生信技能树单细胞数据挖掘笔记 (1):数据读入

生信技能树单细胞数据挖掘笔记 (2):创建 Seurat 对象并进行质控、筛选高变基因并可视化

生信技能树单细胞数据挖掘笔记 (3):降维与聚类

生信技能树单细胞数据挖掘笔记 (4):其他分析(周期判断、double 诊断、细胞类型注释)

生信技能树单细胞数据挖掘笔记 (5):轨迹分析

生信技能树单细胞数据挖掘笔记 (6):差异分析 (1)

生信技能树单细胞数据挖掘笔记 (7):差异分析 (2)

# 3、根据 count 矩阵转换 fpkm 并完成差异分析

# 3.1 导入 count 矩阵

nm_COUNT <- read.csv("GSE81861_CRC_NM_epithelial_cells_COUNT.csv/GSE81861_CRC_NM_epithelial_cells_COUNT.csv")
nm_COUNT <- data_input(nm_COUNT)
nm_COUNT[1:4,1:4]
dim(nm_COUNT)
tumor_COUNT <- read.csv("GSE81861_CRC_tumor_epithelial_cells_COUNT.csv/GSE81861_CRC_tumor_epithelial_cells_COUNT.csv")
tumor_COUNT <- data_input(tumor_COUNT)
tumor_COUNT[1:4,1:4]
dim(tumor_COUNT)
                RHC4104 RHC6087 RHL2880 RHC5949
ENSG00000000003       0     428      66     141
ENSG00000000005       0       0       0       0
ENSG00000000419       0     179       0       1
ENSG00000000457     465       0       0       0
[1] 56380   160
                RHC4075 RHC5563 RHC5552 RHC4874
ENSG00000000003       0       0       0       0
ENSG00000000005       0       0       0       0
ENSG00000000419       0       0       0       0
ENSG00000000457       0     133       0       0
[1] 56380   272

# 3.2 计算 fpkm 矩阵

my_FPKM <- function (counts,g_l){
  #### 根据有基因长度的基因,筛选矩阵子集
  #counts=nm_COUNT
  ng=intersect (rownames (counts),g_l$ensembl_id)
  length (ng)
  lengths=g_l [match (ng,g_l$ensembl_id),2]
  names (lengths) <- g_l [match (ng,g_l$ensembl_id),1]
  head (lengths)
  #### 计算样本文库大小,以及最后的 fpkm 计算
  counts <- counts [names (lengths),]
  counts [1:4,1:4]
  total_count <- colSums (counts)
  head (total_count)
  #根据 counts、length、total_count 计算 fpkm
  FPKM <- t (do.call ( rbind,
                     lapply (1:length (total_count),
                            function (i){
                              10^9*counts [,i]/lengths/total_count [i]
                              #lengths 向量自动遍历
                            }) ))
  FPKM [1:4,1:4]
  return (FPKM)
}
nm_my_FPKM <- my_FPKM (counts = nm_COUNT,
                      g_l = g_l)
colnames (nm_my_FPKM) <- colnames (nm_COUNT)
nm_my_FPKM [1:4,1:4]
dim (nm_my_FPKM)
tumor_my_FPKM <- my_FPKM (counts = tumor_COUNT,
                      g_l = g_l)
dim (tumor_FPKM)
colnames (tumor_my_FPKM) <- colnames (tumor_COUNT)
nm_my_FPKM [1:4,1:4]
                 RHC4104  RHC6087  RHL2880    RHC5949
ENSG00000000003   0.0000 160.5715 168.6839 22.7021034
ENSG00000000005   0.0000   0.0000   0.0000  0.0000000
ENSG00000000419   0.0000 219.5694   0.0000  0.5264304
ENSG00000000457 124.2016   0.0000   0.0000  0.0000000
[1] 50813   160
[1] 56380   272
                 RHC4104  RHC6087  RHL2880    RHC5949
ENSG00000000003   0.0000 160.5715 168.6839 22.7021034
ENSG00000000005   0.0000   0.0000   0.0000  0.0000000
ENSG00000000419   0.0000 219.5694   0.0000  0.5264304
ENSG00000000457 124.2016   0.0000   0.0000  0.0000000

# 3.3 差异分析

exp_my_FPKM <- cbind (nm_my_FPKM,tumor_my_FPKM)
dim (exp_my_FPKM) #转换 id 前
exp_my_FPKM [1:4,1:4] #转换 id 前
exp_my_FPKM <- id_change (exp_my_FPKM)
dim (exp_my_FPKM) #转换 id 后
exp_my_FPKM [1:4,1:4] #转换 id 后
my_FPKM_diff <- scRNA_deg (exp=exp_my_FPKM, group=group_dat)
head (my_FPKM_diff)
dim (my_FPKM_diff)
my_FPKM_diff <- my_FPKM_diff [my_FPKM_diff$p_val<0.01 & abs (my_FPKM_diff$avg_logFC)>0.8,]
dim (my_FPKM_diff)
exp_my_FPKM_diff <- exp_my_FPKM [match (rownames (my_FPKM_diff),rownames (exp_my_FPKM)),]
[1] 50813   432
                 RHC4104  RHC6087  RHL2880    RHC5949
ENSG00000000003   0.0000 160.5715 168.6839 22.7021034
ENSG00000000005   0.0000   0.0000   0.0000  0.0000000
ENSG00000000419   0.0000 219.5694   0.0000  0.5264304
ENSG00000000457 124.2016   0.0000   0.0000  0.0000000
[1] 50813   432
[1] 24931   432
[1] 24931   432
        RHC4104  RHC6087  RHL2880    RHC5949
TSPAN6   0.0000 160.5715 168.6839 22.7021034
TNMD     0.0000   0.0000   0.0000  0.0000000
DPM1     0.0000 219.5694   0.0000  0.5264304
SCYL3  124.2016   0.0000   0.0000  0.0000000
[1] "创建 seurat 对象..."
[1] "质控..."
[1] "归一化..."
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
[1] "差异分析..."
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=04s  
[1] 1231    5
[1] 112   5

# 3.4 可视化

热图

my_heatmap(exp_my_FPKM_diff)

image-20201123104427296

箱图

image-20201123104537756

image-20201123104625107

更新于 阅读次数

请我喝[茶]~( ̄▽ ̄)~*

amane 微信支付

微信支付

amane 支付宝

支付宝