# 前言
之前对拟南芥单细胞测序的文献数据进行复现,数据结果有点微妙,接下来进行拟时间分析
# 分生组织伪时间分析
作者首先查看了分生组织相关基因(WOX5,PIN1,RGF3,PLT1)在聚类中的表达情况,WOX5 和 PIN1 主要在 cluster12 中表达,RGF3 以及 PLT1 主要在 cluster19 中表达,对应到自己的数据中查看效果
pdf("cell_identify/meristematic.pdf",height = 14,width = 14) | |
FeaturePlot(wang,features=c('AT3G11260','AT1G73590','AT2G04025','AT3G20840'),cols=c("grey","yellow","red","brown") | |
,reduction = 'umap',pt.size = 1,label.size = 4) | |
dev.off() |
可以看到对应效果还是不错的,接下来选择 cluster12.14.19 进行伪时间分析
id<-c ("12","14","19") | |
cell.sub <- subset (wang@meta.data,seurat_clusters==id) | |
scRNAsub <- subset (wang, cells=row.names (cell.sub)) | |
dim (scRNAsub) | |
library (monocle) | |
dir.create ("pseudotime121419") | |
data <- as (as.matrix (scRNAsub@assays$RNA@counts), 'sparseMatrix') | |
pd <- new ('AnnotatedDataFrame', data = scRNAsub@meta.data) | |
fData <- data.frame (gene_short_name = row.names (data), row.names = row.names (data)) | |
fd <- new ('AnnotatedDataFrame', data = fData) | |
mycds <- newCellDataSet (data, | |
phenoData = pd, | |
featureData = fd, | |
expressionFamily = negbinomial.size ()) | |
mycds <- estimateSizeFactors (mycds) | |
mycds <- estimateDispersions (mycds, cores=4, relative_expr = TRUE) | |
## 选择代表性基因 | |
## 使用 monocle 选择的高变基因 | |
disp_table <- dispersionTable (mycds) | |
disp.genes <- subset (disp_table, mean_expression >= 0.1 & dispersion_empirical >= 1 * dispersion_fit)$gene_id | |
mycds <- setOrderingFilter (mycds, disp.genes) | |
## 降维以及细胞排序 | |
# 降维 | |
mycds <- reduceDimension (mycds, max_components = 2, method = 'DDRTree') | |
# 排序 | |
mycds <- orderCells (mycds) | |
#State 轨迹分布图 | |
plot1 <- plot_cell_trajectory (mycds, color_by = "State") | |
ggsave ("pseudotime121419/State.pdf", plot = plot1, width = 6, height = 5) | |
ggsave ("pseudotime121419/State.png", plot = plot1, width = 6, height = 5) | |
##Cluster 轨迹分布图 | |
plot2 <- plot_cell_trajectory (mycds, color_by = "seurat_clusters") | |
ggsave ("pseudotime121419/Cluster.pdf", plot = plot2, width = 6, height = 5) | |
ggsave ("pseudotime121419/Cluster.png", plot = plot2, width = 6, height = 5) | |
##Pseudotime 轨迹图 | |
plot3 <- plot_cell_trajectory (mycds, color_by = "Pseudotime") | |
ggsave ("pseudotime121419/Pseudotime.pdf", plot = plot3, width = 6, height = 5) | |
ggsave ("pseudotime121419/Pseudotime.png", plot = plot3, width = 6, height = 5) | |
## 合并作图 | |
plotc <- plot1|plot2|plot3 | |
ggsave ("pseudotime121419/Combination.pdf", plot = plotc, width = 10, height = 3.5) | |
ggsave ("pseudotime121419/Combination.png", plot = plotc, width = 10, height = 3.5) | |
## 保存结果 | |
write.csv (pData (mycds), "pseudotime121419/pseudotime.csv") | |
p1 <- plot_cell_trajectory (mycds, color_by = "State") + facet_wrap (~State, nrow = 1) | |
p2 <- plot_cell_trajectory (mycds, color_by = "seurat_clusters") + facet_wrap (~seurat_clusters, nrow = 1) | |
plotc <- p1/p2 | |
ggsave ("pseudotime121419/trajectory_facet.png", plot = plotc, width = 6, height = 5) |
结果也与文献中的有点出入,接下来查看一下 BEAM 热图和一些基因在轨迹中的表达情况
##BEAM 分析 | |
disp_table <- dispersionTable (mycds) | |
disp.genes <- subset (disp_table, mean_expression >= 0.5&dispersion_empirical >= 1*dispersion_fit) | |
disp.genes <- as.character (disp.genes$gene_id) | |
mycds_sub <- mycds [disp.genes,] | |
#State 轨迹分布图 | |
plot1 <- plot_cell_trajectory (mycds_sub, color_by = "State") | |
ggsave ("pseudotime121419/BEAM_State.pdf", plot = plot1, width = 6, height = 5) | |
ggsave ("pseudotime121419/BEAM_State.png", plot = plot1, width = 6, height = 5) | |
##Cluster 轨迹分布图 | |
plot2 <- plot_cell_trajectory (mycds_sub, color_by = "seurat_clusters") | |
ggsave ("pseudotime121419/BEAM_Cluster.pdf", plot = plot2, width = 6, height = 5) | |
ggsave ("pseudotime121419/BEAM_Cluster.png", plot = plot2, width = 6, height = 5) | |
##Pseudotime 轨迹图 | |
plot3 <- plot_cell_trajectory (mycds_sub, color_by = "Pseudotime") | |
ggsave ("pseudotime121419/BEAM_Pseudotime.pdf", plot = plot3, width = 6, height = 5) | |
ggsave ("pseudotime121419/BEAM_Pseudotime.png", plot = plot3, width = 6, height = 5) | |
## 合并作图 | |
plotc <- plot1|plot2|plot3 | |
ggsave ("pseudotime121419/BEAM_Combination.pdf", plot = plotc, width = 10, height = 3.5) | |
ggsave ("pseudotime121419/BEAM_Combination.png", plot = plotc, width = 10, height = 3.5) | |
## 保存结果 | |
beam_res <- BEAM (mycds_sub, branch_point = 1, cores = 8) | |
beam_res <- beam_res [order (beam_res$qval),] | |
beam_res <- beam_res [,c ("gene_short_name", "pval", "qval")] | |
mycds_sub_beam <- mycds_sub [row.names (subset (beam_res, qval < 1e-4)),] | |
pdf ("pseudotime121419/BEAM_pseudotime_heatmap2.pdf",width = 10, height = 10) | |
plot_genes_branched_heatmap (mycds_sub_beam, branch_point = 1, | |
num_clusters = 5, show_rownames = F) | |
dev.off () | |
## 寻找相应的基因绘制轨迹图 | |
matrix_dir="filtered_gene_bc_matrices/ref/" | |
barcode.path <- paste0 (matrix_dir,"barcodes.tsv") | |
features.path <- paste0 (matrix_dir,"genes.tsv") | |
matrix.path <- paste0 (matrix_dir, "matrix.mtx") | |
mat1 <- readMM (file = matrix.path) | |
feature.names = read.delim (features.path, | |
header = FALSE, | |
stringsAsFactors = FALSE) | |
barcode.names = read.delim (barcode.path, | |
header = FALSE, | |
stringsAsFactors = FALSE) | |
colnames (mat1) = barcode.names$V1 | |
rownames (mat1) = feature.names$V1 | |
mat1<-as.matrix (mat1) | |
gene<-t (as.matrix (mat1 [c ('AT3G11260','AT1G73590', | |
'AT2G04025','AT3G20840','AT1G50490'), | |
colnames (scRNAsub@assays$RNA@counts)])) | |
colnames (gene)<-c ("WOX5","PIN1","RGF3","PLT1","UBC20") | |
mycds_sub@phenoData@data<-cbind (mycds_sub@phenoData@data,gene) | |
mycds_sub@phenoData@data [mycds_sub@phenoData@data == 0] <- NA | |
p1<-plot_cell_trajectory (mycds_sub, color_by = "WOX5")+scale_color_gradient (na.value = "grey",low="yellow",high="red") | |
p2<-plot_cell_trajectory (mycds_sub, color_by = "PIN1")+scale_color_gradient (na.value = "grey",low="yellow",high="red") | |
p3<-plot_cell_trajectory (mycds_sub, color_by = "RGF3")+scale_color_gradient (na.value = "grey",low="yellow",high="red") | |
p4<-plot_cell_trajectory (mycds_sub, color_by = "PLT1")+scale_color_gradient (na.value = "grey",low="yellow",high="red") | |
p5<-plot_cell_trajectory (mycds_sub, color_by = "UBC20")+scale_color_gradient (na.value = "grey",low="yellow",high="red") | |
plotc <- p1|p2|p3|p4|p5 | |
ggsave ("pseudotime121419/meristematic.pdf", plot = plotc, width = 18, height = 5) | |
ggsave ("pseudotime121419/meristematic.png", plot = plotc, width = 18, height = 5) |
文献中没有给出相应的参数,结果还是与文献的结果有点差距
# root cap 伪时间分析
按照相同的方式对 root cap 组织进行伪时间分析
# 结语
对于此次的数据复现,前面的聚类步骤还是能与文献对应,但是后续的伪时间分析差距就有点大了,主要是因为拟时间分析我才刚刚入门学习,对一些分析过程中的参数不太了解,目前的数据与代码我已上传 github
,欢迎大家批评指正