三个男躁一个女,国精产品一区一手机的秘密,麦子交换系列最经典十句话,欧美国产综合欧美视频

<li id="jpjks"></li>

<sub id="jpjks"></sub>

登錄注冊寫文章

2022-07-12 Seurat復習

2022-07-12 Seurat復習

基本流程：數據導入，QC去除低質量細胞，歸一化，由于矩陣過于稀疏所以選取top1000~2000，對每個基因進行zscore，線性降維和非線性降維，聚類（基于線性降維的結果進行KNN,SNN）

rm( list = ls())

library(dplyr)

library(Seurat)

library(patchwork)

# Load the PBMC dataset

pbmc.data <- Read10X(data.dir = "F:/class/test/filtered_gene_bc_matrices/hg19/")

#### Method1對于不規范數據

dir.10x = 'F:/class/test/filtered_gene_bc_matrices/hg19/'

#首先處理genelist文件，考慮到只需要第二行

genes <- read.table(paste0(dir.10x, 'genes.tsv'), stringsAsFactors=F, header=F)$V2

genes <- make.unique(genes, sep = '.')#將重復的以一個點分開

#barcode是當列矩陣

barcodes <- readLines(paste0(dir.10x, 'barcodes.tsv'))

mtx <- Matrix::readMM(paste0(dir.10x, 'matrix.mtx'))

mtx <- as(mtx, 'dgCMatrix')#格式轉換

colnames(mtx) = barcodes#賦予列名

rownames(mtx) = genes#賦予行名

pbmc <- CreateSeuratObject(counts = mtx, project = "pbmc3k",min.cells = 3, min.features = 200)

#### Method2

# Initialize the Seurat object with the raw (non-normalized data).

pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)

pbmc

# Lets examine a few genes in the first thirty cells

pbmc.data[c("CD3D", "TCL1A", "MS4A1"), 1:30]

dense.size <- object.size(as.matrix(pbmc.data))

dense.size

sparse.size <- object.size(pbmc.data)

sparse.size

dense.size/sparse.size

#######QC計算線粒體比例，線粒體比例高是死細胞，過濾掉線粒體高的細胞

# The [[ operator can add columns to object metadata. This is a great place to stash QC stats

pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")

# Visualize QC metrics as a violin plot

#三張圖每個細胞檢測到的基因數量，每個細胞檢測到的count數量，每個細胞檢測到的線粒體比例

VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

# FeatureScatter is typically used to visualize feature-feature relationships, but can be used

# for anything calculated by the object, i.e. columns in object metadata, PC scores etc.

#count和線粒體基因的的關系

plot1 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt")

#count和基因總數的關系

plot2 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

plot1 + plot2

#根據數據特點選擇適合的過濾方式

pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)

######NormalizeData去除測序深度對數據分析的影響

#將每一個細胞的count先歸一化到10000，再對每一個基因表達值log

pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)

#pbmc <- NormalizeData(pbmc)

##########Feature selection 1500，3000

pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)

# Identify the 10 most highly variable genes

top10 <- head(VariableFeatures(pbmc), 10)

top10

# plot variable features with and without labels

plot1 <- VariableFeaturePlot(pbmc)

plot2 <- LabelPoints(plot = plot1, points = top10, repel=TRUE)

plot2

#FindVariableFeatures選擇基因的原理平均表達量，一個細胞的平均表達量在不同的細胞之間的方差有多大，方差越大就是細胞類群分開的依據

plot1 + plot2

all.genes <- rownames(pbmc)

pbmc <- ScaleData(pbmc, features = all.genes)#本質還是zscore，每一個基因在所有細胞中平均細胞表達量為0，方差為1

########降維Reduction

pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))

# Examine and visualize PCA results a few different ways

DimPlot(pbmc, reduction = "pca")#可視化

# FeaturePlot(pbmc, reduction = 'pca', features = c('CD79A', 'CD14', 'FCGR3A', 'CD4', 'CST3', 'PPBP'))

# FeaturePlot(pbmc, reduction = 'pca', features = c('nCount_RNA', 'nFeature_RNA'))

# loadings_sorted = dplyr::arrange(as.data.frame(loadings), desc(PC_1))

# DimHeatmap(pbmc, dims = 1:2, cells = 200, balanced = TRUE)

# print(pbmc[["pca"]], dims = 1:5, nfeatures = 5)

# VizDimLoadings(pbmc, dims = 1:2, reduction = "pca")

# DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE)

# DimHeatmap(pbmc, dims = 1:15, cells = 500, balanced = TRUE)

ElbowPlot(pbmc,ndims = 50)#做了50個pca可視化，縱軸奇異值

# NOTE: This process can take a long time for big datasets, comment out for expediency. More

# approximate techniques such as those implemented in ElbowPlot() can be used to reduce

# computation time

#pbmc <- JackStraw(pbmc, num.replicate = 100)

#pbmc <- ScoreJackStraw(pbmc, dims = 1:20)

#JackStrawPlot(pbmc, dims = 1:15)

#ElbowPlot(pbmc)

# Look at cluster IDs of the first 5 cells

#head(Idents(pbmc), 5)

# If you haven't installed UMAP, you can do so via reticulate::py_install(packages =

# 'umap-learn')

#選取前10的特征進行非線性降維，兩種方式umap,tsne,但是umap更好可以更真實的表現群與群的真實距離

pbmc <- RunUMAP(pbmc, dims = 1:10)

FeaturePlot(pbmc, features = c('FCGR3A', 'CD14'), reduction = 'umap')

#DimPlot(pbmc, reduction = "umap")

# note that you can set `label = TRUE` or use the LabelClusters function to help label

#######cluster分群

pbmc <- FindNeighbors(pbmc, dims = 1:10)# louvain cluster, graph based

pbmc <- FindClusters(pbmc, resolution = 0.5)#resolution越大群越多

DimPlot(pbmc, reduction = "umap", group.by = 'seurat_clusters', label=T)

#根據maker對細胞進行細胞注釋

FeaturePlot(pbmc, features = c("MS4A1", "TYROBP", "CD14",'FCGR3A', "FCER1A",

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "CCR7", "IL7R", "PPBP", "CD8A"))

new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", "CD8 T", "FCGR3A+ Mono",

? ? ? ? ? ? ? ? ? ? "NK", "DC", "Platelet")

names(new.cluster.ids) <- levels(pbmc)

pbmc <- RenameIdents(pbmc, new.cluster.ids)

#重新定義的亞群

DimPlot(pbmc, reduction = "umap", label=T)

# ############################################################################################################

# ## DE analysis有很多小群無法判斷時

# ############################################################################################################

#利用差異基因的方式，來看所有群表達怎樣的marker，來確定每一個群表達什么樣的基因，再去推斷那個群可能是什么樣的細胞亞群

pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)#都所有亞群依次求差異基因

pbmc.markers %>% group_by(cluster) %>% slice_max(n = 2, order_by = avg_log2FC)#每個cluster top2的差異基因看

CD4.mem.DEGS <- FindMarkers(pbmc, ident.1 = 'Memory CD4 T', ident.2 = 'Naive CD4 T', min.pct = 0.25)#要計算哪一群的差異基因，要計算的差異基因對照的是什么

#test.use計算方式

# ############################################################################################################

# ## gene signature analysis

# ############################################################################################################

exhaustion_genes = list(c('PDCD1','CD160','FASLG','CD244','LAG3','TNFRSF1B','CCR5','CCL3',

? ? ? ? ? ? ? ? ? ? ? ? ? 'CCL4','CXCL10','CTLA4','LGALS1','LGALS3','PTPN13','RGS16','ISG20',

? ? ? ? ? ? ? ? ? ? ? ? ? 'MX1','IRF4','EOMES','PBX3','NFATC1','NR4A2','CKS2','GAS2','ENTPD1','CA2'))

pbmc = Seurat::AddModuleScore(pbmc, features = exhaustion_genes, name='exhaustion.score')

FeaturePlot(pbmc, features = 'exhaustion.score1', reduction = 'umap')

VlnPlot(pbmc,features = 'exhaustion.score1', group.by = 'seurat_clusters')

最后編輯于：2022.07.13 14:21:17

?著作權歸作者所有,轉載或內容合作請聯系作者
平臺聲明：文章內容（如有圖片或視頻亦包括在內）由作者上傳并發布，文章內容僅代表作者本人觀點，簡書系信息發布平臺，僅提供信息存儲服務。

推薦閱讀更多精彩內容

單細胞測序--Seurat包--官網經典流程
Seurat - Guided Clustering Tutorial, July 16, 2020 https:...
PhageNanoenzyme閱讀 7,426評論 0贊 5
單細胞測序--Seurat標準流程
單細胞測序--Seurat（上）--創建對象和數據預處理參考：https://zhuanlan.zhihu.co...
Seurat_Satija閱讀 7,807評論 0贊 16
2019-10-22 R語言Seurat包下游分析-1
下游分析 cellranger count 計算的結果只能作為錯略觀測的結果，如果需要進一步分析聚類細胞，還需要進...
流浪狗阿貍閱讀 9,896評論 0贊 21
捋一遍單細胞測序分群鑒定——基于seurat官網和網上資料~
?在看完seurat官方分群教程后 https://satijalab.org/seurat/v3.0/pbmc3...
申偉_4009閱讀 4,466評論 0贊 4
單細胞轉錄組分析---seurat 1例子
==================安裝seurant================ install.packa...
jjjscuedu閱讀 4,191評論 0贊 6

1贊2贊

贊賞

手機看全文

主站蜘蛛池模板：石景山区| 辽阳县| 革吉县| 卓尼县| 宁安市| 福鼎市| 天全县| 衡阳县| 贵港市| 阿拉尔市| 顺昌县| 五原县| 南康市| 博湖县| 福清市| 喀喇| 广安市| 类乌齐县| 保德县| 广饶县| 阜城县| 揭阳市| 府谷县| 江门市| 洪雅县| 田阳县| 独山县| 兴文县| 陆丰市| 宁乡县| 琼结县| 平昌县| 四会市| 温泉县| 仁寿县| 屯门区| 深泽县| 济源市| 泾川县| 互助| 红原县|

<em id="qacjq"></em>

<cite id="qacjq"></cite>