2024-08-07突變數據整理

##############
#參考鏈接:[https://mp.weixin.qq.com/s/0VPAy8f9u5ol8ryFZbmk7Q]
BiocManager::install("PoisonAlien/TCGAmutations")
library(TCGAmutations)
library(maftools) 
library(dplyr)
proj='TCGA-LIHC'
laml = tcga_load(study = "LIHC") 
laml 

###############
#載入分組的信息Cluster.rda
#將兩個表的樣本ID統一

#>   Hugo_Symbol Chromosome Start_Position End_Position
#> 1        NMT2         10       15154934     15154934
#> 2    ARHGAP42         11      100845194    100845194
#> 3        RAG1         11       36597064     36597064
#> 4      NLRP14         11        7064166      7064166
library(stringr)
length(unique(str_sub(mut$Tumor_Sample_Barcode,1,16)))

# 以病人為中心,表達矩陣按病人ID去重復
#exprSet <- exp_id
#k = !duplicated(str_sub(colnames(exprSet),1,12));table(k)
#exprSet = exprSet[,k]
#調整meta的ID順序與exprSet列名一致
#meta=meta[match(str_sub(colnames(exprSet),1,12),meta$ID),]
#identical(meta$ID,str_sub(colnames(exprSet),1,12))
#colnames(exprSet) <- str_sub(colnames(exprSet),1,16)
#k = colnames(exprSet) %in% unique(str_sub(mut$Tumor_Sample_Barcode,1,16));table(k)
#expm = exprSet[,k]

#重新賦值避免影響原始數據
maf_object<- laml
a=laml@data
#############整理分組信息
ex2 = read.csv("SASP_group.csv",
               row.names = 1, check.names = F)
# 選擇 group 列并保留行名

Cluster <- ex2[, "group", drop = FALSE] # drop = FALSE提取一列時保證提取后還是數據框
table(Cluster$group)
Cluster$Cluster=ifelse(Cluster$group=="high MRGPI","high MRGPI",'low MRGPI')
Cluster$Cluster =factor(Cluster$Cluster,levels=c("high MRGPI",'low MRGPI'))
#Cluster <- Cluster[, "Cluster", drop = FALSE]
save(Cluster,file = "Cluster.Rda")
#確定分組信息的ID完全在突變數據里面,此時需要對突變患者編碼進行截取前12位
Cluster=Cluster[rownames(Cluster)%in%substr(maf_object@data$Tumor_Sample_Barcode, 1, 12),,drop = FALSE] 
save(Cluster,file = "Cluster.Rda")

load("Cluster.Rda")
#group列轉換成因子
Cluster$group =factor(Cluster$group,levels=c("high MRGPI",'low MRGPI'))
#刪除cluster列,行名為患者ID,列為group,且為因子
Cluster=Cluster[,-2,drop=FALSE]
class(Cluster$group)
#”factor“
table(Cluster$group)
#high MRGPI  low MRGPI 
#178        179 
save(Cluster,file = "Cluster.Rda")

#設置分組信息
#手動讀取分組Cluster.rda
#將行名改為列
Cluster=rownames_to_column(Cluster,var = "symbol")
#根據分組信息對患者ID進行重新排序
Cluster=Cluster[order(Cluster$group),]
#行名為空
rownames(Cluster)=NULL
#行名轉換為患者ID
Cluster=column_to_rownames(Cluster,var="symbol")
save(Cluster,file = "Cluster.Rda") 

#載入分組的信息Cluster.rda
#將兩個表的樣本ID統一
#把突變樣本ID轉化為分組樣本的患者ID
a$Tumor_Sample_Barcode = substring(a$Tumor_Sample_Barcode,1,12)
group=rownames_to_column(Cluster,var="sample")
colnames(group)[1]="Tumor_Sample_Barcode"
#把分組文件各自拆分
group_high=group[group$group=="high MRGPI",]#178個
group_low=group[group$group=="low MRGPI",]#179個

#提取cluster的maf文件
maf_1=a[a$Tumor_Sample_Barcode%in%group_high$Tumor_Sample_Barcode,]
maf_2=a[a$Tumor_Sample_Barcode%in%group_low$Tumor_Sample_Barcode,]

#構建兩組的maf文件
maf.coad_high=read.maf(maf=maf_1)
maf.coad_low=read.maf(maf=maf_2)

# 3.2作圖
#繪制ICD-high瀑布圖
oncoplot(maf = maf.coad_high,
         top = 20,   #顯示前10個的突變基因信息
         fontSize = 0.6,   #設置字體大小
         showTumorSampleBarcodes = F)  


#繪制ICD-low瀑布圖
oncoplot(maf = maf.coad_low,
         top = 20,   #顯示前30個的突變基因信息
         fontSize = 0.6,   #設置字體大小
         showTumorSampleBarcodes = F)

#maftools自帶可視化函數plotmaf總結,可以比較分析統計maf文件的數據。
#if (as.numeric(dev.cur()) != 1) graphics.off()
plotmafSummary(maf = maf.coad_high, rmOutlier = TRUE,
               #showBarcodes = FALSE,
               addStat = 'median', dashboard = TRUE, titvRaw = FALSE)
plotmafSummary(maf = maf.coad_low, rmOutlier = TRUE,
               #showBarcodes = FALSE,
               addStat = 'median', dashboard = TRUE, titvRaw = FALSE)



###########3
#歸納每個基因的突變
getGeneSummary(laml)

lollipopPlot(maf = laml, gene = 'LIPG', 
             AACol = 'HGVSp_Short', showMutationRate = TRUE)
#指定基因的突變
g <- c("ADRA1A","TRPM8","AR","EPHX2","WEE1","PFKFB3","PIM3","ESRRA","RORC","LIPG","P2RY1","SFRP1","PLK3","AHCY","PPARD","ADCY1")
oncoplot(maf = laml,genes = g, fontSize = 0.7)
最后編輯于
?著作權歸作者所有,轉載或內容合作請聯系作者
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發布,文章內容僅代表作者本人觀點,簡書系信息發布平臺,僅提供信息存儲服務。

推薦閱讀更多精彩內容