my_RNAseq_processure

p_load(tidyverse, DESeq2, pheatmap, ggrepel, ggsci, reshape2, 
       GSVA, GSEABase, clusterProfiler, aPEAR)
dir.create('./GSEA_analysis')
color <- c(pal_npg(alpha = 1)(10),pal_nejm(alpha=1)(8))
### Construct the DESeq2 object
countData <- read.csv('count.csv', row.names = 1)
condition <- factor(gsub("_[0-9]$", "", colnames(countData)))
colData <- data.frame(row.names = colnames(countData), condition)
dds <- DESeqDataSetFromMatrix(countData, colData, design=~condition)
### Vst() for PCA analysis
rld <- vst(dds, blind=T)
p <- plotPCA(rld)
pcaData <- p$data
median <- pcaData %>% group_by(group) %>% summarize(X1=median(PC1), X2 = median(PC2))
xlab_text <- p$labels$x
ylab_text <- p$labels$y
p1 <- ggplot(pcaData, aes(PC1, PC2, color = group)) + 
    geom_point(size = 3) + 
    scale_color_manual(values = color[1:length(table(pcaData$group))]) +
    geom_text_repel(aes(X1, X2, label = group), data = median, family="serif") + 
    xlab(xlab_text) + ylab(ylab_text) +
    ggtitle("Sample_PCA_analysis") +
    theme_classic() +
    theme(legend.position = "none", text=element_text(family = "serif"))
ggsave("./GSEA_analysis/1.sample_PCA.pdf", p1, height=4, width=6)
  ### Correlation analysis on samples
rld_mat <- assay(rld)
rld_cor <- cor(rld_mat)
rld_cor_gg <- melt(rld_cor)
rld_cor_gg$value <- scale(rld_cor_gg$value)
p2 <- ggplot(rld_cor_gg, aes(x=Var1,y=Var2, fill=value)) +
    geom_raster() +
    scale_fill_gradient2(low='royalblue2',high='firebrick2',mid="grey95") +
    ggtitle("Sample_correlation") +
    xlab(NULL) + ylab(NULL) +
    theme_classic() +
    theme(legend.position = "none", text=element_text(family = "serif"))
ggsave("./GSEA_analysis/2.sample_correlation.pdf", p2, height=4, width=6)

  ### Construct the DESeq2 object and perform the analysis
countData <- read.csv('count.csv', row.names = 1)
  ### countData <- filter_all(countData, any_vars(. > 10))
condition <- factor(c(rep("ctrl",(ncol(countData)/2)),rep("treated",(ncol(countData)/2))))
colData <- data.frame(row.names = colnames(countData), condition)
dds <- DESeqDataSetFromMatrix(countData, colData, design=~condition)
dds <- DESeq(dds)
saveRDS(dds, './GSEA_analysis/3.DESeq2_result.rds')
res <- results(dds)
resdata <- as.data.frame(res)
  ### To avoid the padj value goes too low
resdata$padj[resdata$padj<1e-200] <- 1e-200
  ### Separate the change into "UP", "DN", and "Stable"
resdata$change = ifelse(resdata$padj < 0.05 & abs(resdata$log2FoldChange) >= 2, 
                          ifelse(resdata$log2FoldChange >= 2, 'Up', 'Dn'), 'Stable')
resdata <- na.omit(resdata)
write.csv(resdata, './GSEA_analysis/3.DESeq2_result.csv')
  ### Prepare the text information for labeling
resdata_sig <- subset(resdata, (padj < 0.05 & abs(log2FoldChange) >= 2))
resdata_sig <- resdata_sig %>% top_n(20, -log10(padj))
if(any(resdata$change == "Dn")){
    color_panel <- c('royalblue2', "grey95", 'firebrick2')
  } else {
    color_panel <- c("grey95", 'firebrick2')
  }
  ### Draw the graph
p3 <- ggplot(resdata, aes(log2FoldChange, -log(padj, 10), colour = change)) + 
    geom_point(alpha=0.8, size = 2) + 
    scale_color_manual(values=color_panel) + 
    xlab(expression("log"[2]*"FC")) + ylab(expression("-log"[10]*"FDR")) + 
    geom_vline(xintercept=c(-2,2),lty=4,col="lightgrey",lwd=0.8) + 
    geom_hline(yintercept = -log10(0.05),lty=4,col="lightgrey",lwd=0.8) +
    geom_label_repel(aes(label = rownames(resdata_sig)), data = resdata_sig, family="serif", 
                     max.overlaps = 100, max.iter = 1e9, force = 1e2) +
    ggtitle("Significant_genes (padj < 0.05 & log2FC > 2)",
            subtitle = paste0("Down: ",count(resdata$change == "Dn"),", ",
                              "Up: ",count(resdata$change == "Up"),sep="")) +
    theme_classic() +
    theme(legend.position = "none", text=element_text(family="serif"))
ggsave("./GSEA_analysis/3.volcano.pdf", p3, height=4, width=6)

  ### Prepare the DESeq2 result for GSEA analysis
dds <- readRDS('./GSEA_analysis/3.DESeq2_result.rds')
dds <- data.frame(results(dds))
log2FC_matrix <- na.omit(dds)
log2FC_matrix <- log2FC_matrix[log2FC_matrix$baseMean > 1,]
GSEA <- log2FC_matrix$stat
names(GSEA) <- rownames(log2FC_matrix)
GSEA <- sort(GSEA, decreasing=T)
  ### Load the geneset information based on human or mouse
if('GAPDH' %in% names(GSEA)){
    df <- readRDS("~/Desktop/Saved_index_for_R/GSEA_genesets_hs_2024.rds")
  } else {
    df <- readRDS("~/Desktop/Saved_index_for_R/GSEA_genesets_mm_2024.rds")
  }
  ### Perform the GSEA analysis
GSEA_ALL <- GSEA(GSEA, TERM2GENE=df, eps=0)
GSEA_matrix <- GSEA_ALL@result
  ### Save result
saveRDS(GSEA_ALL, './GSEA_analysis/4.GSEA_result.rds')
GSEA_matrix <- GSEA_matrix %>% select('ID', 'NES', 'p.adjust') %>% 
  tibble() %>% arrange(NES) %>% mutate(logP = -log10(p.adjust)) %>%
  mutate(color = if_else(NES > 2, 'royalblue2', if_else(NES < -2, 'firebrick2', 'grey95')))
GSEA_matrix$ID <- gsub('_',' ',GSEA_matrix$ID)
GSEA_top <- rbind(GSEA_matrix %>% top_n(-10, NES), GSEA_matrix %>% top_n(10, NES))
p4 <- ggplot(GSEA_matrix, aes(x = NES, y = logP, color = color)) + 
  geom_point() + theme_classic() + 
  geom_label_repel(aes(label = ID), GSEA_top, size = 2.5, direction="both") + 
  scale_color_manual(values = c('royalblue2','grey95','firebrick2')) +
  xlab('NES of enrichment') + ylab(expression("Log"[10]*"(p.adj)")) +
  geom_vline(xintercept=c(-2, 2),lty=4,col="grey",lwd=0.5) +
  theme(legend.position = 'none') + 
  ggtitle("GSEA_results_on_genesets")
ggsave("./GSEA_analysis/4_GSEA.pdf", p4, height=6, width=12)

  ### Perform filtering of the GSEA result
  # GSEA_matrix$Description <- gsub("WP_","",GSEA_matrix$Description)
  # GSEA_matrix$Description <- gsub("REACTOME_","",GSEA_matrix$Description)
  # GSEA_matrix$Description <- gsub("KEGG_","",GSEA_matrix$Description)
  # GSEA_matrix$Description <- gsub("GOMF_","",GSEA_matrix$Description)
  # GSEA_matrix$Description <- gsub("GOCC_","",GSEA_matrix$Description)
  # GSEA_matrix$Description <- gsub("GOBP_","",GSEA_matrix$Description)
GSEA_matrix <- GSEA_ALL@result
GSEA_matrix$Description <- gsub("_", " ", GSEA_matrix$ID)
# GSEA_matrix$Description <- tolower(GSEA_matrix$Description)
GSEA_matrix <- rbind(GSEA_matrix %>% top_n(100,NES), 
                       GSEA_matrix %>% top_n(-100,NES))
GSEA_matrix <- GSEA_matrix[unique(GSEA_matrix$ID),]
  ### Use the aPEAR package
set.seed(123)
data <- findPathClusters(GSEA_matrix, cluster = 'hier', minClusterSize = 3)
p5 <- plotPathClusters(GSEA_matrix, data$sim, data$clusters, 
                        repelLabels = TRUE, drawEllipses = TRUE)
ggsave("./GSEA_analysis/5.GSEA_network.pdf", p5, height=8, width=12)
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

推薦閱讀更多精彩內(nèi)容