2 min to read
RNAseq比对结果下游分析
简介
DESeq2要求的数据是raw count。
获取注释信息
library(rtracklayer)
gtf <- import("Homo_sapiens.GRCh37.87.chr.gtf.gz")
gtf <- gtf[gtf$gene_name!=""]
gtf <- gtf[!is.na(gtf$gene_name)]
## protein coding
protein <-
gtf$gene_name[gtf$transcript_biotype %in%
c("IG_C_gene", "IG_D_gene", "IG_J_gene", "IG_LV_gene",
"IG_M_gene", "IG_V_gene", "IG_Z_gene",
"nonsense_mediated_decay", "nontranslating_CDS",
"non_stop_decay", "polymorphic_pseudogene",
"protein_coding", "TR_C_gene", "TR_D_gene", "TR_gene",
"TR_J_gene", "TR_V_gene")]
## mitochondrial genes
mito <- gtf$gene_name[as.character(seqnames(gtf)) %in% "MT"]
## long noncoding
lincRNA <-
gtf$gene_name[gtf$transcript_biotype %in%
c("3prime_overlapping_ncrna", "ambiguous_orf",
"antisense_RNA", "lincRNA", "ncrna_host", "non_coding",
"processed_transcript", "retained_intron",
"sense_intronic", "sense_overlapping")]
## short noncoding
sncRNA <-
gtf$gene_name[gtf$transcript_biotype %in%
c("miRNA", "miRNA_pseudogene", "misc_RNA",
"misc_RNA_pseudogene", "Mt_rRNA", "Mt_tRNA",
"Mt_tRNA_pseudogene", "ncRNA", "pre_miRNA",
"RNase_MRP_RNA", "RNase_P_RNA", "rRNA", "rRNA_pseudogene",
"scRNA_pseudogene", "snlRNA", "snoRNA",
"snRNA_pseudogene", "SRP_RNA", "tmRNA", "tRNA",
"tRNA_pseudogene", "ribozyme", "scaRNA", "sRNA")]
## pseudogene
pseudogene <-
gtf$gene_name[gtf$transcript_biotype %in%
c("disrupted_domain", "IG_C_pseudogene", "IG_J_pseudogene",
"IG_pseudogene", "IG_V_pseudogene", "processed_pseudogene",
"pseudogene", "transcribed_processed_pseudogene",
"transcribed_unprocessed_pseudogene",
"translated_processed_pseudogene",
"translated_unprocessed_pseudogene", "TR_J_pseudogene",
"TR_V_pseudogene", "unitary_pseudogene",
"unprocessed_pseudogene")]
annotations <- list(protein=unique(protein),
mito=unique(mito),
lincRNA=unique(lincRNA),
sncRNA=unique(sncRNA),
pseudogene=unique(pseudogene))
annotations <- annotations[lengths(annotations)>0]
参考资料
[01]. GO analysis using clusterProfiler
[02]. 使用limma、Glimma和edgeR,RNA-seq数据分析易如反掌
[03]. 转录组入门(7):差异表达分析
Comments