mkdir -p palmisano/{data/{rawdata,trimdata},ref,QC,alignment/{bam,bed,bedgraph,peakCalling/SEACR,sam/{bowtie2_summary,fragmentLen}}}
reads=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/data/rawdata
index=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/ref
ref=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/ref/mm_39
qc=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/QC
projPath=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano
adapters=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano
trim=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/data/trimdata
sam=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/alignment/sam
bam=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/alignment/bam
bed=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/alignment/bed
bedgraph=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/alignment/bedgraph
reference=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/ref/mm39_bioconductor/BSgenome.Mmusculus.UCSC.mm39.mainChrs.fa
reference_index=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/ref/mm39_bioconductor/BSgenome.Mmusculus.UCSC.mm39.mainChrs.fa.fai
ref1=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/ref
cores=8
list_of_samples=(SRR26891276 SRR26891275 SRR26891273 SRR26891272 SRR26891271 SRR26891270)
samples=(Sham_WT1 Sham_WT2 Sham_WT3 nerve_crush_WT1 nerve_crush_WT2 nerve_crush_WT3)
for i in $list_of_samples; do echo $i; grep $i CUT_TAG.txt | awk '{print $8}' | awk '{gsub(/\;/,"\n")};{print $0}'>>samples.sh;done
Adding the wget command, -O option to specify the path, and renaming samples
for sample in $samples;do echo wget -O ${sample}_R1.fastq.gz>>names.sh; echo wget -O ${sample}_R2.fastq.gz>>names.sh;done
paste -d " " names.sh samples.sh>download.sh
bash download.sh
Content of the download.sh file:
wget -O Sham_WT1_R1.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR268/076/SRR26891276/SRR26891276_1.fastq.gz
wget -O Sham_WT1_R2.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR268/076/SRR26891276/SRR26891276_2.fastq.gz
wget -O Sham_WT2_R1.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR268/075/SRR26891275/SRR26891275_1.fastq.gz
wget -O Sham_WT2_R2.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR268/075/SRR26891275/SRR26891275_2.fastq.gz
wget -O Sham_WT3_R1.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR268/073/SRR26891273/SRR26891273_1.fastq.gz
wget -O Sham_WT3_R2.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR268/073/SRR26891273/SRR26891273_2.fastq.gz
wget -O nerve_crush_WT1_R1.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR268/072/SRR26891272/SRR26891272_1.fastq.gz
wget -O nerve_crush_WT1_R2.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR268/072/SRR26891272/SRR26891272_2.fastq.gz
wget -O nerve_crush_WT2_R1.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR268/071/SRR26891271/SRR26891271_1.fastq.gz
wget -O nerve_crush_WT2_R2.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR268/071/SRR26891271/SRR26891271_2.fastq.gz
wget -O nerve_crush_WT3_R1.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR268/070/SRR26891270/SRR26891270_1.fastq.gz
wget -O nerve_crush_WT3_R2.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR268/070/SRR26891270/SRR26891270_2.fastq.gz
bowtie2-build -f $reference $index/mm_39
for sample in $reads/*.fastq.gz; do
fastqc $sample -o ${qc}/
done
for sample in Sham_WT1 Sham_WT2 Sham_WT3 nerve_crush_WT1 nerve_crush_WT2 nerve_crush_WT3
do
trimmomatic PE -phred33 ${reads}/${sample}_R1.fastq.gz ${reads}/${sample}_R2.fastq.gz ${trim}/${sample}P_R1.fastq.gz ${trim}/${sample}UN_R1.fastq.gz ${trim}/${sample}P_R2.fastq.gz ${trim}/${sample}UN_R2.fastq.gz ILLUMINACLIP:${adapters}/NexteraPE-PE.fa:2:30:10 LEADING:25 TRAILING:25 SLIDINGWINDOW:4:25 MINLEN:70
done
for sample in $trim/*.fastq.gz
do
fastqc $sample -o ${qc}
done
for sample in Sham_WT1P Sham_WT2P Sham_WT3P nerve_crush_WT1P nerve_crush_WT2P nerve_crush_WT3P
do
bowtie2 --local --very-sensitive --no-mixed --no-discordant --phred33 -I 10 -X 700 -p ${cores} -x ${ref} -1 ${trim}/${sample}_R1.fastq.gz -2 ${trim}/${sample}_R2.fastq.gz -S ${projPath}/alignment/sam/${sample}_bowtie2.sam> ${projPath}/alignment/sam/bowtie2_summary/${sample}_bowtie2.txt
done
for sample in Sham_WT1 Sham_WT2 Sham_WT3 nerve_crush_WT1 nerve_crush_WT2 nerve_crush_WT3
do
## Conversion sam to bam and duplicate removal
amtools view -bS -F 4 ${sam}/${sample}P_bowtie2.sam >${bam}/${sample}.mapped.bam
samtools view -q 30 ${bam}/${sample}.mapped.bam>${bam}/${sample}.mapped_cleaned.bam
samtools sort -n -o ${bam}/${sample}_sorted.mapped.bam -O BAM ${bam}/${sample}.mapped.bam
samtools fixmate -m ${bam}/${sample}_sorted.mapped.bam ${bam}/${sample}_fixmate.bam
samtools sort -o ${bam}/${sample}_sorted_coord.bam ${bam}/${sample}_fixmate.bam
samtools markdup -r -s ${bam}/${sample}_sorted_coord.bam ${bam}/${sample}_deduplicated.bam
samtools sort -n -o ${bam}/${sample}_deduplicated_sort_name.bam -O BAM ${bam}/${sample}_deduplicated.bam
samtools index ${bam}/${sample}_deduplicated_sort_name.bam
## Convert into bed file format
bedtools bamtobed -i ${bam}/${sample}_deduplicated_sort_name.bam -bedpe >${bed}/${sample}.bed
## Keep the read pairs that are on the same chromosome and fragment length less than 1000bp.
awk '$1==$4 && $6-$2 < 1000 {print $0}' ${bed}/${sample}.bed >${bed}/${sample}.cleaned.bed
## Only extract the fragment related columns
cut -f 1,2,6 ${bed}/${sample}.cleaned.bed | sort -k1,1 -k2,2n -k3,3n >${bed}/${sample}.fragments.bed
done
samtools faidx $reference
cut -f1,2 $reference_index>$ref1/mm_39_main_chr.sizes
for sample in Sham_WT1 Sham_WT2 Sham_WT3 nerve_crush_WT1 nerve_crush_WT2 nerve_crush_WT3
do
## Extract the 9th column from the alignment sam file which is the fragment length
samtools view -F 0x04 ${sam}/${sample}P_bowtie2.sam | awk -F'\t' 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($9)}' | sort | uniq -c | awk -v OFS="\t" '{print $2, $1/2}' >${sam}/fragmentLen/${sample}_fragmentLen.txt
done
touch ${projPath}/depth.txt
for sample in Sham_WT1 Sham_WT2 Sham_WT3 nerve_crush_WT1 nerve_crush_WT2 nerve_crush_WT3
do
seqDepthDouble=`samtools view -F 0x04 ${sam}/${sample}P_bowtie2.sam | wc -l`
seqDepth=$((seqDepthDouble/2))
echo -e ${sample} $seqDepth>>${projPath}/depth_mainChr.txt
done
sampleList=("Sham_WT1" "Sham_WT2" "Sham_WT3" "nerve_crush_WT1" "nerve_crush_WT2" "nerve_crush_WT3")
seqDepth=(15984588 15361687 23989124 6288377 26387701 16008253)
chromSize=/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/ref/mm_39_main_chr.sizes
do
scale_factor=`echo "10000000 / ${size}" | bc -l`
echo "Scaling factor for ${sampleList[$i]} is: $scale_factor!"
bedtools genomecov -bg -scale $scale_factor -i ${bed}/${sampleList[$i]}.fragments.bed -g $chromSize > ${bedgraph}/${sampleList[$i]}.fragments.normalized.bedgraph
i=$((i+1))
done
seqDepth=(15984588 15361687 23989124 6288377 26387701 16008253)
sampleList=("Sham_WT1" "Sham_WT2" "Sham_WT3" "nerve_crush_WT1" "nerve_crush_WT2" "nerve_crush_WT3")
for i in ${!seqDepth[@]}
do
echo $i
bash $seacr ${bedgraph}/${sampleList[$i]}.fragments.normalized.bedgraph 0.01 non stringent $projPath/peakCalling/SEACR/${sampleList[$i]}_seacr_top0.01.peaks
done
library(repr)
options(repr.plot.width=7, repr.plot.height=10)
mappedReads <- idxstatsBam("/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/alignment/bam/nerve_crush_WT1_deduplicated.bam")
TotalMapped <- sum(mappedReads[, "mapped"])
ggplot(mappedReads, aes(x = seqnames, y = mapped)) + geom_bar(stat = "identity", fill="lightblue") +
coord_flip()+
theme(axis.text.x = element_text(color = "black", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
axis.text.y = element_text(color = "black", size = 20, angle = 0, hjust = 1, vjust = 0, face = "plain"),
axis.title.x = element_text(color = "black", size = 20, angle = 0, hjust = .5, vjust = 0, face = "plain"),
axis.title.y = element_text(color = "black", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
plot.title = element_text(size=20, hjust = .5, vjust = .5, face="bold"))+
ggtitle("Mapped reads of nerve_crush_WT1")
bam="/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/alignment/bam/"
peaks="/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/peakCalling/SEACR/"
SampleID<-c("Sham_WT1", "Sham_WT2", "Sham_WT3", "nerve_crush_WT1", "nerve_crush_WT2", "nerve_crush_WT3")
Tissue<-c("Sham", "Sham", "Sham", "nerve_crush", "nerve_crush", "nerve_crush")
Factor<-c("H3K27ac", "H3K27ac", "H3K27ac", "H3K27ac", "H3K27ac", "H3K27ac")
Condition<-c("Sham", "Sham", "Sham", "nerve_crush", "nerve_crush", "nerve_crush")
Treatment<-c("Sham", "Sham", "Sham", "nerve_crush", "nerve_crush", "nerve_crush")
Replicate<-c(1, 1, 1, 2, 2, 2)
bamReads<-c(paste0(bam,"Sham_WT1_deduplicated.bam"), paste0(bam,"Sham_WT2_deduplicated.bam"), paste0(bam,"Sham_WT3_deduplicated.bam"), paste0(bam,"nerve_crush_WT1_deduplicated.bam"), paste0(bam,"nerve_crush_WT2_deduplicated.bam"), paste0(bam,"nerve_crush_WT3_deduplicated.bam"))
Peaks<-c(paste0(peaks,"Sham_WT1_seacr_top0.01.peaks.stringent.bed"), paste0(peaks,"Sham_WT2_seacr_top0.01.peaks.stringent.bed"), paste0(peaks,"Sham_WT3_seacr_top0.01.peaks.stringent.bed"), paste0(peaks,"nerve_crush_WT1_seacr_top0.01.peaks.stringent.bed"), paste0(peaks,"nerve_crush_WT2_seacr_top0.01.peaks.stringent.bed"), paste0(peaks,"nerve_crush_WT3_seacr_top0.01.peaks.stringent.bed"))
PeakCaller<-c("bed", "bed", "bed", "bed", "bed", "bed")
sample<-data.frame(SampleID, Tissue, Factor, Condition, Treatment, Replicate, bamReads, Peaks, PeakCaller)
Creating the dba object
H3K27<-dba(sampleSheet=sample)
Sham_WT1 Sham H3K27ac Sham Sham 1 bed
Sham_WT2 Sham H3K27ac Sham Sham 1 bed
Sham_WT3 Sham H3K27ac Sham Sham 1 bed
nerve_crush_WT1 nerve_crush H3K27ac nerve_crush nerve_crush 2 bed
nerve_crush_WT2 nerve_crush H3K27ac nerve_crush nerve_crush 2 bed
nerve_crush_WT3 nerve_crush H3K27ac nerve_crush nerve_crush 2 bed
H3K27
6 Samples, 16381 sites in matrix (21285 total):
ID Tissue Factor Condition Treatment Replicate
1 Sham_WT1 Sham H3K27ac Sham Sham 1
2 Sham_WT2 Sham H3K27ac Sham Sham 1
3 Sham_WT3 Sham H3K27ac Sham Sham 1
4 nerve_crush_WT1 nerve_crush H3K27ac nerve_crush nerve_crush 2
5 nerve_crush_WT2 nerve_crush H3K27ac nerve_crush nerve_crush 2
6 nerve_crush_WT3 nerve_crush H3K27ac nerve_crush nerve_crush 2
Intervals
1 15750
2 16795
3 16400
4 13719
5 16416
6 16483
summary(H3K27$binding[,3]-H3K27$binding[,2])
Min. 1st Qu. Median Mean 3rd Qu. Max.
260 5781 8190 12021 13381 218922
Recentering peaks was done at 5000 (it is around the first quartile value)
sample_counted_5000<-dba.count(H3K27, summits=5000)
Plotting heatmap
options(repr.plot.width=10, repr.plot.height=10)
dba.plotHeatmap(sample_counted_5000)
PCA plot
options(repr.plot.width=10, repr.plot.height=10)
dba.plotPCA(sample_counted_5000, attributes=DBA_TISSUE, label=DBA_ID)
The PCA plot shows that the nerve_crush_WT1 sample is an outlier, and might need to be removed. But in the current analysis
we won't remove that sample.
Normalizing the data
sample_counted_5000 <- dba.normalize(sample_counted_5000)
norm <- dba.normalize(sample_counted_5000, bRetrieve=TRUE)
norm
$norm.method 'lib'
$norm.factors 0.981968318328968 0.920722275438958 1.27213454965902 0.447159454402593 1.40984681155239 0.968168590618077
$lib.method 'full'
$lib.sizes 11882905 11141760 15394238 5411125 17060709 11715913
$control.subtract TRUE
$filter.value 1
Establishing a model design and contrast
Before running the differential analysis, we need to tell DiffBind how to model the data, including which comparison(s) we are interested in. This is done using the dba.contrast function, as follows:
sample_counted_5000<- dba.contrast(sample_counted_5000)
sample_counted_5000 <- dba.analyze(sample_counted_5000)
dba.show( sample_counted_5000, bContrasts=TRUE)
Applying Blacklist/Greylists...
No genome detected.
Analyzing...
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
A data.frame: 1 × 6
Factor Group Samples Group2 Samples2 DB.DESeq2
Treatment nerve_crush 3 Sham 3 1726
plot(sample_counted_5000, contrast=1)
Retrieving the differentially binding sites
sample_counted_5000.DB <- dba.report(sample_counted_5000)
sample_counted_5000.DB
GRanges object with 1726 ranges and 6 metadata columns:
seqnames ranges strand | Conc Conc_nerve_crush
|
462 chr1 119154467-119164467 * | 7.02148 7.65962
12331 chr7 46754071-46764071 * | 7.67465 8.21803
6652 chr18 42764517-42774517 * | 7.38153 7.96976
4393 chr14 70261305-70271305 * | 7.50893 8.03424
6081 chr17 44806598-44816598 * | 7.07149 7.66559
... ... ... ... . ... ...
1557 chr10 121961958-121971958 * | 6.71243 6.47058
387 chr1 91868519-91878519 * | 7.95556 8.09568
5601 chr16 49767488-49777488 * | 7.36808 7.52847
12249 chr7 36402601-36412601 * | 7.70801 7.53072
4245 chr14 54425111-54435111 * | 7.57095 7.36509
Conc_Sham Fold p-value FDR
462 5.84908 1.70877 3.42984e-29 5.10395e-25
12331 6.79269 1.38278 5.41112e-28 4.02614e-24
6652 6.37168 1.51013 6.07147e-26 3.01165e-22
4393 6.67439 1.26758 2.15278e-24 7.92300e-21
6081 6.04372 1.49343 2.66212e-24 7.92300e-21
... ... ... ... ...
1557 6.91950 -0.255146 0.00575962 0.0497507
387 7.80035 0.199681 0.00576039 0.0497507
5601 7.18760 0.221576 0.00576518 0.0497632
12249 7.86588 -0.208337 0.00577197 0.0497929
4245 7.75106 -0.223987 0.00578037 0.0498364
-------
seqinfo: 20 sequences from an unspecified genome; no seqlengths
sample_counted_5000.DB <- dba.report(sample_counted_5000)
sample_counted_5000.DB
df1 <- data.frame(samples=c("Sham", "Nerve_crush"),
peaks=c(285, 1176))
head(df1)
options(repr.plot.width=7, repr.plot.height=7)
p<-ggplot(data=df1, aes(x=samples, y=peaks)) +
geom_text(aes(label=peaks), vjust=-1)+
geom_bar(stat="identity", color="blue", fill="green")+ylim(0, 1500)+
theme(axis.text.x = element_text(size = 14), axis.title.x = element_text(size = 16, color="black", family = "Times"),
axis.text.y = element_text(size = 14), axis.title.y = element_text(size = 16, family = "Times"),
plot.title = element_text(size = 20, face = "bold", color = "black", hjust = 0.5, family = "Times"))+
ggtitle("The total number of differentially bound peaks")
p
ggsave(p, file="/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/data/ratings.pdf", scale=1)
toBlkList <- "/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/BlackList_mm39.bed"
blkList <- import.bed(toBlkList)
Saving the sample_counted_5000.DB object as a dataframe
The sample_counted_5000.DB object is a GRanges object
df2<-as.data.frame(sample_counted_5000.DB)
write.table(df2, file="/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/data/peakscounted_5000.csv", sep=',')
Downloading saved data
df2=read.csv("/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/data/peakscounted_5000.csv")
Converting dataframe to a GRanges object
Peaks_GR <- GRanges(seqnames = df2[, "seqnames"], IRanges(df2[,
"start"], df2[, "end"]))
values(Peaks_GR) <- DataFrame(Fold = df2$Fold, FDR = df2$FDR)
Blacklist removal
Peaks_GR <- Peaks_GR[!Peaks_GR %over% blkList]
Peaks_GR
Peak annotation
peakAnno <- annotatePeak(Peaks_GR, tssRegion = c(-2000, 2000), TxDb = TxDb.Mmusculus.UCSC.mm39.knownGene,
annoDb = "org.Mm.eg.db")
Vizualising peak annotation
The peakAnno object contains the information on annotation of individual peaks to genes.
p<-plotAnnoBar(peakAnno)+
theme(axis.text.x = element_text(color = "black", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
axis.text.y = element_text(color = "black", size = 20, angle = 0, hjust = 1, vjust = 0, face = "plain"),
axis.title.x = element_text(color = "black", size = 20, angle = 0, hjust = .5, vjust = 0, face = "plain"),
axis.title.y = element_text(color = "black", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
plot.title = element_text(size=20, hjust = .5, vjust = .5, face="bold"))
p
ggsave(p, file="/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/data/peakAnno1.png", scale=1)
p<-plotDistToTSS(peakAnno)+theme(axis.text.x = element_text(color = "black", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
axis.text.y = element_text(color = "black", size = 20, angle = 0, hjust = 1, vjust = 0, face = "plain"),
axis.title.x = element_text(color = "black", size = 20, angle = 0, hjust = .5, vjust = 0, face = "plain"),
axis.title.y = element_text(color = "black", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
plot.title = element_text(size=20, hjust = .5, vjust = .5, face="bold"))
p
ggsave(p, file="/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/data/peakAnno2.png", scale=1)
Converting the peakAnno object into the data frame
peakAnno_df<-as.data.frame(peakAnno)
Saving the peakAnno_df
write.table(peakAnno_df, "/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/data/peakAnno_df.csv", sep=",")
peakAnno_DF <- read.csv("/Users/alexeyefanov/Desktop/Programming/rnaseq_analysis/CUT_RUN/palmisano/data/peakAnno_df.csv")
Gene ontology and functional testing
Extraction of all genes which are included in the TxDb object to use as our universe of genes for pathway enrichment.
allGeneGR <- genes(TxDb.Mmusculus.UCSC.mm39.knownGene)
allGeneGR[1:2, ]
GRanges object with 2 ranges and 1 metadata column:
seqnames ranges strand | gene_id
|
100009600 chr9 20973689-20984392 - | 100009600
100009609 chr7 84584773-84613323 - | 100009609
-------
seqinfo: 61 sequences (1 circular) from mm39 genome
Gene Set testing
Transcription factors or epigenetic marks may act on specific sets of genes grouped by a common biological feature (shared Biological function, common regulation in RNAseq experiment etc).
A frequent step in CUT&TAG analysis is to test whether common gene sets are enriched for transcription factor binding or epigenetic marks.
Sources of well curated gene sets include GO consortium (gene’s function, biological process and cellular localisation), REACTOME (Biological Pathways) and MsigDB (Computationally and Experimentally derived).
Gene Set testing for CUT&TAG Gene set enrichment testing may be performed on the sets of genes with peaks associated to them. In this example we will consider genes with peaks within 2000bp of a gene’s TSS.
annotatedPeaksGR_TSS<-peakAnno_DF%>%filter(grepl("Promoter",annotation))
annotatedPeaksGR_TSS<-annotatedPeaksGR_TSS[order(annotatedPeaksGR_TSS$Fold, decreasing=TRUE),]
annotatedPeaksGR_TSS<-annotatedPeaksGR_TSS[!duplicated(annotatedPeaksGR_TSS$geneId),]
genesWithPeakInTSS<-annotatedPeaksGR_TSS$geneId
genesWithPeakInTSS[1:2]
Once we have our gene list and the universe of genes in the same format, we can use them in the enrichGO function to perform gene ontology analysis
For the ont argument, we can choose between the “BP”, “MF”, and “CC” subontologies, or “ALL” for all three.
GO_result <- enrichGO(gene = genesWithPeakInTSS, universe = allGeneIDs, OrgDb = org.Mm.eg.db,
ont = "BP")
GO_result_df <- data.frame(GO_result)
GO_result_df[1:5, ]
A data.frame: 5 × 9
ID Description GeneRatio BgRatio pvalue p.adjust qvalue geneID Count
GO:0007015 GO:0007015 actin filament organization 33/555 476/23649 3.584496e-08 0.0001335229 0.0001142948 19230/223864/56699/677884/104215/19353/18016/22437/107702/228543/442801/21916/50876/384619/18810/17918/20411/211401/23790/23999/56320/104027/50918/20779/13430/70435/11853/20971/22388/225115/541307/16765/76707 33
GO:0032535 GO:0032535 regulation of cellular component size 30/555 415/23649 6.054086e-08 0.0001335229 0.0001142948 19230/223864/56699/20358/19353/14533/22418/105722/20351/20354/68585/21916/50876/239667/16531/243743/23999/53416/15519/56320/50918/97998/13430/56637/70435/20562/22388/225115/541307/20499 30
GO:0007264 GO:0007264 small GTPase-mediated signal transduction 30/555 438/23649 1.958196e-07 0.0002879201 0.0002464579 223864/330662/56699/70527/12614/104215/19353/319939/216963/19158/240057/12977/20354/56508/228543/68585/16706/228355/17532/71544/26431/20779/13430/70727/20166/11853/104886/75687/14766/16765 30
GO:0008361 GO:0008361 regulation of cell size 20/555 223/23649 3.490232e-07 0.0003848853 0.0003294595 223864/20358/19353/22418/105722/20351/20354/68585/239667/16531/243743/23999/53416/15519/56320/97998/13430/56637/20562/20499 20
GO:0051348 GO:0051348 negative regulation of transferase activity 20/555 230/23649 5.718747e-07 0.0005045079 0.0004318557 18016/235584/13197/19246/70012/18810/14869/211770/22695/56013/23790/57320/18768/231103/12367/606496/97998/20779/72400/94245 20
Making the enrichment map
GO_result_plot <- pairwise_termsim(GO_result)
emapplot(GO_result_plot, showCategory = 20)
Making barplot
barplot(GO_result_plot, showCategory = 20)
Conclusion
- Totally, 1461 DB peaks were detected in the current study.
- 1176 DB peaks were detected in the nerve_crush samples vs 285 in the sham samples.
- 496 DB peaks were detected in the nerve_crush samples vs 7 in the sham samples, with threshold abs(logFoldChange)>0.5.
- According to the study data, activation of transcription occurs in the nerve_crush samples in comparison with the sham samples.
- GO terms corresponding to cell shape, cytoskeleton reorganization, and locomotion changes were enriched in the the nerve_crush samples in comparison with the sham samples
Back to bioinformatics project page: Back to bioinformatics project page
Back to home: Back to home