read GOseq result table
eGOseqs<-list.files(pattern="enrich.Rdata",path=file.path("2018-10-07-over-representation-analysis-5-useful-tools_files","output_copy_ORA4"))
eGOseqs.list2<-sapply(file.path("2018-10-07-over-representation-analysis-5-useful-tools_files","output_copy_ORA4",eGOseqs),function(x) mget(load(x))) # mget will return the value of the object(or objects) in a list. see https://stackoverflow.com/questions/29398630/load-data-frames-into-list
#names(eGOseqs.list2)
# Make list into data.frame
eGOseqs.list2.summary<-do.call("rbind",eGOseqs.list2)
#head(eGOseqs.list2.summary) # make sure those are file names
rownames(eGOseqs.list2.summary)<-1:nrow(eGOseqs.list2.summary)
#View(eGOseqs.list2.summary)
# eGOseqs.list2.summary (more than three genes in each KO)
eGOseqs.list2.summary<-eGOseqs.list2.summary[eGOseqs.list2.summary$numDEInCat>3,]
# focused on only very significant GO terms
eGOseqs.list2.summary<-eGOseqs.list2.summary[eGOseqs.list2.summary$over_represented_padjust<1e-4,]
head(eGOseqs.list2.summary)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0010200 4.858293e-26 1 91
## 2 GO:0006952 5.724864e-26 1 277
## 3 GO:0042742 1.352377e-23 1 153
## 4 GO:0080167 1.496577e-18 1 80
## 5 GO:0009751 6.725063e-18 1 91
## 6 GO:0006468 3.953862e-15 1 312
## numInCat term ontology over_represented_padjust
## 1 136 response to chitin BP 1.083144e-22
## 2 620 defense response BP 1.083144e-22
## 3 288 defense response to bacterium BP 1.705798e-20
## 4 134 response to karrikin BP 1.415762e-15
## 5 162 response to salicylic acid BP 5.089528e-15
## 6 774 protein phosphorylation BP 2.493569e-12
## Term
## 1 response to chitin
## 2 defense response
## 3 defense response to bacterium
## 4 response to karrikin
## 5 response to salicylic acid
## 6 protein phosphorylation
## Definition
## 1 A process that results in a change in state or activity of a cell or an organism (in terms of movement, secretion, enzyme production, gene expression, etc.) as a result of a chitin stimulus.
## 2 Reactions, triggered in response to the presence of a foreign body or the occurrence of an injury, which result in restriction of damage to the organism attacked or prevention/recovery from the infection caused by the attack.
## 3 Reactions triggered in response to the presence of a bacterium that act to protect the cell or organism.
## 4 Any process that results in a change in state or activity of a cell or an organism (in terms of movement, secretion, enzyme production, gene expression, etc.) as a result of a karrikin stimulus. Karrikins are signaling molecules in smoke from burning vegetation that trigger seed germination for many angiosperms (flowering plants).
## 5 Any process that results in a change in state or activity of a cell or an organism (in terms of movement, secretion, enzyme production, gene expression, etc.) as a result of a salicylic acid stimulus.
## 6 The process of introducing a phosphate group on to a protein.
## FC sample
## 1 all genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.csv
## 2 all genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.csv
## 3 all genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.csv
## 4 all genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.csv
## 5 all genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.csv
## 6 all genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.csv
DGE list of interest
# Ding (2018) Cell DEGs
getwd()
## [1] "/Volumes/data_personal/Kazu_blog/content/post"
DEG.objs<-list.files(path=file.path("2018-10-07-over-representation-analysis-5-useful-tools_files","output_copy_ORA4"),pattern="\\.csv$") # under construction
DEG.objs
## [1] "genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.csv"
## [2] "genotypenpr4_genotypenpr4.treatmenttreated.DEGs.int.rWT.rU.csv"
## [3] "trt.DEGs.int.rWT.rU.csv"
# read csv file
DEG.list<-lapply(DEG.objs, function(x) read_csv(file.path("2018-10-07-over-representation-analysis-5-useful-tools_files","output_copy_ORA4",x)))
## Parsed with column specification:
## cols(
## genes = col_character(),
## logFC.genotypenpr1.1 = col_double(),
## logFC.genotypenpr1.1.treatmenttreated = col_double(),
## logCPM = col_double(),
## LR = col_double(),
## PValue = col_double(),
## FDR = col_double(),
## AGI = col_character(),
## symbol2 = col_character(),
## full_name = col_character()
## )
## Parsed with column specification:
## cols(
## genes = col_character(),
## logFC.genotypenpr4.4D = col_double(),
## logFC.genotypenpr4.4D.treatmenttreated = col_double(),
## logCPM = col_double(),
## LR = col_double(),
## PValue = col_double(),
## FDR = col_double(),
## AGI = col_character(),
## symbol2 = col_character(),
## full_name = col_character()
## )
## Parsed with column specification:
## cols(
## genes = col_character(),
## logFC = col_double(),
## logCPM = col_double(),
## LR = col_double(),
## PValue = col_double(),
## FDR = col_double(),
## AGI = col_character(),
## symbol2 = col_character(),
## full_name = col_character()
## )
names(DEG.list)<-gsub(".csv","",DEG.objs)
head(DEG.list)
## $genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU
## # A tibble: 6,694 x 10
## genes logFC.genotypen… logFC.genotypen… logCPM LR PValue FDR
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AT2G… -4.20 -0.720 6.69 1300. 5.30e-283 1.78e-278
## 2 AT5G… 3.26 0.530 4.76 1000. 5.96e-218 1.00e-213
## 3 AT1G… -2.02 -0.475 7.19 629. 2.69e-137 3.01e-133
## 4 AT1G… -2.26 -0.511 7.51 620. 2.83e-135 2.38e-131
## 5 AT3G… -3.16 0.888 5.33 618. 5.42e-135 3.64e-131
## 6 AT5G… -0.212 -6.72 3.89 607. 1.67e-132 9.35e-129
## 7 AT2G… 2.22 1.95 9.21 572. 7.80e-125 3.74e-121
## 8 AT5G… 1.82 1.83 9.40 570. 1.54e-124 6.46e-121
## 9 AT1G… -3.65 -1.52 3.37 565. 1.92e-123 7.17e-120
## 10 AT5G… 0.000149 -7.96 3.77 546. 2.42e-119 8.12e-116
## # ... with 6,684 more rows, and 3 more variables: AGI <chr>,
## # symbol2 <chr>, full_name <chr>
##
## $genotypenpr4_genotypenpr4.treatmenttreated.DEGs.int.rWT.rU
## # A tibble: 846 x 10
## genes logFC.genotypen… logFC.genotypen… logCPM LR PValue FDR
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AT5G… 3.04 0.628 4.76 904. 5.00e-197 1.68e-192
## 2 AT3G… -5.45 -0.183 1.98 459. 1.80e-100 3.03e- 96
## 3 AT5G… -3.30 0.354 3.10 444. 4.51e- 97 5.05e- 93
## 4 AT1G… -7.40 -3.70 2.00 432. 1.32e- 94 1.11e- 90
## 5 AT3G… -2.97 -0.301 3.36 372. 1.83e- 81 1.23e- 77
## 6 AT1G… -2.50 0.891 6.30 288. 2.54e- 63 1.42e- 59
## 7 AT2G… -1.42 -0.703 6.69 277. 7.63e- 61 3.66e- 57
## 8 AT5G… -3.04 0.434 4.19 211. 1.75e- 46 7.36e- 43
## 9 AT2G… -1.10 -0.237 5.87 209. 5.09e- 46 1.90e- 42
## 10 AT5G… -1.84 -0.489 2.78 187. 2.68e- 41 9.01e- 38
## # ... with 836 more rows, and 3 more variables: AGI <chr>, symbol2 <chr>,
## # full_name <chr>
##
## $trt.DEGs.int.rWT.rU
## # A tibble: 7,811 x 9
## genes logFC logCPM LR PValue FDR AGI symbol2 full_name
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 AT5G1… 8.57 3.89 675. 7.55e-149 2.54e-144 AT5G… AT-HSP… heat shock…
## 2 AT1G0… 4.00 4.60 592. 7.94e-131 1.33e-126 AT1G… <NA> <NA>
## 3 AT5G2… 6.16 4.19 588. 5.77e-130 6.46e-126 AT5G… ATWRKY… ARABIDOPSI…
## 4 AT4G2… 3.85 5.39 586. 1.92e-129 1.59e-125 AT4G… CRK14 cysteine-r…
## 5 AT5G1… 9.64 3.77 586. 2.37e-129 1.59e-125 AT5G… HSP17.… 17.6 kDa c…
## 6 AT3G2… 4.92 3.36 546. 9.68e-121 5.42e-117 AT3G… <NA> <NA>
## 7 AT4G2… 3.34 6.70 539. 2.78e-119 1.33e-115 AT4G… SGT1A <NA>
## 8 AT5G2… 3.82 6.70 535. 2.42e-118 1.02e-114 AT5G… AtDMR6… NA;DOWNY M…
## 9 AT5G0… 4.14 7.14 523. 1.07e-115 3.98e-112 AT5G… AtHsp7… NA;NA
## 10 AT1G2… 2.87 7.19 511. 3.43e-113 1.15e-109 AT1G… AtWAK1… NA;NA;cell…
## # ... with 7,801 more rows
A case study: all misregulated genes in npr1-1 (Ding (2018) Cell)
names(eGOseqs.list2)[1] # all misregulated genes in npr1-1 genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.csv
## [1] "2018-10-07-over-representation-analysis-5-useful-tools_files/output_copy_ORA4/genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.all.enrich.Rdata.GO.ORA.result"
results.table<-genes.in.enriched.category(enrich.result=eGOseqs.list2[[1]],gene.list=gsub("(.)(\\.[[:digit:]]+)","\\1",DEG.list[[1]]$genes))
results.table[1:10,1:10] # the first 10 rows and columns
## AGI GO:0010200 GO:0006952 GO:0042742 GO:0080167 GO:0009751
## 1 AT2G14560 0 0 0 0 1
## 2 AT1G21250 0 0 0 0 1
## 3 AT1G35710 0 0 0 0 0
## 4 AT3G02550 0 0 0 0 0
## 5 AT5G12030 0 0 0 0 0
## 6 AT2G34430 0 0 0 0 0
## 7 AT5G54270 0 0 0 0 0
## 8 AT1G13470 0 0 0 0 0
## 9 AT5G12020 0 0 0 0 0
## 10 AT5G22570 0 0 1 0 0
## GO:0006468 GO:0009651 GO:0009416 GO:0009408
## 1 0 0 0 0
## 2 1 0 0 0
## 3 1 0 0 0
## 4 0 0 0 0
## 5 0 1 0 1
## 6 0 0 1 0
## 7 0 0 1 0
## 8 0 0 0 0
## 9 0 1 0 1
## 10 0 0 0 0