Download this Rmd file

Please download and modify your input/output file location

Download RNA-seq data from NCBI

Let’s use published RNA-seq data from Arabidopsis thaliana mutants (npr1-1 and npr4-4D) Ding (2018).

library(tidyverse)
library(stringr)
library(edgeR)

Trimming unnecessary sequences from data (such as adaptors) by trimmomatic

Needs to trim? Do fastqc and see results

trim?

# working in directories containing fastq files
fastfiles<-list.files(pattern="fastq")
system(paste("fastqc ",fastfiles,sep=""))
# and then check reports to see if there are adaptor contamination
# It seems those files were not contaminated so that no need to trim read files.
fastfiles.SE<- fastfiles %>% as_tibble() %>% separate(value,into=c("SRA","type","compress"),sep="\\.") %>% separate(SRA,into=c("SRA","pair"),sep="_") %>% group_by(SRA) %>%summarize(num=n()) %>% filter(num==1) %>% select(SRA) %>% as_vector()
# trim by trimmomatic (not necessary this time)
# system("makedir trimmed")
#for (x in fastfiles) {
#system(paste("trimmomatic.sh SE -threads 4 ",x," trimmed/trimmed_",x," ILLUMINACLIP:Bradseq_adapter.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36",sep="")) # bug fixed
#}
# Or using inline loop?
#system("for i in *fastq.gz; do trimmomatic.sh SE -threads 4 $i trimmed/trimmed$i ILLUMINACLIP:Bradseq_adapter.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36; done")

Mapping to reference cDNA and creating count data file (copied from Julin’s scripts “01_20170617_Map and normalize”)

read Kallisto manual https://pachterlab.github.io/kallisto/starting.html

Build index

#system("kallisto index -i TAIR10_cdna_20110103_representative_gene_model_kallisto_index TAIR10_cdna_20110103_representative_gene_model.gz")
# This reference seq has been updated in 2012! Redo mapping (101018)
#system("kallisto index -i TAIR10_cdna_20110103_representative_gene_model_updated_kallisto_index TAIR10_cdna_20110103_representative_gene_model_updated")

mapping

# makin directory for mapped 
system("mkdir kallisto_sam_out2")
# reading fastq files
fastqfiles<-list.files(pattern="fastq.gz")
# fastq files with single end 
fastqfiles.SE<- fastqfiles %>% as_tibble() %>% separate(value,into=c("SRA","type","compress"),sep="\\.") %>% separate(SRA,into=c("SRA","pair"),sep="_") %>% group_by(SRA) %>%summarize(num=n()) %>% filter(num==1) %>% select(SRA) %>% as_vector()
for(x in fastqfiles.SE) {
system(paste("kallisto quant -i TAIR10_cdna_20110103_representative_gene_model_updated_kallisto_index  -o kallisto_sam_out2/",x," --single -l 200 -s 40 --pseudobam ",x,"_1.fastq.gz | samtools view -Sb - > kallisto_sam_out2/",x,"/",x,".bam",sep=""))
setwd(paste("kallisto_sam_out2/",x,sep=""))
system(paste("samtools sort *.bam -o ",x,".sorted.bam",sep=""))
system("samtools index *.sorted.bam")
setwd("../../")
}

samtools view -u alignment.sam | samtools sort -@ 4 - output_prefix # https://www.biostars.org/p/263589/

Move the counts to my local computer # Julin used lftp (https://www.networkworld.com/article/2833203/operating-systems/unix--flexibly-moving-files-with-lftp.html)

#In my computer, 
# lftp sftp://kazu@whitney.plb.ucdavis.edu
#  cd NGS/Ding_2018_Cell
#  mirror kallisto_sam_out2

remove unused files

system("cd  2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files/kallisto_sam_out2")
system("rm */*.json")
system("rm */*.bam*")

compress tsv files

system("gzip */abundance.tsv")

Get counts into R

kallisto_files <- dir(path = "2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files/kallisto_sam_out2",pattern="abundance.tsv",recursive = TRUE,full.names = TRUE)
kallisto_files
getwd()
kallisto_files2<-list.files(path="2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files/kallisto_sam_out2")
kallisto_files2
kallisto_names <- str_split(kallisto_files,"/",simplify=TRUE)[,3]
kallisto_names
#![](/post/2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files/Scan_QRcode.png)

combine count data in list

counts.list <- lapply(kallisto_files,read_tsv)
names(counts.list) <- kallisto_names
counts.list[[1]]

combine counts data

counts <- sapply(counts.list, dplyr::select, est_counts) %>% 
  bind_cols(counts.list[[1]][,"target_id"],.) 
counts[is.na(counts)] <- 0
colnames(counts) <- sub(".est_counts","",colnames(counts),fixed = TRUE)
counts
counts.updated<-counts

# write counts data
write_csv(counts.updated,"2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files/counts.updated.csv.gz")

sample info

#sample.info<-read_csv("2018-10-01-differential-expression-analysis-with-public-available-sequencing-data/SraRunInfo.csv")
sample.info<-read_csv("http://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?save=efetch&rettype=runinfo&db=sra&term=PRJNA434313") # Directly from ncbi site. This works!

## Parsed with column specification:
## cols(
##   .default = col_character(),
##   ReleaseDate = col_datetime(format = ""),
##   LoadDate = col_datetime(format = ""),
##   spots = col_integer(),
##   bases = col_double(),
##   spots_with_mates = col_integer(),
##   avgLength = col_integer(),
##   size_MB = col_integer(),
##   InsertSize = col_integer(),
##   InsertDev = col_integer(),
##   Study_Pubmed_id = col_integer(),
##   ProjectID = col_integer(),
##   TaxID = col_integer()
## )

## See spec(...) for full column specifications.

sample.info2<-tibble(SampleName=c("GSM3014771","GSM3014772","GSM3014773","GSM3014774","GSM3014775","GSM3014776","GSM3014777","GSM3014778","GSM3014779","GSM3014780","GSM3014781","GSM3014782","GSM3014783","GSM3014784","GSM3014785","GSM3014786"),sample=c("WT_treated_1","WT_treated_2","WT_untreated_1","WT_untreated_2","npr1-1_treated_1","npr1-1_treated_2","npr1-1_untreated_1","npr1-1_untreated_2","npr4-4D_treated_1","npr4-4D_treated_2","npr4-4D_untreated_1","npr4-4D_untreated_2","npr1-1_npr4-4D_treated_1","npr1-1_npr4-4D_treated_2","npr1-1_npr4-4D_untreated_1","npr1-1_npr4-4D_untreated_2")) # # from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE110702
# combine 
sample.info.final <- left_join(sample.info,sample.info2,by="SampleName") %>% dplyr::select(Run,sample,LibraryLayout) %>%  separate(sample,into=c("genotype1","genotype2","treatment","rep"),sep="_",fill="left",remove=FALSE) %>% unite(genotype,genotype1,genotype2,sep="_") %>% mutate(genotype=str_replace(genotype,"NA_","")) %>% unite(group,genotype,treatment,sep="_",remove=FALSE)

check libraries by barplot and MDS plots

counts<-read_csv("2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files/counts.updated.csv.gz")

## Parsed with column specification:
## cols(
##   target_id = col_character(),
##   SRR6739807 = col_double(),
##   SRR6739808 = col_double(),
##   SRR6739809 = col_double(),
##   SRR6739810 = col_double(),
##   SRR6739811 = col_double(),
##   SRR6739812 = col_double(),
##   SRR6739813 = col_double(),
##   SRR6739814 = col_double(),
##   SRR6739815 = col_double(),
##   SRR6739816 = col_double(),
##   SRR6739817 = col_double(),
##   SRR6739818 = col_double()
## )

all(colnames(counts)[-1]==sample.info.final[sample.info.final$LibraryLayout=="SINGLE","Run"])

## [1] TRUE

dge <- DGEList(counts[,-1],
               group=as_vector(sample.info.final[sample.info.final$LibraryLayout=="SINGLE","group"]),
               samples=sample.info.final[sample.info.final$LibraryLayout=="SINGLE",],
               genes=counts$target_id)
# Filtering.
## From edgeR User guide (section 2.6. Filtering) "As a rule of thumb, genes are dropped if they can’t possibly be expressed in all the samples for any of the conditions. Users can set their own definition of genes being expressed. Usually a gene is required to have a count of 5-10 in a library to be considered expressed in that library. Users should also filter with count-per-million (CPM) rather than filtering on the counts directly, as the latter does not account for differences in library sizes between samples"
dge.nolow<-dge[rowSums(cpm(dge)> 5) >= 2,] # two replicates in this experiment. 
table(rowSums(cpm(dge)> 5) >= 2)

## 
## FALSE  TRUE 
## 16803 16799

# normalization
dge.nolow <- calcNormFactors(dge.nolow)

# lib.size check
ggplot(dge.nolow$samples,aes(x=sample,y=norm.factors,fill=genotype)) + geom_col() + 
  theme(axis.text.x  = element_text(angle=90, vjust=0.5,size = 7))

MDS plot

mds <- plotMDS(dge.nolow,method = "bcv",labels=dge.nolow$samples$group,gene.selection = "pairwise", ndim=3)

mds.pl <- as_tibble(mds$cmdscale.out) %>% 
  bind_cols(data.frame(sample=row.names(mds$cmdscale.out)),.) %>%
  left_join(dge.nolow$sample,by=c(sample="Run"))

## Warning: Column `sample`/`Run` joining factor and character vector,
## coercing into character vector

mds.pl %>% ggplot(aes(x=V1,y=V2,color=as.factor(treatment),label=genotype)) + geom_text(angle=30) + ggtitle("DIM 1 vs 2")

# edge.nolowR (model with glm)

# set references in genotype and treatment
dge.nolow$samples$genotype<-factor(dge.nolow$samples$genotype,levels=c("WT","npr4-4D","npr1-1"))
dge.nolow$samples$treatment<-factor(dge.nolow$samples$treatment,levels=c("untreated","treated"))
save(dge.nolow,file="2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files/dge.nolow.Ding2018.Rdata")
# making model matrix
design.int <- with(dge.nolow$samples, model.matrix(~ genotype*treatment + rep))
# estimateDisp
dge.nolow.int <- estimateDisp(dge.nolow,design = design.int)
## fit linear model
fit.int <- glmFit(dge.nolow.int,design.int)
# get DEGs, treatment effects in Col (rWT = reference WT, rU = reference is untreated)
trt.lrt.rWT.rU <- glmLRT(fit.int,coef = c("treatmenttreated")) 
trt.DEGs.int.rWT.rU <- topTags(trt.lrt.rWT.rU,n = Inf,p.value = 0.05)$table;nrow(trt.DEGs.int.rWT.rU) #

## [1] 7862

# get DEGs, genotype effects in npr1-1 (no matter treated or untreated) (coef=c("genotypenpr1-1","genotypenpr1-1:treatmenttreated"))
genotypenpr1_genotypenpr1.treatmenttreated.lrt.rWT.rU <- glmLRT(fit.int,coef = c("genotypenpr1-1","genotypenpr1-1:treatmenttreated"))
genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU <- topTags(genotypenpr1_genotypenpr1.treatmenttreated.lrt.rWT.rU,n = Inf,p.value = 0.05)$table;nrow(genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU) #

## [1] 6460

# get DEGs, genotype effects in npr4-4D (no matter treated or untreated) (coef=c("genotypenpr4-4D","genotypenpr4-4D:treatmenttreated"))
genotypenpr4_genotypenpr4.treatmenttreated.lrt.rWT.rU <- glmLRT(fit.int,coef = c("genotypenpr4-4D","genotypenpr4-4D:treatmenttreated"))
genotypenpr4_genotypenpr4.treatmenttreated.DEGs.int.rWT.rU <- topTags(genotypenpr4_genotypenpr4.treatmenttreated.lrt.rWT.rU,n = Inf,p.value = 0.05)$table;nrow(genotypenpr4_genotypenpr4.treatmenttreated.DEGs.int.rWT.rU) # 846

## [1] 854

Download updated gene description

Most updated annotation can be downloaded from TAIR website (for subscribers). I used “TAIR Data 20180630”.

At.gene.name <-read_tsv("https://www.arabidopsis.org/download_files/Subscriber_Data_Releases/TAIR_Data_20180630/gene_aliases_20180702.txt.gz") # Does work from home when I use Pulse Secure.

## Parsed with column specification:
## cols(
##   name = col_character(),
##   symbol = col_character(),
##   full_name = col_character()
## )

# test %>% dplyr::select(name,symbol) %>% group_by(name) %>% spread(key=name,value=symbol)
# test %>% dplyr::select(name,symbol) %>% spread(key=symbol,value=name)
# test %>% dplyr::select(name,symbol) %>% group_by(name) %>% spread(key=symbol,value=name)
# test %>% dplyr::select(name,symbol) %>% group_by(name) %>% summarise(n())
# How to concatenate multiple symbols in the same AGI
At.gene.name <- At.gene.name %>% group_by(name) %>% summarise(symbol2=paste(symbol,collapse=";"),full_name=paste(full_name,collapse=";"))

At.gene.name %>% dplyr::slice(100:110) # OK

## # A tibble: 11 x 3
##    name     symbol2          full_name                                    
##    <chr>    <chr>            <chr>                                        
##  1 AT1G018… AtGEN1;GEN1      NA;ortholog of HsGEN1                        
##  2 AT1G019… ATSBT1.1;SBTI1.1 NA;NA                                        
##  3 AT1G019… AtGET3a;GET3a    Guided Entry of Tail-anchored protein 3a;Gui…
##  4 AT1G019… ARK2;AtKINUb     armadillo repeat kinesin 2;Arabidopsis thali…
##  5 AT1G019… BIG3;EDA10       BIG3;embryo sac development arrest 10        
##  6 AT1G019… AtBBE1;OGOX4     NA;oligogalacturonide oxidase 4              
##  7 AT1G020… GAE2             UDP-D-glucuronate 4-epimerase 2              
##  8 AT1G020… SEC1A            secretory 1A                                 
##  9 AT1G020… LAP6;PKSA        LESS ADHESIVE POLLEN 6;polyketide synthase A 
## 10 AT1G020… SPL8             squamosa promoter binding protein-like 8     
## 11 AT1G020… ATCSN7;COP15;CS… ARABIDOPSIS THALIANA COP9 SIGNALOSOME SUBUNI…

Make “output” directory for saving DEG results as csv files

# system("mkdir 2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files/output")

Add gene descripion to DEG files

# WT (Col) treatment DEG
trt.DEGs.int.rWT.rU.desc<- trt.DEGs.int.rWT.rU %>% mutate(AGI=str_remove(genes,pattern="\\.[[:digit:]]+$")) %>% left_join(At.gene.name,by=c("AGI"="name"))
write_csv(trt.DEGs.int.rWT.rU.desc,path="2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files/output/trt.DEGs.int.rWT.rU.csv")

# npr1-1 (either treated or untreated)
genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.desc <- genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU %>% mutate(AGI=str_remove(genes,pattern="\\.[[:digit:]]+$")) %>% left_join(At.gene.name,by=c("AGI"="name"))
write_csv(genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.desc,"2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files/output/genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.csv")
# npr4-4 (either treated or untreated)
genotypenpr4_genotypenpr4.treatmenttreated.DEGs.int.rWT.rU.desc <- genotypenpr4_genotypenpr4.treatmenttreated.DEGs.int.rWT.rU %>% mutate(AGI=str_remove(genes,pattern="\\.[[:digit:]]+$")) %>% left_join(At.gene.name,by=c("AGI"="name"))
write_csv(genotypenpr4_genotypenpr4.treatmenttreated.DEGs.int.rWT.rU.desc,"2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files/output/genotypenpr4_genotypenpr4.treatmenttreated.DEGs.int.rWT.rU.csv")

table for DEGs summary

#DEG.objs<-list.files(path=file.path("2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files","output"),pattern="(DEGs)(.)(\\.csv)") # under construction
DEG.objs<-list.files(path=file.path("2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files","output"),pattern="\\.csv$") # under construction
DEG.objs

## [1] "genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU.csv"
## [2] "genotypenpr4_genotypenpr4.treatmenttreated.DEGs.int.rWT.rU.csv"
## [3] "trt.DEGs.int.rWT.rU.csv"

# read csv file
DEG.count.list<-lapply(DEG.objs, function(x) read_csv(paste(file.path("2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files","output"),"/",x,sep="")))

## Parsed with column specification:
## cols(
##   genes = col_character(),
##   logFC.genotypenpr1.1 = col_double(),
##   logFC.genotypenpr1.1.treatmenttreated = col_double(),
##   logCPM = col_double(),
##   LR = col_double(),
##   PValue = col_double(),
##   FDR = col_double(),
##   AGI = col_character(),
##   symbol2 = col_character(),
##   full_name = col_character()
## )

## Parsed with column specification:
## cols(
##   genes = col_character(),
##   logFC.genotypenpr4.4D = col_double(),
##   logFC.genotypenpr4.4D.treatmenttreated = col_double(),
##   logCPM = col_double(),
##   LR = col_double(),
##   PValue = col_double(),
##   FDR = col_double(),
##   AGI = col_character(),
##   symbol2 = col_character(),
##   full_name = col_character()
## )

## Parsed with column specification:
## cols(
##   genes = col_character(),
##   logFC = col_double(),
##   logCPM = col_double(),
##   LR = col_double(),
##   PValue = col_double(),
##   FDR = col_double(),
##   AGI = col_character(),
##   symbol2 = col_character(),
##   full_name = col_character()
## )

names(DEG.count.list)<-gsub(".csv","",DEG.objs)

DEGs.count<-sapply(DEG.count.list,function(x) {
                value <- nrow(x)
                ifelse(is.null(value),0,value)
        }  )
DEGs.count

## genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU 
##                                                       6460 
## genotypenpr4_genotypenpr4.treatmenttreated.DEGs.int.rWT.rU 
##                                                        854 
##                                        trt.DEGs.int.rWT.rU 
##                                                       7862

Plot gene expression pattern

For some reasons, labeller = label_wrap_gen(width=30) does not work here. Needs to make a function for plotting graph later to reduce redundancy of scripts.

# calculate normalized gene expression level
cpm.wide <- bind_cols(dge.nolow$gene,as_tibble(cpm(dge.nolow))) %>% as_tibble() %>% rename(transcript_ID=genes)
cpm.wide

## # A tibble: 16,799 x 13
##    transcript_ID SRR6739807 SRR6739808 SRR6739809 SRR6739810 SRR6739811
##    <chr>              <dbl>      <dbl>      <dbl>      <dbl>      <dbl>
##  1 AT1G50920.1       169.       164.       175.       158.       184.  
##  2 AT1G15970.1        20.0       19.7       15.2       15.6       14.6 
##  3 AT1G73440.1         6.22       6.17       7.39       6.33       7.10
##  4 AT1G75120.1         2.44       2.20       3.55       4.94       5.59
##  5 AT1G17600.1        32.1       36.5       11.1        9.87      12.1 
##  6 AT1G77370.1        34.1       36.3       23.7       25.1       22.9 
##  7 AT1G10950.1       172.       171.       133.       148.       173.  
##  8 AT1G31870.1        40.8       38.5       26.8       26.9       33.3 
##  9 AT1G51360.1         7.40       7.64       9.07      10.4        6.82
## 10 AT1G50950.1         4.45       4.49       3.74       5.16       5.50
## # ... with 16,789 more rows, and 7 more variables: SRR6739812 <dbl>,
## #   SRR6739813 <dbl>, SRR6739814 <dbl>, SRR6739815 <dbl>,
## #   SRR6739816 <dbl>, SRR6739817 <dbl>, SRR6739818 <dbl>

#write_csv(cpm.wide,file.path("2018-10-01-differential-expression-analysis-with-public-available-sequencing-data_files","output","cpm_wide_Ding_2018_Cell.csv.gz"))
# plot
# format data for plot
plot.data<-DEG.count.list[["trt.DEGs.int.rWT.rU"]] %>% left_join(cpm.wide,by=c("genes"="transcript_ID")) %>% unite(AGI_desc,c("AGI","symbol2")) %>% dplyr::select(-LR,-PValue,-genes) 
# plot: use labeller = label_wrap_gen(width=10) for multiple line in each gene name
p1<-plot.data %>% filter(logFC>0 & FDR<1e-100) %>% dplyr::select(-logFC,-logCPM,-FDR,-full_name) %>%  gather(Run,value,-1) %>% left_join(sample.info.final,by="Run") %>% select(-Run,-sample,-group,-LibraryLayout) %>% ggplot(aes(x=fct_relevel(treatment,"untreated"),y=value,color=genotype,shape=rep)) + geom_jitter() + facet_wrap(AGI_desc~.,scale="free",ncol=5,labeller = label_wrap_gen(width=30))+theme(strip.text = element_text(size=6)) + labs(title="WT SA up-regulated genes")
p1

# SA down-regulated genes
plot.data2<-DEG.count.list[["trt.DEGs.int.rWT.rU"]] %>% left_join(cpm.wide,by=c("genes"="transcript_ID")) %>% unite(AGI_desc,c("AGI","symbol2")) %>% dplyr::select(-LR,-PValue,-genes) 
# plot: use labeller = label_wrap_gen(width=10) for multiple line in each gene name
p2<-plot.data2 %>% filter(logFC<0 & FDR<1e-50) %>% dplyr::select(-logFC,-logCPM,-FDR,-full_name) %>%  gather(Run,value,-1) %>% left_join(sample.info.final,by="Run") %>% select(-Run,-sample,-group,-LibraryLayout) %>% ggplot(aes(x=fct_relevel(treatment,"untreated"),y=value,color=genotype,shape=rep)) + geom_jitter() + facet_wrap(AGI_desc~.,scale="free",ncol=5,labeller = label_wrap_gen(width=30))+theme(strip.text = element_text(size=6)) + labs(title="WT SA down-regulated genes")
p2

# npr1-1

# npr1-1 misregulated genes 
plot.data3<-DEG.count.list[["genotypenpr1_genotypenpr1.treatmenttreated.DEGs.int.rWT.rU"]] %>% left_join(cpm.wide,by=c("genes"="transcript_ID")) %>% unite(AGI_desc,c("AGI","symbol2")) %>% dplyr::select(-LR,-PValue,-genes) 
# plot: use labeller = label_wrap_gen(width=10) for multiple line in each gene name
p3<-plot.data3 %>% filter(logFC.genotypenpr1.1>0 & FDR<1e-100) %>% dplyr::select(-logFC.genotypenpr1.1,-logFC.genotypenpr1.1.treatmenttreated,-FDR,-full_name,-logCPM) %>%  gather(Run,value,-1) %>% left_join(sample.info.final,by="Run") %>% select(-Run,-sample,-group,-LibraryLayout) %>% ggplot(aes(x=fct_relevel(treatment,"untreated"),y=value,color=genotype,shape=rep)) + geom_jitter() + facet_wrap(AGI_desc~.,scale="free",ncol=5,labeller = label_wrap_gen(width=30))+theme(strip.text = element_text(size=6)) + labs(title="logFC.genotypenpr1.1>0")
p3

# plot: use labeller = label_wrap_gen(width=10) for multiple line in each gene name
p4<-plot.data3 %>% filter(logFC.genotypenpr1.1<0 & FDR<1e-100) %>% dplyr::select(-logFC.genotypenpr1.1,-logFC.genotypenpr1.1.treatmenttreated,-FDR,-full_name,-logCPM) %>%  gather(Run,value,-1) %>% left_join(sample.info.final,by="Run") %>% select(-Run,-sample,-group,-LibraryLayout) %>% ggplot(aes(x=fct_relevel(treatment,"untreated"),y=value,color=genotype,shape=rep)) + geom_jitter() + facet_wrap(AGI_desc~.,scale="free",ncol=5,labeller = label_wrap_gen(width=30))+theme(strip.text = element_text(size=6)) + labs(title="logFC.genotypenpr1.1<0")
p4

# plot: use labeller = label_wrap_gen(width=10) for multiple line in each gene name
p5<-plot.data3 %>% filter(logFC.genotypenpr1.1.treatmenttreated>0 & FDR<1e-100) %>% dplyr::select(-logFC.genotypenpr1.1,-logFC.genotypenpr1.1.treatmenttreated,-FDR,-full_name,-logCPM) %>%  gather(Run,value,-1) %>% left_join(sample.info.final,by="Run") %>% select(-Run,-sample,-group,-LibraryLayout) %>% ggplot(aes(x=fct_relevel(treatment,"untreated"),y=value,color=genotype,shape=rep)) + geom_jitter() + facet_wrap(AGI_desc~.,scale="free",ncol=5,labeller = label_wrap_gen(width=30))+theme(strip.text = element_text(size=6)) + labs(title="logFC.genotypenpr1.1.treatmenttreated>0")
p5

# plot: use labeller = label_wrap_gen(width=10) for multiple line in each gene name
p6<-plot.data3 %>% filter(logFC.genotypenpr1.1.treatmenttreated<0 & FDR<1e-100) %>% dplyr::select(-logFC.genotypenpr1.1,-logFC.genotypenpr1.1.treatmenttreated,-FDR,-full_name,-logCPM) %>%  gather(Run,value,-1) %>% left_join(sample.info.final,by="Run") %>% select(-Run,-sample,-group,-LibraryLayout) %>% ggplot(aes(x=fct_relevel(treatment,"untreated"),y=value,color=genotype,shape=rep)) + geom_jitter() + facet_wrap(AGI_desc~.,scale="free",ncol=5,labeller = label_wrap_gen(width=30))+theme(strip.text = element_text(size=6)) + labs(title="logFC.genotypenpr1.1.treatmenttreated<0")
p6

Session info

sessionInfo()

## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS  10.14.1
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2  edgeR_3.22.5    limma_3.36.5    forcats_0.3.0  
##  [5] stringr_1.3.1   dplyr_0.7.7     purrr_0.2.5     readr_1.1.1    
##  [9] tidyr_0.8.2     tibble_1.4.2    ggplot2_3.1.0   tidyverse_1.2.1
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_0.2.5 locfit_1.5-9.1   xfun_0.4         splines_3.5.1   
##  [5] haven_1.1.2      lattice_0.20-35  colorspace_1.3-2 htmltools_0.3.6 
##  [9] yaml_2.2.0       utf8_1.1.4       rlang_0.3.0.1    pillar_1.3.0    
## [13] glue_1.3.0       withr_2.1.2      modelr_0.1.2     readxl_1.1.0    
## [17] bindr_0.1.1      plyr_1.8.4       munsell_0.5.0    blogdown_0.10   
## [21] gtable_0.2.0     cellranger_1.1.0 rvest_0.3.2      evaluate_0.12   
## [25] labeling_0.3     knitr_1.21       curl_3.2         fansi_0.4.0     
## [29] broom_0.5.0      Rcpp_1.0.0       scales_1.0.0     backports_1.1.2 
## [33] jsonlite_1.6     hms_0.4.2        digest_0.6.18    stringi_1.2.4   
## [37] bookdown_0.9     grid_3.5.1       cli_1.0.1        tools_3.5.1     
## [41] magrittr_1.5     lazyeval_0.2.1   crayon_1.3.4     pkgconfig_2.0.2 
## [45] xml2_1.2.0       lubridate_1.7.4  assertthat_0.2.0 rmarkdown_1.11  
## [49] httr_1.3.1       rstudioapi_0.8   R6_2.3.0         nlme_3.1-137    
## [53] compiler_3.5.1

Differential expression analysis with public available sequencing data

Kazu Nozue