Last updated: 2015-05-18

Code version: d6e7e4826688a6225653e6ed572bd3d8600ed4ec

This is the QC of cell/library by looking at the number of cell captured in each well, the ERCC ratio, the number of unmapped reads, and the number of mapped reads. Higher ERCC ratio indicates that the RNA amount from the cell is lower, which migh be caused by cell death. The higher number of unmapped reads indicates lower quality of library preparation.

Library QC

##### list all the files
 wanted.files <- list.files(path="/mnt/gluster/home/ptung/test/ERCC/", pattern= "[0-9].counts.txt.summary")

##### get info you need
 all.tables <- lapply(wanted.files,function(x){
     read.table(paste("/mnt/gluster/home/ptung/test/ERCC/",x,sep=""),header=TRUE)[c(1,4,5),]
 })

#### create ERCC ratios
ERCC.ratios <- lapply(all.tables,function(x){
    x[1,2]/(sum(x[,2]))
})
ERCC.ratios <- unlist(ERCC.ratios)

#### create unmapped ratios
unmapped.ratios <- lapply(all.tables,function(x){
    x[3,2]/(sum(x[,2]))
})
unmapped.ratios <- unlist(unmapped.ratios)

##### create number of mapped reads
num.of.mapped.reads <- lapply(all.tables,function(x){
    (sum(x[,2]))-x[3,2]
})
num.of.mapped.reads <- unlist(num.of.mapped.reads)

##### create the table with all into
data1 <- data.frame(wanted.files,ERCC.ratios,unmapped.ratios,num.of.mapped.reads)

##### remove 4 individual cells
data1 <- data1[(data1$num.of.mapped.reads < 50000000),]

##### cell number capture on C1
data.cell.num <- read.csv("/mnt/gluster/home/ptung/LCL/cell_number.csv")
data1 <- data.frame(data1,data.cell.num)

##### QC cell number
index.c <- data1$cell.num == 1

##### QC mapped reads
index1 <- (data1[,"num.of.mapped.reads"] > 2500000)
sum(index1)
[1] 81
##### QC ERCC ratios
index2 <- (data1[,"ERCC.ratios"] < 0.05)
sum(index2)
[1] 83
##### QC unmapped ratios
index3 <- (data1[,"unmapped.ratios"] < 0.3)
sum(index3)
[1] 83
##### QC final ( | or, & and)
index.final <- index1 & index2 & index3 & index.c
index.final
 [1]  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
[12]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE
[23]  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
[34]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
[45]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
[56]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
[67]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
[78]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
[89] FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE
sum(index.final)
[1] 77
#######  keep the names of good cells
keep.cell <- data1$wanted.files[index.final]

######## plot cells with color coded
color.index <- as.numeric(index.final)+1

plot(data1[,c(4,2)],col=color.index)

plot of chunk unnamed-chunk-1

plot(data1[,c(4,3)],col=color.index)

plot of chunk unnamed-chunk-1

plot(data1[,4]/1000000,data1[,2]*100,col=color.index,pch=as.character(data1$cell.num),xlab="Number of mapped reads (million)",ylab="Spike-In (%)")
legend(12,25,c("Cells Removed","Cells Retained"),col=1:2,pch=20)

plot of chunk unnamed-chunk-1

plot(data1[,4]/1000000,data1[,3]*100,col=color.index,pch=as.character(data1$cell.num),xlab="Number of mapped reads (million)",ylab="Unmapped reads (%)")
legend(12,70,c("Cells Removed","Cells Retained"),col=1:2,pch=20)

plot of chunk unnamed-chunk-1

##### plot the good cells only
plot(data1[index.final,c(2,4)],pch=data1$cell.num[index.final],col=color.index[index.final])

plot of chunk unnamed-chunk-1

Session information

sessionInfo()
R version 3.1.1 (2014-07-10)
Platform: x86_64-unknown-linux-gnu (64-bit)

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] knitr_1.6

loaded via a namespace (and not attached):
[1] digest_0.6.4    evaluate_0.5.5  formatR_0.10    htmltools_0.2.6
[5] rmarkdown_0.5.1 stringr_0.6.2   tools_3.1.1     yaml_2.1.13