Last updated: 2015-09-11

Code version: 37f57b1c3b81f30e213726dfcc3da532197ac224

ERCC genes

We investigated potential ordering effect of wells on each place on gene expression across the 9 plates (batches). Leng et al. discussed ordering effect in single-cell RNA-seq experiments using Fluidigm C1 and implemented an algorithm that detects ordering effect of wells on gene expression in OEFinder.

Setup

source("functions.R")
library(edgeR)
Loading required package: limma
# Packages required to start OEFinder
# library(shiny)
# library(gdata)
# library(shinyFiles)
# library(EBSeq)

Prepare single cell data before filtering

Input annotation

anno <- read.table("../data/annotation.txt", header = TRUE,
                   stringsAsFactors = FALSE)
head(anno)
  individual batch well     sample_id
1      19098     1  A01 NA19098.1.A01
2      19098     1  A02 NA19098.1.A02
3      19098     1  A03 NA19098.1.A03
4      19098     1  A04 NA19098.1.A04
5      19098     1  A05 NA19098.1.A05
6      19098     1  A06 NA19098.1.A06

Input read counts.

reads <- read.table("../data/reads.txt", header = TRUE,
                    stringsAsFactors = FALSE)

Input molecule counts

molecules <- read.table("../data/molecules.txt", header = TRUE, stringsAsFactors = FALSE)

Remove bulk samples

single_samples <- anno$well != "bulk"
anno_single <- anno[ which(single_samples), ]
molecules_single <- molecules[ , which(single_samples)]
reads_single <- reads[ , which(single_samples)]
stopifnot(ncol(molecules_single) == nrow(anno_single),
          colnames(molecules_single) == anno_single$sample_id)

Prepare ERCC data

gene_rows_ercc <- grep("ERCC", rownames(molecules_single))
dim(molecules_single[gene_rows_ercc, ])
[1]  92 864

Output single cell samples to txt files.

if (!file.exists("../data/molecules-single-ercc.txt")) {
  write.table(molecules_single[gene_rows_ercc,], 
              file = "../data/molecules-single-ercc.txt",
              quote = FALSE,
              col.names = TRUE, row.names = TRUE)
}

if (!file.exists("../data/reads-single-ercc.txt")) {
  write.table(reads_single[gene_rows_ercc,], 
              file = "../data/reads-single-ercc.txt",
              quote = FALSE,
              col.names = TRUE, row.names = TRUE)
}

Prepare capture site identification file. A txt file with one column of capture site ID (A, B, C, …, H).

require(stringr)
Loading required package: stringr
capture_site <- str_extract(anno_single$well, "[aA-zZ]+")
table(capture_site)
capture_site
  A   B   C   D   E   F   G   H 
108 108 108 108 108 108 108 108 

Save capture_site to a txt file.

if (!file.exists("../data/capture-site-ercc.txt")) {
write.table(data.frame(site = capture_site), 
            file = "../data/capture-site-ercc.txt",
            quote = FALSE,
            col.names = FALSE, row.names = FALSE)
}

Molecule count data

For unnormalized data, OEFinder defaults the DESeq normalization method.

Upload molecules-single-ercc.txt and capture-site-single-ercc.txt to OEFinder Shiny GUI interface.

Output to singleCellSeq/data/OEFinder.

# runGitHub("OEFinder", "lengning")

OE_ercc <- read.csv("../data/OEFinder/ercc-OEgenes.csv",
                     stringsAsFactors = FALSE,
                     quote = "\"", sep = ",", header = TRUE)
colnames(OE_ercc) <- c("genes", "pvalue")
head(OE_ercc)
[1] genes  pvalue
<0 rows> (or 0-length row.names)
str(OE_ercc)
'data.frame':   0 obs. of  2 variables:
 $ genes : logi 
 $ pvalue: logi