Last updated: 2015-09-11
Code version: 37f57b1c3b81f30e213726dfcc3da532197ac224
We investigated potential ordering effect of wells on each place on gene expression across the 9 plates (batches). Leng et al. discussed ordering effect in single-cell RNA-seq experiments using Fluidigm C1 and implemented an algorithm that detects ordering effect of wells on gene expression in OEFinder.
source("functions.R")
library(edgeR)
Loading required package: limma
# Packages required to start OEFinder
# library(shiny)
# library(gdata)
# library(shinyFiles)
# library(EBSeq)
Input annotation
anno <- read.table("../data/annotation.txt", header = TRUE,
stringsAsFactors = FALSE)
head(anno)
individual batch well sample_id
1 19098 1 A01 NA19098.1.A01
2 19098 1 A02 NA19098.1.A02
3 19098 1 A03 NA19098.1.A03
4 19098 1 A04 NA19098.1.A04
5 19098 1 A05 NA19098.1.A05
6 19098 1 A06 NA19098.1.A06
Input read counts.
reads <- read.table("../data/reads.txt", header = TRUE,
stringsAsFactors = FALSE)
Input molecule counts
molecules <- read.table("../data/molecules.txt", header = TRUE, stringsAsFactors = FALSE)
Remove bulk samples
single_samples <- anno$well != "bulk"
anno_single <- anno[ which(single_samples), ]
molecules_single <- molecules[ , which(single_samples)]
reads_single <- reads[ , which(single_samples)]
stopifnot(ncol(molecules_single) == nrow(anno_single),
colnames(molecules_single) == anno_single$sample_id)
Prepare ERCC data
gene_rows_ercc <- grep("ERCC", rownames(molecules_single))
dim(molecules_single[gene_rows_ercc, ])
[1] 92 864
Output single cell samples to txt files.
if (!file.exists("../data/molecules-single-ercc.txt")) {
write.table(molecules_single[gene_rows_ercc,],
file = "../data/molecules-single-ercc.txt",
quote = FALSE,
col.names = TRUE, row.names = TRUE)
}
if (!file.exists("../data/reads-single-ercc.txt")) {
write.table(reads_single[gene_rows_ercc,],
file = "../data/reads-single-ercc.txt",
quote = FALSE,
col.names = TRUE, row.names = TRUE)
}
Prepare capture site identification file. A txt file with one column of capture site ID (A, B, C, …, H).
require(stringr)
Loading required package: stringr
capture_site <- str_extract(anno_single$well, "[aA-zZ]+")
table(capture_site)
capture_site
A B C D E F G H
108 108 108 108 108 108 108 108
Save capture_site to a txt file.
if (!file.exists("../data/capture-site-ercc.txt")) {
write.table(data.frame(site = capture_site),
file = "../data/capture-site-ercc.txt",
quote = FALSE,
col.names = FALSE, row.names = FALSE)
}
For unnormalized data, OEFinder defaults the DESeq normalization method.
Upload molecules-single-ercc.txt and capture-site-single-ercc.txt to OEFinder Shiny GUI interface.
Output to singleCellSeq/data/OEFinder.
# runGitHub("OEFinder", "lengning")
OE_ercc <- read.csv("../data/OEFinder/ercc-OEgenes.csv",
stringsAsFactors = FALSE,
quote = "\"", sep = ",", header = TRUE)
colnames(OE_ercc) <- c("genes", "pvalue")
head(OE_ercc)
[1] genes pvalue
<0 rows> (or 0-length row.names)
str(OE_ercc)
'data.frame': 0 obs. of 2 variables:
$ genes : logi
$ pvalue: logi