I am currently using some of the functionality implemented in scLVM to analyse your single-cell data.
I've made a few modifications to the plotting/identifying function getVariableGenes(... method="fdr" ...) that might be of interest as extra features (I've commented out the sections I have not updated):
# -----------------------------------------------------------------
# modification of getVariableGenes() to add more parameters:
# - (required) ERCC counts for overlay on plot
# - modifiable min bio dispersion parameter
# - adjustable ylim
# - possibility to have an interactive plot to identify points (genes)
# within a hand-drawn polygon
# - returns plotted X and Y values
# (& interactively selected points if applicable)
# for futher manipulation
getVariableGenes <- function (
nCountsEndo, nCountsERCC, fit, method = "fit", threshold = 0.1, minBiolDisp=0.5,
ylim = NULL,
fit_type = NULL, sfEndo = NULL, sfERCC = NULL, plot = T,
interactive=FALSE) {
if (!(method %in% c("fdr", "fit"))) {
stop("'method' needs to be either 'fdr' or 'fit'")
}
if (is.null(fit_type)) {
print("No 'fit_type' specified. Trying to guess its from parameter names")
if ("a0" %in% names(coefficients(fit)) & "a1tilde" %in%
names(coefficients(fit))) {
fit_type = "counts"
}
else {
if ("a" %in% names(coefficients(fit)) & "k" %in%
names(coefficients(fit))) {
fit_type = "log"
}
else {
if (is.call(fit$call)) {
fit_type = "logvar"
}
}
}
print(paste("Assuming 'fit_type' is ", "'", fit_type,
"'", sep = ""))
}
if (is.null(fit_type)) {
stop("Couldn't guess fit_type. Please specify it or run the getTechincalNoise \n function to obtain the fit")
}
if (!(fit_type %in% c("counts", "log", "logvar")) & !is.null(fit_type)) {
stop("'fit_type' needs to be either 'fdr' or 'fit'")
}
if (method == "fdr" & fit_type != "counts") {
stop("method='fdr', can only be used with fit_type 'counts'")
}
if (method == "fdr" & (is.null(sfERCC) | is.null(sfEndo))) {
stop("Please specify sfERCC and sfEndo when using method='fdr'")
}
if (method == "fdr") {
meansEndo <- rowMeans(nCountsEndo)
varsEndo <- rowVars(nCountsEndo)
cv2Endo <- varsEndo/meansEndo^2
meansERCC <- rowMeans(nCountsERCC)
varsERCC <- rowVars(nCountsERCC)
cv2ERCC <- varsERCC/meansERCC^2
minBiolDisp <- (minBiolDisp^2)
xi <- mean(1/sfERCC)
m <- ncol(nCountsEndo)
psia1thetaA <- mean(1/sfERCC) + (coefficients(fit)["a1tilde"] - xi) * mean(sfERCC/sfEndo)
cv2thA <- coefficients(fit)["a0"] + minBiolDisp + coefficients(fit)["a0"] * minBiolDisp
testDenomA <- (meansEndo * psia1thetaA + meansEndo^2 * cv2thA)/(1 + cv2thA/m)
pA <- 1 - pchisq(varsEndo * (m - 1)/testDenomA, m - 1)
padjA <- p.adjust(pA, "BH")
print(table(padjA < 0.1))
is_het = padjA < threshold
is_het[is.na(is_het)] = FALSE
if (plot == TRUE) {
if (is.null(ylim)) ylim <- c(0.1, 250)
plot(meansEndo, cv2Endo, log = "xy", col = 1 + is_het,
ylim = ylim, xlab = "Mean Counts", ylab = "CV2 Counts")
xg <- 10^seq(-3, 5, length.out = 100)
lines(xg, coefficients(fit)[1] + coefficients(fit)[2]/xg,
lwd = 2, col = "green")
try(points(meansERCC, cv2ERCC, pch = 20, cex = 1,
col = "blue"))
legend("bottomleft", c("Endo. genes", "Var. genes",
"ERCCs", "Fit"), pch = c(1, 1, 20, NA),
lty = c(NA, NA, NA, 1),
col = c("black", "red", "blue", "green"),
cex = 0.7)
}
}
# if (method == "fit" & fit_type == "log") {
# LCountsEndo <- log10(nCountsEndo + 1)
# LmeansEndo <- rowMeans(LCountsEndo)
# Lcv2Endo = rowVars(LCountsEndo)/LmeansEndo^2
# is_het = (fit$opts$offset * coefficients(fit)["a"] *
# 10^(-coefficients(fit)["k"] * LmeansEndo) < Lcv2Endo) &
# LmeansEndo > fit$opts$minmean
# if (plot == TRUE) {
# plot(LmeansEndo, Lcv2Endo, log = "y", col = 1 +
# is_het, xlab = "meansLogEndo", ylab = "cv2LogEndo")
# xg <- seq(0, 5.5, length.out = 100)
# lines(xg, fit$opts$offset * coefficients(fit)[1] *
# 10^(-coefficients(fit)[2] * xg), lwd = 2, col = "green")
# legend("bottomright", c("Endo. genes", "Var. genes",
# "Fit"), pch = c(1, 1, NA), lty = c(NA, NA, 1),
# col = c("black", "red", "blue"), cex = 0.7)
# }
# }
# if (method == "fit" & fit_type == "counts") {
# meansEndo <- rowMeans(nCountsEndo)
# varsEndo <- rowVars(nCountsEndo)
# cv2Endo <- varsEndo/meansEndo^2
# is_het = (coefficients(fit)[[1]] + coefficients(fit)[[2]]/meansEndo) <
# cv2Endo
# if (plot == TRUE) {
# if (is.null(ylim)) ylim <- c(0.1, 95)
# plot(meansEndo, cv2Endo, log = "xy", col = 1 + is_het,
# ylim = ylim, xlab = "Mean Counts", ylab = "CV2 Counts")
# xg <- 10^seq(-3, 5, length.out = 100)
# lines(xg, coefficients(fit)[1] + coefficients(fit)[2]/xg,
# lwd = 2, col = "green")
# legend("bottomright", c("Endo. genes", "Var. genes",
# "Fit"), pch = c(1, 1, NA), lty = c(NA, NA, 1),
# col = c("black", "red", "green"), cex = 0.7)
# }
# }
# if (method == "fit" & fit_type == "logvar") {
# LCountsEndo <- log10(nCountsEndo + 1)
# LmeansEndo <- rowMeans(LCountsEndo)
# LVarsEndo <- rowVars(LCountsEndo)
# xg = LmeansEndo
# Var_techEndo_logfit_loess = predict(fit, LmeansEndo)
# minVar_Endo = min(LVarsEndo[LmeansEndo > 2.5])
# if (any(xg > 2.5 & Var_techEndo_logfit_loess < 0.6 *
# minVar_Endo)) {
# idx = which(xg > 2.5 & Var_techEndo_logfit_loess <
# 0.6 * minVar_Endo)
# Var_techEndo_logfit_loess[idx] = 0.6 * minVar_Endo
# }
# is_het = (Var_techEndo_logfit_loess < LVarsEndo) & LmeansEndo >
# 0.3
# print(sum(is_het))
# if (plot == TRUE) {
# plot(LmeansEndo, LVarsEndo, log = "y", col = 1 +
# is_het, xlab = "meansLogEndo", ylab = "varsLogEndo")
# xg <- seq(0, 5.5, length.out = 100)
# Var_techEndo_logfit_loess = predict(fit, xg)
# if (any(xg > 2.5 & Var_techEndo_logfit_loess < 0.6 *
# minVar_Endo)) {
# idx_1 = which(xg > 2.5 & Var_techEndo_logfit_loess <
# 0.6 * minVar_Endo)[1]
# idx_end = length(Var_techEndo_logfit_loess)
# Var_techEndo_logfit_loess[idx_1:idx_end] = 0.6 *
# minVar_Endo
# }
# lines(xg, Var_techEndo_logfit_loess, lwd = 2, col = "green")
# legend("bottomright", c("Endo. genes", "Var. genes",
# "Fit"), pch = c(1, 1, NA), lty = c(NA, NA, 1),
# col = c("black", "red", "green"), cex = 0.7)
# }
# }
if (exists("meansEndo")) X <- meansEndo
if (exists("LmeansEndo")) X <- LmeansEndo
if (exists("cv2Endo")) Y <- cv2Endo
if (exists("Lcv2Endo")) Y <- Lcv2Endo
selected <- NULL
if (interactive) {
if(!require(sp)) stop("Interactiveness requires package sp")
if(!require(splancs)) stop("Interactiveness requires package splancs.")
poly <- getpoly( quiet=TRUE )
selected <- names(X)[point.in.polygon(X, Y, poly[,1], poly[,2])>0]
}
return( list(
X = X,
Y = Y,
is_het = is_het,
selected = selected
))
}