## ----package_loads, include=FALSE, eval=TRUE-----------------------------
library(Biobase)
library(knitr)
library(reshape2)
library(ggplot2)

knitr::opts_chunk$set(autodep=TRUE, cache=FALSE, warning=FALSE)
set.seed(0)

## ----init_monocle, include=FALSE, eval=TRUE------------------------------
library(HSMMSingleCell)
library(monocle)
data(HSMM_expr_matrix)
data(HSMM_gene_annotation)
data(HSMM_sample_sheet)

## ----load_data_tables, eval=FALSE----------------------------------------
#  #not run
#  HSMM_expr_matrix <- read.table("fpkm_matrix.txt")
#  HSMM_sample_sheet <- read.delim("cell_sample_sheet.txt")
#  HSMM_gene_annotation <- read.delim("gene_annotations.txt")

## ----build_cell_data_Set_fpkm, eval=FALSE--------------------------------
#  pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet)
#  fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation)
#  HSMM <- newCellDataSet(as.matrix(HSMM_expr_matrix), phenoData = pd, featureData = fd)

## ----show_expression_family, eval=FALSE----------------------------------
#  # Not run
#  HSMM <- newCellDataSet(count_matrix,
#                         phenoData = pd,
#                         featureData = fd,
#                         expressionFamily=negbinomial.size())

## ----show_sparse_new_cell_dataset, eval=FALSE----------------------------
#  HSMM <- newCellDataSet(as(as.matrix(rpc_matrix), "sparseMatrix"),
#                         phenoData = pd,
#                         featureData = fd,
#                         lowerDetectionLimit=0.5,
#                         expressionFamily=negbinomial.size())

## ----chromium_load, eval=FALSE-------------------------------------------
#  cellranger_pipestance_path <- "/path/to/your/pipeline/output/directory"
#  gbm <- load_cellranger_matrix(cellranger_pipestance_path)
#  
#  gbm_cds <- newCellDataSet(exprs(gbm),
#                           phenoData = pData(gbm),
#                           featureData = fData(gbm),
#                           lowerDetectionLimit=0.5,
#                           expressionFamily=negbinomial.size())

## ----build_cell_data_Set_RPC, eval=TRUE----------------------------------
pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet)
fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation)

# First create a CellDataSet from the relative expression levels
HSMM <- newCellDataSet(as.matrix(HSMM_expr_matrix),   
                       phenoData = pd, 
                       featureData = fd,
                       lowerDetectionLimit=0.1,
                       expressionFamily=tobit(Lower=0.1))

# Next, use it to estimate RNA counts
rpc_matrix <- relative2abs(HSMM)

# Now, make a new CellDataSet using the RNA counts
HSMM <- newCellDataSet(as(as.matrix(rpc_matrix), "sparseMatrix"),
                       phenoData = pd, 
                       featureData = fd,
                       lowerDetectionLimit=0.5,
                       expressionFamily=negbinomial.size())


## ----estimate_size_and_dispersion, eval=TRUE-----------------------------
HSMM <- estimateSizeFactors(HSMM)
HSMM <- estimateDispersions(HSMM)

## ----detect_genes, eval=TRUE---------------------------------------------
HSMM <- detectGenes(HSMM, min_expr = 0.1)
print(head(fData(HSMM)))
expressed_genes <- row.names(subset(fData(HSMM), num_cells_expressed >= 10))

## ----show_pData, eval = TRUE---------------------------------------------
print(head(pData(HSMM)))

## ----select_cells, eval = FALSE------------------------------------------
#  valid_cells <- row.names(subset(pData(HSMM), Cells.in.Well == 1 & Control == FALSE & Clump == FALSE & Debris == FALSE & Mapped.Fragments > 1000000))
#  HSMM <- HSMM[,valid_cells]

## ----show_mRNA_totals, eval = TRUE, fig.width = 4, fig.height = 2, fig.align="center"----
pData(HSMM)$Total_mRNAs <- Matrix::colSums(exprs(HSMM))


HSMM <- HSMM[,pData(HSMM)$Total_mRNAs < 1e6]
upper_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) + 2*sd(log10(pData(HSMM)$Total_mRNAs)))
lower_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) - 2*sd(log10(pData(HSMM)$Total_mRNAs)))
qplot(Total_mRNAs, data=pData(HSMM), color=Hours, geom="density") + 
  geom_vline(xintercept=lower_bound) + 
  geom_vline(xintercept=upper_bound)

HSMM <- HSMM[,pData(HSMM)$Total_mRNAs > lower_bound & 
             pData(HSMM)$Total_mRNAs < upper_bound]								  
HSMM <- detectGenes(HSMM, min_expr = 0.1)

## ----lognormal_plot, eval=TRUE, fig.width = 3, fig.height = 2, fig.align="center"----
# Log-transform each value in the expression matrix.
L <- log(exprs(HSMM[expressed_genes,]))

# Standardize each gene, so that they are all on the same scale,
# Then melt the data with plyr so we can plot it easily"
melted_dens_df <- melt(Matrix::t(scale(Matrix::t(L))))

# Plot the distribution of the standardized gene expression values.
qplot(value, geom="density", data=melted_dens_df) +  stat_function(fun = dnorm, size=0.5, color='red') + 
  xlab("Standardized log(FPKM)") +
  ylab("Density")

## ----setup_gates, fig.width = 3, fig.height = 4, fig.align="center", eval=TRUE----
MYF5_id <- row.names(subset(fData(HSMM), gene_short_name == "MYF5"))
ANPEP_id <- row.names(subset(fData(HSMM), gene_short_name == "ANPEP"))

cth <- newCellTypeHierarchy()
cth <- addCellType(cth, "Myoblast", classify_func=function(x) {x[MYF5_id,] >= 1})
cth <- addCellType(cth, "Fibroblast", classify_func=function(x)
       {x[MYF5_id,] < 1 & x[ANPEP_id,] > 1})

## ----count_cells_unsup, fig.width = 3, fig.height = 4, fig.align="center", eval=TRUE----
HSMM <- classifyCells(HSMM, cth, 0.1)

## ----count_cells_unsup_readout, fig.width = 5, fig.height = 5, fig.align="center", eval=TRUE----
table(pData(HSMM)$CellType)

pie <- ggplot(pData(HSMM), aes(x = factor(1), fill = factor(CellType))) +
  geom_bar(width = 1)
pie + coord_polar(theta = "y") + 
  theme(axis.title.x=element_blank(), axis.title.y=element_blank())

## ----cluster_cells_unsup_gene_pick, fig.width = 5, fig.height = 4, fig.align="center", eval=TRUE----
disp_table <- dispersionTable(HSMM)
unsup_clustering_genes <- subset(disp_table, mean_expression >= 0.1)
HSMM <- setOrderingFilter(HSMM, unsup_clustering_genes$gene_id)
plot_ordering_genes(HSMM)

## ----cluster_cells_unsup_no_covariate, fig.width = 4, fig.height = 4, fig.align="center", eval=TRUE, warning=FALSE----
# HSMM@auxClusteringData[["tSNE"]]$variance_explained <- NULL
plot_pc_variance_explained(HSMM, return_all = F) # norm_method = 'log',
HSMM <- reduceDimension(HSMM, max_components=2, num_dim = 6, 
                        reduction_method = 'tSNE', verbose = T) 
HSMM <- clusterCells(HSMM,
                     num_clusters=2)
plot_cell_clusters(HSMM, 1, 2, color="CellType", markers=c("MYF5", "ANPEP"))

## ----cluster_cells_unsup_plot_by_media, fig.width = 4, fig.height = 4, fig.align="center", eval=TRUE----
plot_cell_clusters(HSMM, 1, 2, color="Media")

## ----cluster_cells_unsup_control_for_media, fig.width = 4, fig.height = 4, fig.align="center", eval=TRUE, warning=FALSE----
HSMM <- reduceDimension(HSMM, max_components=2, num_dim = 2, reduction_method = 'tSNE', 
                       residualModelFormulaStr="~Media + num_genes_expressed", verbose = T) #
HSMM <- clusterCells(HSMM, num_clusters=2)
plot_cell_clusters(HSMM, 1, 2, color="CellType")

## ----cluster_cells_unsup_plot_by_cell_type, fig.width = 6, fig.height = 4, fig.align="center", eval=TRUE----
HSMM <- clusterCells(HSMM, num_clusters=2)
plot_cell_clusters(HSMM, 1, 2, color="Cluster") + facet_wrap(~CellType)

## ----cluster_cells_diff_table, eval=TRUE---------------------------------
marker_diff <- markerDiffTable(HSMM[expressed_genes,], 
                                 cth, 
                                 residualModelFormulaStr="~Media + num_genes_expressed",
                                 cores=1)

## ----cluster_cells_semisup_show_marker_spec, fig.width = 4, fig.height = 4, fig.align="center", eval=TRUE----
candidate_clustering_genes <- row.names(subset(marker_diff, qval < 0.01))
marker_spec <- calculateMarkerSpecificity(HSMM[candidate_clustering_genes,], cth)
head(selectTopMarkers(marker_spec, 3))

## ----cluster_cells_semisup_pick_genes, fig.width = 4, fig.height = 4, fig.align="center", eval=TRUE----
semisup_clustering_genes <- unique(selectTopMarkers(marker_spec, 500)$gene_id)
HSMM <- setOrderingFilter(HSMM, semisup_clustering_genes)
plot_ordering_genes(HSMM)

## ----cluster_cells_semisup_clustering_no_impute, fig.width = 4, fig.height = 4, fig.align="center", eval=TRUE, warning = F----
plot_pc_variance_explained(HSMM, return_all = F) # norm_method = 'log',
HSMM <- reduceDimension(HSMM, max_components=2, num_dim = 2, reduction_method = 'tSNE', 
                       residualModelFormulaStr="~Media + num_genes_expressed", verbose = T) 
HSMM <- clusterCells(HSMM, num_clusters=2) 

plot_cell_clusters(HSMM, 1, 2, color="CellType")

## ----cluster_cells_semisup_clustering_with_impute, fig.width = 7, fig.height = 4, fig.align="center", eval=TRUE----
HSMM <- clusterCells(HSMM,
                     num_clusters=2, 
                     frequency_thresh=0.1,
                     cell_type_hierarchy=cth)
plot_cell_clusters(HSMM, 1, 2, color="CellType", markers = c("MYF5", "ANPEP"))

## ----count_cells_semisup_pie, fig.width = 5, fig.height = 5, fig.align="center", eval=TRUE----
pie <- ggplot(pData(HSMM), aes(x = factor(1), fill = factor(CellType))) +
  geom_bar(width = 1)
pie + coord_polar(theta = "y") + 
  theme(axis.title.x=element_blank(), axis.title.y=element_blank())

## ----select_myoblasts, eval=TRUE-----------------------------------------
HSMM_myo <- HSMM[,pData(HSMM)$CellType == "Myoblast"]	
HSMM_myo <- estimateDispersions(HSMM_myo)

## ----ordering_not_run, eval=FALSE----------------------------------------
#  diff_test_res <- differentialGeneTest(HSMM_myo[expressed_genes,],
#                                        fullModelFormulaStr="~Media")
#  ordering_genes <- row.names (subset(diff_test_res, qval < 0.01))

## ----select_ordering_genes, eval=TRUE------------------------------------
disp_table <- dispersionTable(HSMM_myo)
ordering_genes <- subset(disp_table, 
                         mean_expression >= 0.5 & 
                         dispersion_empirical >= 1 * dispersion_fit)$gene_id

## ----set_ordering_filter, eval=TRUE, fig.height=3, fig.width=3, fig.align="center"----
HSMM_myo <- setOrderingFilter(HSMM_myo, ordering_genes)
plot_ordering_genes(HSMM_myo)

## ----reduce_dimension, eval=TRUE-----------------------------------------
HSMM_myo <- reduceDimension(HSMM_myo, max_components=2)

## ----order_cells, eval=TRUE----------------------------------------------
HSMM_myo <- orderCells(HSMM_myo)

## ----plot_ordering_mst, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center", warning=FALSE----
plot_cell_trajectory(HSMM_myo, color_by="Hours")

## ----plot_ordering_mst_by_state, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center", warning=FALSE----
plot_cell_trajectory(HSMM_myo, color_by="State")

## ----plot_ordering_with_GM_state, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center", warning=FALSE----
GM_state <- function(cds){
  if (length(unique(pData(cds)$State)) > 1){
    T0_counts <- table(pData(cds)$State, pData(cds)$Hours)[,"0"]
    return(as.numeric(names(T0_counts)[which(T0_counts == max(T0_counts))]))
  }else { return (1) } 
}
HSMM_myo <- orderCells(HSMM_myo, root_state=GM_state(HSMM_myo))
plot_cell_trajectory(HSMM_myo, color_by="Pseudotime")

## ----init_hsmm_facet_state, include=TRUE, eval=TRUE, fig.width = 6, fig.height = 3, warning=FALSE----
plot_cell_trajectory(HSMM_myo, color_by="State") + facet_wrap(~State, nrow=1)

## ----init_hsmm_jitter_state, include=TRUE, eval=TRUE, fig.width = 3, fig.height = 3.5, fig.align="center", warning=FALSE----
blast_genes <- row.names(subset(fData(HSMM_myo), gene_short_name %in% c("CCNB2", "MYOD1", "MYOG")))
plot_genes_jitter(HSMM_myo[blast_genes,], grouping="State", min_expr=0.1)

## ----plot_markers_linear, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center"----
HSMM_expressed_genes <-  row.names(subset(fData(HSMM_myo), num_cells_expressed >= 10))
HSMM_filtered <- HSMM_myo[HSMM_expressed_genes,]
 
my_genes <- row.names(subset(fData(HSMM_filtered), 
                              gene_short_name %in% c("CDK1", "MEF2C", "MYH3"))) 
 
cds_subset <- HSMM_filtered[my_genes,]
plot_genes_in_pseudotime(cds_subset, color_by="Hours")

## ----select_ordering_genes_pca, eval=TRUE--------------------------------
HSMM_myo <- HSMM_myo[HSMM_expressed_genes,]
exprs_filtered <- t(t(exprs(HSMM_myo)/pData(HSMM_myo)$Size_Factor))
#nz_genes <- which(exprs_filtered != 0, arr.ind = T)
exprs_filtered@x <- log(exprs_filtered@x + 1)

# Calculate the variance across genes without converting to a dense
# matrix:
expression_means <- Matrix::rowMeans(exprs_filtered)
expression_vars <- Matrix::rowMeans((exprs_filtered - expression_means)^2)

# Filter out genes that are constant across all cells:
genes_to_keep <- expression_vars > 0
exprs_filtered <- exprs_filtered[genes_to_keep,]
expression_means <- expression_means[genes_to_keep]
expression_vars <- expression_vars[genes_to_keep]

# Here's how to take the top PCA loading genes, but using
# sparseMatrix operations the whole time, using irlba. Note
# that the v matrix from irlba is the loading matrix
set.seed(0)
irlba_pca_res <- irlba(t(exprs_filtered), 
                      nu=0, 
                      center=expression_means, 
                      scale=sqrt(expression_vars), 
                      right_only=TRUE)$v

row.names(irlba_pca_res) <- row.names(exprs_filtered)

# Here, we will just
# take the top 200 genes from components 2 and 3. 
# Component 1 usually is driven by technical noise.
# We could also use a more principled approach, 
# similar to what dpFeature does below
PC2_genes <- names(sort(abs(irlba_pca_res[, 2]), decreasing = T))[1:200]
PC3_genes <- names(sort(abs(irlba_pca_res[, 3]), decreasing = T))[1:200]

ordering_genes <- union(PC2_genes, PC3_genes)

## ----order_cells_pca_unsup, fig.width = 4, fig.height = 3, fig.align="center", warning=FALSE, eval=TRUE----
HSMM_myo <- setOrderingFilter(HSMM_myo, ordering_genes)
HSMM_myo <- reduceDimension(HSMM_myo, max_components=2)
HSMM_myo <- orderCells(HSMM_myo)
HSMM_myo <- orderCells(HSMM_myo, root_state=GM_state(HSMM_myo))
plot_cell_trajectory(HSMM_myo, color_by="Hours")

## ----set_ordering_gene_dpfeature, fig.height=3, fig.width=3, fig.align="center", eval=TRUE, warning=FALSE----
HSMM_myo <- detectGenes(HSMM_myo, min_expr=0.1)
fData(HSMM_myo)$use_for_ordering <- fData(HSMM_myo)$num_cells_expressed > 0.05 * ncol(HSMM_myo)

## ----plot_pc_variance, fig.width = 3, fig.height = 4, fig.align="center", eval=TRUE, warning=FALSE----
plot_pc_variance_explained(HSMM_myo, return_all = F) #look at the plot and decide how many dimensions you need. It is determined by a huge drop of variance at that dimension. pass that number to num_dim in the next function.

## ----perform tSNE dimension reduction, fig.width = 3, fig.height = 3, fig.align="center", eval=TRUE, warning=FALSE----
  HSMM_myo <- reduceDimension(HSMM_myo, max_components=2, norm_method = 'log', num_dim = 3, 
                              reduction_method = 'tSNE', verbose = T)

## ----cluster cells, fig.width = 3, fig.height = 3, fig.align="center", eval=TRUE, warning=FALSE----
HSMM_myo <- clusterCells(HSMM_myo, verbose = F)

## ----check the clustering results, include=TRUE, eval=TRUE, fig.width = 3, fig.height = 3.5, warning=FALSE, fig.show='hold'----
plot_cell_clusters(HSMM_myo, color_by = 'as.factor(Cluster)')
plot_cell_clusters(HSMM_myo, color_by = 'as.factor(Hours)')

## ----plot_rho_delta, fig.width = 3, fig.height = 3, fig.align="center", eval=TRUE, warning=FALSE----
plot_rho_delta(HSMM_myo, rho_threshold = 2, delta_threshold = 4 )

## ----recluster_cells_again, fig.width = 3, fig.height = 3, fig.align="center", eval=TRUE, warning=FALSE----
HSMM_myo <- clusterCells(HSMM_myo,  
                         rho_threshold = 2, 
                         delta_threshold = 4, 
                         skip_rho_sigma = T, 
                         verbose = F)

## ----check_clustering_again, include=TRUE, eval=TRUE, fig.width = 3, fig.height = 3.5, warning=FALSE, fig.show='hold'----
plot_cell_clusters(HSMM_myo, color_by = 'as.factor(Cluster)')
plot_cell_clusters(HSMM_myo, color_by = 'as.factor(Hours)')

## ----perform_DEG_analysis, fig.width = 3, fig.height = 3, fig.align="center", eval=TRUE, warning=FALSE----
clustering_DEG_genes <- differentialGeneTest(HSMM_myo[HSMM_expressed_genes,], 
                                             fullModelFormulaStr = '~Cluster', 
                                             cores = 1)

## ----order_cells_dp_feature, fig.width = 3, fig.height = 3, fig.align="center", eval=TRUE, warning=FALSE----
HSMM_ordering_genes <- row.names(clustering_DEG_genes)[order(clustering_DEG_genes$qval)][1:1000] 

HSMM_myo <- setOrderingFilter(HSMM_myo, ordering_genes = HSMM_ordering_genes)
HSMM_myo <- reduceDimension(HSMM_myo)
HSMM_myo <- orderCells(HSMM_myo)
HSMM_myo <- orderCells(HSMM_myo, root_state=GM_state(HSMM_myo))
plot_cell_trajectory(HSMM_myo, color_by="Hours")


## ----semi_sup_ordering_myoblast_cth, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center"----
CCNB2_id <- row.names(subset(fData(HSMM_myo), gene_short_name == "CCNB2"))
MYH3_id <- row.names(subset(fData(HSMM_myo), gene_short_name == "MYH3"))

cth <- newCellTypeHierarchy()
cth <- addCellType(cth, "Cycling myoblast", classify_func=function(x) {x[CCNB2_id,] >= 1})
cth <- addCellType(cth, "Myotube", classify_func=function(x) {x[MYH3_id,] >=1})
cth <- addCellType(cth, "Reserve cell", classify_func=function(x) {x[MYH3_id,] == 0 & x[CCNB2_id,] == 0})
HSMM_myo <- classifyCells(HSMM_myo, cth)

## ----semi_sup_ordering_markers, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center"----
marker_diff <- markerDiffTable(HSMM_myo[HSMM_expressed_genes,], 
                               cth, 
                               cores=1)
#semisup_clustering_genes <- row.names(subset(marker_diff, qval < 0.05))
semisup_clustering_genes <- row.names(marker_diff)[order(marker_diff$qval)][1:500] 

## ----semi_sup_ordering_trajectory, include=TRUE, eval=TRUE, fig.width = 6, fig.height = 2, warning=FALSE, fig.align="center"----
HSMM_myo <- setOrderingFilter(HSMM_myo, semisup_clustering_genes)
#plot_ordering_genes(HSMM_myo)
HSMM_myo <- reduceDimension(HSMM_myo, max_components=2)
HSMM_myo <- orderCells(HSMM_myo)
HSMM_myo <- orderCells(HSMM_myo, root_state=GM_state(HSMM_myo))
plot_cell_trajectory(HSMM_myo, color_by="CellType") + theme(legend.position="right")

## ----semi_sup_ordering_genes_in_pseudotime, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center"----
HSMM_filtered <- HSMM_myo[HSMM_expressed_genes,]

my_genes <- row.names(subset(fData(HSMM_filtered), 
                             gene_short_name %in% c("CDK1", "MEF2C", "MYH3"))) 

cds_subset <- HSMM_filtered[my_genes,]
plot_genes_branched_pseudotime(cds_subset, 
                               branch_point=1, 
                               color_by="Hours", 
                               ncol=1)

## ----select_genes, eval=TRUE---------------------------------------------
marker_genes <- row.names(subset(fData(HSMM_myo), 
                                 gene_short_name %in% c("MEF2C", "MEF2D", "MYF5", 
                                                        "ANPEP", "PDGFRA","MYOG", 
                                                        "TPM1",  "TPM2",  "MYH2", 
                                                        "MYH3",  "NCAM1", "TNNT1", 
                                                        "TNNT2", "TNNC1", "CDK1", 
                                                        "CDK2",  "CCNB1", "CCNB2", 
                                                        "CCND1", "CCNA1", "ID1")))

## ----basic_diff, eval=TRUE-----------------------------------------------
diff_test_res <- differentialGeneTest(HSMM_myo[marker_genes,], 
                                      fullModelFormulaStr="~Media")

# Select genes that are significant at an FDR < 10%
sig_genes <- subset(diff_test_res, qval < 0.1)

sig_genes[,c("gene_short_name", "pval", "qval")]

## ----plot_myog_jitter, eval=TRUE, fig.width = 4, fig.height = 2, fig.align="center"----
MYOG_ID1 <- HSMM_myo[row.names(subset(fData(HSMM_myo), 
                                      gene_short_name %in% c("MYOG", "CCNB2"))),]
plot_genes_jitter(MYOG_ID1, grouping="Media", ncol=2)

## ----setup_test_genes, eval=TRUE-----------------------------------------
to_be_tested <- row.names(subset(fData(HSMM), 
                                 gene_short_name %in% c("UBC", "NCAM1", "ANPEP"))) 
cds_subset <- HSMM[to_be_tested,]

## ----all_in_one_test, eval = TRUE, warning=FALSE-------------------------
diff_test_res <- differentialGeneTest(cds_subset, fullModelFormulaStr="~CellType")
diff_test_res[,c("gene_short_name", "pval", "qval")]

## ----jitter_plot_diff_res, eval=TRUE, fig.width = 8, fig.height = 2.5, fig.align="center"----
plot_genes_jitter(cds_subset, grouping="CellType", color_by="CellType", 
                  nrow=1, ncol=NULL, plot_trend=TRUE)

## ----piecewise_test, eval=FALSE------------------------------------------
#  full_model_fits <- fitModel(cds_subset,  modelFormulaStr="~CellType")
#  reduced_model_fits <- fitModel(cds_subset, modelFormulaStr="~1")
#  diff_test_res <- compareModels(full_model_fits, reduced_model_fits)
#  diff_test_res

## ----setup_test_genes_pt, eval=TRUE--------------------------------------
to_be_tested <- row.names(subset(fData(HSMM), 
                                 gene_short_name %in% c("MYH3", "MEF2C", "CCNB2", "TNNT1"))) 
cds_subset <- HSMM_myo[to_be_tested,]

## ----piecewise_test_pt, eval=TRUE----------------------------------------
diff_test_res <- differentialGeneTest(cds_subset,  fullModelFormulaStr="~sm.ns(Pseudotime)")

## ----all_in_one_test_pt, eval=TRUE---------------------------------------
diff_test_res[,c("gene_short_name", "pval", "qval")]

## ----plot_diff_res_pt, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center", warning=FALSE----
plot_genes_in_pseudotime(cds_subset, color_by="Hours")

## ----plot_diff_res_pt_heatmap, include=TRUE, eval=TRUE, fig.width = 5, fig.height = 4, fig.align="center", warning=FALSE----
diff_test_res <- differentialGeneTest(HSMM_myo[marker_genes,], 
                                      fullModelFormulaStr="~sm.ns(Pseudotime)")

sig_gene_names <- row.names(subset(diff_test_res, qval < 0.1))


plot_pseudotime_heatmap(HSMM_myo[sig_gene_names,], 
                        num_clusters = 3, 
                        cores = 1, 
                        show_rownames = T)

## ----plot_diff_res_multi, eval=TRUE, fig.width = 8, fig.height = 4, fig.align="center"----
to_be_tested <- row.names(subset(fData(HSMM), 
                                 gene_short_name %in% c("TPM1", "MYH3", "CCNB2", "GAPDH"))) 
cds_subset <- HSMM[to_be_tested,]

diff_test_res <- differentialGeneTest(cds_subset,  
                                      fullModelFormulaStr="~CellType + Hours", 
                                      reducedModelFormulaStr="~Hours")
diff_test_res[,c("gene_short_name", "pval", "qval")]
plot_genes_jitter(cds_subset, 
                  grouping="Hours", color_by="CellType", plot_trend=TRUE) + 
  facet_wrap( ~ feature_label, scales="free_y")

## ----init_treutlein, include=TRUE, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center", warning=FALSE----
lung <- load_lung()

plot_cell_trajectory(lung, color_by="Time")


## ----lung_beam_test, include=TRUE, eval=TRUE, warning=FALSE--------------
BEAM_res <- BEAM(lung, branch_point=1, cores = 1)
BEAM_res <- BEAM_res[order(BEAM_res$qval),]
BEAM_res <- BEAM_res[,c("gene_short_name", "pval", "qval")]

## ----lung_beam_branched_heatmap, include=TRUE, eval=TRUE, fig.width = 6, fig.height = 8, fig.align="center", warning=FALSE----
plot_genes_branched_heatmap(lung[row.names(subset(BEAM_res, qval < 1e-4)),], 
                            branch_point = 1, 
                            num_clusters = 4, 
                            cores = 1, 
                            use_gene_short_name = T, 
                            show_rownames = T)

## ----lung_beam_branched_pseudotime, include=TRUE, eval=TRUE, fig.width = 4, fig.height = 4, fig.align="center", warning=FALSE----
lung_genes <- row.names(subset(fData(lung), gene_short_name %in% c("Ccnd2", "Sftpb", "Pdpn")))
plot_genes_branched_pseudotime(lung[lung_genes,], 
                               branch_point=1, 
                               color_by="Time", 
                               ncol=1)

## ----citation, eval=TRUE-------------------------------------------------
citation("monocle")


## ----sessi---------------------------------------------------------------
sessionInfo()

