## ----GlobalOptions, results="hide", include=FALSE, cache=FALSE-----------
knitr::opts_chunk$set(fig.align="center", cache=FALSE, cache.path = "clusterExperimentTutorial_cache/", fig.path="clusterExperimentTutorial_figure/",error=FALSE, #make it stop on error
fig.width=6,fig.height=6,autodep=TRUE,out.width="600px",out.height="600px",
message=FALSE)
#knitr::opts_knit$set(stop_on_error = 2L) #really make it stop
#knitr::dep_auto()

## ----cache=FALSE---------------------------------------------------------
set.seed(14456) ## for reproducibility, just in case
library(scRNAseq)
data("fluidigm")

## ------------------------------------------------------------------------
assay(fluidigm)[1:5,1:10]
colData(fluidigm)[,1:5]
NCOL(fluidigm) #number of samples

## ----filter_high---------------------------------------------------------
se <- fluidigm[,colData(fluidigm)[,"Coverage_Type"]=="High"]

## ----filter--------------------------------------------------------------
wh_zero <- which(rowSums(assay(se))==0)
pass_filter <- apply(assay(se), 1, function(x) length(x[x >= 10]) >= 10)
se <- se[pass_filter,]
dim(se)

## ----normalization-------------------------------------------------------
fq <- round(limma::normalizeQuantiles(assay(se)))
assays(se) <- list(normalized_counts=fq)

## ----clusterMany---------------------------------------------------------
options(getClass.msg=FALSE) #get rid of annoying messages about cache until fixed internally in R
library(clusterExperiment)
ce<-clusterMany(se, clusterFunction="pam",ks=5:10,
      isCount=TRUE,dimReduce=c("PCA","var"),nVarDims=c(100,500,1000),
      nPCADims=c(5,15,50),run=TRUE)

## ----plotClusterEx1------------------------------------------------------
defaultMar<-par("mar")
plotCMar<-c(1.1,8.1,4.1,1.1)
par(mar=plotCMar)
plotClusters(ce,main="Clusters from clusterMany", whichClusters="workflow", sampleData=c("Biological_Condition","Cluster2"), axisLine=-1)

## ----plotCluster_newLabels-----------------------------------------------
cl<-clusterLabels(ce)
cl<-gsub("Features","",cl)
clusterLabels(ce)<-cl

## ----plotCluster_newOrder------------------------------------------------
cl<-clusterLabels(ce)
ndims<-sapply(sapply(strsplit(cl,","),function(x){strsplit(x[1],"=")}),function(x){x[2]})
ord<-order(as.numeric(ndims))
par(mar=plotCMar)
plotClusters(ce,main="Clusters from clusterMany", whichClusters=ord, sampleData=c("Biological_Condition","Cluster2"), axisLine=-1)

## ----clusterMatrix-------------------------------------------------------
head(clusterMatrix(ce)[,1:3])

## ------------------------------------------------------------------------
ce<-combineMany(ce)

## ----lookAtCombineMany---------------------------------------------------
head(clusterMatrix(ce)[,1:3])
par(mar=plotCMar)
plotClusters(ce,whichClusters="workflow")


## ----combineMany_changeLabel---------------------------------------------
wh<-which(clusterLabels(ce)=="combineMany")
if(length(wh)!=1) stop() else clusterLabels(ce)[wh]<-"combineMany,default"

## ----combineMany_newCombineMany------------------------------------------
ce<-combineMany(ce,proportion=0.7,clusterLabel="combineMany,0.7")
par(mar=plotCMar)
plotClusters(ce,whichClusters="workflow")


## ----combineMany_changeMinSize-------------------------------------------
ce<-combineMany(ce,proportion=0.7,minSize=3,clusterLabel="combineMany,final")
par(mar=plotCMar)
plotClusters(ce,whichClusters="workflow",main="Min. Size=3")

## ----plotCoClustering_quickstart-----------------------------------------
plotCoClustering(ce)

## ----makeDendrogram------------------------------------------------------
ce<-makeDendrogram(ce,dimReduce="var",ndims=500)
plotDendrogram(ce)

## ----makeDendrogram_show-------------------------------------------------
ce

## ----mergeClustersPlot---------------------------------------------------
mergeClusters(ce,mergeMethod="adjP",plot="mergeMethod")

## ----mergeClusters-------------------------------------------------------
ce<-mergeClusters(ce,mergeMethod="adjP",cutoff=0.01)
par(mar=plotCMar)
plotClusters(ce,whichClusters="workflow", sampleData=c("Biological_Condition","Cluster2"))
plotCoClustering(ce,whichClusters=c("mergeClusters","combineMany"),
                 sampleData=c("Biological_Condition","Cluster2"),annLegend=FALSE)

## ----plotHeatmap---------------------------------------------------------
plotHeatmap(ce,clusterSamplesData="dendrogramValue",breaks=.99,
            sampleData=c("Biological_Condition", "Cluster1", "Cluster2"))

## ----getBestFeatures-----------------------------------------------------
pairsAll<-getBestFeatures(ce,contrastType="Pairs",p.value=0.05,
                          number=nrow(ce),isCount=TRUE)
head(pairsAll)

## ----getBestFeatures_size------------------------------------------------
length(pairsAll$Feature)==length(unique(pairsAll$Feature))

## ----getBestFeatures_heatmap---------------------------------------------
plotHeatmap(ce, clusterSamplesData="dendrogramValue",
            clusterFeaturesData=unique(pairsAll[,"IndexInOriginal"]),
            main="Heatmap of features w/ significant pairwise differences",
            breaks=.99)

## ----show----------------------------------------------------------------
ce

## ----CEHelperCommands1---------------------------------------------------
head(clusterMatrix(ce))[,1:5]
primaryCluster(ce)

## ----CEHelperCommands2---------------------------------------------------
head(clusterLabels(ce),10)

## ----CEHelperCommands3---------------------------------------------------
head(clusterTypes(ce),10)

## ----SECommandsOnCE------------------------------------------------------
colData(ce)[,1:5]

## ----CEClusterLengend----------------------------------------------------
length(clusterLegend(ce))
clusterLegend(ce)[1:2]

## ----plotClusterEx1_redo-------------------------------------------------
par(mar=plotCMar)
plotClusters(ce,main="Clusters from clusterMany", whichClusters="workflow", 
             axisLine=-1)

## ----plotClusterEx1_newOrder---------------------------------------------
par(mar=plotCMar)
plotClusters(ce,whichClusters="clusterMany",
               main="Only Clusters from clusterMany",axisLine=-1)

## ----plotClusterEx1_addData----------------------------------------------
par(mar=plotCMar)
plotClusters(ce,whichClusters="workflow", sampleData=c("Biological_Condition","Cluster2"), 
               main="Workflow clusters plus other data",axisLine=-1)

## ----plotClusterEx1_setColors--------------------------------------------
par(mar=plotCMar)
ce_temp<-plotClusters(ce,whichClusters="workflow", sampleData=c("Biological_Condition","Cluster2"), 
               main="Clusters from clusterMany, different order",axisLine=-1,resetNames=TRUE,resetColors=TRUE,resetOrderSamples=TRUE)
clusterLegend(ce_temp)[c("mergeClusters","combineMany,final")]

## ----plotClusterEx1_forceColors,fig.width=18,fig.height=6----------------
par(mar=plotCMar)
par(mfrow=c(1,2))
plotClusters(ce_temp, sampleData=c("Biological_Condition","Cluster2"),
             whichClusters="workflow", existingColors="all",
             main="Workflow Clusters, fix the color of clusters",axisLine=-1)

plotClusters(ce_temp, sampleData=c("Biological_Condition","Cluster2"), 
             existingColors="all", whichClusters="clusterMany",
             main="clusterMany Clusters, fix the color of clusters",
             axisLine=-1)

## ----plotHeatmap_Ex1-----------------------------------------------------
par(mfrow=c(1,1))
par(mar=defaultMar)
plotHeatmap(ce,main="Heatmap with clusterMany")

## ----plotHeatmap_Ex1.1---------------------------------------------------
whClusterPlot<-1:2
plotHeatmap(ce,whichClusters=whClusterPlot, annLegend=FALSE)

## ----plotHeatmap_primaryCluster------------------------------------------
plotHeatmap(ce,clusterSamplesData="primaryCluster",
            whichClusters="primaryCluster",
            main="Heatmap with clusterMany",annLegend=FALSE)

## ----showCE_dendrogram---------------------------------------------------
show(ce)

## ----plotHeatmap_dendro--------------------------------------------------
plotHeatmap(ce,clusterSamplesData="dendrogramValue",
            whichClusters=c("mergeClusters","combineMany"),
            main="Heatmap with clusterMany",
            sampleData=c("Biological_Condition","Cluster2"),annLegend=FALSE)

## ----tableArguments, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'----
# simple table creation here
tabl <- "  
Argument| Dependencies |  Passed to | Argument passed to
---------------|-----------------|:-------------:|------:|
ks             | sequential=TRUE |  seqCluster   |  k0 
-    | sequential=FALSE, findBestK=FALSE, clusterFunction of type 'K' | clusterD | k
-              | sequential=FALSE, findBestK=FALSE, subsample=TRUE | subsampleClustering | k
-               | sequential=FALSE, findBestK=TRUE, clusterFunction of type 'K' | clusterD | kRange
dimReduce      | none            | transform     | dimReduce
nVarDims       | dimReduce in 'mad','cv','var' | transform | nVarDims
nPCADims       | dimReduce='PCA' | transform     | nPCADims
clusterFunction| none            | clusterD      | clusterFunction    
minSizes       | none            | clusterD      | minSize
distFunction   | subsample=FALSE | clusterD      | distFunction
alphas         | clusterFunction of type '01'| clusterD | alpha
findBestK      | clusterFunction of type 'K' | clusterD | findBestK
removeSil      | clusterFunction of type 'K' | clusterD | removeSil
silCutoff      | clusterFunction of type 'K' | clusterD | silCutoff
betas          | sequential=TRUE | seqCluster    | beta
"
cat(tabl) # output the table in a format good for HTML/PDF/docx conversion

## ----defineDist----------------------------------------------------------
corDist<-function(x){(1-cor(t(x),method="pearson"))/2}
spearDist<-function(x){(1-cor(t(x),method="spearman"))/2}

## ----visualizeSubsamplingD-----------------------------------------------
ceSub<-clusterSingle(ce,dimReduce="mad",ndims=1000,subsample=TRUE,clusterFunction="hierarchical01",subsampleArgs=list(k=8),clusterLabel="subsamplingCluster",clusterDArgs=list(minSize=5))
plotCoClustering(ceSub,colorScale=rev(seqPal5))

## ----visualizeSpearmanDist-----------------------------------------------
dSp<-spearDist(t(transform(ce,dimReduce="mad",nVarDims=1000)))
plotHeatmap(dSp,isSymmetric=TRUE,colorScale=rev(seqPal5))

## ----clusterManyDiffDist,fig.width=15,fig.height=6-----------------------
ceDist<-clusterMany(ce, k=7:9, alpha=c(0.35,0.4,0.45), clusterFunction=c("tight","hierarchical01","pam","hierarchicalK"),findBestK=FALSE,
               removeSil=c(FALSE),dist=c("corDist","spearDist"),
               isCount=TRUE,dimReduce=c("mad"),nPCADims=1000,run=TRUE)
clusterLabels(ceDist)<-gsub("clusterFunction","alg",clusterLabels(ceDist))
clusterLabels(ceDist)<-gsub("Dist","",clusterLabels(ceDist))
clusterLabels(ceDist)<-gsub("distFunction","dist",clusterLabels(ceDist))
clusterLabels(ceDist)<-gsub("hierarchical","hier",clusterLabels(ceDist))
par(mar=c(1.1,15.1,1.1,1.1))
plotClusters(ceDist,axisLine=-2,sampleData=c("Biological_Condition"))

## ----clusterManyCheckParam-----------------------------------------------
checkParam<-clusterMany(se, clusterFunction="pam", ks=2:10,
                        removeSil=c(TRUE,FALSE), isCount=TRUE,
                        dimReduce=c("PCA","var"),
                        nVarDims=c(100,500,1000),nPCADims=c(5,15,50),run=FALSE)
dim(checkParam$paramMatrix) #number of rows is the number of clusterings

## ----clusterManyCheckParam2,eval=FALSE-----------------------------------
#  # ce<-clusterMany(se,  paramMatrix=checkParam$paramMatrix, clusterDArgs=checkParam$clusterDArgs, seqArgs=checkParam$seqArgs,subsampleArgs=checkParam$subsampleArgs)
#  ce<-clusterMany(ce, clusterFunction="pam",ks=2:10,findBestK=TRUE,removeSil=c(TRUE), isCount=TRUE,dimReduce=c("PCA","var"),nVarDims=c(100,500,1000),nPCADims=c(5,15,50),run=TRUE)

## ----combineMany_detailed------------------------------------------------
plotCoClustering(ce)

## ----combineMany_chooseClusters------------------------------------------
wh<-grep("nVAR",clusterLabels(ce))
ce<-combineMany(ce,whichCluster=wh,proportion=0.7,minSize=3,
                clusterLabel="combineMany,nVAR")
plotCoClustering(ce)

## ----combineMany_showDifferent-------------------------------------------
wh<-grep("combineMany",clusterTypes(ce))
par(mar=plotCMar)
plotClusters(ce,whichClusters=rev(wh),axisLine=-1)

## ----makeDendrogram_reducedFeatures--------------------------------------
ce<-makeDendrogram(ce,dimReduce="var",ndims=500)
plotDendrogram(ce)

## ----showCe--------------------------------------------------------------
show(ce)

## ----remakeMakeDendrogram------------------------------------------------
ce<-makeDendrogram(ce,dimReduce="var",ndims=500,
                   whichCluster="combineMany,final")
plotDendrogram(ce)

## ----clusterTypeOfCombineMany--------------------------------------------
clusterTypes(ce)[which(clusterLabels(ce)=="combineMany,final")]

## ----getBackCombineMany--------------------------------------------------
ce<-setToCurrent(ce,whichCluster="combineMany,final")
show(ce)

## ----checkWhatDendro-----------------------------------------------------
ce

## ----mergeClusters_plot,fig.width=12-------------------------------------
mergeClusters(ce,mergeMethod="none",plotType="all")

## ----mergeClusters_ex----------------------------------------------------
ce<-mergeClusters(ce,cutoff=0.05,mergeMethod="adjP",clusterLabel="mergeClusters,v2")
ce

## ----mergeClusters_redo--------------------------------------------------
ce<-mergeClusters(ce,cutoff=0.15,mergeMethod="MB",
                  clusterLabel="mergeClusters,v3")
ce

## ----workflowTable-------------------------------------------------------
workflowClusterTable(ce)

## ----workflowDetails-----------------------------------------------------
head(workflowClusterDetails(ce),8)

## ----markFinal-----------------------------------------------------------
ce<-setToFinal(ce,whichCluster="mergeClusters,v2",
               clusterLabel="Final Clustering")
par(mar=plotCMar)
plotClusters(ce,whichClusters="workflow")

## ----rsecTable, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'----
# simple table creation here
tabl <- "  
| | Arguments Internally Fixed | Arguments for the user |  Correspondence | Notes |
|:-----------------|:-----------------|:-------------|:------|:------|
*clusterMany*     | |  | |
-| sequential=TRUE | k0s              | ks | only sets 'k0', no other k 
- | distFunction=NA | clusterFunction | | only tight or hierarchical01 
- | removeSil=FALSE | dimReduce | | 
- | subsample=TRUE | nVarDims | | 
- |  silCutoff=0 | nPCADims | | 
- |  | alphas | |
- |  | betas | |
- |  | minSizes | |
-  | | clusterDArgs | |
-  | | subsampleArgs | |
- |  | seqArgs | |
- |  | run | |
- |  | ncores | |
- |  | random.seed | |
- |  | isCount | |
- |  | transFun | |
*combineMany* | |  | |
-  | propUnassigned = *(default)* | combineProportion | proportion 
-  | combineMinSize    | minSize | |
*makeDendrogram* | |  | |
-  | ignoreUnassignedVar=TRUE | dendroReduce      | dimReduce | 
-  | unassignedSamples= *(default)* | dendroNDims       | ndims |  
*mergeClusters*  | |  | |
-  | plotType='none' | mergeMethod     | | 
-  | | mergeCutoff | cutoff | 
-  |  | isCount | | used for both mergeMethod and clusterMany 
"

cat(tabl) # output the table in a format good for HTML/PDF/docx conversion


## ----resetToManyClusters-------------------------------------------------
wh<-which(clusterLabels(ce)=="mergeClusters,v3")
if(length(wh)==1) primaryClusterIndex(ce)<-wh else stop()

## ----getBestFeatures_onlyTopPairs----------------------------------------
pairsAllTop<-getBestFeatures(ce,contrastType="Pairs",p.value=0.05)
dim(pairsAllTop)
head(pairsAllTop)

## ----getBestFeatures_oneAgainstAll---------------------------------------
best1vsAll<-getBestFeatures(ce,contrastType="OneAgainstAll",p.value=0.05)
head(best1vsAll)

## ----getBestFeatures_dendroFail,error=TRUE-------------------------------
bestDendroTest1<-getBestFeatures(ce,contrastType="Dendro",p.value=0.05)

## ----showCeAgain---------------------------------------------------------
show(ce)

## ----getBestFeatures_dendro----------------------------------------------
ce<-makeDendrogram(ce,dimReduce="var",ndims=500)
bestDendro<-getBestFeatures(ce,contrastType="Dendro",p.value=0.05)
head(bestDendro)

## ----dendroContrastLevels------------------------------------------------
levels((bestDendro)$Contrast)

## ----dendroWithNodeNames-------------------------------------------------
plotDendrogram(ce,show.node.label=TRUE)

## ----sessionInfo---------------------------------------------------------
sessionInfo()

