## ----knitsetup, message=FALSE, warning=FALSE, include=FALSE----------------
knitr::opts_knit$set(root.dir = ".")
knitr::opts_chunk$set(collapse = TRUE)
BiocStyle::markdown()
library("BiocStyle")

## ----install, eval=FALSE---------------------------------------------------
#  source("http://bioconductor.org/biocLite.R")
#  biocLite("BioCor")

## ----github, eval = FALSE--------------------------------------------------
#  library("devtools")
#  install_github("llrs/BioCor")

## ----load------------------------------------------------------------------
library("BioCor")
## Load libraries with the data of the pathways
library("org.Hs.eg.db")
library("reactome.db")
genesKegg <- as.list(org.Hs.egPATH)
genesReact <- as.list(reactomeEXTID2PATHID)

## ----GSEABase, eval = FALSE------------------------------------------------
#  library("GSEABase")
#  paths2Genes <- geneIds(getGmt("/path/to/c3.all.v2.5.symbols.gmt",
#                   collectionType=BroadCollection(category="c3"),
#                   geneIdType=SymbolIdentifier()))
#  
#  genes <- unlist(paths2Genes, use.names = FALSE)
#  pathways <- rep(names(paths2Genes), lengths(paths2Genes))
#  genes2paths <- split(pathways, genes)
#  
#  gmt_sim <- mgeneSim(names(genes2paths), genes2paths)

## ----pathSim---------------------------------------------------------------
(paths <- sample(unique(unlist(genesReact)), 2))
pathSim(paths[1], paths[2], genesReact)

(pathways <- sample(unique(unlist(genesReact)), 10))
mpathSim(pathways, genesReact)

## ----combineScores---------------------------------------------------------
sim <- mpathSim(pathways, genesReact)
methodsCombineScores <- c("avg", "max", "rcmax", "rcmax.avg", "BMA")
sapply(methodsCombineScores, combineScores, scores = sim)

## ----geneSim---------------------------------------------------------------
geneSim("672", "675", genesKegg)
geneSim("672", "675", genesReact)

mgeneSim(c("BRCA1" = "672", "BRCA2" = "675", "NAT2" = "10"), genesKegg)
mgeneSim(c("BRCA1" = "672", "BRCA2" = "675", "NAT2" = "10"), genesReact)

## ----clusterSim------------------------------------------------------------
clusterSim(c("672", "675"), c("100", "10", "1"), genesKegg)
clusterSim(c("672", "675"), c("100", "10", "1"), genesKegg, NULL)

clusters <- list(cluster1 = c("672", "675"),
                 cluster2 = c("100", "10", "1"),
                 cluster3 = c("18", "10", "83"))
mclusterSim(clusters, genesKegg, "rcmax.avg")
mclusterSim(clusters, genesKegg, "max")

## ----clusterGeneSim--------------------------------------------------------
clusterGeneSim(c("672", "675"), c("100", "10", "1"), genesKegg)
clusterGeneSim(c("672", "675"), c("100", "10", "1"), genesKegg, "max")

mclusterGeneSim(clusters, genesKegg, c("max", "rcmax.avg"))
mclusterGeneSim(clusters, genesKegg, c("max", "max"))

## ----merging---------------------------------------------------------------
kegg <- mgeneSim(c("672", "675", "10"), genesKegg)
react <- mgeneSim(c("672", "675", "10"), genesReact)
## We can sum it adding a weight to each origin
weighted.sum(c(kegg["672", "675"], react["672","675"]), w = c(0.3, 0.7))

## Or if we want to perform for all the matrix
## A list of matrices to merge
sim <- list("kegg" = kegg, "react" = react)
similarities(sim, weighted.sum, w = c(0.3, 0.7))
similarities(sim, weighted.prod, w = c(0.3, 0.7))
similarities(sim, prod)
similarities(sim, mean)

## ----sims------------------------------------------------------------------
lapply(sim, D2J)

## ----whole_db, eval=FALSE--------------------------------------------------
#  ## Omit those genes without a pathway
#  nas <- sapply(genesKegg, function(y){all(is.na(y)) | is.null(y)})
#  genesKegg2 <- genesKegg[!nas]
#  m <- mgeneSim(names(genesKegg2), genesKegg2, method  = NULL)

## ----whole_db2, eval=FALSE-------------------------------------------------
#  sim <- AintoB(m, B)

## ----hclust1, fig.cap="Gene clustering by similarities", fig.wide = TRUE----
genes.id <- c("10", "15", "16", "18", "2", "9", "52", "3855", "3880", "644", 
              "81327", "9128", "2073", "2893", "5142", "210", "81", 
              "1352", "672", "675")
genes.id <- mapIds(org.Hs.eg.db, keys = genes.id, keytype = "ENTREZID", column = "SYMBOL")
genes <- names(genes.id)
names(genes) <- genes.id
sum(lengths(genesReact[genes])) # Check the total number of pathways involved
react <- mgeneSim(genes, genesReact)
## We remove genes which are not in list (hence the warning):
nan <- genes %in% names(genesReact)
react <- react[nan, nan]
hc <- hclust(as.dist(1 - react))
plot(hc)

## ----hclust3, fig.cap="Clustering using clusterSim", fig.wide = TRUE-------
mycl <- cutree(hc, h = 0.2)
clusters <- split(genes[nan], as.factor(mycl))
names(clusters) <- paste0("cluster", seq_len(max(mycl)))
## Remember we can use two methods to compare clusters
sim_clus1 <- mclusterSim(clusters, genesReact)
plot(hclust(as.dist(1 - sim_clus1)))

## ----hclust3b, fig.cap="Clustering using clusterGeneSim", fig.wide = TRUE----
sim_clus2 <- mclusterGeneSim(clusters, genesReact)
plot(hclust(as.dist(1 - sim_clus2)))

## ----geneSimGOSemSim-------------------------------------------------------
geneSim("241", "251", genesKegg, "BMA")
geneSim("241", "251", genesReact, "BMA")
mgeneSim(c("835", "5261","241", "994"), genesKegg, "BMA", round = TRUE)
mgeneSim(c("835", "5261","241", "994"), genesReact, "BMA", round = TRUE)

## ----named-----------------------------------------------------------------
genes <- c("CDC45", "MCM10", "CDC20", "NMU", "MMP1")
genese <- mapIds(org.Hs.eg.db, keys = genes, column = "ENTREZID", 
                 keytype = "SYMBOL")
mgeneSim(genese, genesReact, "BMA")
mgeneSim(genese, genesKegg, "BMA")

## ----clusterSimGOSemSim----------------------------------------------------
gs1 <- c("835", "5261","241", "994", "514", "533")
gs2 <- c("578","582", "400", "409", "411")
clusterSim(gs1, gs2, genesReact, "BMA")
clusterSim(gs1, gs2, genesKegg, "BMA")

## ----wgcna, eval=FALSE-----------------------------------------------------
#  expr.sim <- WGCNA::cor(expr) # or bicor
#  
#  ## Combine the similarities
#  similarity <- similarities(c(list(exp = expr.sim), sim), mean, na.rm = TRUE)
#  
#  ## Choose the softThreshold
#  pSFT <- pickSoftThreshold.fromSimilarity(similarity)
#  
#  ## Or any other function we want
#  adjacency <- adjacency.fromSimilarity(similarity, power = pSFT$powerEstimate)
#  
#  ## Once we have the similarities we can calculate the TOM with TOM
#  TOM <- TOMsimilarity(adjacency) ## Requires adjacencies despite its name
#  dissTOM <- 1 - TOM
#  geneTree <- hclust(as.dist(dissTOM), method = "average")
#  ## We can use a clustering tool to group the genes
#  dynamicMods <- cutreeHybrid(dendro = geneTree, distM = dissTOM,
#                              deepSplit = 2, pamRespectsDendro = FALSE,
#                              minClusterSize = 30)
#  moduleColors <- labels2colors(dynamicMods$labels)

## ---- eval = FALSE---------------------------------------------------------
#  Error in FUN(X[[i]], ...) :
#    trying to get slot "geneAnno" from an object of a basic class ("list") with no slots

## ----session---------------------------------------------------------------
sessionInfo()