## ----results='hide', message=FALSE---------------------------------------
suppressPackageStartupMessages(require(maftools))
#read TCGA maf file for LAML
laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools')
laml = read.maf(maf = laml.maf, removeSilent = TRUE, useAll = FALSE)

## ------------------------------------------------------------------------
#Typing laml shows basic summary of MAF file.
laml
#Shows sample summry.
getSampleSummary(laml)
#Shows frequently mutated genes.
getGeneSummary(laml)
#Shows all fields in MAF
getFields(laml)
#Writes maf summary to an output file with basename laml.
write.mafSummary(maf = laml, basename = 'laml')

## ----fig.height=6, fig.width=8-------------------------------------------
plotmafSummary(maf = laml, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

## ---- fig.align='left',fig.height=6,fig.width=10, eval=T, fig.align='left'----
#We will draw oncoplots for top ten mutated genes. (Removing non-mutated samples from the plot for better visualization)
oncoplot(maf = laml, top = 10, removeNonMutated = TRUE)

## ----results='hide', message=FALSE---------------------------------------
#read TCGA maf file for LAML
laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools')
all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools")
amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools")
del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools")
laml.plus.gistic = read.maf(maf = laml.maf, removeSilent = TRUE, useAll = FALSE, gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, isTCGA = TRUE)

## ---- fig.align='left',fig.height=6,fig.width=10, eval=T, fig.align='left'----
#We will draw oncoplots for top ten mutated genes. (Removing non-mutated samples from the plot for better visualization)
oncoplot(maf = laml.plus.gistic, top = 10, removeNonMutated = TRUE, sortByMutation = TRUE)

## ---- fig.height=7,fig.width=10, eval=T, fig.align='left'----------------
#Read FAB classification of TCGA LAML barcodes.
laml.fab.anno = system.file('extdata', 'tcga_laml_fab_annotation.txt', package = 'maftools')
laml.fab.anno = read.delim(laml.fab.anno, sep = '\t')
head(laml.fab.anno)
#Changing colors (You can use any colors, here in this example we will use a color palette from RColorBrewer)
col = RColorBrewer::brewer.pal(n = 8, name = 'Paired')
names(col) = c('Frame_Shift_Del','Missense_Mutation', 'Nonsense_Mutation', 'Multi_Hit', 'Frame_Shift_Ins',
               'In_Frame_Ins', 'Splice_Site', 'In_Frame_Del')
#We will plot same top ten mutated genes with FAB classification as annotation and using above defined colors.
oncoplot(maf = laml, top = 10, annotation = laml.fab.anno, removeNonMutated = TRUE, colors = col)

## ---- fig.height=2,fig.width=8,fig.align='center'------------------------
oncostrip(maf = laml, genes = c('DNMT3A','NPM1', 'RUNX1'), removeNonMutated = TRUE, showTumorSampleBarcodes = FALSE)

## ---- fig.align='default', fig.height=6, fig.width=8, eval = T-----------
laml.titv = titv(maf = laml, plot = FALSE, useSyn = TRUE)
#plot titv summary
plotTiTv(res = laml.titv)

## ----fig.height=4,fig.width=8,fig.align='center'-------------------------
#Lets plot lollipop plot for DNMT3A, which is one of the most frequent mutated gene in Leukemia.
dnmt3a.lpop = lollipopPlot(maf = laml, gene = 'DNMT3A', AACol = 'Protein_Change', showMutationRate = TRUE, domainLabelSize = 3, defaultYaxis = FALSE)

## ----fig.height=4,fig.width=8,fig.align='center'-------------------------
#Lets plot mutations on KIT gene, without repel option.
kit.lpop = lollipopPlot(maf = laml, gene = 'KIT', AACol = 'Protein_Change', labelPos = c(416, 418), refSeqID = 'NM_000222', domainLabelSize = 3)
#Same plot with repel=TRUE
kit.lpop = lollipopPlot(maf = laml, gene = 'KIT', AACol = 'Protein_Change', labelPos = c(416, 418), refSeqID = 'NM_000222', repel = TRUE, domainLabelSize = 3)

## ---- warning=FALSE, message=FALSE, fig.height=4,fig.width=8,fig.align='center'----
laml.dnmt3a = lollipopPlot(maf = laml, gene = 'DNMT3A', AACol = 'Protein_Change', refSeqID = 'NM_175629', labelPos = 882, collapsePosLabel = TRUE, cBioPortal = TRUE, domainLabelSize = 3, defaultYaxis = FALSE)

## ---- fig.height=4,fig.width=8,fig.align='center'------------------------
tcga.ab.009.seg <- system.file("extdata", "TCGA.AB.3009.hg19.seg.txt", package = "maftools")
plotCBSsegments(cbsFile = tcga.ab.009.seg, maf = laml, labelAll = TRUE)

## ---- results='hide', message=FALSE--------------------------------------
coad <- system.file("extdata", "coad.maf.gz", package = "maftools")
coad = read.maf(maf = coad)

## ---- fig.height=5,fig.width=12,fig.align='center'-----------------------
coad.rf = rainfallPlot(maf = coad, detectChangePoints = TRUE, fontSize = 12, pointSize = 0.6)

## ---- fig.align='center', fig.height=6, fig.width=10---------------------
laml.mutload = tcgaCompare(maf = laml, cohortName = 'Example-LAML')

## ---- fig.align='left',fig.width=7, fig.height=5, eval=T-----------------
geneCloud(input = laml, minMut = 3)

## ------------------------------------------------------------------------
vafPlot = plotVaf(maf = laml, vafCol = 'i_TumorVAF_WU', flip = TRUE)

## ------------------------------------------------------------------------
all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools")
amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools")
del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools")
laml.gistic = readGistic(gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, isTCGA = TRUE)

## ---- fig.align='left',fig.width=7, fig.height=5, eval=T-----------------
  gisticPlot(gistic = laml.gistic)

## ---- fig.align='left',fig.width=7, fig.height=5, eval=T-----------------
gr = plotGisticResults(gistic = laml.gistic)

## ------------------------------------------------------------------------
#We will run mutExclusive on top 10 mutated genes. 
laml.mut.excl = mutExclusive(maf = laml, top = 10)
head(laml.mut.excl)

## ---- fig.height=1.5,fig.width=8,fig.align='center'----------------------
oncostrip(maf = laml, genes = c('NPM1', 'RUNX1'), sort = TRUE, removeNonMutated = TRUE)

## ---- fig.align='default', fig.width=7,fig.height=5, message=F,results='hide', eval=T----
laml.sig = oncodrive(maf = laml, AACol = 'Protein_Change', minMut = 5, pvalMethod = 'zscore')
head(laml.sig)

## ---- fig.align='default', fig.width=7,fig.height=5, eval= T-------------
plotOncodrive(res = laml.sig, fdrCutOff = 0.1, useFraction = TRUE)

## ---- fig.align='left',fig.width=7, fig.height=5-------------------------
laml.pfam = pfamDomains(maf = laml, AACol = 'Protein_Change', top = 10)
#Protein summary (Printing first 7 columns for display convenience)
laml.pfam$proteinSummary[,1:7, with = FALSE]
#Domain summary (Printing first 3 columns for display convenience)
laml.pfam$domainSummary[,1:3, with = FALSE]

## ------------------------------------------------------------------------
#MutsigCV results for TCGA-AML
laml.mutsig <- system.file("extdata", "LAML_sig_genes.txt.gz", package = "maftools")
laml.pancan = pancanComparision(mutsigResults = laml.mutsig, qval = 0.1, cohortName = 'LAML', inputSampleSize = 200, label = 1, normSampleSize = TRUE)

## ------------------------------------------------------------------------
laml.surv <- system.file("extdata", "laml_survival.tsv", package = "maftools")
laml.surv = read.delim(file = laml.surv, header = TRUE, stringsAsFactors = FALSE)
head(laml.surv)

#Survival analysis based on grouping of DNMT3A mutation status
laml.dnmt3a.surv = mafSurvival(maf = laml, clinicalData = laml.surv, genes = 'DNMT3A', time = 'days_to_last_followup', Status = 'Overall_Survival_Status', showConfInt = TRUE)

## ----results='hide', message=FALSE---------------------------------------
#Primary APL MAF
primary.apl = system.file("extdata", "APL_primary.maf.gz", package = "maftools")
primary.apl = read.maf(maf = primary.apl)
#Relapse APL MAF
relapse.apl = system.file("extdata", "APL_relapse.maf.gz", package = "maftools")
relapse.apl = read.maf(maf = relapse.apl)

## ------------------------------------------------------------------------
#We will consider only genes which are mutated in at-least in 5 samples in one of the cohort, to avoid bias due to single mutated genes.
pt.vs.rt <- mafCompare(m1 = primary.apl, m2 = relapse.apl, m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', minMut = 5)
print(pt.vs.rt)

## ---- fig.width=6, fig.height=5, fig.align='center'----------------------
apl.pt.vs.rt.fp = forestPlot(mafCompareRes = pt.vs.rt, pVal = 0.05, show = 'stat', color = c('royalblue', 'maroon'))

## ---- fig.height=3,fig.width=11, eval=T, fig.align='left'----------------
genes = c("PML", "RARA", "RUNX1", "ARID1B", "FLT3")
coOncoplot(m1 = primary.apl, m2 = relapse.apl, m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', genes = genes, removeNonMutated = TRUE)

## ---- echo = TRUE, fig.align='center', fig.height=5, fig.width=7, eval=T----
#We will run this for sample TCGA.AB.2972
tcga.ab.2972.het = inferHeterogeneity(maf = laml, tsb = 'TCGA.AB.2972', vafCol = 'i_TumorVAF_WU')
print(tcga.ab.2972.het$clusterMeans)
#Visualizing results
plotClusters(clusters = tcga.ab.2972.het)

## ---- fig.align='center', fig.height=5, fig.width=7, eval=T--------------
seg = system.file('extdata', 'TCGA.AB.3009.hg19.seg.txt', package = 'maftools')
tcga.ab.3009.het = inferHeterogeneity(maf = laml, tsb = 'TCGA.AB.3009', segFile = seg, vafCol = 'i_TumorVAF_WU')
#Visualizing results. Highlighting those variants on copynumber altered variants.
plotClusters(clusters = tcga.ab.3009.het, genes = 'CN_altered', showCNvars = TRUE)

## ---- results='hide', message=F, fig.align='center',fig.width=7, fig.height=6, eval = T----
#we will specify for random 4 patients.
laml.math = math.score(maf = laml, vafCol = 'i_TumorVAF_WU', 
                       sampleName = c('TCGA.AB.3009', 'TCGA.AB.2849', 'TCGA.AB.3002', 'TCGA.AB.2972'))

## ---- eval=T-------------------------------------------------------------
print(laml.math)

## ---- eval=FALSE---------------------------------------------------------
#  #First we extract adjacent bases to the mutated locus and clssify them into 96 substitution classes. This also estimates APOBEC enrichment per sample.
#  laml.tnm = trinucleotideMatrix(maf = laml, ref_genome = '/path/to/hg19.fa',
#                                 prefix = 'chr', add = TRUE, ignoreChr = 'chr23', useSyn = TRUE)
#  # reading /path/to/hg19.fa (this might take few minutes)..
#  # Extracting 5' and 3' adjacent bases..
#  # Extracting +/- 20bp around mutated bases for background estimation..
#  # Estimating APOBEC enrichment scores..
#  # Performing one-way Fisher's test for APOBEC enrichment..
#  # APOBEC related mutations are enriched in 2.674% of samples (APOBEC enrichment score > 2 ; 5 of 187 samples)
#  # Creating mutation matrix..
#  # matrix of dimension 193x96

## ---- eval=FALSE---------------------------------------------------------
#  plotSignatures(laml.tnm)

## ---- echo=FALSE---------------------------------------------------------
sub.tbl = structure(list(n_mutations = c(3, 8, 4, 6, 6, 10, 4, 11, 7, 11, 
12, 8, 10, 11, 10, 9, 10, 9, 18, 11, 20, 8, 10, 10, 15, 9, 12, 
13, 17, 9, 15, 14, 10, 13, 9, 14, 10, 11, 13, 14, 19, 18, 14, 
13, 16, 15, 16, 15, 16, 22, 17, 19, 14, 18, 14, 20, 14, 25, 19, 
23, 17, 26, 14, 5, 14, 13, 9, 4, 8, 7, 10, 10, 14, 3, 1, 4, 12, 
10, 9, 2, 10, 8, 1, 2, 2, 20, 4, 2, 11, 3, 11, 24, 3, 6, 8, 11, 
2, 14, 12, 10, 10, 17, 15, 1, 13, 10, 12, 10, 1, 15, 2, 1, 9, 
4, 11, 6, 4, 7, 1, 5, 15, 1, 17, 17, 16, 2, 13, 16, 11, 14, 1, 
12, 20, 9, 14, 9, 5, 10, 5, 1, 8, 9, 8, 14, 2, 11, 2, 1, 13, 
1, 4, 2, 7, 15, 7, 6, 9, 21, 4, 7, 2, 14, 6, 5, 4, 2, 14, 10, 
5, 9, 6, 13, 9, 8, 8, 7, 6, 8, 11, 3, 10, 26, 17, 17, 35, 5, 
1), APOBEC_Enriched = c("yes", "yes", "yes", "no", "no", "yes", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "yes", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
NA, "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", NA, 
"no", "no", "no", "no", "no", "no", NA, NA, "no", "no", "no", 
"no", "no", "no", NA, "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", NA, "no", "no", "no", "no", "no", "no", 
"no", "no", NA, "no", "no", "no", "no", "no", "no", "no", NA, 
"no", NA, "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", NA, "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no"), fraction_APOBEC_mutations = c(0.333, 
0.125, 0.25, 0.167, 0.167, 0.2, 0.25, 0.182, 0.143, 0.182, 0.167, 
0.125, 0.2, 0.091, 0.1, 0.111, 0.1, 0.111, 0.111, 0.091, 0.2, 
0.125, 0.1, 0.1, 0.067, 0.111, 0.083, 0.077, 0.118, 0.111, 0.067, 
0.071, 0.1, 0.077, 0.111, 0.071, 0.1, 0.091, 0.077, 0.071, 0.105, 
0.056, 0.071, 0.077, 0.125, 0.067, 0.062, 0.067, 0.062, 0.091, 
0.118, 0.105, 0.071, 0.056, 0.071, 0.1, 0.071, 0.04, 0.053, 0.043, 
0.059, 0.038, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0)), .Names = c("n_mutations", "APOBEC_Enriched", "fraction_APOBEC_mutations"
), row.names = c(NA, -187L), class = c("data.table", "data.frame"
))

data.table::setDT(sub.tbl)

sub.tbl$APOBEC_Enriched = factor(sub.tbl$APOBEC_Enriched, levels = c('yes', 'no')) #Set levels
    yp = pretty(x = c(1: max(sub.tbl[,n_mutations], na.rm = TRUE)))
    yp[length(yp)] = max(sub.tbl[,n_mutations], na.rm = TRUE)
    yp[1] = 1

    pieDat = sub.tbl[!is.na(APOBEC_Enriched), mean(fraction_APOBEC_mutations), APOBEC_Enriched]
    pieDat[,nonApobec := 1 - V1]
    colnames(pieDat)[2] = 'Apobec'
    pieDat = data.table::melt(pieDat, id.vars = 'APOBEC_Enriched', drop = FALSE)
    pieDat[,title := paste0(variable, ' [', round(value, digits = 3), ']')]
    pieDat$title = gsub(pattern = '^Apobec', replacement = 'tCw', x = pieDat$title)
    pieDat$title = gsub(pattern = '^nonApobec', replacement = 'non-tCw', x = pieDat$title)

    layout(matrix(c(1,2,1,3), 2, 2, byrow = TRUE), widths=c(2, 3))
    par(bty="n", mgp = c(0.5,0.5,0), las=1, tcl=-.25, font.main=4, xpd=NA)

    pieCol  = c("#084594", "#9ECAE1")

    boxplot(n_mutations ~ APOBEC_Enriched, data = sub.tbl,  xaxt="n", boxwex=0.6, outline = TRUE, lty=1,
            outwex=0, staplewex=0, frame.plot = FALSE, col = c('maroon', 'royalblue'), yaxt = 'n',
            ylim = c(min(yp), max(yp)),
            outcol="gray70", outcex = 0.8, outpch  = 16)
    title(main = 'Mutation load between APOBEC enriched \n and non-APOBEC enriched samples', cex.main=0.9)

    axis(side = 1, at = c(1, 2), labels = na.omit(sub.tbl[,.N,APOBEC_Enriched])[,paste0('N=', N)], las = 1, tick = FALSE)
    axis(side = 2, at = yp, lwd = 1.8, las = 1)

    pie(x = pieDat[APOBEC_Enriched %in% 'yes', value], col = pieCol,
        border="white", radius = 0.95, cex.main=0.6, labels =  pieDat[APOBEC_Enriched %in% 'yes', title], clockwise = TRUE)
    symbols(0,0,circles=.4, inches=FALSE, col="white", bg="white", lty=0, add=TRUE)
    title(main = 'Average tCw mutations in \n APOBEC enriched samples', cex.main=0.9)

    pie(x = pieDat[APOBEC_Enriched %in% 'no', value], col = pieCol,
        border="white",  radius = 0.95, cex.main=1.33, labels =  pieDat[APOBEC_Enriched %in% 'no', title], clockwise = TRUE)
    symbols(0,0,circles=.4, inches=FALSE, col="white", bg="white", lty=0, add=TRUE)
    title(main = 'Average tCw mutations in \n non-APOBEC enriched samples', cex.main = 0.9)
    

## ---- fig.height=5, fig.width=5, eval=F, message=FALSE-------------------
#  #Run main function with maximum 6 signatures.
#  require('NMF')
#  laml.sign = extractSignatures(mat = laml.tnm, nTry = 6, plotBestFitRes = FALSE)
#  # Warning : Found zero mutations for conversions A[T>G]C
#  # Comparing against experimentally validated 30 signatures.. (See http://cancer.sanger.ac.uk/cosmic/signatures for details.)
#  # Found Signature_1 most similar to validated Signature_1. CoSine-Similarity: 0.778739082321156
#  # Found Signature_2 most similar to validated Signature_1. CoSine-Similarity: 0.782748375695661

## ---- echo=F-------------------------------------------------------------
laml.sign = structure(list(signatures = structure(c(1.08748973712201e-18, 
0.00839574301030403, 1.08748973712201e-18, 1.08748973712201e-18, 
0.00240385240062567, 0.0077139799108362, 1.08748973712201e-18, 
1.08748973712201e-18, 0.0162307404236774, 1.08748973712201e-18, 
1.08748973712201e-18, 0.00375682293486133, 0.0149882188409168, 
0.0166193873319139, 1.08748973712201e-18, 0.0340641337293563, 
0.0177133495392653, 1.08748973712201e-18, 1.08748973712201e-18, 
0.010900522793394, 1.08748973712201e-18, 1.08748973712201e-18, 
0.0204384802376138, 1.08748973712201e-18, 0.016350784190091, 
0.00817539209504552, 0.00817539209504552, 0.00510268819463668, 
4.64672034428952e-06, 1.08748973712201e-18, 0.00226222985404151, 
0.0136256534917425, 0.0305962431704599, 1.08748973712201e-18, 
0.0644710774331736, 0.00640610796992356, 0.0940170090930234, 
0.0580805923991668, 0.0849820094561238, 1.08748973712201e-18, 
0.0397128561572591, 0.0074963059243485, 0.0626642796173211, 0.0351236523951451, 
0.0201728180626018, 0.014196017550947, 0.0643882909888946, 1.08748973712201e-18, 
1.08748973712201e-18, 0.00455330562748865, 0.00408769604752276, 
1.08748973712201e-18, 0.00272513069834851, 0.00650167581744194, 
0.016350784190091, 1.08748973712201e-18, 1.08748973712201e-18, 
0.0122630881425683, 1.08748973712201e-18, 0.00334016875229922, 
0.00136256534917425, 1.08748973712201e-18, 0.00545026139669701, 
0.00545026139669701, 4.42324620317284e-15, 1.08748973712201e-18, 
0.00737413388604955, 0.0269789492628818, 0.00988463958213221, 
0.0100520699314415, 1.08748973712201e-18, 0.00316631020385699, 
1.08748973712201e-18, 0.010900522793394, 1.08748973712201e-18, 
0.010900522793394, 0.00272513069834851, 1.08748973712201e-18, 
0.00394072268462079, 0.00852966039575841, 7.59566702198884e-17, 
0.0115340943434354, 1.08748973712201e-18, 0.00237636221356833, 
1.08748973712201e-18, 1.08748973712201e-18, 0.0136256534917425, 
0.00181567829546557, 0.00408769604752276, 1.08748973712201e-18, 
1.08748973712201e-18, 0.00545026139669701, 0.00408769604752276, 
0.00499540259887787, 0.00272513069834851, 0.00353514720450814, 
0.0134931114940269, 0.0108267734159331, 0.00758987521539011, 
0.00674655574701343, 0.0094753598393619, 0.0070321331529894, 
0.0109631530888968, 0.0109631530888968, 0.00344757540744538, 
0.00674655574701343, 0.00505991681026007, 0.0128545761394095, 
9.04499029878464e-19, 0.00742363125585919, 0.0101198336205201, 
9.04499029878464e-19, 9.04499029878464e-19, 0.00505991681026007, 
0.00505991681026007, 9.04499029878464e-19, 0.0118064725572735, 
0.00421659734188339, 9.04499029878464e-19, 0.00758987521539011, 
9.04499029878464e-19, 9.04499029878464e-19, 9.04499029878464e-19, 
0.00190175907624695, 0.00421372139194615, 0.00590323627863675, 
0.00703305451506787, 9.04499029878464e-19, 0.0131095012433463, 
0.0480692096974707, 0.0857521368222207, 0.0171181159082965, 9.04499029878464e-19, 
0.0526012816612792, 0.103417003771369, 0.108788211420591, 0.00746704363785783, 
0.0358397179449789, 0.051450983147391, 0.0119940343267555, 0.020404090976265, 
0.0173566988936855, 0.0225544149118068, 0.0312028203299371, 0.00337327787350671, 
0.00645838048724634, 9.04499029878464e-19, 0.00252995840513004, 
9.04499029878464e-19, 0.00187921658868539, 9.04499029878464e-19, 
0.00590323627863675, 0.00337327787350671, 9.04499029878464e-19, 
0.00421659734188339, 0.002149298816947, 9.04499029878464e-19, 
0.00168663893675336, 9.04499029878464e-19, 9.04499029878464e-19, 
0.0227696256461676, 0.0118064725572735, 0.0123023875952219, 0.00691512349188281, 
0.00147207029502386, 0.00642836104928948, 0.0160230698991569, 
0.0132200565053105, 0.00758987521539011, 9.04499029878464e-19, 
0.0109631530888968, 9.04499029878464e-19, 9.04499029878464e-19, 
0.00337327787350671, 0.00430756215200655, 0.0115872086847547, 
0.00337327787350667, 0.0086313879649405, 0.00168663893675336, 
0.00105917939270956, 0.000843319468376679, 0.00758987521539011, 
9.04499029878464e-19, 0.00309283703504524, 9.04499029878464e-19, 
0.00252995840513004, 0.00421659734188339, 9.04499029878464e-19, 
9.04499029878464e-19, 0.00112484084993519, 9.04499029878464e-19, 
0.00287194214691947), .Dim = c(96L, 2L), .Dimnames = list(c("A[C>A]A", 
"A[C>A]C", "A[C>A]G", "A[C>A]T", "C[C>A]A", "C[C>A]C", "C[C>A]G", 
"C[C>A]T", "G[C>A]A", "G[C>A]C", "G[C>A]G", "G[C>A]T", "T[C>A]A", 
"T[C>A]C", "T[C>A]G", "T[C>A]T", "A[C>G]A", "A[C>G]C", "A[C>G]G", 
"A[C>G]T", "C[C>G]A", "C[C>G]C", "C[C>G]G", "C[C>G]T", "G[C>G]A", 
"G[C>G]C", "G[C>G]G", "G[C>G]T", "T[C>G]A", "T[C>G]C", "T[C>G]G", 
"T[C>G]T", "A[C>T]A", "A[C>T]C", "A[C>T]G", "A[C>T]T", "C[C>T]A", 
"C[C>T]C", "C[C>T]G", "C[C>T]T", "G[C>T]A", "G[C>T]C", "G[C>T]G", 
"G[C>T]T", "T[C>T]A", "T[C>T]C", "T[C>T]G", "T[C>T]T", "A[T>A]A", 
"A[T>A]C", "A[T>A]G", "A[T>A]T", "C[T>A]A", "C[T>A]C", "C[T>A]G", 
"C[T>A]T", "G[T>A]A", "G[T>A]C", "G[T>A]G", "G[T>A]T", "T[T>A]A", 
"T[T>A]C", "T[T>A]G", "T[T>A]T", "A[T>C]A", "A[T>C]C", "A[T>C]G", 
"A[T>C]T", "C[T>C]A", "C[T>C]C", "C[T>C]G", "C[T>C]T", "G[T>C]A", 
"G[T>C]C", "G[T>C]G", "G[T>C]T", "T[T>C]A", "T[T>C]C", "T[T>C]G", 
"T[T>C]T", "A[T>G]A", "A[T>G]C", "A[T>G]G", "A[T>G]T", "C[T>G]A", 
"C[T>G]C", "C[T>G]G", "C[T>G]T", "G[T>G]A", "G[T>G]C", "G[T>G]G", 
"G[T>G]T", "T[T>G]A", "T[T>G]C", "T[T>G]G", "T[T>G]T"), c("Signature_1", 
"Signature_2"))), coSineSimMat = structure(c(0.76789895507522, 
0.757733629596811, 0.171803248681684, 0.199391522195904, 0.407671912943102, 
0.372979035914154, 0.344078922420868, 0.319857408370786, 0.573357292983596, 
0.562412460243176, 0.685700701802704, 0.686217358302521, 0.377725890462418, 
0.386689478887272, 0.382312659188403, 0.407516946456442, 0.339149804914427, 
0.305305965796845, 0.386629499233586, 0.15685755480318, 0.350678506033931, 
0.562433289508901, 0.268840367435164, 0.322933955777266, 0.108666524311962, 
0.0628339785033974, 0.49126593617209, 0.527932757462746, 0.47172512923794, 
0.461711639647726, 0.362590079921887, 0.387794528034913, 0.154909499589746, 
0.154613800740969, 0.303423806321064, 0.204479833568232, 0.570031076792535, 
0.740784602225925, 0.445644443725404, 0.510768207280784, 0.248807908838572, 
0.28784224944225, 0.140154287925718, 0.0725523826114571, 0.407829024906403, 
0.610157444568381, 0.256945337078229, 0.227615984891259, 0.43572741633734, 
0.391864627867027, 0.296855754287958, 0.345602091793204, 0.105681572106723, 
0.0918011629446175, 0.0955192240249301, 0.0892005087879189, 0.35734783741945, 
0.351111836488432, 0.570462074592721, 0.532907409369077), .Dim = c(2L, 
30L), .Dimnames = list(c("Signature_1", "Signature_2"), c("Signature_1", 
"Signature_2", "Signature_3", "Signature_4", "Signature_5", "Signature_6", 
"Signature_7", "Signature_8", "Signature_9", "Signature_10", 
"Signature_11", "Signature_12", "Signature_13", "Signature_14", 
"Signature_15", "Signature_16", "Signature_17", "Signature_18", 
"Signature_19", "Signature_20", "Signature_21", "Signature_22", 
"Signature_23", "Signature_24", "Signature_25", "Signature_26", 
"Signature_27", "Signature_28", "Signature_29", "Signature_30"
))), nmfObj = NULL), .Names = c("signatures", "coSineSimMat", "nmfObj"))

## ---- fig.width=7, fig.height=5, fig.align='center', eval = T------------
plotSignatures(laml.sign)

## ------------------------------------------------------------------------
require('corrplot')
corrplot::corrplot(corr = laml.sign$coSineSimMat, col = RColorBrewer::brewer.pal(n = 9, name = 'Oranges'), is.corr = FALSE, tl.cex = 0.6, tl.col = 'black', cl.cex = 0.6)

## ------------------------------------------------------------------------
var.file = system.file('extdata', 'variants.tsv', package = 'maftools')
#This is what input looks like
var = read.delim(var.file, sep = '\t')
head(var)

## ---- results='hide', eval=F, message=F----------------------------------
#  #Annotate
#  var.maf = oncotate(maflite = var.file, header = TRUE)

## ---- eval = F-----------------------------------------------------------
#  #Results from oncotate. First 20 columns.
#  var.maf[1:10, 1:20, with = FALSE]

## ---- eval=T-------------------------------------------------------------
var.annovar = system.file("extdata", "variants.hg19_multianno.txt", package = "maftools")
var.annovar.maf = annovarToMaf(annovar = var.annovar, Center = 'CSI-NUS', refBuild = 'hg19', 
                               tsbCol = 'Tumor_Sample_Barcode', table = 'ensGene')


## ------------------------------------------------------------------------
#Read sample ICGC data for ESCA
esca.icgc <- system.file("extdata", "simple_somatic_mutation.open.ESCA-CN.sample.tsv.gz", package = "maftools")
esca.maf <- icgcSimpleMutationToMAF(icgc = esca.icgc, addHugoSymbol = TRUE)
#Printing first 16 columns for display convenience.
print(esca.maf[1:5,1:16, with = FALSE])

## ---- eval=FALSE---------------------------------------------------------
#  laml.mutsig.corrected = prepareMutSig(maf = laml)
#  # Converting gene names for 1 variants from 1 genes
#  #    Hugo_Symbol MutSig_Synonym N
#  # 1:    ARHGAP35          GRLF1 1
#  # Original symbols are preserved under column OG_Hugo_Symbol.

## ------------------------------------------------------------------------
##Extract data for samples 'TCGA.AB.3009' and 'TCGA.AB.2933'  (Printing just 5 rows for display convenience)
subsetMaf(maf = laml, tsb = c('TCGA.AB.3009', 'TCGA.AB.2933'))[1:5]
##Same as above but return output as an MAF object
subsetMaf(maf = laml, tsb = c('TCGA.AB.3009', 'TCGA.AB.2933'), mafObj = TRUE)

## ------------------------------------------------------------------------
##Select all Splice_Site mutations from DNMT3A and NPM1
subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), query = "Variant_Classification == 'Splice_Site'")

##Same as above but include only 'i_transcript_name' column in the output
subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), query = "Variant_Classification == 'Splice_Site'", fields = 'i_transcript_name')

## ------------------------------------------------------------------------
sessionInfo()

