## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"---------------------------------
BiocStyle::latex(use.unsrturl=FALSE)

## ----setup, include=FALSE, cache=FALSE-------------------------------------------------------
library(knitr)
# set global chunk options for knitr
opts_chunk$set(comment=NA, warning=FALSE, message=FALSE, fig.path='figure/systemPipeR-')
options(formatR.arrow=TRUE, width=95)
unlink("test.db")

## ----eval=TRUE-------------------------------------------------------------------------------
library(systemPipeR)

## ----eval=FALSE------------------------------------------------------------------------------
#  library(systemPipeRdata)
#  genWorkenvir(workflow="varseq")
#  setwd("varseq")

## ----eval=FALSE------------------------------------------------------------------------------
#  source("systemPipeVARseq_Fct.R")

## ----eval=TRUE-------------------------------------------------------------------------------
targetspath <- system.file("extdata", "targetsPE.txt", package="systemPipeR")
targets <- read.delim(targetspath, comment.char = "#")[,1:5]
targets

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="param/trimPE.param", mytargets="targetsPE.txt")[1:4] # Note: subsetting!
#  filterFct <- function(fq, cutoff=20, Nexceptions=0) {
#      qcount <- rowSums(as(quality(fq), "matrix") <= cutoff)
#      fq[qcount <= Nexceptions] # Retains reads where Phred scores are >= cutoff with N exceptions
#  }
#  preprocessReads(args=args, Fct="filterFct(fq, cutoff=20, Nexceptions=0)", batchsize=100000)
#  writeTargetsout(x=args, file="targets_PEtrim.txt", overwrite=TRUE)

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="bwa.param", mytargets="targets.txt")
#  fqlist <- seeFastq(fastq=infile1(args), batchsize=100000, klength=8)
#  pdf("./results/fastqReport.pdf", height=18, width=4*length(fqlist))
#  seeFastqPlot(fqlist)
#  dev.off()

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="bwa.param", mytargets="targets.txt")
#  sysargs(args)[1] # Command-line parameters for first FASTQ file

## ----eval=FALSE------------------------------------------------------------------------------
#  bampaths <- runCommandline(args=args)

## ----eval=FALSE------------------------------------------------------------------------------
#  moduleload(modules(args))
#  system("bwa index -a bwtsw ./data/tair10.fasta")
#  resources <- list(walltime="20:00:00", nodes=paste0("1:ppn=", cores(args)), memory="10gb")
#  reg <- clusterRun(args, conffile=".BatchJobs.R", template="torque.tmpl", Njobs=18, runid="01",
#                    resourceList=resources)
#  waitForJobs(reg)

## ----eval=FALSE------------------------------------------------------------------------------
#  file.exists(outpaths(args))

## ----eval=FALSE------------------------------------------------------------------------------
#  library(gmapR); library(BiocParallel); library(BatchJobs)
#  gmapGenome <- GmapGenome(reference(args), directory="data", name="gmap_tair10chr", create=TRUE)
#  args <- systemArgs(sysma="gsnap.param", mytargets="targetsPE.txt")
#  f <- function(x) {
#      library(gmapR); library(systemPipeR)
#      args <- systemArgs(sysma="gsnap.param", mytargets="targetsPE.txt")
#      gmapGenome <- GmapGenome(reference(args), directory="data", name="gmap_tair10chr", create=FALSE)
#      p <- GsnapParam(genome=gmapGenome, unique_only=TRUE, molecule="DNA", max_mismatches=3)
#      o <- gsnap(input_a=infile1(args)[x], input_b=infile2(args)[x], params=p, output=outfile1(args)[x])
#  }
#  funs <- makeClusterFunctionsTorque("torque.tmpl")
#  param <- BatchJobsParam(length(args), resources=list(walltime="20:00:00", nodes="1:ppn=1", memory="6gb"), cluster.functions=funs)
#  register(param)
#  d <- bplapply(seq(along=args), f)
#  writeTargetsout(x=args, file="targets_gsnap_bam.txt")

## ----eval=FALSE------------------------------------------------------------------------------
#  read_statsDF <- alignStats(args=args)
#  write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t")

## ----eval=FALSE------------------------------------------------------------------------------
#  symLink2bam(sysargs=args, htmldir=c("~/.html/", "somedir/"),
#              urlbase="http://biocluster.ucr.edu/~tgirke/",
#  	    urlfile="./results/IGVurl.txt")

## ----eval=FALSE------------------------------------------------------------------------------
#  writeTargetsout(x=args, file="targets_bam.txt")
#  system("java -jar CreateSequenceDictionary.jar R=./data/tair10.fasta O=./data/tair10.dict")
#  # system("java -jar /opt/picard/1.81/CreateSequenceDictionary.jar R=./data/tair10.fasta O=./data/tair10.dict")
#  args <- systemArgs(sysma="gatk.param", mytargets="targets_bam.txt")
#  resources <- list(walltime="20:00:00", nodes=paste0("1:ppn=", 1), memory="10gb")
#  reg <- clusterRun(args, conffile=".BatchJobs.R", template="torque.tmpl", Njobs=18, runid="01",
#                    resourceList=resources)
#  waitForJobs(reg)
#  writeTargetsout(x=args, file="targets_gatk.txt")

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="sambcf.param", mytargets="targets_bam.txt")
#  resources <- list(walltime="20:00:00", nodes=paste0("1:ppn=", 1), memory="10gb")
#  reg <- clusterRun(args, conffile=".BatchJobs.R", template="torque.tmpl", Njobs=18, runid="01",
#                    resourceList=resources)
#  waitForJobs(reg)
#  writeTargetsout(x=args, file="targets_sambcf.txt")

## ----eval=FALSE------------------------------------------------------------------------------
#  library(gmapR); library(BiocParallel); library(BatchJobs)
#  args <- systemArgs(sysma="vartools.param", mytargets="targets_gsnap_bam.txt")
#  f <- function(x) {
#      library(VariantTools); library(gmapR); library(systemPipeR)
#      args <- systemArgs(sysma="vartools.param", mytargets="targets_gsnap_bam.txt")
#      gmapGenome <- GmapGenome(systemPipeR::reference(args), directory="data", name="gmap_tair10chr", create=FALSE)
#      tally.param <- TallyVariantsParam(gmapGenome, high_base_quality = 23L, indels = TRUE)
#      bfl <- BamFileList(infile1(args)[x], index=character())
#      var <- callVariants(bfl[[1]], tally.param)
#      sampleNames(var) <- names(bfl)
#      writeVcf(asVCF(var), outfile1(args)[x], index = TRUE)
#  }
#  funs <- makeClusterFunctionsTorque("torque.tmpl")
#  param <- BatchJobsParam(length(args), resources=list(walltime="20:00:00", nodes="1:ppn=1", memory="6gb"), cluster.functions=funs)
#  register(param)
#  d <- bplapply(seq(along=args), f)
#  writeTargetsout(x=args, file="targets_vartools.txt")

## ----eval=FALSE------------------------------------------------------------------------------
#  library(VariantAnnotation)
#  args <- systemArgs(sysma="filter_gatk.param", mytargets="targets_gatk.txt")
#  filter <- "totalDepth(vr) >= 2 & (altDepth(vr) / totalDepth(vr) >= 0.8) & rowSums(softFilterMatrix(vr))>=1"
#  # filter <- "totalDepth(vr) >= 20 & (altDepth(vr) / totalDepth(vr) >= 0.8) & rowSums(softFilterMatrix(vr))==6"
#  filterVars(args, filter, varcaller="gatk", organism="A. thaliana")
#  writeTargetsout(x=args, file="targets_gatk_filtered.txt")

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="filter_sambcf.param", mytargets="targets_sambcf.txt")
#  filter <- "rowSums(vr) >= 2 & (rowSums(vr[,3:4])/rowSums(vr[,1:4]) >= 0.8)"
#  # filter <- "rowSums(vr) >= 20 & (rowSums(vr[,3:4])/rowSums(vr[,1:4]) >= 0.8)"
#  filterVars(args, filter, varcaller="bcftools", organism="A. thaliana")
#  writeTargetsout(x=args, file="targets_sambcf_filtered.txt")

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="filter_vartools.param", mytargets="targets_vartools.txt")
#  filter <- "(values(vr)$n.read.pos.ref + values(vr)$n.read.pos) >= 2 & (values(vr)$n.read.pos / (values(vr)$n.read.pos.ref + values(vr)$n.read.pos) >= 0.8)"
#  # filter <- "(values(vr)$n.read.pos.ref + values(vr)$n.read.pos) >= 20 & (values(vr)$n.read.pos / (values(vr)$n.read.pos.ref + values(vr)$n.read.pos) >= 0.8)"
#  filterVars(args, filter, varcaller="vartools", organism="A. thaliana")
#  writeTargetsout(x=args, file="targets_vartools_filtered.txt")

## ----eval=FALSE------------------------------------------------------------------------------
#  library("GenomicFeatures")
#  args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt")
#  txdb <- loadDb("./data/tair10.sqlite")
#  fa <- FaFile(systemPipeR::reference(args))
#  variantReport(args=args, txdb=txdb, fa=fa, organism="A. thaliana")

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_sambcf_filtered.txt")
#  txdb <- loadDb("./data/tair10.sqlite")
#  fa <- FaFile(systemPipeR::reference(args))
#  variantReport(args=args, txdb=txdb, fa=fa, organism="A. thaliana")

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_vartools_filtered.txt")
#  txdb <- loadDb("./data/tair10.sqlite")
#  fa <- FaFile(systemPipeR::reference(args))
#  variantReport(args=args, txdb=txdb, fa=fa, organism="A. thaliana")

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt")
#  combineDF <- combineVarReports(args, filtercol=c(Consequence="nonsynonymous"))
#  write.table(combineDF, "./results/combineDF_nonsyn_gatk.xls", quote=FALSE, row.names=FALSE, sep="\t")

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_sambcf_filtered.txt")
#  combineDF <- combineVarReports(args, filtercol=c(Consequence="nonsynonymous"))
#  write.table(combineDF, "./results/combineDF_nonsyn_sambcf.xls", quote=FALSE, row.names=FALSE, sep="\t")

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_vartools_filtered.txt")
#  combineDF <- combineVarReports(args, filtercol=c(Consequence="nonsynonymous"))
#  write.table(combineDF, "./results/combineDF_nonsyn_vartools.xls", quote=FALSE, row.names=FALSE, sep="\t")

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt")
#  write.table(varSummary(args), "./results/variantStats_gatk.xls", quote=FALSE, col.names = NA, sep="\t")

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_sambcf_filtered.txt")
#  write.table(varSummary(args), "./results/variantStats_sambcf.xls", quote=FALSE, col.names = NA, sep="\t")

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_vartools_filtered.txt")
#  write.table(varSummary(args), "./results/variantStats_vartools.xls", quote=FALSE, col.names = NA, sep="\t")

## ----eval=FALSE------------------------------------------------------------------------------
#  args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt")
#  varlist <- sapply(names(outpaths(args))[1:4], function(x) as.character(read.delim(outpaths(args)[x])$VARID))
#  vennset_gatk <- overLapper(varlist, type="vennsets")
#  args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_sambcf_filtered.txt")
#  varlist <- sapply(names(outpaths(args))[1:4], function(x) as.character(read.delim(outpaths(args)[x])$VARID))
#  vennset_bcf <- overLapper(varlist, type="vennsets")
#  pdf("./results/vennplot_var.pdf")
#  vennPlot(list(vennset_gatk, vennset_bcf), mymain="", mysub="GATK: red; BCFtools: blue", colmode=2, ccol=c("blue", "red"))
#  dev.off()

## ----sessionInfo, results='asis'-------------------------------------------------------------
toLatex(sessionInfo())

