## ---- echo=FALSE, results="hide", warning=FALSE--------------------------
suppressPackageStartupMessages({
  library(NADfinder)
  library(BSgenome.Mmusculus.UCSC.mm10)
  library(rtracklayer)
})
knitr::opts_chunk$set(warning=FALSE, message=FALSE, eval=TRUE)

## ----quickStart, fig.width=6, fig.height=4-------------------------------
## load the library
library(NADfinder)
library(SummarizedExperiment)
## bam file path
path <- "path/to/your/bam/files"
f <- c(genome="genome.bam", nucleosome="nucleosome.bam")
## window size for tile of genome. Here we set it to a big window size,
## ie., 50k because of the following two reasons:
## 1. peaks of NADs are wide;
## 2. data will be smoothed better with bigger window size.
ws <- 50000
## step for tile of genome
step <- 1000
## Set the background. 
## 0.25 means 25% of the lower ratios will be used for background training.
backgroundPercentage <- 0.25
## Count the reads for each window with a given step in the genome.
## The output will be a GRanges object.
library(BSgenome.Mmusculus.UCSC.mm10)

## ----eval=FALSE----------------------------------------------------------
#  se <- tileCount(reads=file.path(path, f),
#                  genome=Mmusculus,
#                  windowSize=ws,
#                  step=step)

## ------------------------------------------------------------------------
data(single.count)
se <- single.count

## ------------------------------------------------------------------------
## Calculate ratios for peak calling. We use nucleosome vs genomic DNA.
dat <- log2se(se, nucleosomeCols = "nucleosome.bam", genomeCols="genome.bam")
## Smooth the ratios for each chromosome.
## We found that for each chromosome, the ratios are higher in 5'end than 3'end.
datList <- smoothRatiosByChromosome(dat, N=100)
## check the difference between the cumulative percentage tag allocation 
## in genome and nucleosome samples.
cumulativePercentage(datList[["chr18"]])

## ------------------------------------------------------------------------
## check the results of smooth function
chr18 <- datList[["chr18"]] ## we only have reads in chr18 in test data.
chr18subset <- subset(chr18, seq.int(floor(length(chr18)/10))*10)
chr18 <- assays(chr18subset)
ylim <- range(c(chr18$ratio[, 1], 
                chr18$bcRatio[, 1], 
                chr18$smoothedRatio[, 1]))
plot(chr18$ratio[, 1], 
     ylim=ylim*c(.9, 1.1), 
     type="l", main="chr18")
abline(h=0, col="cyan", lty=2)
points(chr18$bcRatio[, 1], type="l", col="green")
points(chr18$smoothedRatio[, 1], type="l", col="red")
legend("topright", 
       legend = c("raw_ratio", "background_corrected", "smoothed"), 
       fill = c("black", "green", "red"))
## call peaks for each chromosome
peaks <- lapply(datList, trimPeaks, 
                backgroundPercentage=backgroundPercentage, 
                cutoffPvalue=0.05, countFilter=1000)
## plot the peaks in "chr18"
peaks.subset <- countOverlaps(rowRanges(chr18subset), peaks$chr18)>=1
peaks.run <- rle(peaks.subset)
run <- cumsum(peaks.run$lengths)
run <- data.frame(x0=c(1, run[-length(run)]), x1=run)
run <- run[peaks.run$values, , drop=FALSE]
rect(xleft = run$x0, ybottom = ylim[2]*.75, 
     xright = run$x1, ytop = ylim[2]*.8,
     col = "black")
## convert list to a GRanges object
peaks.gr <- unlist(GRangesList(peaks))

## ------------------------------------------------------------------------
## output the peaks
write.csv(as.data.frame(unname(peaks.gr)), "peaklist.csv", row.names=FALSE)
## export peaks to a bed file.
library(rtracklayer)
export(peaks.gr, "peaklist.bed", "BED")

## ------------------------------------------------------------------------
library(NADfinder)
## bam file path
path <- "path/to/your/bam/files"
f <- c("G26.bam", "G28.bam", "G29.bam", "N26.bam", "N28.bam", "N29.bam")
ws <- 50000
step <- 1000
library(BSgenome.Mmusculus.UCSC.mm10)

## ----eval=FALSE----------------------------------------------------------
#  se <- tileCount(reads=file.path(path, f),
#                  genome=Mmusculus,
#                  windowSize=ws,
#                  step=step)

## ------------------------------------------------------------------------
data(triplicates.counts)
se <- triplicates.counts

## Calculate ratios for nucleoli vs genomic sample.
gps <- c("26", "28", "29")
se <- log2se(se, 
             nucleosomeCols = paste0("N", gps, ".bam"),
             genomeCols = paste0("G", gps, ".bam"))
getCorrelations(se, "chr18")
seList<- smoothRatiosByChromosome(se, chr="chr18")
cumulativePercentage(seList[["chr18"]])
peaks <- lapply(seList, callPeaks, 
                cutoffAdjPvalue=0.05, countFilter=1000)
peaks <- unlist(GRangesList(peaks))
peaks

## ----fig.height=1.5------------------------------------------------------
ideo <- readRDS(system.file("extdata", "ideo.mm10.rds", 
                            package = "NADfinder"))
plotSig(ideo=ideo, grList=GRangesList(peaks), mcolName="AveSig", 
        layout=list("chr18"), 
        parameterList=list(types="heatmap"))

## ----sessionInfo---------------------------------------------------------
sessionInfo()

