## ----echo=FALSE, results='hide', warning=FALSE---------------------------
library(dagLogo)

## ----setenv, eval=FALSE, echo=TRUE---------------------------------------
#  Sys.setenv(R_GSCMD=file.path("C:", "Program Files", "gs",
#                               "gs9.06", "bin", "gswin32c.exe"))

## ----fetchSequences------------------------------------------------------
library(dagLogo)
library(biomaRt)
try({##just in case biomaRt server does not response
    mart <- useMart("ensembl", "dmelanogaster_gene_ensembl")
    dat <- read.csv(system.file("extdata", "dagLogoTestData.csv", 
                                package="dagLogo"))
    dat <- dat[1:5,] ##subset to speed sample
    dat
    seq <- fetchSequence(as.character(dat$entrez_geneid), 
                         anchorPos=as.character(dat$NCBI_site), 
                         mart=mart, 
                         upstreamOffset=7, 
                         downstreamOffset=7)
    head(seq@peptides)
})

## ----fetchSequences2-----------------------------------------------------
try({
    seq <- fetchSequence(as.character(dat$entrez_geneid), 
                         anchorAA="*",
                         anchorPos=as.character(dat$peptide), 
                         mart=mart, 
                         upstreamOffset=7, 
                         downstreamOffset=7)
    head(seq@peptides)
})

## ----fetchSequences3-----------------------------------------------------
if(interactive()){
    dat <- read.csv(system.file("extdata", "peptides4dagLogo.csv",
                package="dagLogo"))

    tail(dat)
    mart <- useMart("ensembl", "hsapiens_gene_ensembl")
    seq <- fetchSequence(toupper(as.character(dat$symbol)), 
                         type="hgnc_symbol",
                         anchorAA="s",
                         anchorPos=as.character(dat$peptides), 
                         mart=mart, 
                         upstreamOffset=7, 
                         downstreamOffset=7)
    head(seq@peptides)
}

## ----formatSequence------------------------------------------------------
dat <- unlist(read.delim(system.file("extdata", "grB.txt", package="dagLogo"), 
                         header=F, as.is=TRUE))
head(dat)
##prepare proteome from a fasta file
proteome <- prepareProteome(fasta=system.file("extdata", 
                                              "HUMAN.fasta",
                                              package="dagLogo"))
##prepare object of dagPeptides
seq <- formatSequence(seq=dat, proteome=proteome, 
                      upstreamOffset=14, downstreamOffset=15)

## ----prepareProteome0----------------------------------------------------
if(interactive()){
    library(UniProt.ws)
    UniProt.ws <- UniProt.ws(taxId=9606)
    proteome <- prepareProteome(UniProt.ws=UniProt.ws)
}

## ----prepareProteome-----------------------------------------------------
bg <- buildBackgroundModel(seq, bg="wholeGenome", proteome=proteome)

## ----testDAU-------------------------------------------------------------
t0 <- testDAU(seq, bg)
t1 <- testDAU(seq, bg, group="classic")
t2 <- testDAU(seq, bg, group="charge")
t3 <- testDAU(seq, bg, group="chemistry")
t4 <- testDAU(seq, bg, group="hydrophobicity")

## ----dagHeatmap,fig.cap="DAG heatmap",fig.width=6,fig.height=6-----------
dagHeatmap(t0) ##Plot a heatmap to show the results

## ----dagLogo0,fig.cap="ungrouped results",fig.width=6,fig.height=4-------
dagLogo(t0) ##Plot a logo to show the ungrouped results

## ----dagLogo1,fig.cap="classic grouped",fig.width=6,fig.height=4---------
##Plot a logo to show the classic grouped results
dagLogo(t1, namehash=nameHash(t1@group), legend=TRUE)

## ----dagLogo2,fig.cap="charge grouped",fig.width=6,fig.height=4----------
##Plot a logo to show the charge grouped results
dagLogo(t2, namehash=nameHash(t2@group), legend=TRUE)

## ----dagLogo3,fig.cap="chemistry grouped",fig.width=6,fig.height=4-------
##Plot a logo to show the chemistry grouped results
dagLogo(t3, namehash=nameHash(t3@group), legend=TRUE)

## ----dagLogo4,fig.cap="hydrophobicity grouped",fig.width=6,fig.height=4----
##Plot a logo to show the hydrophobicity grouped results
dagLogo(t4, namehash=nameHash(t4@group), legend=TRUE)

## ----CAPmotif,fig.cap="Catobolite Activator Protein Motif",fig.width=6,fig.height=4----
library(motifStack)
protein<-read.table(file.path(find.package("motifStack"),"extdata","cap.txt"))
protein<-t(protein[,1:20])
motif<-pcm2pfm(protein)
motif<-new("pfm", mat=motif, name="CAP", 
            color=colorset(alphabet="AA",colorScheme="chemistry"))
##The DNA-binding helix-turn-helix motif of the CAP family ploted by motifStack
plot(motif)

## ----CAPdagLogo,fig.cap="Catobolite Activator Protein Motif",fig.width=6,fig.height=4----
library(Biostrings)
cap <- as.character(readAAStringSet(system.file("extdata", 
                                                "cap.fasta", 
                                                package="dagLogo")))
data(ecoli.proteome)
seq <- formatSequence(seq=cap, proteome=ecoli.proteome)
bg <- buildBackgroundModel(seq, bg="wholeGenome", 
                           proteome=ecoli.proteome, 
                           permutationSize=10L)
##The DNA-binding helix-turn-helix motif of the CAP family ploted by dagLogo
t0 <- testDAU(seq, bg)
dagLogo(t0)

## ----CAPgroup,fig.cap="Catobolite Activator Protein Motif",fig.width=6,fig.height=4----
## The DNA-binding helix-turn-helix motif of the CAP family grouped by chemistry
t1 <- testDAU(seq, bg, group="chemistry")
dagLogo(t1, namehash=nameHash(t1@group), legend=TRUE)

## ----sessionInfo---------------------------------------------------------
sessionInfo()

