## ----global_options, include=FALSE---------------------------------------
knitr::opts_chunk$set(warning = FALSE, message = FALSE, eval = FALSE)

## ----load_libraries, results='hide'--------------------------------------
#  library(RCAS)

## ----sample_data---------------------------------------------------------
#  library(RCAS)
#  data(queryRegions) #sample queryRegions in BED format()
#  data(gff)          #sample GFF file

## ----RCAS_import_data----------------------------------------------------
#  #library(RCAS)
#  #queryRegions <- importBed(filePath = <path to BED file>, sampleN = 10000)
#  #gff <- importGtf(filePath = <path to GTF file>)

## ----queryGFF------------------------------------------------------------
#  overlaps <- queryGff(queryRegions = queryRegions, gffData = gff)
#  #data.table is used to do quick summary operations
#  overlaps.dt <- data.table(as.data.frame(overlaps))

## ----query_gene_types----------------------------------------------------
#  biotype_col <- grep('gene_biotype', colnames(overlaps.dt), value = T)
#  df <- overlaps.dt[,length(unique(overlappingQuery)), by = biotype_col]
#  colnames(df) <- c("feature", "count")
#  df$percent <- round(df$count / length(queryRegions) * 100, 1)
#  df <- df[order(count, decreasing = TRUE)]
#  p <- plot_ly(data = df,
#               type = "bar",
#               x = df$feature,
#               y = df$percent,
#               text = paste("count:", df$count), color=df$feature)
#  layout(p = p,
#         margin = list(l=100, r=100, b=150),
#         xaxis = list(showticklabels = TRUE,  tickangle = 90),
#         yaxis = list(title = paste("percentage of query regions,",
#                                    "n =", length(queryRegions))))
#  

## ----getTxdbFeatures-----------------------------------------------------
#  txdbFeatures <- getTxdbFeaturesFromGRanges(gff)

## ----summarizeQueryRegions-----------------------------------------------
#  summary <- summarizeQueryRegions(queryRegions = queryRegions,
#                                   txdbFeatures = txdbFeatures)
#  
#  df <- data.frame(summary)
#  df$percent <- round((df$count / length(queryRegions)), 3) * 100
#  p <- plot_ly( data = df,
#                x = rownames(df),
#                y = df$percent,
#                type = 'bar',
#                text = paste("count:", df$count),
#                color = rownames(df)
#                )
#  layout(p = p,
#         xaxis = list(title = 'features'),
#         yaxis = list(title = paste("percentage of query regions,",
#                                    "n =", length(queryRegions)
#                                    )
#                      ),
#         margin = list(b = 150, r = 50)
#         )

## ----getTargetedGenesTable-----------------------------------------------
#  dt <- getTargetedGenesTable(queryRegions = queryRegions,
#                             txdbFeatures = txdbFeatures)
#  dt <- dt[order(transcripts, decreasing = TRUE)]
#  
#  DT::datatable(dt[1:100],
#            extensions = c('Buttons', 'FixedColumns'),
#            options = list(fixedColumns = TRUE,
#                           scrollX = TRUE,
#                           dom = 'Bfrtip',
#                           buttons = c('copy', 'print', 'csv','excel', 'pdf')),
#            filter = 'bottom'
#            )

## ----transcriptBoundaryCoverage------------------------------------------
#  cvgF <- getFeatureBoundaryCoverage(queryRegions = queryRegions, featureCoords = txdbFeatures$transcripts, flankSize = 1000, boundaryType = 'fiveprime', sampleN = 10000)
#  cvgT <- getFeatureBoundaryCoverage(queryRegions = queryRegions, featureCoords = txdbFeatures$transcripts, flankSize = 1000, boundaryType = 'threeprime', sampleN = 10000)
#  
#  plotFeatureBoundaryCoverage(cvgF = cvgF, cvgT = cvgT, featureName = 'transcripts')
#  

## ----coverageprofilelist-------------------------------------------------
#  cvgList <- calculateCoverageProfileList(queryRegions = queryRegions,
#                                         targetRegionsList = txdbFeatures,
#                                         sampleN = 10000)
#  
#  p <- plotly::plot_ly(data = cvgList, type = 'scatter', mode = 'lines')
#  for (f in unique(cvgList$feature)){
#    data <- cvgList[cvgList$feature == f,]
#    p <- add_trace(p = p, data = data, x = ~bins, y = ~meanCoverage,
#                   legendgroup = f, showlegend = FALSE, opacity = 1, color = f)
#    p <- add_ribbons(p = p, data = data, x = ~bins,
#                     ymin = data$meanCoverage - data$standardError*1.96,
#                     ymax = data$meanCoverage + data$standardError*1.96,
#                     legendgroup = f,
#                     name = f, color = f
#                     )
#  }
#  layout(p, font = list(size = 14))

## ----motif_analysis------------------------------------------------------
#  motifResults <- runMotifRG(queryRegions = queryRegions,
#                             genomeVersion = 'hg19',
#                             motifN = 2, nCores = 2)
#  
#  par(mfrow = c(1,2), mar = c(2,2,2,2))
#  for (i in 1:length(motifResults$motifs)) {
#    motifPattern <- motifResults$motifs[[i]]@pattern
#    motifRG::plotMotif(match = motifResults$motifs[[i]]@match$pattern,
#                       main = paste0('Motif-',i,': ',motifPattern),
#                       entropy = TRUE)
#  }

## ----motif_analysis_table------------------------------------------------
#  summary <- getMotifSummaryTable(motifResults)
#  DT::datatable(summary,
#            extensions = c('Buttons', 'FixedColumns'),
#            options = list(fixedColumns = TRUE,
#                           scrollX = TRUE,
#                           dom = 'Bfrtip',
#                           buttons = c('copy', 'print', 'csv','excel', 'pdf')),
#            filter = 'bottom'
#            )

## ----GO analysis---------------------------------------------------------
#  
#  #get all genes from the GTF data
#  backgroundGenes <- unique(gff$gene_id)
#  #get genes that overlap query regions
#  targetedGenes <- unique(overlaps$gene_id)
#  
#  #run TopGO
#  goBP <- runTopGO(ontology = 'BP',
#                        species = 'human',
#                        backgroundGenes = backgroundGenes,
#                        targetedGenes = targetedGenes)
#  
#  goBP <- goBP[order(goBP$foldEnrichment, decreasing = TRUE),]
#  rownames(goBP) <- goBP$GO.ID
#  goBP <- subset(goBP, select = -c(Annotated,classicFisher, bh, GO.ID))
#  DT::datatable(goBP[1:10,],
#            extensions = c('Buttons', 'FixedColumns'),
#            options = list(fixedColumns = TRUE,
#                           scrollX = TRUE,
#                           dom = 'Bfrtip',
#                           buttons = c('copy', 'print', 'csv', 'excel', 'pdf')),
#            filter = 'bottom'
#            )

## ----msigdb_analysis-----------------------------------------------------
#  #geneSets <- parseMsigdb(< path to msigdbFile>)
#  data(geneSets)
#  resultsGSEA <- runGSEA(geneSetList = geneSets,
#                         backgroundGenes = backgroundGenes,
#                         targetedGenes = targetedGenes)
#  
#  datatable(resultsGSEA[1:10,],
#      extensions = 'FixedColumns',
#    options = list(
#      dom = 't',
#      scrollX = TRUE,
#      scrollCollapse = TRUE
#    ))
#  

