## ---- echo=FALSE---------------------------------------------------------
suppressPackageStartupMessages({
    library(Organism.dplyr)
    library(GenomicRanges)
    library(ggplot2)
})

## ---- eval=FALSE---------------------------------------------------------
#  library(Organism.dplyr)

## ---- eval=FALSE---------------------------------------------------------
#  src <- src_organism("TxDb.Hsapiens.UCSC.hg38.knownGene")

## ---- eval=FALSE---------------------------------------------------------
#  path <- "path/to/my.sqlite"
#  src <- src_organism("TxDb.Hsapiens.UCSC.hg38.knownGene", path)

## ------------------------------------------------------------------------
supportedOrganisms()

## ---- eval=FALSE---------------------------------------------------------
#  src <- src_ucsc("human", path)

## ------------------------------------------------------------------------
src <- src_organism(dbpath = hg38light())
src

## ------------------------------------------------------------------------
src_tbls(src)

## ------------------------------------------------------------------------
tbl(src, "id")

## ------------------------------------------------------------------------
colnames(tbl(src, "id"))

## ------------------------------------------------------------------------
tbl(src, "id") %>%
    filter(symbol %like% "SNORD%") %>%
    dplyr::select(entrez, map, ensembl, symbol) %>%
    distinct() %>% arrange(symbol) %>% collect()

## ------------------------------------------------------------------------
inner_join(tbl(src, "id"), tbl(src, "id_go")) %>%
    filter(symbol == "ADA") %>%
    dplyr::select(entrez, ensembl, symbol, go, evidence, ontology)

## ------------------------------------------------------------------------
txcount <- inner_join(tbl(src, "id"), tbl(src, "ranges_tx")) %>%
    dplyr::select(symbol, tx_id) %>%
    group_by(symbol) %>%
    summarise(count = count(symbol)) %>%
    dplyr::select(symbol, count) %>%
    arrange(desc(count)) %>%
    collect(n=Inf)

txcount

library(ggplot2)
ggplot(txcount, aes(x = symbol, y = count)) + 
    geom_bar(stat="identity") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    ggtitle("Transcript count") +
    labs(x = "Symbol") +
    labs(y = "Count")

## ------------------------------------------------------------------------
inner_join(tbl(src, "id"), tbl(src, "ranges_gene")) %>%
    filter(symbol %in% c("ADA", "NAT2")) %>%
    dplyr::select(gene_chrom, gene_start, gene_end, gene_strand,
                  symbol, map) %>%
    collect() %>% GenomicRanges::GRanges()

## ------------------------------------------------------------------------
keytypes(src)

## ------------------------------------------------------------------------
columns(src)

## ------------------------------------------------------------------------
head(keys(src))

## ------------------------------------------------------------------------
head(keys(src, "symbol"))

## ------------------------------------------------------------------------
keytype <- "symbol"
keys <- c("ADA", "NAT2")
columns <- c("entrez", "tx_id", "tx_name","exon_id")
select_tbl(src, keys, columns, keytype)

## ------------------------------------------------------------------------
mapIds(src, keys, column = "tx_name", keytype)
mapIds(src, keys, column = "tx_name", keytype, multiVals="CharacterList")

## ------------------------------------------------------------------------
supportedFilters()

## ------------------------------------------------------------------------
EnsemblFilter("ENSG00000196839")
SymbolFilter("SNORD", "startsWith")

## ------------------------------------------------------------------------
filters <- list(SymbolFilter("SNORD", "startsWith"))
transcripts_tbl(src, filter=filters)
filters <- list(
    SymbolFilter("SNORD", "startsWith"),
    GRangesFilter(GenomicRanges::GRanges("chr15:25062333-25065121"))
)
transcripts(src, filter=filters)

## ------------------------------------------------------------------------
transcripts_tbl(src, filter = list(
    SymbolFilter("ADA"),
    TxStartFilter(44619810,"<")
))

## ------------------------------------------------------------------------
sessionInfo()

