## ----style, echo = FALSE, results = 'asis'-------------------------------
BiocStyle::markdown()

## ----global_options, include=FALSE--------------------------------------------------------------------------
knitr::opts_chunk$set(fig.width=10, fig.height=7, warning=FALSE, message=FALSE)
options(width=110)
set.seed(123)

## -----------------------------------------------------------------------------------------------------------
## load ABAEnrichment package
library(ABAEnrichment)
## create input vector with candidate genes
gene_ids = c('NCAPG', 'APOL4', 'NGFR', 'NXPH4', 'C21orf59', 'CACNG2', 'AGTR1', 'ANO1', 
  'BTBD3', 'MTUS1', 'CALB1', 'GYG1', 'PAX2')
genes = rep(1, length(gene_ids))
names(genes) = gene_ids
genes

## ---- eval=FALSE--------------------------------------------------------------------------------------------
#  ## run enrichment analyses with default parameters for the adult and developing human brain
#  res_adult = aba_enrich(genes, dataset='adult')
#  res_devel = aba_enrich(genes, dataset='5_stages')

## ----results='hide'-----------------------------------------------------------------------------------------
## run enrichment analysis with less cutoffs and randomsets to save computation time
res_devel = aba_enrich(genes, dataset='5_stages', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
## extract first element from the output list, which contains the statistics
fwers_devel = res_devel[[1]]
## see results for the brain regions with highest enrichment for children (3-11 yrs, age_category 3)
fwers_3 = fwers_devel[fwers_devel[,1]==3, ]
head(fwers_3)

## -----------------------------------------------------------------------------------------------------------
res_devel[2:3]

## ----eval=FALSE---------------------------------------------------------------------------------------------
#  ## run enrichment analysis, with randomsets dependent on gene length
#  res_len = aba_enrich(genes, gene_len=TRUE)
#  ## run the same analysis using gene coordinates from GRCh38 instead of the default GRCh37
#  res_len_hg20 = aba_enrich(genes, gene_len=TRUE, ref_genome='grch38')

## -----------------------------------------------------------------------------------------------------------
## assign random scores to the genes used above
genes = sample(1:50, length(gene_ids))
names(genes) = gene_ids
genes

## ---- results='hide'----------------------------------------------------------------------------------------
## test for enrichment of expressed genes with high scores in the adult brain using the Wilcoxon rank-sum test
res_wilcox = aba_enrich(genes, test='wilcoxon', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
head(res_wilcox[[1]])

## -----------------------------------------------------------------------------------------------------------
## create input vector with a candidate region on chromosome 8
## and background regions on chromosome 7, 8 and 9
genes = c(1, rep(0,6))
names(genes) = c('8:82000000-83000000', '7:1300000-56800000', '7:74900000-148700000',
  '8:7400000-44300000', '8:47600000-146300000', '9:0-39200000', '9:69700000-140200000')
genes

## ----results='hide'-----------------------------------------------------------------------------------------
## run enrichment analysis for the adult human brain
res_region = aba_enrich(genes, dataset='adult', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
## look at the results from the enrichment analysis
fwers_region = res_region[[1]]
head(fwers_region)
## see which genes are located in the candidate region
input_genes = res_region[[2]]
candidate_genes = input_genes[input_genes==1]
candidate_genes

## -----------------------------------------------------------------------------------------------------------
## get expression data for the first 5 brain regions from the last aba_enrich-analysis
top_regions = fwers_region[1:5, 'structure_id']
top_regions
expr = get_expression(top_regions, background=FALSE)
head(expr)

## ---- eval=FALSE--------------------------------------------------------------------------------------------
#  ## get expression data independent from previous aba_enrich analysis
#  regions = c('Allen:12926', 'Allen:4738', 'Allen:4671', 'Allen:12909', 'Allen:4718')
#  gene_ids = c('CHMP4C', 'FABP12', 'FABP4', 'FABP5', 'FABP9', 'IMPA1',
#    'PAG1', 'PMP2', 'SLC10A5', 'SNX16', 'ZFAND1')
#  expr2 = get_expression(regions, gene_ids=gene_ids, dataset='adult', background=FALSE)

## -----------------------------------------------------------------------------------------------------------
## get expression data for the first 5 brain regions from the last aba_enrich-analysis
top_regions = fwers_region[1:5, 'structure_id']
plot_expression(top_regions, background=FALSE)

## -----------------------------------------------------------------------------------------------------------
## plot the same expression data without dendrogram
plot_expression(top_regions, dendro=FALSE, background=FALSE)

## -----------------------------------------------------------------------------------------------------------
## plot expression of some genes for the frontal neocortex (Allen:10161) in age category 3 (children, 3-11 yrs)
gene_ids = c('ENSG00000157764', 'ENSG00000163041', 'ENSG00000182158', 'ENSG00000147889',
  'ENSG00000103126', 'ENSG00000184634')
plot_expression('Allen:10161', gene_ids=gene_ids, dataset='5_stages', age_category=3)

## -----------------------------------------------------------------------------------------------------------
## get IDs of the substructures of the frontal neocortex (Allen:10161) for which expression data are available
subs = get_sampled_substructures('Allen:10161')
subs
## get the full name of those substructures
get_name(subs)
## get the superstructures of the frontal neocortex (from general to special)
supers = get_superstructures('Allen:10161')
supers
## get the full names of those superstructures
get_name(supers)

## -----------------------------------------------------------------------------------------------------------
## get structure IDs of brain regions that contain 'accumbens' in their names
get_id('accumbens')
## get structure IDs of brain regions that contain 'telencephalon' in their names
get_id('telencephalon')

## -----------------------------------------------------------------------------------------------------------
## get all brain regions included in ABAEnrichment together with thier IDs
all_regions = get_id('')
head(all_regions)

## ---- results='hide'----------------------------------------------------------------------------------------
## run an enrichment analysis with 7 candidate and 7 background genes for the developing brain
gene_ids = c('FOXJ1', 'NTS', 'LTBP1', 'STON2', 'KCNJ6', 'AGT', 
  'ANO3', 'TTR', 'ELAVL4', 'BEAN1', 'TOX', 'EPN3', 'PAX2', 'KLHL1')
genes = rep(c(1,0), each=7)
names(genes) = gene_ids
res = aba_enrich(genes, dataset='5_stages', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
head(res[[1]])
## see which candidate genes are annotated to brain regions with a FWER < 0.01
anno = get_annotated_genes(res, fwer_threshold=0.1)
anno

## -----------------------------------------------------------------------------------------------------------
## find out which of the above genes have expression above the 70% and 90% expression-cutoff, respectively,
## in the basal ganglia of the adult human brain (Allen:4276)
anno2 = get_annotated_genes(structure_ids='Allen:4276', dataset='adult', 
  cutoff_quantiles=c(0.7,0.9), genes=gene_ids)
anno2

## -----------------------------------------------------------------------------------------------------------
## use previously defined input genes vector
genes

## ----results='hide'-----------------------------------------------------------------------------------------
## run enrichment analysis with developmental effect score
res_dev_effect = aba_enrich(genes, dataset='dev_effect', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
## see the 5 brain regions with the lowest FWERs
top_regions = res_dev_effect[[1]][1:5, ]
top_regions

## -----------------------------------------------------------------------------------------------------------
## plot developmental effect score of the 5 brain structures with the lowest FWERs
plot_expression(top_regions[ ,'structure_id'])

## -----------------------------------------------------------------------------------------------------------
sessionInfo()