## ----warning=FALSE, message=FALSE------------------------------------------
library(FamAgg)

data(minnbreast)
## Subsetting to only few families of the whole data set.
mbsub <- minnbreast[minnbreast$famid %in% 4:14, ]
mbped <- mbsub[, c("famid", "id", "fatherid", "motherid", "sex")]
## Renaming column names.
colnames(mbped) <- c("family", "id", "father", "mother", "sex")
## Defining the optional argument age.
endage <- mbsub$endage
names(endage) <- mbsub$id
## Create the object.
fad <- FAData(pedigree=mbped, age=endage)

## --------------------------------------------------------------------------
## Use the pedigree method to access the full pedigree
## data.frame,
head(pedigree(fad))

## or access individual columns using $.
## The ID of the father (0 representing "founders"):
head(fad$father)
## Mother:
head(fad$mother)
## Sex:
head(fad$sex)

## We can also access the age of each individual, if
## provided.
head(age(fad))

## --------------------------------------------------------------------------
## Extract the pedigree information from family "4"...
nrow(family(fad, family=4))

head(family(fad, family=4))

## ...which is the same as extracting the family pedigree
## for an individual of this family.
head(family(fad, id=3))

## Note that IDs are internally always converted to character,
## thus, using id=3 and id="3" return the same information.
head(family(fad, id="3"))

## --------------------------------------------------------------------------
## Subset the object to a single family.
fam4 <- fad[fad$family == "4", ]
table(fam4$family)

## ----family-4-pedigree, message=FALSE, fig.align='center'------------------
switchPlotfun("ks2paint")
## By supplying device="plot", we specify that we wish to visualize the
## pedigree in an R plot. This is the default for "ks2paint", anyway.
plotPed(fad, id=3, device="plot")

## --------------------------------------------------------------------------
## Build the pedigree for individual 3.
fullPed <- buildPed(fad, id="3")
nrow(fullPed)

## --------------------------------------------------------------------------
## Find the subpedigree for individuals 21, 22 and 17.
buildPed(fad, id=c(21, 22, 17), prune=TRUE)

## ----family-sub-pedigree, message=FALSE, fig.align='center'----------------
plotPed(fad, id=c(21, 22, 17), prune=TRUE)

## ----pedigree-family-14, message=FALSE, fig.align='center'-----------------
plotPed(fad, family="14", cex=0.4)

## --------------------------------------------------------------------------
## Check if we have individual 26064 from the second branch in the pedigree
## of individual 440.
any(buildPed(fad, id="440")$id == "26064")

## What for the pedigree of 447?
any(buildPed(fad, id="447")$id == "26064")

## --------------------------------------------------------------------------
## Find founders for family 4.
findFounders(fad, "4")

## --------------------------------------------------------------------------
## Find the closest common ancestor.
getCommonAncestor(fad, id=c(21, 22, 17))

## --------------------------------------------------------------------------
## Get the children of ID 4.
getChildren(fad, id="4", max.generations=1)

## Get the offsprings.
getChildren(fad, id="4")

## Get all ancestors.
getAncestors(fad, id="4")

## Get the siblings.
getSiblings(fad, id=c("4"))

## --------------------------------------------------------------------------
## Add the trait information to the FAData object.
cancer <- mbsub$cancer
names(cancer) <- as.character(mbsub$id)
trait(fad) <- cancer

## Identify the affected founders.
## First all affected individuals.
affIds <- affectedIndividuals(fad)
## Identify founders for each family.
founders <- lapply(unique(fad$family), function(z){
    return(findFounders(fad, family=z))
})
names(founders) <- unique(fad$family)

## Track the affected founder.
affFounders <- lapply(founders, function(z){
    return(z[z %in% affIds])
})
## Interestingly, not all founders are affected! It seems in some cases
## parents of the affected participants in the screening phase have also
## been included.
affFounders <- affFounders[unlist(lapply(affFounders, length)) > 0]

## The number of families analyzed.
length(founders)

## The number of families with affected founder.
length(affFounders)

## --------------------------------------------------------------------------
kin2affFounders <- shareKinship(fad, unlist(affFounders))

## How many of these are affected?
sum(kin2affFounders %in% affIds)

## How many affected are not related to an affected founder?
sum(!(affIds %in% kin2affFounders))

## --------------------------------------------------------------------------
## Get all individuals sharing kinship with individual 4.
shareKinship(fad, id="4")

## --------------------------------------------------------------------------
## Estimate generation levels for all families.
estimateGenerations(fad)[1:3]

## --------------------------------------------------------------------------
gens <- generationsFrom(fad, id="4")

## ----family-four-gens-rel-to-four, message=FALSE, fig.align='center'-------
plotPed(fad, family=4, label2=gens)

## ----results='hide', message=FALSE-----------------------------------------
## Extract the cancer trait information.
tcancer <- mbsub$cancer
names(tcancer) <- mbsub$id
## Set the trait.
trait(fad) <- tcancer

## --------------------------------------------------------------------------
## Extract the trait information.
head(trait(fad))

## We can also extract the IDs of the affected individuals.
head(affectedIndividuals(fad))

## Or the IDs of the phenotyped individuals.
head(phenotypedIndividuals(fad))

## ----family-pedigree-affected, message=FALSE, fig.align='center'-----------
## Plotting the pedigree for family "9".
plotPed(fad, family="9")

## ----family-pedigree-affected-highlighted, message=FALSE, fig.align='center'----
## Plotting the pedigree for family "9".
plotPed(fad, family="9", highlight.ids=list(a=c("185", "201", "198"),
					    b=c("193")))

## --------------------------------------------------------------------------
## Transform the full pedigree to a graph.
fullGraph <- ped2graph(pedigree(fad))

## In addition, build the graph for a single family.
singleFam <- ped2graph(family(fad, family=4))

## ----graph-plots, fig.align='center'---------------------------------------
## Build the layout.
plot(fullGraph)
lay <- layout_(singleFam, on_grid())
plot(singleFam, layout=lay)

## --------------------------------------------------------------------------
subgr <- connectedSubgraph(singleFam, nodes=c("7", "8", "27", "17"))

## ----subgraph-plot, fig.align='center'-------------------------------------
## Plot the graph.
plot(subgr)
## Similar to buildPed/plotPed with prune=TRUE.
plotPed(fad, id=c("7", "8", "17", "27"), prune=TRUE)

## ----message=FALSE---------------------------------------------------------
## Import a "ped" file.
pedFile <- system.file("txt/minnbreastsub.ped.gz", package="FamAgg")
## Quick glance at the file.
readLines(pedFile, n=1)
fad <- FAData(pedFile)

head(pedigree(fad))

## ----message=FALSE---------------------------------------------------------
## Create the FAData by reading data from a txt file.
pedFile <- system.file("txt/minnbreastsub.txt", package="FamAgg")
fad <- FAData(pedigree=pedFile, header=TRUE, id.col="id",
	      family.col="famid", father.col="fatherid",
	      mother.col="motherid")

## --------------------------------------------------------------------------
tmpF <- tempfile()

## Subset the pedigree to family 4
fam4 <- fad[fad$family == 4, ]

## Export data in ped format.
export(fam4, tmpF, format="ped")

## ----warning=TRUE, message=FALSE-------------------------------------------
library(FamAgg)
set.seed(18011977)
data(minnbreast)
## Subset the dataset to reduce processing time.
mbsub <- minnbreast[minnbreast$famid %in% c(4:60, 432), ]
## Uncomment the line below to use the whole dataset instead.
## mbsub <- minnbreast

## Define the number of simulations we perform.
## nsim <- 10000
nsim <- 1000

mbped <- mbsub[, c("famid", "id", "fatherid", "motherid", "sex")]
## Renaming column names.
colnames(mbped) <- c("family", "id", "father", "mother", "sex")
## Create the FAData object.
fad <- FAData(pedigree=mbped)

## Define the trait.
tcancer <- mbsub$cancer
names(tcancer) <- as.character(mbsub$id)

## ----warning=FALSE, message=TRUE-------------------------------------------
## Calculate the genealogical index of familiality.
gi <- genealogicalIndexTest(fad, trait=tcancer, traitName="cancer", nsim=nsim)

## Display the result.
result(gi)

## ----warning=FALSE, eval=FALSE---------------------------------------------
#  ## Calculate the genealogical index of familiality using random sampling from
#  ## a sex matched control set.
#  giSexMatch <- genealogicalIndexTest(fad, trait=tcancer, traitName="cancer", nsim=nsim,
#  				controlSetMethod="getSexMatched")
#  
#  ## Use an external vector to perform the matching.
#  ## The results are essentially identical.
#  giExtMatch <- genealogicalIndexTest(fad, trait=tcancer, traitName="cancer", nsim=nsim,
#  				controlSetMethod="getExternalMatched", match.using=fad$sex)

## ----message=FALSE---------------------------------------------------------
## Evaluate the proportion of male and femal cases.
table(gi$sex[affectedIndividuals(gi)])

## We can use the gender information to perform stratified sampling, i.e.
## in each permutation a random set of 3 male and 15 females will be selected.
giStrata <- genealogicalIndexTest(fad, trait=tcancer, traitName="cancer", nsim=nsim,
				  strata=fad$sex)

result(giStrata)

## ----mbreast-genealogical-index-result, message=FALSE, warning=FALSE, fig.align='center'----
## Plot the result.
plotRes(giStrata)

## ----message=FALSE---------------------------------------------------------
library(gap)

## Adding the trait information, so the extracted pedigree data.frame will
## also contain a column "affected" with that information.
trait(fad) <- tcancer

## Extract the pedigree and re-format it for the gif function.
pedi <- pedigree(fad)
## Remove singletons.
pedi <- removeSingletons(pedi)
pedi[is.na(pedi$father), "father"] <- 0
pedi[is.na(pedi$mother), "mother"] <- 0

## Identify the affected individuals.
affIds <- as.numeric(pedi$id[which(pedi$affected == 1)])

## Execute the gif method contained in the gap package.
gifRes <- gif(pedi[, c("id", "father", "mother")], affIds)

## Calculate the GIF using FamAgg's genealogicalIndexTest.
gifT <- genealogicalIndexTest(fad, trait=tcancer, nsim=100)

## Comparing the results:
gifRes[[1]] == result(gifT)$genealogical_index

## ----message=FALSE, warning=FALSE------------------------------------------
## Perform the analysis (no strata etc) separately for each family.
giFam <- genealogicalIndexTest(fad, trait=tcancer, nsim=nsim,
			       perFamilyTest=TRUE, traitName="Cancer")

## Display the result from the analysis.
head(result(giFam))

## ----warning=FALSE---------------------------------------------------------
## Estimate the risk for each individual using the familial incidence rate method.
## We use the endage provided in the Minnesota Breast Cancer Record as
## a measure for time at risk.
fr <- familialIncidenceRate(fad, trait=tcancer, timeAtRisk=mbsub$endage)

## ----mbreast-mean-fr-per-family, message=FALSE, warning=FALSE, fig.align='center'----
## Split the FIR by family and average the values within each.
frFam <- split(fr, f=fad$family)
frFamAvg <- lapply(frFam, mean, na.rm=TRUE)

## Sort and plot the averages.
frFamAvg <- sort(unlist(frFamAvg), decreasing=TRUE)
plot(frFamAvg, type="h", xaxt="n", xlab="", ylab="mean FIR",
     main="Per family averaged familial incidence rate")
axis(side=1, las=2, at=1:length(frFamAvg), label=names(frFamAvg))

## ----warning=FALSE, message=FALSE------------------------------------------
## Estimate the risk for each individual using the familial incidence rate method.
## We use the endage provided in the Minnesota Breast Cancer Record as
## a measure for time at risk.
frTest <- familialIncidenceRateTest(fad, trait=tcancer, traitName="cancer",
				    timeAtRisk=mbsub$endage, nsim=nsim)

## --------------------------------------------------------------------------
head(familialIncidenceRate(frTest))
head(frTest$fir)

## --------------------------------------------------------------------------
head(result(frTest))

## --------------------------------------------------------------------------
frRes <- result(frTest)
frSig <- frRes[which(frRes$padj < 0.05), ]

## Split by family.
frFam <- split(frSig, frSig$family)
frRes <- data.frame(family=names(frFam), no_sign_fir=unlist(lapply(frFam, nrow)))
## Determine the number of phenotyped and affected individuals per family.
noPheNAff <- sapply(names(frFam), function(z){
    fam <- family(frTest, family=z)
    return(c(no_pheno=sum(!is.na(fam$affected)),
	     no_aff=length(which(fam$affected == 1))
	     ))
})
frRes <- cbind(frRes, t(noPheNAff))

## Display the number of phenotyped and affected individuals as well as
## the number of individuals within the families with a significant FIR.
frRes[order(frRes[, "no_sign_fir"], decreasing=TRUE), ]

## --------------------------------------------------------------------------
## Perform the kinship sum test.
kinSum <- kinshipSumTest(fad, trait=tcancer, traitName="cancer",
			 nsim=nsim, strata=fad$sex)
head(result(kinSum))

## --------------------------------------------------------------------------
## Extract the IDs of the individuals with significant kinship. By default, the raw
## p-values are adjusted for multiple hypothesis testing using the method from
## Benjamini and Hochberg.
kinSumRes <- result(kinSum)
kinSumIds <- as.character(kinSumRes[kinSumRes$padj < 0.1, "affected_id"])

## From which families are these?
table(kinSumRes[kinSumIds, "family"])

## ----mbreast-family-432-FIR-compared-to-others, message=FALSE, warning=FALSE, fig.align='center'----
## Get the familial ratio of the significant in this family, of all in this family,
## and of all others.
famId <- kinSumRes[1, "family"]

## Extract the family.
fam <- family(kinSum, family=famId)

## Stratify individuals in affected/unaffected.
strat <- rep("All, unaff.", length(kinSum$id))
strat[which(kinSum$affected > 0)] <- "All, aff."
strat[kinSum$id %in% fam$id] <- paste0("Fam ", famId, ", unaff.")
strat[kinSum$id %in% fam$id[which(fam$affected > 0)]] <- paste0("Fam ",famId,", aff.")

famData <- data.frame(fr=fr, group=strat)
boxplot(fr~group, data=famData, na.rm=TRUE, ylab="FIR",
	col=rep(c("#FBB4AE", "#B3CDE3"), 2))

## ----mbreast-family-432-affected, message=FALSE, warning=FALSE, fig.align='center'----
## Plot the pedigree for the family of the selected individual removing
## all individuals that were not phenotypes.
plotPed(kinSum, id=kinSumIds[1], cex=0.3, only.phenotyped=TRUE)

## ----mbreast-family-432-affecte-res, message=FALSE, warning=FALSE, fig.align='center'----
plotRes(kinSum, id=kinSumIds[1])

## ----message=FALSE---------------------------------------------------------
## Calculate the kinship test.
kinGroup <- kinshipGroupTest(fad, trait=tcancer, traitName="cancer",
			     nsim=nsim, strata=fad$sex)
head(result(kinGroup))

## --------------------------------------------------------------------------
kinGroupRes <- result(kinGroup)
## Create a data.frame with the summarized results.
resTab <- data.frame(total_families=length(unique(kinGroup$family)),
		     ratio_sign=length(unique(
			 kinGroupRes[kinGroupRes$ratio_padj < 0.05, "family"]
		     )),
		     kinship_sign=length(unique(
			 kinGroupRes[kinGroupRes$kinship_padj < 0.05, "family"]
		     ))
		     )
resTab

## ----mbreast-family-432-affecte-res-kinship, message=FALSE, warning=FALSE, fig.align='center'----
plotPed(kinGroup, id=kinGroupRes[kinGroupRes$family == "432", "group_id"][1],
	prune=TRUE, label1=fr)

## ----message=FALSE---------------------------------------------------------
## First we load the trait/affected information into the FAData object.
trait(fad) <- tcancer

## Next we determine the number of phenotyped individuals per family.
famAff <- pedigree(fad)[, c("family", "affected")]
## Exclude individuals that were not phenotyped.
famAff <- famAff[!is.na(famAff$affected), ]
## Calculate the number of phenotyped per family.
famSize <- table(famAff$family)

keepFams <- names(famSize)[famSize < 22]

## Extract the family and restrict to those on which we can perform the analysis.
famCliq <- fad$family
famCliq <- famCliq[famCliq %in% keepFams]

## ----message=FALSE---------------------------------------------------------
probRes <- probabilityTest(fad, trait=tcancer, traitName="Cancer",
			   cliques=famCliq, nsim=nsim)
probResTab <- result(probRes)
head(probResTab)

## ----mbreast-prob-clique-1, message=FALSE, warning=FALSE, fig.align='center'----
plotPed(probRes, id=probResTab[1, "group_id"])

