### R code from vignette source 'vignettes/GWASTools/inst/doc/Affymetrix.Rnw'
### Encoding: UTF-8

###################################################
### code chunk number 1: Affymetrix.Rnw:67-87
###################################################
library(GWASTools)
library(GWASdata)
# Load the SNP annotation (simple data frame)
data(affy_snp_annot)
# Create a SnpAnnotationDataFrame
snpAnnot <- SnpAnnotationDataFrame(affy_snp_annot)
# names of columns
varLabels(snpAnnot)
# data
head(pData(snpAnnot))
# Add metadata to describe the columns
meta <- varMetadata(snpAnnot)
meta[c("snpID", "chromosome", "position", "rsID", "probeID"), 
  "labelDescription"] <- c("unique integer ID for SNPs",                      
  paste("integer code for chromosome: 1:22=autosomes,",
   "23=X, 24=pseudoautosomal, 25=Y, 26=Mitochondrial, 27=Unknown"), 
  "base pair position on chromosome (build 36)",
  "RS identifier",
  "unique ID from Affymetrix")
varMetadata(snpAnnot) <- meta


###################################################
### code chunk number 2: Affymetrix.Rnw:109-136
###################################################
# Load the scan annotation (simple data frame)
data(affy_scan_annot)
# Create a ScanAnnotationDataFrame
scanAnnot <- ScanAnnotationDataFrame(affy_scan_annot)
# names of columns
varLabels(scanAnnot)
# data
head(pData(scanAnnot))
# Add metadata to describe the columns
meta <- varMetadata(scanAnnot)
meta[c("scanID", "subjectID", "family", "father", "mother",
  "CoriellID", "race", "sex", "status", "genoRunID", "plate",
  "alleleFile", "chpFile"), "labelDescription"] <- 
   c("unique integer ID for scans", 
  "subject identifier (may have multiple scans)",
  "family identifier", 
  "father identifier as subjectID",
  "mother identifier as subjectID",
  "Coriell subject identifier",
  "HapMap population group",
  "sex coded as M=male and F=female",
  "simulated case/control status" ,
  "genotyping instance identifier",
  "plate containing samples processed together for genotyping chemistry",
  "data file with intensities",
  "data file with genotypes and quality scores")
varMetadata(scanAnnot) <- meta


###################################################
### code chunk number 3: Affymetrix.Rnw:198-208
###################################################
geno.nc.file <- "tmp.geno.nc"
# get snp annotation data frame for function
snp <- affy_snp_annot[,c("snpID", "chromosome", "position")]
ncdfCreate(ncdf.filename = geno.nc.file,
       snp.annotation = snp,
       variables = c("genotype"),
       array.name = "AffyGenomeWideSNP_6",
       genome.build = "36",
       n.samples = 3,
       precision = "single")


###################################################
### code chunk number 4: Affymetrix.Rnw:217-223
###################################################
# first 3 samples only
scan.annotation <- affy_scan_annot[1:3, c("scanID", "genoRunID", "chpFile")]
names(scan.annotation) <- c("scanID", "scanName", "file")
# indicate which column of SNP annotation is referenced in data files
snp.annotation <- affy_snp_annot[,c("snpID", "probeID")]
names(snp.annotation) <- c("snpID", "snpName")


###################################################
### code chunk number 5: Affymetrix.Rnw:230-236
###################################################
col.nums <- as.integer(c(2,3))
names(col.nums) <- c("snp", "geno")
# Define a path to the raw data CHP text files which are read by 
#   ncdfAddData to access the raw genotypic data
path <- system.file("extdata", "affy_raw_data",
  package="GWASdata")


###################################################
### code chunk number 6: Affymetrix.Rnw:245-276
###################################################
diag.geno.file <- "diag.geno.RData"
diag.geno <- ncdfAddData(path = path,
  ncdf.filename = geno.nc.file, 
  snp.annotation = snp.annotation,
  scan.annotation = scan.annotation, 
  sep.type = "\t",
  skip.num = 1,
  col.total = 6,
  col.nums = col.nums,
  scan.name.in.file = -1,
  scan.start.index = 1,
  diagnostics.filename = diag.geno.file,
  verbose = FALSE)
# Look at the values included in the "diag.geno" object which holds 
#   all output from the function call
names(diag.geno)
# `read.file' is a vector indicating whether (1) or not (0) each file 
#   specified in the `files' argument was read successfully
table(diag.geno$read.file)
# `row.num' is a vector of the number of rows read from each file
table(diag.geno$row.num) 
# `sample.match' is a vector indicating whether (1) or not (0) 
#   the sample name inside the raw text file matches that in the 
#   sample annotation data.frame
table(diag.geno$sample.match)
# `snp.chk' is a vector indicating whether (1) or not (0) 
#   the raw text file has the expected set of SNP names
table(diag.geno$snp.chk)
# `chk' is a vector indicating whether (1) or not (0) all previous
#   checks were successful and the data were written to the NetCDF file
table(diag.geno$chk)


###################################################
### code chunk number 7: Affymetrix.Rnw:285-291
###################################################
(genofile <- NcdfGenotypeReader(geno.nc.file))
# Take out genotype data for the first 3 samples and 
#   the first 5 SNPs
(genos <- getGenotype(genofile, snp=c(1,5), scan=c(1,3)))
# Close the NetCDF file
close(genofile)


###################################################
### code chunk number 8: Affymetrix.Rnw:297-320
###################################################
check.geno.file <- "check.geno.RData"
check.geno <- ncdfCheckGenotype(path = path,
  ncdf.filename = geno.nc.file, 
  snp.annotation = snp.annotation,
  scan.annotation = scan.annotation, 
  sep.type = "\t",
  skip.num = 1,
  col.total = 6,
  col.nums = col.nums,
  scan.name.in.file = -1,
  check.scan.index = 1:3,
  n.scans.loaded = 3,
  diagnostics.filename = check.geno.file,
  verbose = FALSE)
# Look at the values included in the "check.geno" object which holds 
#   all output from the function call
names(check.geno)
# 'snp.order' is a vector indicating whether (1) or not (0) the snp ids 
#   are in the same order in each file.
table(check.geno$snp.order)
# 'geno.chk' is a vector indicating whether (1) or not (0) the genotypes 
#   in the netCDF match the text file
table(check.geno$geno.chk)


###################################################
### code chunk number 9: Affymetrix.Rnw:359-369
###################################################
qxy.nc.file <- "tmp.qxy.nc"
# get snp annotation data frame for function
snp <- affy_snp_annot[,c("snpID", "chromosome", "position")]
ncdfCreate(ncdf.filename = qxy.nc.file,
       snp.annotation = snp,
       variables = c("quality","X","Y"),
       array.name = "AffyGenomeWideSNP_6",
       genome.build = "36",
       n.samples = 3,
       precision = "single")


###################################################
### code chunk number 10: Affymetrix.Rnw:378-384
###################################################
# first 3 samples only
scan.annotation <- affy_scan_annot[1:3, c("scanID", "genoRunID", "chpFile")]
names(scan.annotation) <- c("scanID", "scanName", "file")
# indicate which column of SNP annotation is referenced in data files
snp.annotation <- affy_snp_annot[,c("snpID", "probeID")]
names(snp.annotation) <- c("snpID", "snpName")


###################################################
### code chunk number 11: Affymetrix.Rnw:391-397
###################################################
col.nums <- as.integer(c(2,4))
names(col.nums) <- c("snp","qs")
# Define a path to the raw data CHP text files which are read by 
#   ncdfAddData to access the raw genotypic data
path <- system.file("extdata", "affy_raw_data",
  package="GWASdata")


###################################################
### code chunk number 12: Affymetrix.Rnw:406-419
###################################################
diag.qual.file <- "diag.qual.RData"
diag.qual <- ncdfAddData(path = path,
  ncdf.filename = qxy.nc.file, 
  snp.annotation = snp.annotation,
  scan.annotation = scan.annotation, 
  sep.type = "\t",
  skip.num = 1,
  col.total = 6,
  col.nums = col.nums,
  scan.name.in.file = -1,
  scan.start.index = 1,
  diagnostics.filename = diag.qual.file,
  verbose = FALSE)


###################################################
### code chunk number 13: Affymetrix.Rnw:426-437
###################################################
scan.annotation <- affy_scan_annot[1:3, c("scanID", "genoRunID", "alleleFile")]
names(scan.annotation) <- c("scanID", "scanName", "file")
diag.xy.file <- "diag.xy.RData"
diag.xy <- ncdfAddIntensity(path = path,
  ncdf.filename = qxy.nc.file, 
  snp.annotation = snp.annotation, 
  scan.annotation = scan.annotation, 
  scan.start.index = 1, 
  n.consecutive.scans = 3,
  diagnostics.filename = diag.xy.file,
  verbose = FALSE)


###################################################
### code chunk number 14: Affymetrix.Rnw:444-451
###################################################
# Open the NetCDF file we just created
(intenfile <- NcdfIntensityReader(qxy.nc.file))
# Take out the normalized X intensity values for the first 
#     5 SNPs for the first 3 samples
(xinten <- getX(intenfile, snp=c(1,5), scan=c(1,3)))
# Close the NetCDF file
close(intenfile)


###################################################
### code chunk number 15: Affymetrix.Rnw:457-476
###################################################
scan.annotation <- affy_scan_annot[1:5,
  c("scanID", "genoRunID", "chpFile", "alleleFile")]
names(scan.annotation) <- c("scanID", "scanName", "file", "inten.file")
check.qxy.file <- "check.qxy.RData"
check.qxy <- ncdfCheckIntensity(path = path,
  intenpath = path,
  ncdf.filename = qxy.nc.file, 
  snp.annotation = snp.annotation,
  scan.annotation = scan.annotation, 
  sep.type = "\t",
  skip.num = 1,
  col.total = 6,
  col.nums = col.nums,
  scan.name.in.file = -1,
  check.scan.index = 1:3,
  n.scans.loaded = 3,
  affy.inten = TRUE,
  diagnostics.filename = check.qxy.file,
  verbose = FALSE)


###################################################
### code chunk number 16: Affymetrix.Rnw:564-574
###################################################
bl.nc.file <- "tmp.bl.nc"
# get snp annotation data frame for function
snp <- affy_snp_annot[,c("snpID", "chromosome", "position")]
ncdfCreate(ncdf.filename = bl.nc.file,
       snp.annotation = snp,
       variables = c("BAlleleFreq","LogRRatio"),
       array.name = "AffyGenomeWideSNP_6",
       genome.build = "36",
       n.samples = 3,
       precision = "single")


###################################################
### code chunk number 17: Affymetrix.Rnw:586-600
###################################################
xyNC <- NcdfIntensityReader(qxy.nc.file)
genoNC <- NcdfGenotypeReader(geno.nc.file)
BAFfromGenotypes(xyNC, genoNC,
           bl.ncdf.filename = bl.nc.file, 
           min.n.genotypes = 0,
           call.method ="by.study")
close(xyNC)
close(genoNC)
# Open the NetCDF file we just created
(blfile <- NcdfIntensityReader(bl.nc.file))
# Look at the BAlleleFrequency values for the first 5 SNPs
(baf <- getBAlleleFreq(blfile, snp=c(1,5), scan=c(1,3)))
# Close the NetCDF file
close(blfile)


###################################################
### code chunk number 18: Affymetrix.Rnw:605-608
###################################################
file.remove(geno.nc.file, qxy.nc.file, bl.nc.file)
file.remove(diag.geno.file, diag.qual.file, diag.xy.file)
file.remove(check.geno.file, check.qxy.file)


