## This is a MALDIquant example file. It is released into public domain with the
## right to use it for any purpose but without any warranty.


## workflow demo


## load necessary packages
library("MALDIquant")


## load example spectra
data("fiedler2009subset", package="MALDIquant")


## check raw data
## any empty spectra? (empty spectra are ignored in subsequent baseline
## correction/peak detection; you could find/remove them by calling
## findEmptyMassObjects/removeEmptyMassObjects)
## see ?isEmpty, ?findEmptyMassObjects, ?removeEmptyMassObjects
any(sapply(fiedler2009subset, isEmpty))
# FALSE

## any spectra with irregular mass values/intervals? (spectra with
## missing/filtered mass values/irregular mass intervals may compromise
## subsequent baseline correction and peak detection.)
any(!sapply(fiedler2009subset, isRegular))
# FALSE

## do length of spectra differ? (if they differ you have to adjust the
## corresponding halfWindowSize in subsequent baseline correction and peak
## detection.)
any(length(fiedler2009subset[[1]]) != sapply(fiedler2009subset, length))
# FALSE


## preprocessing
## sqrt transform (for variance stabilization)
spectra <- transformIntensity(fiedler2009subset, method="sqrt")


## 21 point Savitzky-Golay-Filter for smoothing spectra
## (maybe you have to adjust the halfWindowSize;
## you could use a simple moving average instead)
## see ?smoothIntensity, ?.savitzkyGolay, ?.movingAverage
spectra <- smoothIntensity(spectra, method="SavitzkyGolay", halfWindowSize=10)


## remove baseline
## (maybe you have to adjust iterations to your spectra; high resolution
## spectra need a much lower iteration number (halfWindowSize, for some other
## baseline estimation algorithms)
## see ?removeBaseline, ?estimateBaseline
spectra <- removeBaseline(spectra, method="SNIP", iterations=100)


## calibrate (normalize) intensities (different calibration methods available)
## see ?calibrateIntensity
spectra <- calibrateIntensity(spectra, method="TIC")


## run peak detection
## (maybe you need to adjust halfWindowSize [decreasing it for high resolution
## spectra] and SNR [a higher value increase the True-Positive-Rate but decrease
## sensitivity])
## see ?detectPeaks, ?estimateNoise
peaks <- detectPeaks(spectra, method="MAD", halfWindowSize=20, SNR=2)


## align spectra by warping
## 1. create reference peaks (could be done automatically by
##  determineWarpingFunctions)
## 2. calculate individual warping functions
## 3. warp each MassPeaks object
## (maybe you have to adjust the tolerance argument [increasing for low
## resolution spectra with a high mass error, decreasing for high resolution
## spectra with a small mass error])
## see ?referencePeaks,?determineWarpingFunctions
refPeaks <- referencePeaks(peaks)
warpingFunctions <- determineWarpingFunctions(peaks, reference=refPeaks,
                                              tolerance=0.002)
peaks <- warpMassPeaks(peaks, warpingFunctions)


## bin peaks
peaks <- binPeaks(peaks)


## merge technical replicates
## 1. create factors for correct assignment
nTechRep <- 2
nBiologicalSamples <- length(peaks)/nTechRep
samples <- factor(rep(1:nBiologicalSamples, each=nTechRep),
                  levels=1:nBiologicalSamples)

## 2. filter peaks which occur only in one of the replicates
peaks <- filterPeaks(peaks, labels=samples, minFrequency=1)

## 3. merge technical replicates
peaks <- mergeMassPeaks(peaks, labels=samples, method="mean")



## prepare for statistical analysis
## 1. get cancer/control indices
filenames <- sapply(peaks, function(x)metaData(x)$file[1])
cancer <- grepl(pattern="/tumor/", x=filenames)
classes <- factor(ifelse(cancer, "cancer", "control"),
                  levels=c("cancer", "control"))

## 2. filter peaks which occur less across all samples
peaks <- filterPeaks(peaks, minFrequency=1)

## 3. export MassPeaks objects as matrix
training <- intensityMatrix(peaks)


## 'training' and 'classes' could now used by any statistical tool e.g. sda
