%% $Id: panTree.Rd 185 2014-09-03 08:09:01Z larssn $

\name{panTree}
\alias{panTree}
\title{
  Constructing pan-genome trees
}
\description{
  Creates a pan-genome tree based on a pan-matrix and a distance function.
}
\usage{
panTree(pan.matrix, dist.FUN=distManhattan, nboot=0, linkage="average", ...)
}
\arguments{
  \item{pan.matrix}{A \code{Panmat} object, see \code{\link{panMatrix}}.}
  \item{dist.FUN}{A valid distance function, see below.}
  \item{nboot}{Number of bootstrap samples.}
  \item{linkage}{The linkage function, see below.}
  \item{...}{Additional parameters passed on to the specified distance function, see Details below.}
}
\details{
  A pan-genome tree is a graphical display of the genomes in a pan-genome study, based on some pan-matrix (Snipen & Ussery, 2010). \code{\link{panTree}} is a constructor that computes a \code{Pantree} object, use \code{\link{plot.Pantree}} to actually plot the tree.
  
  The parameter \samp{dist.FUN} must be a function that takes as input a numerical matrix (\code{Panmat} object) and returns a \code{\link{dist}} object. See \code{\link{distManhattan}} or \code{\link{distJaccard}} for examples of such functions. Any additional arguments (\samp{...}) are passed on to this function.
  
  If you want to have bootstrap-values in the tree, set \samp{nboot} to some appropriate number (e.g. \samp{nboot=100}).
  
  The tree is created by \code{\link{hclust}} (hierarchical clustering) using the \samp{average} linkage function, which is according to Snipen & Ussery, 2010. You may specify alternatives by the parameter \samp{linkage}, see \code{\link{hclust}} for details.
}
\value{
  This function returns a \code{Pantree} object, which is a small (S3) extension to a \code{\link{list}} with 4 components. These components are named \samp{Htree}, \samp{Nboot}, \samp{Nbranch} and \samp{Dist.FUN}.
  
  \samp{Htree} is a \code{\link{hclust}} object. This is the actual tree.
  
  \samp{Nboot} is the number of bootstrap samples.
  
  \samp{Nbranch} is a vector listing the number of times each split/clade in the tree was observed in the bootstrap procedure.
  
  \samp{Dist.FUN} is the name of the distance function used to construct the tree.
}

\references{
  Snipen, L., Ussery, D.W. (2010). Standard operating procedure for computing pangenome trees. Standards in Genomic Sciences, 2:135-141.
}
\author{
  Lars Snipen and Kristian Hovde Liland.
}

\seealso{
  \code{\link{panMatrix}}, \code{\link{distManhattan}}, \code{\link{distJaccard}}, \code{\link{plot.Pantree}}.
}
\examples{
# Loading a Panmat object, constructing a tree and plotting it
data(list="Mpneumoniae.blast.panmat",package="micropan")
my.tree <- panTree(Mpneumoniae.blast.panmat)
plot(my.tree)

# Computing some weights to be used in the distManhattan 
# function below...
w <- geneWeights(Mpneumoniae.blast.panmat,type="shell")
# Creating another tree with scaled and weighted distances and bootstrap values
my.tree <- panTree(Mpneumoniae.blast.panmat, scale=0.1, weights=w)

# ...and plotting with alternative labels and colors from Mpneumoniae.table
data(list="Mpneumoniae.table",package="micropan")
labels <- Mpneumoniae.table$Strain
names(labels) <- Mpneumoniae.table$GID.tag
cols <- Mpneumoniae.table$Color
names(cols) <- Mpneumoniae.table$GID.tag
plot(my.tree, leaf.lab=labels, col=cols,
	cex=0.8, xlab="Shell-weighted Manhattan distances")
}
