% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/contentscraper.R
\name{ContentScraper}
\alias{ContentScraper}
\title{ContentScraper}
\usage{
ContentScraper(webpage, patterns, patnames, excludepat,
  ManyPerPattern = FALSE, astext = TRUE, encod)
}
\arguments{
\item{webpage}{character, a web page as text.}

\item{patterns}{character vector, one or more XPath patterns to extract from the web page.}

\item{patnames}{character vector, given names for each xpath pattern to extract.}

\item{excludepat}{character vector, one o more Xpath to exclude from the extracted content.}

\item{ManyPerPattern}{boolean, If False only the first matched element by the pattern is extracted (like in Blogs one page has one article/post and one title). Otherwise if set to True  all nodes matching the pattern are extracted (Like in galleries, listing or comments, one page has many elements with the same pattern )}

\item{astext}{boolean, default is TRUE, HTML and PHP tags is stripped from the extracted piece.}

\item{encod}{character, set the weppage character encoding.}
}
\value{
return a named list of extracted content
}
\description{
From a given web page as text _character_ and a set of named XPath patterns, this function extracts selected parts of the HTML document then it returns a list of extracted contents.
}
\examples{

pageinfo<-LinkExtractor("http://glofile.com/index.php/2017/06/08/athletisme-m-a-rome/")
#Retreive the webpge header and data

Data<-ContentScraper(pageinfo[[1]][[10]],c("//head/title","//*/article"),c("title", "article"))
#Extract the title and the article from webpage content using Xpaths

}
\author{
salim khalil
}
