% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simulate_MDP.R
\name{simulate_MDP}
\alias{simulate_MDP}
\title{Simulate Trajectories in a MDP}
\usage{
simulate_MDP(
  model,
  n = 100,
  start = NULL,
  horizon = NULL,
  return_states = FALSE,
  epsilon = NULL,
  delta_horizon = 0.001,
  engine = "cpp",
  verbose = FALSE,
  ...
)
}
\arguments{
\item{model}{a MDP model.}

\item{n}{number of trajectories.}

\item{start}{probability distribution over the states for choosing the
starting states for the trajectories. Defaults to "uniform".}

\item{horizon}{number of epochs for the simulation. If \code{NULL} then the
horizon for the model is used.}

\item{return_states}{logical; return visited states.}

\item{epsilon}{the probability of random actions  for using an epsilon-greedy policy.
Default for solved models is 0 and for unsolved model 1.}

\item{delta_horizon}{precision used to determine the horizon for infinite-horizon problems.}

\item{engine}{\code{'cpp'} or \code{'r'} to perform simulation using a faster C++
or a native R implementation.}

\item{verbose}{report used parameters.}

\item{...}{further arguments are ignored.}
}
\value{
A list with elements:
\itemize{
\item \code{avg_reward}: The average discounted reward.
\item \code{reward}: Reward for each trajectory.
\item \code{action_cnt}: Action counts.
\item \code{state_cnt}: State counts.
\item \code{states}: a vector with state ids.
Rows represent trajectories.
}

A vector with state ids (in the final epoch or all). Attributes containing action
counts, and rewards  for each trajectory may be available.
}
\description{
Simulate trajectories through a MDP. The start state for each
trajectory is randomly chosen using the specified belief. The belief is used to choose actions
from an epsilon-greedy policy and then update the state.
}
\details{
A native R implementation is available (\code{engine = 'r'}) and the default is a
faster C++ implementation (\code{engine = 'cpp'}).

Both implementations support parallel execution using the package
\pkg{foreach}. To enable parallel execution, a parallel backend like
\pkg{doparallel} needs to be available needs to be registered (see
\code{\link[doParallel:registerDoParallel]{doParallel::registerDoParallel()}}).
Note that small simulations are slower using parallelization. Therefore, C++ simulations
with n * horizon less than 100,000 are always executed using a single worker.
}
\examples{
data(Maze)

# solve the POMDP for 5 epochs and no discounting
sol <- solve_MDP(Maze, discount = 1)
sol

# U in the policy is and estimate of the utility of being in a state when using the optimal policy.
policy(sol)
matrix(policy(sol)[[1]]$action, nrow = 3, dimnames = list(1:3, 1:4))[3:1, ]

## Example 1: simulate 10 trajectories following the policy, only the final belief state is returned
sim <- simulate_MDP(sol, n = 100, horizon = 10, verbose = TRUE)
sim

# Note that all simulations start at s_1 and that the simulated avg. reward 
# is therefore an estimate to the U value for the start state s_1.
policy(sol)[[1]][1,] 

# Calculate proportion of actions taken in the simulation
round_stochastic(sim$action_cnt / sum(sim$action_cnt), 2)

# reward distribution
hist(sim$reward)

## Example 2: simulate starting following a uniform distribution over all
#             states and return all visited states
sim <- simulate_MDP(sol, n = 100, start = "uniform", horizon = 10, return_states = TRUE)
sim$avg_reward

# how often was each state visited?
table(sim$states)
matrix(table(sim$states),nrow = 3, dimnames = list(1:3, 1:4))[3:1, ]
}
\seealso{
Other MDP: 
\code{\link{MDP}()},
\code{\link{POMDP_accessors}},
\code{\link{solve_MDP}()},
\code{\link{transition_graph}()}
}
\author{
Michael Hahsler
}
\concept{MDP}
