% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/7_1_textTopics.R
\name{textTopics}
\alias{textTopics}
\title{This function creates and trains a BERTopic model (based on bertopic python packaged) on a
text-variable in a tibble/data.frame. (EXPERIMENTAL)}
\usage{
textTopics(
  data,
  variable_name,
  embedding_model = "distilroberta",
  umap_model = "default",
  hdbscan_model = "default",
  vectorizer_model = "default",
  representation_model = "mmr",
  num_top_words = 10,
  n_gram_range = c(1, 3),
  stopwords = "english",
  min_df = 5,
  bm25_weighting = FALSE,
  reduce_frequent_words = TRUE,
  set_seed = 8,
  save_dir = "./results"
)
}
\arguments{
\item{data}{(tibble/data.frame) A tibble with a text-variable to be analysed, and optional
numeric/categorical variables that you might want to use for later analyses testing the
significance of topics in relation to these variables.}

\item{variable_name}{(string)  Name of the text-variable in the data tibble that you want
to perform topic modeling on.}

\item{embedding_model}{(string) Name of the embedding model to use such as "miniLM", "mpnet",
"multi-mpnet", "distilroberta".}

\item{umap_model}{(string) The dimension reduction algorithm, currently only "default"
is supported.}

\item{hdbscan_model}{(string) The clustering algorithm to use, currently only "default"
is supported.}

\item{vectorizer_model}{(string) Name of the vectorizer model, currently only "default"
is supported.}

\item{representation_model}{(string) Name of the representation model used for topics,
including "keybert" or "mmr".}

\item{num_top_words}{(integer) Determine the number of top words presented for each topic.}

\item{n_gram_range}{(vector) Two-dimensional vector indicating the ngram range used for
the vectorizer model.}

\item{stopwords}{(string) Name of the stopword dictionary to use.}

\item{min_df}{(integer) The minimum document frequency of terms.}

\item{bm25_weighting}{(boolean) Determine whether bm25_weighting is used for ClassTfidfTransformer.}

\item{reduce_frequent_words}{(boolean) Determine whether frequent words are reduced by ClassTfidfTransformer.}

\item{set_seed}{(integer) The random seed for initialization of the umap model.}

\item{save_dir}{(string) The directory for saving results.}
}
\value{
A folder containing the model, data, folder with terms and values for each topic,
and the document-topic matrix. Moreover the model itself is returned formatted as a data.frame
together with metdata
}
\description{
This function creates and trains a BERTopic model (based on bertopic python packaged) on a
text-variable in a tibble/data.frame. (EXPERIMENTAL)
}
