# Copyright (C) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in project root for information.

#' Spark ML -- TrainClassifier
#'
#'     Trains a classification model
#'     
#'         The currently supported classifiers are:
#'            Logistic Regression Classifier
#'            Decision Tree Classifier
#'            Random Forest Classifier
#'            Gradient Boosted Trees Classifier
#'            Naive Bayes Classifier
#'            Multilayer Perceptron Classifier
#'         In addition to any generic learner that inherits from Predictor.
#'     
#'         This module featurizes the given data into a vector of doubles and
#'         passes it to the given learner.
#'     
#'         Note the behavior of the reindex and labels parameters, the parameters interact as:
#'     
#'         reindex - false
#'         labels - false (Empty)
#'         Assume all double values, don't use metadata, assume natural ordering
#'     
#'         reindex - true
#'         labels - false (Empty)
#'         Index, use natural ordering of string indexer
#'     
#'         reindex - false
#'         labels - true (Specified)
#'         Assume user knows indexing, apply label values. Currently only string type supported.
#'     
#'         reindex - true
#'         labels - true (Specified)
#'         Validate labels matches column type, try to recast to label type, reindex label column
#' @param featuresCol The name of the features column
#' @param labelCol The name of the label column
#' @param labels Sorted label values on the labels column
#' @param model Classifier to run
#' @param numFeatures Number of features to hash to
#' @param reindexLabel Re-index the label column
#' @export
ml_train_classifier <- function(x, featuresCol=NULL, labelCol=NULL, labels=NULL, model=NULL, numFeatures=0, reindexLabel=TRUE, only.model=FALSE)
{
  df <- spark_dataframe(x)
  sc <- spark_connection(df)
  env <- new.env(parent = emptyenv())

  env$model <- "com.microsoft.ml.spark.TrainClassifier"
  mod <- invoke_new(sc, env$model)

  mod_parameterized <- mod %>%
    invoke("setFeaturesCol", featuresCol) %>%
    invoke("setLabelCol", labelCol) %>%
    invoke("setLabels", as.array(labels)) %>%
    invoke("setModel", model) %>%
    invoke("setNumFeatures", as.integer(numFeatures)) %>%
    invoke("setReindexLabel", as.logical(reindexLabel))
  mod_model_raw <- mod_parameterized %>%
    invoke("fit", df)

  mod_model <- sparklyr:::new_ml_model(mod_parameterized, mod_model_raw, mod_model_raw)

  if (only.model)
    return(mod_model)

  transformed <- invoke(mod_model$model, "transform", df)

  sdf_register(transformed)
}
