# Copyright (C) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in project root for information.

#' Spark ML -- EnsembleByKey
#'
#'     The ``EnsembleByKey`` first performs a grouping operation on a set of keys,
#'         and then averages the selected columns. It can handle scalar or vector columns,
#'         and the dimensions of the vector columns are automatically inferred by materializing
#'         the first row of the column. To avoid materialization you can provide the vector dimensions
#'         through the ``setVectorDims`` function, which takes a mapping from
#'         columns (String) to dimension (Int). You can also choose to squash or keep the original
#'         dataset with the ``collapseGroup`` parameter.
#' @param colNames Names of the result of each col
#' @param collapseGroup Whether to collapse all items in group to one entry
#' @param cols Cols to ensemble
#' @param keys Keys to group by
#' @param strategy How to ensemble the scores, ex: mean
#' @param vectorDims the dimensions of any vector columns, used to avoid materialization
#' @export
ml_ensemble_by_key <- function(x, colNames=NULL, collapseGroup=TRUE, cols=NULL, keys=NULL, strategy="mean", vectorDims=NULL)
{
  if (unfit.model) {
    sc <- x
  } else {
    df <- spark_dataframe(x)
    sc <- spark_connection(df)
  }
  env <- new.env(parent = emptyenv())

  env$model <- "com.microsoft.ml.spark.EnsembleByKey"
  mod <- invoke_new(sc, env$model)

  mod_parameterized <- mod %>%
    invoke("setColNames", as.array(colNames)) %>%
    invoke("setCollapseGroup", as.logical(collapseGroup)) %>%
    invoke("setCols", as.array(cols)) %>%
    invoke("setKeys", as.array(keys)) %>%
    invoke("setStrategy", strategy) %>%
    invoke("setVectorDims", vectorDims)

  transformed <- invoke(mod_parameterized, "transform", df)

  sdf_register(transformed)
}
