# Copyright (C) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in project root for information.

#' Spark ML -- SummarizeData
#'
#'     Compute summary statistics for the dataset.
#'     
#'         Statistics to be computed:
#'     
#'         - counts
#'         - basic
#'         - sample
#'         - percentiles
#'     
#'         errorThreshold (default 0.0) is the error threshold for quantiles.
#' @param basic Compute basic statistics
#' @param counts Compute count statistics
#' @param errorThreshold Threshold for quantiles - 0 is exact
#' @param percentiles Compute percentiles
#' @param sample Compute sample statistics
#' @export
ml_summarize_data <- function(x, basic=TRUE, counts=TRUE, errorThreshold=0.0, percentiles=TRUE, sample=TRUE)
{
  df <- spark_dataframe(x)
  sc <- spark_connection(df)
  env <- new.env(parent = emptyenv())

  env$model <- "com.microsoft.ml.spark.SummarizeData"
  mod <- invoke_new(sc, env$model)

  mod_parameterized <- mod %>%
    invoke("setBasic", as.logical(basic)) %>%
    invoke("setCounts", as.logical(counts)) %>%
    invoke("setErrorThreshold", as.double(errorThreshold)) %>%
    invoke("setPercentiles", as.logical(percentiles)) %>%
    invoke("setSample", as.logical(sample))

  transformed <- invoke(mod_parameterized, "transform", df)

  sdf_register(transformed)
}
