# Copyright (C) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in project root for information.

#' Spark ML -- SelectColumns
#'
#'     ``SelectColumns`` takes a list of column names and returns a DataFrame
#'         consisting of only those columns.  Any columns in the DataFrame that are
#'         not in the selection list are dropped.
#'     
#'         :Example:
#'     
#'         >>> import pandas as pd
#'         >>> from mmlspark import SelectColumns
#'         >>> from pyspark.sql import SQLContext
#'         >>> spark = pyspark.sql.SparkSession.builder.appName("Test SelectCol").getOrCreate()
#'         >>> tmp1 = {"col1": [1, 2, 3, 4, 5],
#'         ...         "col2": [6, 7, 8, 9, 10],
#'         ...         "col2": [5, 4, 3, 2, 1] }
#'         >>> pddf = pd.DataFrame(tmp1)
#'         >>> pddf.columns
#'         ['col1', 'col2', 'col3']
#'         >>> data2 = SelectColumns(cols = ["col1", "col2"]).transform(data)
#'         >>> data2.columns
#'         ['col1', 'col2']
#' @param cols Comma separated list of selected column names
#' @export
ml_select_columns <- function(x, cols=NULL)
{
  if (unfit.model) {
    sc <- x
  } else {
    df <- spark_dataframe(x)
    sc <- spark_connection(df)
  }
  env <- new.env(parent = emptyenv())

  env$model <- "com.microsoft.ml.spark.SelectColumns"
  mod <- invoke_new(sc, env$model)

  mod_parameterized <- mod %>%
    invoke("setCols", as.array(cols))

  transformed <- invoke(mod_parameterized, "transform", df)

  sdf_register(transformed)
}
