package com.microsoft.ml.spark.featurize;

import com.microsoft.ml.spark.core.contracts.HasAdditionalPythonMethods;
import com.microsoft.ml.spark.core.contracts.HasFeaturesCol;
import com.microsoft.ml.spark.core.contracts.Wrappable;
import java.io.IOException;
import org.apache.spark.annotation.DeveloperApi;
import org.apache.spark.ml.Estimator;
import org.apache.spark.ml.Model;
import org.apache.spark.ml.feature.FastVectorAssembler;
import org.apache.spark.ml.feature.HashingTF;
import org.apache.spark.ml.linalg.SQLDataTypes$;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.param.StringArrayParam;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Some;
import scala.Tuple3;
import scala.collection.Iterable;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.BitSet;
import scala.collection.immutable.BitSet$;
import scala.collection.immutable.HashSet;
import scala.collection.immutable.HashSet$;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Nil$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

/* compiled from: AssembleFeatures.scala */
@ScalaSignature(bytes = "\u0006\u0001\u00055x!B\u0001\u0003\u0011\u0003i\u0011\u0001E!tg\u0016l'\r\\3GK\u0006$XO]3t\u0015\t\u0019A!A\u0005gK\u0006$XO]5{K*\u0011QAB\u0001\u0006gB\f'o\u001b\u0006\u0003\u000f!\t!!\u001c7\u000b\u0005%Q\u0011!C7jGJ|7o\u001c4u\u0015\u0005Y\u0011aA2p[\u000e\u0001\u0001C\u0001\b\u0010\u001b\u0005\u0011a!\u0002\t\u0003\u0011\u0003\t\"\u0001E!tg\u0016l'\r\\3GK\u0006$XO]3t'\u0015y!\u0003GAg!\t\u0019b#D\u0001\u0015\u0015\u0005)\u0012!B:dC2\f\u0017BA\f\u0015\u0005\u0019\te.\u001f*fMB\u0019\u0011D\t\u0013\u000e\u0003iQ!a\u0007\u000f\u0002\tU$\u0018\u000e\u001c\u0006\u0003\u000fuQ!!\u0002\u0010\u000b\u0005}\u0001\u0013AB1qC\u000eDWMC\u0001\"\u0003\ry'oZ\u0005\u0003Gi\u0011Q\u0003R3gCVdG\u000fU1sC6\u001c(+Z1eC\ndW\r\u0005\u0002\u000fK\u0019!\u0001C\u0001\u0001''\u0015)sE\f\u001c:!\rA\u0013fK\u0007\u00029%\u0011!\u0006\b\u0002\n\u000bN$\u0018.\\1u_J\u0004\"A\u0004\u0017\n\u00055\u0012!!F!tg\u0016l'\r\\3GK\u0006$XO]3t\u001b>$W\r\u001c\t\u0003_Qj\u0011\u0001\r\u0006\u0003cI\n\u0011bY8oiJ\f7\r^:\u000b\u0005M\"\u0011\u0001B2pe\u0016L!!\u000e\u0019\u0003\u001d!\u000b7OR3biV\u0014Xm]\"pYB\u0011qfN\u0005\u0003qA\u0012\u0011b\u0016:baB\f'\r\\3\u0011\u0005eQ\u0014BA\u001e\u001b\u0005U!UMZ1vYR\u0004\u0016M]1ng^\u0013\u0018\u000e^1cY\u0016D\u0001\"P\u0013\u0003\u0006\u0004%\tEP\u0001\u0004k&$W#A \u0011\u0005\u0001\u001beBA\nB\u0013\t\u0011E#\u0001\u0004Qe\u0016$WMZ\u0005\u0003\t\u0016\u0013aa\u0015;sS:<'B\u0001\"\u0015\u0011!9UE!A!\u0002\u0013y\u0014\u0001B;jI\u0002BQ!S\u0013\u0005\u0002)\u000ba\u0001P5oSRtDC\u0001\u0013L\u0011\u0015i\u0004\n1\u0001@\u0011\u0015IU\u0005\"\u0001N)\u0005!\u0003bB(&\u0005\u0004%\t\u0001U\u0001\u0013G>dW/\u001c8t)>4U-\u0019;ve&TX-F\u0001R!\t\u0011V+D\u0001T\u0015\t!F$A\u0003qCJ\fW.\u0003\u0002W'\n\u00012\u000b\u001e:j]\u001e\f%O]1z!\u0006\u0014\u0018-\u001c\u0005\u00071\u0016\u0002\u000b\u0011B)\u0002'\r|G.^7ogR{g)Z1ukJL'0\u001a\u0011\t\u000bi+CQA.\u0002+\u001d,GoQ8mk6t7\u000fV8GK\u0006$XO]5{KV\tA\fE\u0002\u0014;~J!A\u0018\u000b\u0003\u000b\u0005\u0013(/Y=\t\u000b\u0001,C\u0011A1\u0002+M,GoQ8mk6t7\u000fV8GK\u0006$XO]5{KR\u0011!mY\u0007\u0002K!)Am\u0018a\u00019\u0006)a/\u00197vK\"9a-\nb\u0001\n\u00039\u0017\u0001G8oK\"{G/\u00128d_\u0012,7)\u0019;fO>\u0014\u0018nY1mgV\t\u0001\u000eE\u0002SS.L!A[*\u0003\u000bA\u000b'/Y7\u0011\u0005Ma\u0017BA7\u0015\u0005\u001d\u0011un\u001c7fC:Daa\\\u0013!\u0002\u0013A\u0017!G8oK\"{G/\u00128d_\u0012,7)\u0019;fO>\u0014\u0018nY1mg\u0002BQ!]\u0013\u0005\u0006I\f1dZ3u\u001f:,\u0007j\u001c;F]\u000e|G-Z\"bi\u0016<wN]5dC2\u001cX#A6\t\u000bQ,C\u0011A;\u00027M,Go\u00148f\u0011>$XI\\2pI\u0016\u001c\u0015\r^3h_JL7-\u00197t)\t\u0011g\u000fC\u0003eg\u0002\u00071\u000eC\u0004yK\t\u0007I\u0011A=\u0002!9,XNY3s\u001f\u001a4U-\u0019;ve\u0016\u001cX#\u0001>\u0011\u0005I[\u0018B\u0001?T\u0005!Ie\u000e\u001e)be\u0006l\u0007B\u0002@&A\u0003%!0A\tok6\u0014WM](g\r\u0016\fG/\u001e:fg\u0002Bq!!\u0001&\t\u000b\t\u0019!A\nhKRtU/\u001c2fe>3g)Z1ukJ,7/\u0006\u0002\u0002\u0006A\u00191#a\u0002\n\u0007\u0005%ACA\u0002J]RDq!!\u0004&\t\u0003\ty!A\ntKRtU/\u001c2fe>3g)Z1ukJ,7\u000fF\u0002c\u0003#Aq\u0001ZA\u0006\u0001\u0004\t)\u0001\u0003\u0005\u0002\u0016\u0015\u0012\r\u0011\"\u0001h\u0003-\tG\u000e\\8x\u00136\fw-Z:\t\u000f\u0005eQ\u0005)A\u0005Q\u0006a\u0011\r\u001c7po&k\u0017mZ3tA!1\u0011QD\u0013\u0005\u0006I\fabZ3u\u00032dwn^%nC\u001e,7\u000fC\u0004\u0002\"\u0015\"\t!a\t\u0002\u001dM,G/\u00117m_^LU.Y4fgR\u0019!-!\n\t\r\u0011\fy\u00021\u0001l\u0011\u001d\tI#\nC!\u0003W\t1AZ5u)\rY\u0013Q\u0006\u0005\t\u0003_\t9\u00031\u0001\u00022\u00059A-\u0019;bg\u0016$\b\u0007BA\u001a\u0003\u0007\u0002b!!\u000e\u0002<\u0005}RBAA\u001c\u0015\r\tI$H\u0001\u0004gFd\u0017\u0002BA\u001f\u0003o\u0011q\u0001R1uCN,G\u000f\u0005\u0003\u0002B\u0005\rC\u0002\u0001\u0003\r\u0003\u000b\ni#!A\u0001\u0002\u000b\u0005\u0011q\t\u0002\u0004?\u0012\n\u0014\u0003BA%\u0003\u001f\u00022aEA&\u0013\r\ti\u0005\u0006\u0002\b\u001d>$\b.\u001b8h!\r\u0019\u0012\u0011K\u0005\u0004\u0003'\"\"aA!os\"9\u0011qK\u0013\u0005\n\u0005e\u0013!F4fi\u000e{G.^7ogR{g+Z2u_JL'0\u001a\u000b\u0005\u00037\n\u0019\bE\u0003\u0002^\u00055tH\u0004\u0003\u0002`\u0005%d\u0002BA1\u0003Oj!!a\u0019\u000b\u0007\u0005\u0015D\"\u0001\u0004=e>|GOP\u0005\u0002+%\u0019\u00111\u000e\u000b\u0002\u000fA\f7m[1hK&!\u0011qNA9\u0005\u0011a\u0015n\u001d;\u000b\u0007\u0005-D\u0003\u0003\u0005\u0002v\u0005U\u0003\u0019AA<\u0003Y\u0019w\u000e\\;n]:\u000bW.Z:U_\u001a+\u0017\r^;sSj,\u0007c\u0001\b\u0002z%\u0019\u00111\u0010\u0002\u0003-\r{G.^7o\u001d\u0006lWm\u001d+p\r\u0016\fG/\u001e:ju\u0016Dq!a &\t\u0003\t\t)\u0001\u0005u_\nKGoU3u)\u0011\t\u0019)a%\u0011\t\u0005\u0015\u0015qR\u0007\u0003\u0003\u000fSA!!#\u0002\f\u0006I\u0011.\\7vi\u0006\u0014G.\u001a\u0006\u0004\u0003\u001b#\u0012AC2pY2,7\r^5p]&!\u0011\u0011SAD\u0005\u0019\u0011\u0015\u000e^*fi\"A\u0011QSA?\u0001\u0004\t9*A\u0004j]\u0012L7-Z:\u0011\tMi\u0016Q\u0001\u0005\b\u00037+C\u0011IAO\u0003\u0011\u0019w\u000e]=\u0015\u0007\u001d\ny\n\u0003\u0005\u0002\"\u0006e\u0005\u0019AAR\u0003\u0015)\u0007\u0010\u001e:b!\r\u0011\u0016QU\u0005\u0004\u0003O\u001b&\u0001\u0003)be\u0006lW*\u00199\t\u000f\u0005-V\u0005\"\u0011\u0002.\u0006yAO]1og\u001a|'/\\*dQ\u0016l\u0017\r\u0006\u0003\u00020\u0006m\u0006\u0003BAY\u0003ok!!a-\u000b\t\u0005U\u0016qG\u0001\u0006if\u0004Xm]\u0005\u0005\u0003s\u000b\u0019L\u0001\u0006TiJ,8\r\u001e+za\u0016D\u0001\"!0\u0002*\u0002\u0007\u0011qV\u0001\u0007g\u000eDW-\\1)\t\u0005%\u0016\u0011\u0019\t\u0005\u0003\u0007\fI-\u0004\u0002\u0002F*\u0019\u0011qY\u000f\u0002\u0015\u0005tgn\u001c;bi&|g.\u0003\u0003\u0002L\u0006\u0015'\u0001\u0004#fm\u0016dw\u000e]3s\u0003BL\u0007cA\n\u0002P&\u0019\u0011\u0011\u001b\u000b\u0003\u0019M+'/[1mSj\f'\r\\3\t\r%{A\u0011AAk)\u0005i\u0001\"CAm\u001f\u0005\u0005I\u0011BAn\u0003-\u0011X-\u00193SKN|GN^3\u0015\u0005\u0005u\u0007\u0003BAp\u0003Sl!!!9\u000b\t\u0005\r\u0018Q]\u0001\u0005Y\u0006twM\u0003\u0002\u0002h\u0006!!.\u0019<b\u0013\u0011\tY/!9\u0003\r=\u0013'.Z2u\u0001")
/* loaded from: input_file:com/microsoft/ml/spark/featurize/AssembleFeatures.class */
public class AssembleFeatures extends Estimator<AssembleFeaturesModel> implements HasFeaturesCol, Wrappable, DefaultParamsWritable {
    private final String uid;
    private final StringArrayParam columnsToFeaturize;
    private final Param<Object> oneHotEncodeCategoricals;
    private final IntParam numberOfFeatures;
    private final Param<Object> allowImages;
    private final Param<String> featuresCol;

    public static Object load(String str) {
        return AssembleFeatures$.MODULE$.load(str);
    }

    public static MLReader<AssembleFeatures> read() {
        return AssembleFeatures$.MODULE$.read();
    }

    public MLWriter write() {
        return DefaultParamsWritable.class.write(this);
    }

    public void save(String str) throws IOException {
        MLWritable.class.save(this, str);
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasAdditionalPythonMethods
    public String additionalPythonMethods() {
        return HasAdditionalPythonMethods.Cclass.additionalPythonMethods(this);
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasFeaturesCol
    public Param<String> featuresCol() {
        return this.featuresCol;
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasFeaturesCol
    public void com$microsoft$ml$spark$core$contracts$HasFeaturesCol$_setter_$featuresCol_$eq(Param param) {
        this.featuresCol = param;
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasFeaturesCol
    public HasFeaturesCol setFeaturesCol(String str) {
        return HasFeaturesCol.Cclass.setFeaturesCol(this, str);
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasFeaturesCol
    public String getFeaturesCol() {
        return HasFeaturesCol.Cclass.getFeaturesCol(this);
    }

    public String uid() {
        return this.uid;
    }

    public StringArrayParam columnsToFeaturize() {
        return this.columnsToFeaturize;
    }

    public final String[] getColumnsToFeaturize() {
        return (String[]) $(columnsToFeaturize());
    }

    public AssembleFeatures setColumnsToFeaturize(String[] strArr) {
        return (AssembleFeatures) set(columnsToFeaturize(), strArr);
    }

    public Param<Object> oneHotEncodeCategoricals() {
        return this.oneHotEncodeCategoricals;
    }

    public final boolean getOneHotEncodeCategoricals() {
        return BoxesRunTime.unboxToBoolean($(oneHotEncodeCategoricals()));
    }

    public AssembleFeatures setOneHotEncodeCategoricals(boolean z) {
        return (AssembleFeatures) set(oneHotEncodeCategoricals(), BoxesRunTime.boxToBoolean(z));
    }

    public IntParam numberOfFeatures() {
        return this.numberOfFeatures;
    }

    public final int getNumberOfFeatures() {
        return BoxesRunTime.unboxToInt($(numberOfFeatures()));
    }

    public AssembleFeatures setNumberOfFeatures(int i) {
        return (AssembleFeatures) set(numberOfFeatures(), BoxesRunTime.boxToInteger(i));
    }

    public Param<Object> allowImages() {
        return this.allowImages;
    }

    public final boolean getAllowImages() {
        return BoxesRunTime.unboxToBoolean($(allowImages()));
    }

    public AssembleFeatures setAllowImages(boolean z) {
        return (AssembleFeatures) set(allowImages(), BoxesRunTime.boxToBoolean(z));
    }

    public AssembleFeaturesModel fit(Dataset<?> dataset) {
        Tuple3 tuple3;
        ColumnNamesToFeaturize columnNamesToFeaturize = new ColumnNamesToFeaturize();
        HashSet apply = HashSet$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(getColumnsToFeaturize()));
        String[] columns = dataset.columns();
        scala.collection.mutable.HashSet hashSet = new scala.collection.mutable.HashSet();
        hashSet.$plus$plus$eq(Predef$.MODULE$.refArrayOps(columns));
        Dataset<Row> df = dataset.toDF();
        Predef$.MODULE$.refArrayOps(columns).foreach(new AssembleFeatures$$anonfun$fit$1(this, dataset, columnNamesToFeaturize, apply, hashSet, df));
        List<String> columnsToVectorize = getColumnsToVectorize(columnNamesToFeaturize);
        if (columnNamesToFeaturize.colNamesToHash().isEmpty()) {
            tuple3 = new Tuple3(None$.MODULE$, columnsToVectorize, None$.MODULE$);
        } else {
            HashingTF numFeatures = new HashingTF().setInputCol(AssembleFeaturesUtilities$.MODULE$.getTokenizedColumnName(df)).setOutputCol(AssembleFeaturesUtilities$.MODULE$.getHashedFeaturesColumnName(df)).setNumFeatures(getNumberOfFeatures());
            Dataset<Row> hashStringColumns = AssembleFeaturesUtilities$.MODULE$.hashStringColumns(df, columnNamesToFeaturize.colNamesToHash(), numFeatures);
            int[] iArr = (int[]) ((BitSet) hashStringColumns.select(numFeatures.getOutputCol(), Predef$.MODULE$.wrapRefArray(new String[0])).map(new AssembleFeatures$$anonfun$4(this), Encoders$.MODULE$.kryo(ClassTag$.MODULE$.apply(BitSet.class))).reduce(new AssembleFeatures$$anonfun$5(this))).toArray(ClassTag$.MODULE$.Int());
            tuple3 = new Tuple3(new Some(numFeatures), (List) columnsToVectorize.$colon$plus(AssembleFeaturesUtilities$.MODULE$.getSelectedFeaturesColumnName(df), List$.MODULE$.canBuildFrom()), new Some(iArr));
        }
        Tuple3 tuple32 = tuple3;
        if (tuple32 != null) {
            Option option = (Option) tuple32._1();
            List list = (List) tuple32._2();
            Option option2 = (Option) tuple32._3();
            if (option != null && list != null && option2 != null) {
                Tuple3 tuple33 = new Tuple3(option, list, option2);
                Option option3 = (Option) tuple33._1();
                List list2 = (List) tuple33._2();
                Option option4 = (Option) tuple33._3();
                columnNamesToFeaturize.colNamesToVectorize().$plus$plus$eq(list2);
                return new AssembleFeaturesModel(uid(), columnNamesToFeaturize, option3, option4, new FastVectorAssembler().setInputCols((String[]) list2.toArray(ClassTag$.MODULE$.apply(String.class))).setOutputCol(getFeaturesCol()), BoxesRunTime.unboxToBoolean($(oneHotEncodeCategoricals())));
            }
        }
        throw new MatchError(tuple32);
    }

    private List<String> getColumnsToVectorize(ColumnNamesToFeaturize columnNamesToFeaturize) {
        Iterable values = BoxesRunTime.unboxToBoolean($(oneHotEncodeCategoricals())) ? columnNamesToFeaturize.categoricalColumns().values() : columnNamesToFeaturize.categoricalColumns().keys();
        List<String> list = (List) ((Seq) columnNamesToFeaturize.conversionColumnNamesMap().keys().toSeq().map(new AssembleFeatures$$anonfun$6(this, columnNamesToFeaturize), Seq$.MODULE$.canBuildFrom())).toList().$colon$colon$colon(values.toList()).distinct();
        return BoxesRunTime.unboxToBoolean($(oneHotEncodeCategoricals())) ? (List) list.filter(new AssembleFeatures$$anonfun$7(this, columnNamesToFeaturize)) : list;
    }

    public BitSet toBitSet(int[] iArr) {
        return (BitSet) Predef$.MODULE$.intArrayOps(iArr).foldLeft(BitSet$.MODULE$.apply(Nil$.MODULE$), new AssembleFeatures$$anonfun$toBitSet$1(this));
    }

    /* renamed from: copy, reason: merged with bridge method [inline-methods] and merged with bridge method [inline-methods] */
    public Estimator<AssembleFeaturesModel> m340copy(ParamMap paramMap) {
        return new AssembleFeatures();
    }

    @DeveloperApi
    public StructType transformSchema(StructType structType) {
        return structType.add(new StructField(getFeaturesCol(), SQLDataTypes$.MODULE$.VectorType(), StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4()));
    }

    /* renamed from: fit, reason: collision with other method in class */
    public /* bridge */ /* synthetic */ Model m341fit(Dataset dataset) {
        return fit((Dataset<?>) dataset);
    }

    public AssembleFeatures(String str) {
        this.uid = str;
        com$microsoft$ml$spark$core$contracts$HasFeaturesCol$_setter_$featuresCol_$eq(new Param(this, "featuresCol", "The name of the features column"));
        HasAdditionalPythonMethods.Cclass.$init$(this);
        MLWritable.class.$init$(this);
        DefaultParamsWritable.class.$init$(this);
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{featuresCol().$minus$greater("features")}));
        this.columnsToFeaturize = new StringArrayParam(this, "columnsToFeaturize", "Columns to featurize", new AssembleFeatures$$anonfun$3(this));
        this.oneHotEncodeCategoricals = new BooleanParam(this, "oneHotEncodeCategoricals", "One-hot encode categoricals");
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{oneHotEncodeCategoricals().$minus$greater(BoxesRunTime.boxToBoolean(true))}));
        this.numberOfFeatures = new IntParam(this, "numberOfFeatures", "Number of features to hash string columns to");
        this.allowImages = new BooleanParam(this, "allowImages", "Allow featurization of images");
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{allowImages().$minus$greater(BoxesRunTime.boxToBoolean(false))}));
    }

    public AssembleFeatures() {
        this(Identifiable$.MODULE$.randomUID("AssembleFeatures"));
    }
}
