package com.microsoft.ml.spark.featurize.text;

import com.microsoft.ml.spark.core.contracts.HasAdditionalPythonMethods;
import com.microsoft.ml.spark.core.contracts.HasInputCol;
import com.microsoft.ml.spark.core.contracts.HasOutputCol;
import com.microsoft.ml.spark.featurize.text.TextFeaturizerParams;
import java.io.IOException;
import java.util.NoSuchElementException;
import org.apache.spark.ml.Estimator;
import org.apache.spark.ml.Model;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.attribute.AttributeGroup;
import org.apache.spark.ml.feature.HashingTF;
import org.apache.spark.ml.feature.IDF;
import org.apache.spark.ml.feature.NGram;
import org.apache.spark.ml.feature.RegexTokenizer;
import org.apache.spark.ml.feature.StopWordsRemover;
import org.apache.spark.ml.feature.StopWordsRemover$;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.ArrayType$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import scala.Predef$;
import scala.StringContext;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;
import scala.runtime.RichInt$;

/* compiled from: TextFeaturizer.scala */
@ScalaSignature(bytes = "\u0006\u0001\t-q!B\u0001\u0003\u0011\u0003y\u0011A\u0004+fqR4U-\u0019;ve&TXM\u001d\u0006\u0003\u0007\u0011\tA\u0001^3yi*\u0011QAB\u0001\nM\u0016\fG/\u001e:ju\u0016T!a\u0002\u0005\u0002\u000bM\u0004\u0018M]6\u000b\u0005%Q\u0011AA7m\u0015\tYA\"A\u0005nS\u000e\u0014xn]8gi*\tQ\"A\u0002d_6\u001c\u0001\u0001\u0005\u0002\u0011#5\t!AB\u0003\u0013\u0005!\u00051C\u0001\bUKb$h)Z1ukJL'0\u001a:\u0014\u000bE!\"$a;\u0011\u0005UAR\"\u0001\f\u000b\u0003]\tQa]2bY\u0006L!!\u0007\f\u0003\r\u0005s\u0017PU3g!\rYBEJ\u0007\u00029)\u0011QDH\u0001\u0005kRLGN\u0003\u0002\n?)\u0011q\u0001\t\u0006\u0003C\t\na!\u00199bG\",'\"A\u0012\u0002\u0007=\u0014x-\u0003\u0002&9\t)B)\u001a4bk2$\b+\u0019:b[N\u0014V-\u00193bE2,\u0007C\u0001\t(\r\u0011\u0011\"\u0001\u0001\u0015\u0014\u000b\u001dJ\u0003gM\u001e\u0011\u0007)ZS&D\u0001\u001f\u0013\tacDA\u0005FgRLW.\u0019;peB\u0011\u0001CL\u0005\u0003_\t\u00111\u0003V3yi\u001a+\u0017\r^;sSj,'/T8eK2\u0004\"\u0001E\u0019\n\u0005I\u0012!\u0001\u0006+fqR4U-\u0019;ve&TXM\u001d)be\u0006l7\u000f\u0005\u00025s5\tQG\u0003\u00027o\u0005I1m\u001c8ue\u0006\u001cGo\u001d\u0006\u0003q\u0019\tAaY8sK&\u0011!(\u000e\u0002\f\u0011\u0006\u001c\u0018J\u001c9vi\u000e{G\u000e\u0005\u00025y%\u0011Q(\u000e\u0002\r\u0011\u0006\u001cx*\u001e;qkR\u001cu\u000e\u001c\u0005\t\u007f\u001d\u0012)\u0019!C!\u0001\u0006\u0019Q/\u001b3\u0016\u0003\u0005\u0003\"AQ#\u000f\u0005U\u0019\u0015B\u0001#\u0017\u0003\u0019\u0001&/\u001a3fM&\u0011ai\u0012\u0002\u0007'R\u0014\u0018N\\4\u000b\u0005\u00113\u0002\u0002C%(\u0005\u0003\u0005\u000b\u0011B!\u0002\tULG\r\t\u0005\u0006\u0017\u001e\"\t\u0001T\u0001\u0007y%t\u0017\u000e\u001e \u0015\u0005\u0019j\u0005\"B K\u0001\u0004\t\u0005\"B&(\t\u0003yE#\u0001\u0014\t\u000bE;C\u0011\u0001*\u0002\u001fM,G/V:f)>\\WM\\5{KJ$\"a\u0015+\u000e\u0003\u001dBQ!\u0016)A\u0002Y\u000bQA^1mk\u0016\u0004\"!F,\n\u0005a3\"a\u0002\"p_2,\u0017M\u001c\u0005\u00065\u001e\"\taW\u0001\u0011g\u0016$Hk\\6f]&TXM]$baN$\"a\u0015/\t\u000bUK\u0006\u0019\u0001,\t\u000by;C\u0011A0\u0002#M,G/T5o)>\\WM\u001c'f]\u001e$\b\u000e\u0006\u0002TA\")Q+\u0018a\u0001CB\u0011QCY\u0005\u0003GZ\u00111!\u00138u\u0011\u0015)w\u0005\"\u0001g\u0003M\u0019X\r\u001e+pW\u0016t\u0017N_3s!\u0006$H/\u001a:o)\t\u0019v\rC\u0003VI\u0002\u0007\u0011\tC\u0003jO\u0011\u0005!.\u0001\btKR$v\u000eT8xKJ\u001c\u0017m]3\u0015\u0005M[\u0007\"B+i\u0001\u00041\u0006\"B7(\t\u0003q\u0017AF:fiV\u001bXm\u0015;pa^{'\u000fZ:SK6|g/\u001a:\u0015\u0005M{\u0007\"B+m\u0001\u00041\u0006\"B9(\t\u0003\u0011\u0018!G:fi\u000e\u000b7/Z*f]NLG/\u001b<f'R|\u0007oV8sIN$\"aU:\t\u000bU\u0003\b\u0019\u0001,\t\u000bU<C\u0011\u0001<\u00025M,G\u000fR3gCVdGo\u0015;pa^{'\u000f\u001a'b]\u001e,\u0018mZ3\u0015\u0005M;\b\"B+u\u0001\u0004\t\u0005\"B=(\t\u0003Q\u0018\u0001D:fiN#x\u000e],pe\u0012\u001cHCA*|\u0011\u0015)\u0006\u00101\u0001B\u0011\u0015ix\u0005\"\u0001\u007f\u0003-\u0019X\r^+tK:;%/Y7\u0015\u0005M{\b\"B+}\u0001\u00041\u0006bBA\u0002O\u0011\u0005\u0011QA\u0001\u000fg\u0016$hj\u0012:b[2+gn\u001a;i)\r\u0019\u0016q\u0001\u0005\u0007+\u0006\u0005\u0001\u0019A1\t\u000f\u0005-q\u0005\"\u0001\u0002\u000e\u0005I1/\u001a;CS:\f'/\u001f\u000b\u0004'\u0006=\u0001BB+\u0002\n\u0001\u0007a\u000bC\u0004\u0002\u0014\u001d\"\t!!\u0006\u0002\u001dM,GOT;n\r\u0016\fG/\u001e:fgR\u00191+a\u0006\t\rU\u000b\t\u00021\u0001b\u0011\u001d\tYb\nC\u0001\u0003;\t\u0011b]3u+N,\u0017\n\u0012$\u0015\u0007M\u000by\u0002\u0003\u0004V\u00033\u0001\rA\u0016\u0005\b\u0003G9C\u0011AA\u0013\u00035\u0019X\r^'j]\u0012{7M\u0012:fcR\u00191+a\n\t\rU\u000b\t\u00031\u0001b\u0011\u001d\tYc\nC\u0005\u0003[\t\u0001c]3u!\u0006\u0014\u0018-\\%oi\u0016\u0014h.\u00197\u0016\r\u0005=\u0012QGA*)!\t\t$a\u0012\u0002L\u0005=\u0003\u0003BA\u001a\u0003ka\u0001\u0001\u0002\u0005\u00028\u0005%\"\u0019AA\u001d\u0005\u0005i\u0015\u0003BA\u001e\u0003\u0003\u00022!FA\u001f\u0013\r\tyD\u0006\u0002\b\u001d>$\b.\u001b8h!\rQ\u00131I\u0005\u0004\u0003\u000br\"!\u0004)ja\u0016d\u0017N\\3Ti\u0006<W\r\u0003\u0005\u0002J\u0005%\u0002\u0019AA\u0019\u0003\u0015iw\u000eZ3m\u0011\u001d\ti%!\u000bA\u0002\u0005\u000bAA\\1nK\"9Q+!\u000bA\u0002\u0005E\u0003\u0003BA\u001a\u0003'\"\u0001\"!\u0016\u0002*\t\u0007\u0011q\u000b\u0002\u0002)F!\u00111HA-!\r)\u00121L\u0005\u0004\u0003;2\"aA!os\"9\u0011\u0011M\u0014\u0005\n\u0005\r\u0014\u0001E4fiB\u000b'/Y7J]R,'O\\1m+\u0019\t)'a\u001b\u0002pQ1\u0011\u0011LA4\u0003[B\u0001\"!\u0013\u0002`\u0001\u0007\u0011\u0011\u000e\t\u0005\u0003g\tY\u0007\u0002\u0005\u00028\u0005}#\u0019AA\u001d\u0011\u001d\ti%a\u0018A\u0002\u0005#\u0001\"!\u0016\u0002`\t\u0007\u0011q\u000b\u0005\b\u0003g:C\u0011IA;\u0003\r1\u0017\u000e\u001e\u000b\u0004[\u0005]\u0004\u0002CA=\u0003c\u0002\r!a\u001f\u0002\u000f\u0011\fG/Y:fiB\"\u0011QPAF!\u0019\ty(!\"\u0002\n6\u0011\u0011\u0011\u0011\u0006\u0004\u0003\u0007{\u0012aA:rY&!\u0011qQAA\u0005\u001d!\u0015\r^1tKR\u0004B!a\r\u0002\f\u0012a\u0011QRA<\u0003\u0003\u0005\tQ!\u0001\u0002X\t\u0019q\fJ\u0019\t\u000f\u0005Eu\u0005\"\u0011\u0002\u0014\u0006!1m\u001c9z)\rI\u0013Q\u0013\u0005\t\u0003/\u000by\t1\u0001\u0002\u001a\u0006)Q\r\u001f;sCB!\u00111TAQ\u001b\t\tiJC\u0002\u0002 z\tQ\u0001]1sC6LA!a)\u0002\u001e\nA\u0001+\u0019:b[6\u000b\u0007\u000fC\u0004\u0002(\u001e\"\t!!+\u0002\u001fQ\u0014\u0018M\\:g_Jl7k\u00195f[\u0006$B!a+\u00028B!\u0011QVAZ\u001b\t\tyK\u0003\u0003\u00022\u0006\u0005\u0015!\u0002;za\u0016\u001c\u0018\u0002BA[\u0003_\u0013!b\u0015;sk\u000e$H+\u001f9f\u0011!\tI,!*A\u0002\u0005-\u0016AB:dQ\u0016l\u0017\rC\u0004\u0002>\u001e\"I!a0\u0002\u001d9,W\rZ:U_.,g.\u001b>feR\u0019a+!1\t\u0011\u0005e\u00161\u0018a\u0001\u0003WCq!!2(\t\u0013\t9-A\twC2LG-\u0019;f\u0013:\u0004X\u000f\u001e+za\u0016$B!!3\u0002PB\u0019Q#a3\n\u0007\u00055gC\u0001\u0003V]&$\b\u0002CAi\u0003\u0007\u0004\r!a5\u0002\u0013%t\u0007/\u001e;UsB,\u0007\u0003BAW\u0003+LA!a6\u00020\nAA)\u0019;b)f\u0004X\rC\u0004\u0002\\\u001e\"I!!8\u0002\u0019\u0005\u0004\b/\u001a8e\u0007>dW/\u001c8\u0015\r\u0005-\u0016q\\Aq\u0011!\tI,!7A\u0002\u0005-\u0006\u0002CAr\u00033\u0004\r!!:\u0002\u0007\r|G\u000e\u0005\u0003\u0002.\u0006\u001d\u0018\u0002BAu\u0003_\u00131b\u0015;sk\u000e$h)[3mIB\u0019Q#!<\n\u0007\u0005=hC\u0001\u0007TKJL\u0017\r\\5{C\ndW\r\u0003\u0004L#\u0011\u0005\u00111\u001f\u000b\u0002\u001f!I\u0011q_\t\u0002\u0002\u0013%\u0011\u0011`\u0001\fe\u0016\fGMU3t_24X\r\u0006\u0002\u0002|B!\u0011Q B\u0004\u001b\t\tyP\u0003\u0003\u0003\u0002\t\r\u0011\u0001\u00027b]\u001eT!A!\u0002\u0002\t)\fg/Y\u0005\u0005\u0005\u0013\tyP\u0001\u0004PE*,7\r\u001e")
/* loaded from: input_file:com/microsoft/ml/spark/featurize/text/TextFeaturizer.class */
public class TextFeaturizer extends Estimator<TextFeaturizerModel> implements TextFeaturizerParams, HasInputCol, HasOutputCol {
    private final String uid;
    private final Param<String> outputCol;
    private final Param<String> inputCol;
    private final BooleanParam useTokenizer;
    private final BooleanParam tokenizerGaps;
    private final IntParam minTokenLength;
    private final Param<String> tokenizerPattern;
    private final BooleanParam toLowercase;
    private final BooleanParam useStopWordsRemover;
    private final BooleanParam caseSensitiveStopWords;
    private final Param<String> defaultStopWordLanguage;
    private final Param<String> stopWords;
    private final BooleanParam useNGram;
    private final IntParam nGramLength;
    private final BooleanParam binary;
    private final IntParam numFeatures;
    private final BooleanParam useIDF;
    private final IntParam minDocFreq;

    public static Object load(String str) {
        return TextFeaturizer$.MODULE$.load(str);
    }

    public static MLReader<TextFeaturizer> read() {
        return TextFeaturizer$.MODULE$.read();
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasOutputCol
    public Param<String> outputCol() {
        return this.outputCol;
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasOutputCol
    public void com$microsoft$ml$spark$core$contracts$HasOutputCol$_setter_$outputCol_$eq(Param param) {
        this.outputCol = param;
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasOutputCol
    public HasOutputCol setOutputCol(String str) {
        return HasOutputCol.Cclass.setOutputCol(this, str);
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasOutputCol
    public String getOutputCol() {
        return HasOutputCol.Cclass.getOutputCol(this);
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasInputCol
    public Param<String> inputCol() {
        return this.inputCol;
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasInputCol
    public void com$microsoft$ml$spark$core$contracts$HasInputCol$_setter_$inputCol_$eq(Param param) {
        this.inputCol = param;
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasInputCol
    public HasInputCol setInputCol(String str) {
        return HasInputCol.Cclass.setInputCol(this, str);
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasInputCol
    public String getInputCol() {
        return HasInputCol.Cclass.getInputCol(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public BooleanParam useTokenizer() {
        return this.useTokenizer;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public BooleanParam tokenizerGaps() {
        return this.tokenizerGaps;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public IntParam minTokenLength() {
        return this.minTokenLength;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public Param<String> tokenizerPattern() {
        return this.tokenizerPattern;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public BooleanParam toLowercase() {
        return this.toLowercase;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public BooleanParam useStopWordsRemover() {
        return this.useStopWordsRemover;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public BooleanParam caseSensitiveStopWords() {
        return this.caseSensitiveStopWords;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public Param<String> defaultStopWordLanguage() {
        return this.defaultStopWordLanguage;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public Param<String> stopWords() {
        return this.stopWords;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public BooleanParam useNGram() {
        return this.useNGram;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public IntParam nGramLength() {
        return this.nGramLength;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public BooleanParam binary() {
        return this.binary;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public IntParam numFeatures() {
        return this.numFeatures;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public BooleanParam useIDF() {
        return this.useIDF;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public IntParam minDocFreq() {
        return this.minDocFreq;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$useTokenizer_$eq(BooleanParam booleanParam) {
        this.useTokenizer = booleanParam;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$tokenizerGaps_$eq(BooleanParam booleanParam) {
        this.tokenizerGaps = booleanParam;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$minTokenLength_$eq(IntParam intParam) {
        this.minTokenLength = intParam;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$tokenizerPattern_$eq(Param param) {
        this.tokenizerPattern = param;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$toLowercase_$eq(BooleanParam booleanParam) {
        this.toLowercase = booleanParam;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$useStopWordsRemover_$eq(BooleanParam booleanParam) {
        this.useStopWordsRemover = booleanParam;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$caseSensitiveStopWords_$eq(BooleanParam booleanParam) {
        this.caseSensitiveStopWords = booleanParam;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$defaultStopWordLanguage_$eq(Param param) {
        this.defaultStopWordLanguage = param;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$stopWords_$eq(Param param) {
        this.stopWords = param;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$useNGram_$eq(BooleanParam booleanParam) {
        this.useNGram = booleanParam;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$nGramLength_$eq(IntParam intParam) {
        this.nGramLength = intParam;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$binary_$eq(BooleanParam booleanParam) {
        this.binary = booleanParam;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$numFeatures_$eq(IntParam intParam) {
        this.numFeatures = intParam;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$useIDF_$eq(BooleanParam booleanParam) {
        this.useIDF = booleanParam;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public void com$microsoft$ml$spark$featurize$text$TextFeaturizerParams$_setter_$minDocFreq_$eq(IntParam intParam) {
        this.minDocFreq = intParam;
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final boolean getUseTokenizer() {
        return TextFeaturizerParams.Cclass.getUseTokenizer(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final boolean getTokenizerGaps() {
        return TextFeaturizerParams.Cclass.getTokenizerGaps(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final int getMinTokenLength() {
        return TextFeaturizerParams.Cclass.getMinTokenLength(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final String getTokenizerPattern() {
        return TextFeaturizerParams.Cclass.getTokenizerPattern(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final boolean getToLowercase() {
        return TextFeaturizerParams.Cclass.getToLowercase(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final boolean getUseStopWordsRemover() {
        return TextFeaturizerParams.Cclass.getUseStopWordsRemover(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final boolean getCaseSensitiveStopWords() {
        return TextFeaturizerParams.Cclass.getCaseSensitiveStopWords(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final String getDefaultStopWordLanguage() {
        return TextFeaturizerParams.Cclass.getDefaultStopWordLanguage(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final String getStopWords() {
        return TextFeaturizerParams.Cclass.getStopWords(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final boolean getUseNGram() {
        return TextFeaturizerParams.Cclass.getUseNGram(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final int getNGramLength() {
        return TextFeaturizerParams.Cclass.getNGramLength(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final boolean getBinary() {
        return TextFeaturizerParams.Cclass.getBinary(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final int getNumFeatures() {
        return TextFeaturizerParams.Cclass.getNumFeatures(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final boolean getUseIDF() {
        return TextFeaturizerParams.Cclass.getUseIDF(this);
    }

    @Override // com.microsoft.ml.spark.featurize.text.TextFeaturizerParams
    public final int getMinDocFreq() {
        return TextFeaturizerParams.Cclass.getMinDocFreq(this);
    }

    public MLWriter write() {
        return DefaultParamsWritable.class.write(this);
    }

    public void save(String str) throws IOException {
        MLWritable.class.save(this, str);
    }

    @Override // com.microsoft.ml.spark.core.contracts.HasAdditionalPythonMethods
    public String additionalPythonMethods() {
        return HasAdditionalPythonMethods.Cclass.additionalPythonMethods(this);
    }

    public String uid() {
        return this.uid;
    }

    public TextFeaturizer setUseTokenizer(boolean z) {
        return (TextFeaturizer) set(useTokenizer(), BoxesRunTime.boxToBoolean(z));
    }

    public TextFeaturizer setTokenizerGaps(boolean z) {
        return (TextFeaturizer) set(tokenizerGaps(), BoxesRunTime.boxToBoolean(z));
    }

    public TextFeaturizer setMinTokenLength(int i) {
        return (TextFeaturizer) set(minTokenLength(), BoxesRunTime.boxToInteger(i));
    }

    public TextFeaturizer setTokenizerPattern(String str) {
        return (TextFeaturizer) set(tokenizerPattern(), str);
    }

    public TextFeaturizer setToLowercase(boolean z) {
        return (TextFeaturizer) set(toLowercase(), BoxesRunTime.boxToBoolean(z));
    }

    public TextFeaturizer setUseStopWordsRemover(boolean z) {
        return (TextFeaturizer) set(useStopWordsRemover(), BoxesRunTime.boxToBoolean(z));
    }

    public TextFeaturizer setCaseSensitiveStopWords(boolean z) {
        return (TextFeaturizer) set(caseSensitiveStopWords(), BoxesRunTime.boxToBoolean(z));
    }

    public TextFeaturizer setDefaultStopWordLanguage(String str) {
        return (TextFeaturizer) set(defaultStopWordLanguage(), str);
    }

    public TextFeaturizer setStopWords(String str) {
        return (TextFeaturizer) set(stopWords(), str);
    }

    public TextFeaturizer setUseNGram(boolean z) {
        return (TextFeaturizer) set(useNGram(), BoxesRunTime.boxToBoolean(z));
    }

    public TextFeaturizer setNGramLength(int i) {
        return (TextFeaturizer) set(nGramLength(), BoxesRunTime.boxToInteger(i));
    }

    public TextFeaturizer setBinary(boolean z) {
        return (TextFeaturizer) set(binary(), BoxesRunTime.boxToBoolean(z));
    }

    public TextFeaturizer setNumFeatures(int i) {
        return (TextFeaturizer) set(numFeatures(), BoxesRunTime.boxToInteger(i));
    }

    public TextFeaturizer setUseIDF(boolean z) {
        return (TextFeaturizer) set(useIDF(), BoxesRunTime.boxToBoolean(z));
    }

    public TextFeaturizer setMinDocFreq(int i) {
        return (TextFeaturizer) set(minDocFreq(), BoxesRunTime.boxToInteger(i));
    }

    public <M extends PipelineStage, T> M com$microsoft$ml$spark$featurize$text$TextFeaturizer$$setParamInternal(M m, String str, T t) {
        return m.set(m.getParam(str), t);
    }

    public <M extends PipelineStage, T> Object com$microsoft$ml$spark$featurize$text$TextFeaturizer$$getParamInternal(M m, String str) {
        return m.getOrDefault(m.getParam(str));
    }

    public TextFeaturizerModel fit(Dataset<?> dataset) {
        try {
            BoxesRunTime.boxToBoolean(getUseTokenizer());
        } catch (NoSuchElementException unused) {
            setUseTokenizer(needsTokenizer(dataset.schema()));
        }
        transformSchema(dataset.schema());
        ObjectRef create = ObjectRef.create(Nil$.MODULE$);
        if (getUseTokenizer()) {
            create.elem = ((List) create.elem).$colon$colon(new RegexTokenizer().setGaps(getTokenizerGaps()).setPattern(getTokenizerPattern()).setMinTokenLength(getMinTokenLength()).setToLowercase(getToLowercase()));
        }
        if (getUseStopWordsRemover()) {
            StopWordsRemover caseSensitive = new StopWordsRemover().setCaseSensitive(getCaseSensitiveStopWords());
            String defaultStopWordLanguage = getDefaultStopWordLanguage();
            if (defaultStopWordLanguage != null ? !defaultStopWordLanguage.equals("custom") : "custom" != 0) {
                create.elem = ((List) create.elem).$colon$colon(caseSensitive.setStopWords(StopWordsRemover$.MODULE$.loadDefaultStopWords(getDefaultStopWordLanguage())));
            } else {
                create.elem = ((List) create.elem).$colon$colon(caseSensitive.setStopWords(getStopWords().split(",")));
            }
        }
        if (getUseNGram()) {
            create.elem = ((List) create.elem).$colon$colon(new NGram().setN(getNGramLength()));
        }
        create.elem = ((List) create.elem).$colon$colon(new HashingTF().setBinary(getBinary()).setNumFeatures(getNumFeatures()));
        if (getUseIDF()) {
            create.elem = ((List) create.elem).$colon$colon(new IDF().setMinDocFreq(getMinDocFreq()));
        }
        create.elem = ((List) create.elem).reverse();
        List list = (List) ((List) ((List) create.elem).zip(RichInt$.MODULE$.to$extension0(Predef$.MODULE$.intWrapper(0), ((List) create.elem).length()), List$.MODULE$.canBuildFrom())).map(new TextFeaturizer$$anonfun$1(this, create), List$.MODULE$.canBuildFrom());
        List list2 = (List) ((List) list.reverse().tail()).map(new TextFeaturizer$$anonfun$2(this), List$.MODULE$.canBuildFrom());
        return (TextFeaturizerModel) new TextFeaturizerModel(uid(), new Pipeline().setStages((PipelineStage[]) list.toArray(ClassTag$.MODULE$.apply(PipelineStage.class))).fit(dataset), list2).setParent(this);
    }

    /* renamed from: copy, reason: merged with bridge method [inline-methods] and merged with bridge method [inline-methods] */
    public Estimator<TextFeaturizerModel> m401copy(ParamMap paramMap) {
        return defaultCopy(paramMap);
    }

    public StructType transformSchema(StructType structType) {
        validateInputType(structType.apply((String) $(inputCol())).dataType());
        if (Predef$.MODULE$.refArrayOps(structType.fieldNames()).contains($(outputCol()))) {
            throw new IllegalArgumentException(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Output column ", " already exists."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{$(outputCol())})));
        }
        return appendColumn(structType, new AttributeGroup((String) $(outputCol()), BoxesRunTime.unboxToInt($(numFeatures()))).toStructField());
    }

    private boolean needsTokenizer(StructType structType) {
        DataType dataType = structType.apply((String) $(inputCol())).dataType();
        StringType$ stringType$ = StringType$.MODULE$;
        return dataType != null ? dataType.equals(stringType$) : stringType$ == null;
    }

    private void validateInputType(DataType dataType) {
        if (getUseTokenizer()) {
            ArrayType apply = ArrayType$.MODULE$.apply(StringType$.MODULE$);
            if (dataType != null ? dataType.equals(apply) : apply == null) {
                Predef$ predef$ = Predef$.MODULE$;
                StringType$ stringType$ = StringType$.MODULE$;
                predef$.require(dataType != null ? dataType.equals(stringType$) : stringType$ == null, new TextFeaturizer$$anonfun$validateInputType$1(this, dataType));
            }
            Predef$ predef$2 = Predef$.MODULE$;
            StringType$ stringType$2 = StringType$.MODULE$;
            predef$2.require(dataType != null ? dataType.equals(stringType$2) : stringType$2 == null, new TextFeaturizer$$anonfun$validateInputType$2(this, dataType));
            return;
        }
        if (!getUseNGram()) {
            StringType$ stringType$3 = StringType$.MODULE$;
            if (dataType != null ? dataType.equals(stringType$3) : stringType$3 == null) {
                Predef$.MODULE$.require(dataType instanceof ArrayType, new TextFeaturizer$$anonfun$validateInputType$5(this, dataType));
            }
            Predef$.MODULE$.require(dataType instanceof ArrayType, new TextFeaturizer$$anonfun$validateInputType$6(this, dataType));
            return;
        }
        StringType$ stringType$4 = StringType$.MODULE$;
        if (dataType != null ? dataType.equals(stringType$4) : stringType$4 == null) {
            Predef$ predef$3 = Predef$.MODULE$;
            ArrayType apply2 = ArrayType$.MODULE$.apply(StringType$.MODULE$);
            predef$3.require(dataType != null ? dataType.equals(apply2) : apply2 == null, new TextFeaturizer$$anonfun$validateInputType$3(this, dataType));
        }
        Predef$ predef$4 = Predef$.MODULE$;
        ArrayType apply3 = ArrayType$.MODULE$.apply(StringType$.MODULE$);
        predef$4.require(dataType != null ? dataType.equals(apply3) : apply3 == null, new TextFeaturizer$$anonfun$validateInputType$4(this, dataType));
    }

    private StructType appendColumn(StructType structType, StructField structField) {
        Predef$.MODULE$.require(!Predef$.MODULE$.refArrayOps(structType.fieldNames()).contains(structField.name()), new TextFeaturizer$$anonfun$appendColumn$1(this, structField));
        return new StructType((StructField[]) Predef$.MODULE$.refArrayOps(structType.fields()).$colon$plus(structField, ClassTag$.MODULE$.apply(StructField.class)));
    }

    /* renamed from: fit, reason: collision with other method in class */
    public /* bridge */ /* synthetic */ Model m402fit(Dataset dataset) {
        return fit((Dataset<?>) dataset);
    }

    public TextFeaturizer(String str) {
        this.uid = str;
        HasAdditionalPythonMethods.Cclass.$init$(this);
        MLWritable.class.$init$(this);
        DefaultParamsWritable.class.$init$(this);
        TextFeaturizerParams.Cclass.$init$(this);
        com$microsoft$ml$spark$core$contracts$HasInputCol$_setter_$inputCol_$eq(new Param(this, "inputCol", "The name of the input column"));
        com$microsoft$ml$spark$core$contracts$HasOutputCol$_setter_$outputCol_$eq(new Param(this, "outputCol", "The name of the output column"));
        setDefault(outputCol(), new StringBuilder().append(str).append("_output").toString());
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{useTokenizer().$minus$greater(BoxesRunTime.boxToBoolean(true))}));
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{tokenizerGaps().$minus$greater(BoxesRunTime.boxToBoolean(true))}));
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{minTokenLength().$minus$greater(BoxesRunTime.boxToInteger(0))}));
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{tokenizerPattern().$minus$greater("\\s+")}));
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{toLowercase().$minus$greater(BoxesRunTime.boxToBoolean(true))}));
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{useStopWordsRemover().$minus$greater(BoxesRunTime.boxToBoolean(false))}));
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{caseSensitiveStopWords().$minus$greater(BoxesRunTime.boxToBoolean(false))}));
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{defaultStopWordLanguage().$minus$greater("english")}));
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{useNGram().$minus$greater(BoxesRunTime.boxToBoolean(false)), nGramLength().$minus$greater(BoxesRunTime.boxToInteger(2))}));
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{numFeatures().$minus$greater(BoxesRunTime.boxToInteger(262144)), binary().$minus$greater(BoxesRunTime.boxToBoolean(false))}));
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{useIDF().$minus$greater(BoxesRunTime.boxToBoolean(true)), minDocFreq().$minus$greater(BoxesRunTime.boxToInteger(1))}));
    }

    public TextFeaturizer() {
        this(Identifiable$.MODULE$.randomUID("TextFeaturizer"));
    }
}
