Source code for LightGBMRegressor

# Copyright (C) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in project root for information.


import sys
if sys.version >= '3':
    basestring = str

from pyspark.ml.param.shared import *
from pyspark import keyword_only
from pyspark.ml.util import JavaMLReadable, JavaMLWritable
from pyspark.ml.wrapper import JavaTransformer, JavaEstimator, JavaModel
from pyspark.ml.common import inherit_doc
from mmlspark.Utils import *

[docs]@inherit_doc class LightGBMRegressor(ComplexParamsMixin, JavaMLReadable, JavaMLWritable, JavaEstimator): """ Args: alpha (double): parameter for Huber loss and Quantile regression (default: 0.9) baggingFraction (double): Bagging fraction (default: 1.0) baggingFreq (int): Bagging frequence (default: 0) baggingSeed (int): Bagging seed (default: 3) defaultListenPort (int): The default listen port on executors, used for testing (default: 12400) earlyStoppingRound (int): Early stopping round (default: 0) featureFraction (double): Feature fraction (default: 1.0) featuresCol (str): features column name (default: features) labelCol (str): label column name (default: label) learningRate (double): Learning rate or shrinkage rate (default: 0.1) maxBin (int): Max bin (default: 255) maxDepth (int): Max depth (default: -1) minSumHessianInLeaf (double): Minimal sum hessian in one leaf (default: 0.001) numIterations (int): Number of iterations, LightGBM constructs num_class * num_iterations trees (default: 100) numLeaves (int): Number of leaves (default: 31) objective (str): The Objective. For regression applications, this can be: regression_l2, regression_l1, huber, fair, poisson, quantile, mape, gamma or tweedie. For classification applications, this can be: binary, multiclass, or multiclassova. (default: regression) parallelism (str): Tree learner parallelism, can be set to data_parallel or voting_parallel (default: data_parallel) predictionCol (str): prediction column name (default: prediction) timeout (double): Timeout in seconds (default: 120.0) """ @keyword_only def __init__(self, alpha=0.9, baggingFraction=1.0, baggingFreq=0, baggingSeed=3, defaultListenPort=12400, earlyStoppingRound=0, featureFraction=1.0, featuresCol="features", labelCol="label", learningRate=0.1, maxBin=255, maxDepth=-1, minSumHessianInLeaf=0.001, numIterations=100, numLeaves=31, objective="regression", parallelism="data_parallel", predictionCol="prediction", timeout=120.0): super(LightGBMRegressor, self).__init__() self._java_obj = self._new_java_obj("com.microsoft.ml.spark.LightGBMRegressor") self.alpha = Param(self, "alpha", "alpha: parameter for Huber loss and Quantile regression (default: 0.9)") self._setDefault(alpha=0.9) self.baggingFraction = Param(self, "baggingFraction", "baggingFraction: Bagging fraction (default: 1.0)") self._setDefault(baggingFraction=1.0) self.baggingFreq = Param(self, "baggingFreq", "baggingFreq: Bagging frequence (default: 0)") self._setDefault(baggingFreq=0) self.baggingSeed = Param(self, "baggingSeed", "baggingSeed: Bagging seed (default: 3)") self._setDefault(baggingSeed=3) self.defaultListenPort = Param(self, "defaultListenPort", "defaultListenPort: The default listen port on executors, used for testing (default: 12400)") self._setDefault(defaultListenPort=12400) self.earlyStoppingRound = Param(self, "earlyStoppingRound", "earlyStoppingRound: Early stopping round (default: 0)") self._setDefault(earlyStoppingRound=0) self.featureFraction = Param(self, "featureFraction", "featureFraction: Feature fraction (default: 1.0)") self._setDefault(featureFraction=1.0) self.featuresCol = Param(self, "featuresCol", "featuresCol: features column name (default: features)") self._setDefault(featuresCol="features") self.labelCol = Param(self, "labelCol", "labelCol: label column name (default: label)") self._setDefault(labelCol="label") self.learningRate = Param(self, "learningRate", "learningRate: Learning rate or shrinkage rate (default: 0.1)") self._setDefault(learningRate=0.1) self.maxBin = Param(self, "maxBin", "maxBin: Max bin (default: 255)") self._setDefault(maxBin=255) self.maxDepth = Param(self, "maxDepth", "maxDepth: Max depth (default: -1)") self._setDefault(maxDepth=-1) self.minSumHessianInLeaf = Param(self, "minSumHessianInLeaf", "minSumHessianInLeaf: Minimal sum hessian in one leaf (default: 0.001)") self._setDefault(minSumHessianInLeaf=0.001) self.numIterations = Param(self, "numIterations", "numIterations: Number of iterations, LightGBM constructs num_class * num_iterations trees (default: 100)") self._setDefault(numIterations=100) self.numLeaves = Param(self, "numLeaves", "numLeaves: Number of leaves (default: 31)") self._setDefault(numLeaves=31) self.objective = Param(self, "objective", "objective: The Objective. For regression applications, this can be: regression_l2, regression_l1, huber, fair, poisson, quantile, mape, gamma or tweedie. For classification applications, this can be: binary, multiclass, or multiclassova. (default: regression)") self._setDefault(objective="regression") self.parallelism = Param(self, "parallelism", "parallelism: Tree learner parallelism, can be set to data_parallel or voting_parallel (default: data_parallel)") self._setDefault(parallelism="data_parallel") self.predictionCol = Param(self, "predictionCol", "predictionCol: prediction column name (default: prediction)") self._setDefault(predictionCol="prediction") self.timeout = Param(self, "timeout", "timeout: Timeout in seconds (default: 120.0)") self._setDefault(timeout=120.0) if hasattr(self, "_input_kwargs"): kwargs = self._input_kwargs else: kwargs = self.__init__._input_kwargs self.setParams(**kwargs)
[docs] @keyword_only def setParams(self, alpha=0.9, baggingFraction=1.0, baggingFreq=0, baggingSeed=3, defaultListenPort=12400, earlyStoppingRound=0, featureFraction=1.0, featuresCol="features", labelCol="label", learningRate=0.1, maxBin=255, maxDepth=-1, minSumHessianInLeaf=0.001, numIterations=100, numLeaves=31, objective="regression", parallelism="data_parallel", predictionCol="prediction", timeout=120.0): """ Set the (keyword only) parameters Args: alpha (double): parameter for Huber loss and Quantile regression (default: 0.9) baggingFraction (double): Bagging fraction (default: 1.0) baggingFreq (int): Bagging frequence (default: 0) baggingSeed (int): Bagging seed (default: 3) defaultListenPort (int): The default listen port on executors, used for testing (default: 12400) earlyStoppingRound (int): Early stopping round (default: 0) featureFraction (double): Feature fraction (default: 1.0) featuresCol (str): features column name (default: features) labelCol (str): label column name (default: label) learningRate (double): Learning rate or shrinkage rate (default: 0.1) maxBin (int): Max bin (default: 255) maxDepth (int): Max depth (default: -1) minSumHessianInLeaf (double): Minimal sum hessian in one leaf (default: 0.001) numIterations (int): Number of iterations, LightGBM constructs num_class * num_iterations trees (default: 100) numLeaves (int): Number of leaves (default: 31) objective (str): The Objective. For regression applications, this can be: regression_l2, regression_l1, huber, fair, poisson, quantile, mape, gamma or tweedie. For classification applications, this can be: binary, multiclass, or multiclassova. (default: regression) parallelism (str): Tree learner parallelism, can be set to data_parallel or voting_parallel (default: data_parallel) predictionCol (str): prediction column name (default: prediction) timeout (double): Timeout in seconds (default: 120.0) """ if hasattr(self, "_input_kwargs"): kwargs = self._input_kwargs else: kwargs = self.__init__._input_kwargs return self._set(**kwargs)
[docs] def setAlpha(self, value): """ Args: alpha (double): parameter for Huber loss and Quantile regression (default: 0.9) """ self._set(alpha=value) return self
[docs] def getAlpha(self): """ Returns: double: parameter for Huber loss and Quantile regression (default: 0.9) """ return self.getOrDefault(self.alpha)
[docs] def setBaggingFraction(self, value): """ Args: baggingFraction (double): Bagging fraction (default: 1.0) """ self._set(baggingFraction=value) return self
[docs] def getBaggingFraction(self): """ Returns: double: Bagging fraction (default: 1.0) """ return self.getOrDefault(self.baggingFraction)
[docs] def setBaggingFreq(self, value): """ Args: baggingFreq (int): Bagging frequence (default: 0) """ self._set(baggingFreq=value) return self
[docs] def getBaggingFreq(self): """ Returns: int: Bagging frequence (default: 0) """ return self.getOrDefault(self.baggingFreq)
[docs] def setBaggingSeed(self, value): """ Args: baggingSeed (int): Bagging seed (default: 3) """ self._set(baggingSeed=value) return self
[docs] def getBaggingSeed(self): """ Returns: int: Bagging seed (default: 3) """ return self.getOrDefault(self.baggingSeed)
[docs] def setDefaultListenPort(self, value): """ Args: defaultListenPort (int): The default listen port on executors, used for testing (default: 12400) """ self._set(defaultListenPort=value) return self
[docs] def getDefaultListenPort(self): """ Returns: int: The default listen port on executors, used for testing (default: 12400) """ return self.getOrDefault(self.defaultListenPort)
[docs] def setEarlyStoppingRound(self, value): """ Args: earlyStoppingRound (int): Early stopping round (default: 0) """ self._set(earlyStoppingRound=value) return self
[docs] def getEarlyStoppingRound(self): """ Returns: int: Early stopping round (default: 0) """ return self.getOrDefault(self.earlyStoppingRound)
[docs] def setFeatureFraction(self, value): """ Args: featureFraction (double): Feature fraction (default: 1.0) """ self._set(featureFraction=value) return self
[docs] def getFeatureFraction(self): """ Returns: double: Feature fraction (default: 1.0) """ return self.getOrDefault(self.featureFraction)
[docs] def setFeaturesCol(self, value): """ Args: featuresCol (str): features column name (default: features) """ self._set(featuresCol=value) return self
[docs] def getFeaturesCol(self): """ Returns: str: features column name (default: features) """ return self.getOrDefault(self.featuresCol)
[docs] def setLabelCol(self, value): """ Args: labelCol (str): label column name (default: label) """ self._set(labelCol=value) return self
[docs] def getLabelCol(self): """ Returns: str: label column name (default: label) """ return self.getOrDefault(self.labelCol)
[docs] def setLearningRate(self, value): """ Args: learningRate (double): Learning rate or shrinkage rate (default: 0.1) """ self._set(learningRate=value) return self
[docs] def getLearningRate(self): """ Returns: double: Learning rate or shrinkage rate (default: 0.1) """ return self.getOrDefault(self.learningRate)
[docs] def setMaxBin(self, value): """ Args: maxBin (int): Max bin (default: 255) """ self._set(maxBin=value) return self
[docs] def getMaxBin(self): """ Returns: int: Max bin (default: 255) """ return self.getOrDefault(self.maxBin)
[docs] def setMaxDepth(self, value): """ Args: maxDepth (int): Max depth (default: -1) """ self._set(maxDepth=value) return self
[docs] def getMaxDepth(self): """ Returns: int: Max depth (default: -1) """ return self.getOrDefault(self.maxDepth)
[docs] def setMinSumHessianInLeaf(self, value): """ Args: minSumHessianInLeaf (double): Minimal sum hessian in one leaf (default: 0.001) """ self._set(minSumHessianInLeaf=value) return self
[docs] def getMinSumHessianInLeaf(self): """ Returns: double: Minimal sum hessian in one leaf (default: 0.001) """ return self.getOrDefault(self.minSumHessianInLeaf)
[docs] def setNumIterations(self, value): """ Args: numIterations (int): Number of iterations, LightGBM constructs num_class * num_iterations trees (default: 100) """ self._set(numIterations=value) return self
[docs] def getNumIterations(self): """ Returns: int: Number of iterations, LightGBM constructs num_class * num_iterations trees (default: 100) """ return self.getOrDefault(self.numIterations)
[docs] def setNumLeaves(self, value): """ Args: numLeaves (int): Number of leaves (default: 31) """ self._set(numLeaves=value) return self
[docs] def getNumLeaves(self): """ Returns: int: Number of leaves (default: 31) """ return self.getOrDefault(self.numLeaves)
[docs] def setObjective(self, value): """ Args: objective (str): The Objective. For regression applications, this can be: regression_l2, regression_l1, huber, fair, poisson, quantile, mape, gamma or tweedie. For classification applications, this can be: binary, multiclass, or multiclassova. (default: regression) """ self._set(objective=value) return self
[docs] def getObjective(self): """ Returns: str: The Objective. For regression applications, this can be: regression_l2, regression_l1, huber, fair, poisson, quantile, mape, gamma or tweedie. For classification applications, this can be: binary, multiclass, or multiclassova. (default: regression) """ return self.getOrDefault(self.objective)
[docs] def setParallelism(self, value): """ Args: parallelism (str): Tree learner parallelism, can be set to data_parallel or voting_parallel (default: data_parallel) """ self._set(parallelism=value) return self
[docs] def getParallelism(self): """ Returns: str: Tree learner parallelism, can be set to data_parallel or voting_parallel (default: data_parallel) """ return self.getOrDefault(self.parallelism)
[docs] def setPredictionCol(self, value): """ Args: predictionCol (str): prediction column name (default: prediction) """ self._set(predictionCol=value) return self
[docs] def getPredictionCol(self): """ Returns: str: prediction column name (default: prediction) """ return self.getOrDefault(self.predictionCol)
[docs] def setTimeout(self, value): """ Args: timeout (double): Timeout in seconds (default: 120.0) """ self._set(timeout=value) return self
[docs] def getTimeout(self): """ Returns: double: Timeout in seconds (default: 120.0) """ return self.getOrDefault(self.timeout)
[docs] @classmethod def read(cls): """ Returns an MLReader instance for this class. """ return JavaMMLReader(cls)
[docs] @staticmethod def getJavaPackage(): """ Returns package name String. """ return "com.microsoft.ml.spark.LightGBMRegressor"
@staticmethod def _from_java(java_stage): module_name=LightGBMRegressor.__module__ module_name=module_name.rsplit(".", 1)[0] + ".LightGBMRegressor" return from_java(java_stage, module_name) def _create_model(self, java_model): return M(java_model)
[docs]class M(ComplexParamsMixin, JavaModel, JavaMLWritable, JavaMLReadable): """ Model fitted by :class:`LightGBMRegressor`. This class is left empty on purpose. All necessary methods are exposed through inheritance. """
[docs] @classmethod def read(cls): """ Returns an MLReader instance for this class. """ return JavaMMLReader(cls)
[docs] @staticmethod def getJavaPackage(): """ Returns package name String. """ return "M"
@staticmethod def _from_java(java_stage): module_name=M.__module__ module_name=module_name.rsplit(".", 1)[0] + ".M" return from_java(java_stage, module_name)