# Copyright (C) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in project root for information.
import sys
if sys.version >= '3':
basestring = str
from pyspark.ml.param.shared import *
from pyspark import keyword_only
from pyspark.ml.util import JavaMLReadable, JavaMLWritable
from pyspark.ml.wrapper import JavaTransformer, JavaEstimator, JavaModel
from pyspark.ml.common import inherit_doc
from mmlspark.Utils import *
[docs]@inherit_doc
class LightGBMRegressor(ComplexParamsMixin, JavaMLReadable, JavaMLWritable, JavaEstimator):
"""
Args:
alpha (double): parameter for Huber loss and Quantile regression (default: 0.9)
baggingFraction (double): Bagging fraction (default: 1.0)
baggingFreq (int): Bagging frequence (default: 0)
baggingSeed (int): Bagging seed (default: 3)
defaultListenPort (int): The default listen port on executors, used for testing (default: 12400)
earlyStoppingRound (int): Early stopping round (default: 0)
featureFraction (double): Feature fraction (default: 1.0)
featuresCol (str): features column name (default: features)
labelCol (str): label column name (default: label)
learningRate (double): Learning rate or shrinkage rate (default: 0.1)
maxBin (int): Max bin (default: 255)
maxDepth (int): Max depth (default: -1)
minSumHessianInLeaf (double): Minimal sum hessian in one leaf (default: 0.001)
numIterations (int): Number of iterations, LightGBM constructs num_class * num_iterations trees (default: 100)
numLeaves (int): Number of leaves (default: 31)
objective (str): The Objective. For regression applications, this can be: regression_l2, regression_l1, huber, fair, poisson, quantile, mape, gamma or tweedie. For classification applications, this can be: binary, multiclass, or multiclassova. (default: regression)
parallelism (str): Tree learner parallelism, can be set to data_parallel or voting_parallel (default: data_parallel)
predictionCol (str): prediction column name (default: prediction)
timeout (double): Timeout in seconds (default: 120.0)
"""
@keyword_only
def __init__(self, alpha=0.9, baggingFraction=1.0, baggingFreq=0, baggingSeed=3, defaultListenPort=12400, earlyStoppingRound=0, featureFraction=1.0, featuresCol="features", labelCol="label", learningRate=0.1, maxBin=255, maxDepth=-1, minSumHessianInLeaf=0.001, numIterations=100, numLeaves=31, objective="regression", parallelism="data_parallel", predictionCol="prediction", timeout=120.0):
super(LightGBMRegressor, self).__init__()
self._java_obj = self._new_java_obj("com.microsoft.ml.spark.LightGBMRegressor")
self.alpha = Param(self, "alpha", "alpha: parameter for Huber loss and Quantile regression (default: 0.9)")
self._setDefault(alpha=0.9)
self.baggingFraction = Param(self, "baggingFraction", "baggingFraction: Bagging fraction (default: 1.0)")
self._setDefault(baggingFraction=1.0)
self.baggingFreq = Param(self, "baggingFreq", "baggingFreq: Bagging frequence (default: 0)")
self._setDefault(baggingFreq=0)
self.baggingSeed = Param(self, "baggingSeed", "baggingSeed: Bagging seed (default: 3)")
self._setDefault(baggingSeed=3)
self.defaultListenPort = Param(self, "defaultListenPort", "defaultListenPort: The default listen port on executors, used for testing (default: 12400)")
self._setDefault(defaultListenPort=12400)
self.earlyStoppingRound = Param(self, "earlyStoppingRound", "earlyStoppingRound: Early stopping round (default: 0)")
self._setDefault(earlyStoppingRound=0)
self.featureFraction = Param(self, "featureFraction", "featureFraction: Feature fraction (default: 1.0)")
self._setDefault(featureFraction=1.0)
self.featuresCol = Param(self, "featuresCol", "featuresCol: features column name (default: features)")
self._setDefault(featuresCol="features")
self.labelCol = Param(self, "labelCol", "labelCol: label column name (default: label)")
self._setDefault(labelCol="label")
self.learningRate = Param(self, "learningRate", "learningRate: Learning rate or shrinkage rate (default: 0.1)")
self._setDefault(learningRate=0.1)
self.maxBin = Param(self, "maxBin", "maxBin: Max bin (default: 255)")
self._setDefault(maxBin=255)
self.maxDepth = Param(self, "maxDepth", "maxDepth: Max depth (default: -1)")
self._setDefault(maxDepth=-1)
self.minSumHessianInLeaf = Param(self, "minSumHessianInLeaf", "minSumHessianInLeaf: Minimal sum hessian in one leaf (default: 0.001)")
self._setDefault(minSumHessianInLeaf=0.001)
self.numIterations = Param(self, "numIterations", "numIterations: Number of iterations, LightGBM constructs num_class * num_iterations trees (default: 100)")
self._setDefault(numIterations=100)
self.numLeaves = Param(self, "numLeaves", "numLeaves: Number of leaves (default: 31)")
self._setDefault(numLeaves=31)
self.objective = Param(self, "objective", "objective: The Objective. For regression applications, this can be: regression_l2, regression_l1, huber, fair, poisson, quantile, mape, gamma or tweedie. For classification applications, this can be: binary, multiclass, or multiclassova. (default: regression)")
self._setDefault(objective="regression")
self.parallelism = Param(self, "parallelism", "parallelism: Tree learner parallelism, can be set to data_parallel or voting_parallel (default: data_parallel)")
self._setDefault(parallelism="data_parallel")
self.predictionCol = Param(self, "predictionCol", "predictionCol: prediction column name (default: prediction)")
self._setDefault(predictionCol="prediction")
self.timeout = Param(self, "timeout", "timeout: Timeout in seconds (default: 120.0)")
self._setDefault(timeout=120.0)
if hasattr(self, "_input_kwargs"):
kwargs = self._input_kwargs
else:
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)
[docs] @keyword_only
def setParams(self, alpha=0.9, baggingFraction=1.0, baggingFreq=0, baggingSeed=3, defaultListenPort=12400, earlyStoppingRound=0, featureFraction=1.0, featuresCol="features", labelCol="label", learningRate=0.1, maxBin=255, maxDepth=-1, minSumHessianInLeaf=0.001, numIterations=100, numLeaves=31, objective="regression", parallelism="data_parallel", predictionCol="prediction", timeout=120.0):
"""
Set the (keyword only) parameters
Args:
alpha (double): parameter for Huber loss and Quantile regression (default: 0.9)
baggingFraction (double): Bagging fraction (default: 1.0)
baggingFreq (int): Bagging frequence (default: 0)
baggingSeed (int): Bagging seed (default: 3)
defaultListenPort (int): The default listen port on executors, used for testing (default: 12400)
earlyStoppingRound (int): Early stopping round (default: 0)
featureFraction (double): Feature fraction (default: 1.0)
featuresCol (str): features column name (default: features)
labelCol (str): label column name (default: label)
learningRate (double): Learning rate or shrinkage rate (default: 0.1)
maxBin (int): Max bin (default: 255)
maxDepth (int): Max depth (default: -1)
minSumHessianInLeaf (double): Minimal sum hessian in one leaf (default: 0.001)
numIterations (int): Number of iterations, LightGBM constructs num_class * num_iterations trees (default: 100)
numLeaves (int): Number of leaves (default: 31)
objective (str): The Objective. For regression applications, this can be: regression_l2, regression_l1, huber, fair, poisson, quantile, mape, gamma or tweedie. For classification applications, this can be: binary, multiclass, or multiclassova. (default: regression)
parallelism (str): Tree learner parallelism, can be set to data_parallel or voting_parallel (default: data_parallel)
predictionCol (str): prediction column name (default: prediction)
timeout (double): Timeout in seconds (default: 120.0)
"""
if hasattr(self, "_input_kwargs"):
kwargs = self._input_kwargs
else:
kwargs = self.__init__._input_kwargs
return self._set(**kwargs)
[docs] def setAlpha(self, value):
"""
Args:
alpha (double): parameter for Huber loss and Quantile regression (default: 0.9)
"""
self._set(alpha=value)
return self
[docs] def getAlpha(self):
"""
Returns:
double: parameter for Huber loss and Quantile regression (default: 0.9)
"""
return self.getOrDefault(self.alpha)
[docs] def setBaggingFraction(self, value):
"""
Args:
baggingFraction (double): Bagging fraction (default: 1.0)
"""
self._set(baggingFraction=value)
return self
[docs] def getBaggingFraction(self):
"""
Returns:
double: Bagging fraction (default: 1.0)
"""
return self.getOrDefault(self.baggingFraction)
[docs] def setBaggingFreq(self, value):
"""
Args:
baggingFreq (int): Bagging frequence (default: 0)
"""
self._set(baggingFreq=value)
return self
[docs] def getBaggingFreq(self):
"""
Returns:
int: Bagging frequence (default: 0)
"""
return self.getOrDefault(self.baggingFreq)
[docs] def setBaggingSeed(self, value):
"""
Args:
baggingSeed (int): Bagging seed (default: 3)
"""
self._set(baggingSeed=value)
return self
[docs] def getBaggingSeed(self):
"""
Returns:
int: Bagging seed (default: 3)
"""
return self.getOrDefault(self.baggingSeed)
[docs] def setDefaultListenPort(self, value):
"""
Args:
defaultListenPort (int): The default listen port on executors, used for testing (default: 12400)
"""
self._set(defaultListenPort=value)
return self
[docs] def getDefaultListenPort(self):
"""
Returns:
int: The default listen port on executors, used for testing (default: 12400)
"""
return self.getOrDefault(self.defaultListenPort)
[docs] def setEarlyStoppingRound(self, value):
"""
Args:
earlyStoppingRound (int): Early stopping round (default: 0)
"""
self._set(earlyStoppingRound=value)
return self
[docs] def getEarlyStoppingRound(self):
"""
Returns:
int: Early stopping round (default: 0)
"""
return self.getOrDefault(self.earlyStoppingRound)
[docs] def setFeatureFraction(self, value):
"""
Args:
featureFraction (double): Feature fraction (default: 1.0)
"""
self._set(featureFraction=value)
return self
[docs] def getFeatureFraction(self):
"""
Returns:
double: Feature fraction (default: 1.0)
"""
return self.getOrDefault(self.featureFraction)
[docs] def setFeaturesCol(self, value):
"""
Args:
featuresCol (str): features column name (default: features)
"""
self._set(featuresCol=value)
return self
[docs] def getFeaturesCol(self):
"""
Returns:
str: features column name (default: features)
"""
return self.getOrDefault(self.featuresCol)
[docs] def setLabelCol(self, value):
"""
Args:
labelCol (str): label column name (default: label)
"""
self._set(labelCol=value)
return self
[docs] def getLabelCol(self):
"""
Returns:
str: label column name (default: label)
"""
return self.getOrDefault(self.labelCol)
[docs] def setLearningRate(self, value):
"""
Args:
learningRate (double): Learning rate or shrinkage rate (default: 0.1)
"""
self._set(learningRate=value)
return self
[docs] def getLearningRate(self):
"""
Returns:
double: Learning rate or shrinkage rate (default: 0.1)
"""
return self.getOrDefault(self.learningRate)
[docs] def setMaxBin(self, value):
"""
Args:
maxBin (int): Max bin (default: 255)
"""
self._set(maxBin=value)
return self
[docs] def getMaxBin(self):
"""
Returns:
int: Max bin (default: 255)
"""
return self.getOrDefault(self.maxBin)
[docs] def setMaxDepth(self, value):
"""
Args:
maxDepth (int): Max depth (default: -1)
"""
self._set(maxDepth=value)
return self
[docs] def getMaxDepth(self):
"""
Returns:
int: Max depth (default: -1)
"""
return self.getOrDefault(self.maxDepth)
[docs] def setMinSumHessianInLeaf(self, value):
"""
Args:
minSumHessianInLeaf (double): Minimal sum hessian in one leaf (default: 0.001)
"""
self._set(minSumHessianInLeaf=value)
return self
[docs] def getMinSumHessianInLeaf(self):
"""
Returns:
double: Minimal sum hessian in one leaf (default: 0.001)
"""
return self.getOrDefault(self.minSumHessianInLeaf)
[docs] def setNumIterations(self, value):
"""
Args:
numIterations (int): Number of iterations, LightGBM constructs num_class * num_iterations trees (default: 100)
"""
self._set(numIterations=value)
return self
[docs] def getNumIterations(self):
"""
Returns:
int: Number of iterations, LightGBM constructs num_class * num_iterations trees (default: 100)
"""
return self.getOrDefault(self.numIterations)
[docs] def setNumLeaves(self, value):
"""
Args:
numLeaves (int): Number of leaves (default: 31)
"""
self._set(numLeaves=value)
return self
[docs] def getNumLeaves(self):
"""
Returns:
int: Number of leaves (default: 31)
"""
return self.getOrDefault(self.numLeaves)
[docs] def setObjective(self, value):
"""
Args:
objective (str): The Objective. For regression applications, this can be: regression_l2, regression_l1, huber, fair, poisson, quantile, mape, gamma or tweedie. For classification applications, this can be: binary, multiclass, or multiclassova. (default: regression)
"""
self._set(objective=value)
return self
[docs] def getObjective(self):
"""
Returns:
str: The Objective. For regression applications, this can be: regression_l2, regression_l1, huber, fair, poisson, quantile, mape, gamma or tweedie. For classification applications, this can be: binary, multiclass, or multiclassova. (default: regression)
"""
return self.getOrDefault(self.objective)
[docs] def setParallelism(self, value):
"""
Args:
parallelism (str): Tree learner parallelism, can be set to data_parallel or voting_parallel (default: data_parallel)
"""
self._set(parallelism=value)
return self
[docs] def getParallelism(self):
"""
Returns:
str: Tree learner parallelism, can be set to data_parallel or voting_parallel (default: data_parallel)
"""
return self.getOrDefault(self.parallelism)
[docs] def setPredictionCol(self, value):
"""
Args:
predictionCol (str): prediction column name (default: prediction)
"""
self._set(predictionCol=value)
return self
[docs] def getPredictionCol(self):
"""
Returns:
str: prediction column name (default: prediction)
"""
return self.getOrDefault(self.predictionCol)
[docs] def setTimeout(self, value):
"""
Args:
timeout (double): Timeout in seconds (default: 120.0)
"""
self._set(timeout=value)
return self
[docs] def getTimeout(self):
"""
Returns:
double: Timeout in seconds (default: 120.0)
"""
return self.getOrDefault(self.timeout)
[docs] @classmethod
def read(cls):
""" Returns an MLReader instance for this class. """
return JavaMMLReader(cls)
[docs] @staticmethod
def getJavaPackage():
""" Returns package name String. """
return "com.microsoft.ml.spark.LightGBMRegressor"
@staticmethod
def _from_java(java_stage):
module_name=LightGBMRegressor.__module__
module_name=module_name.rsplit(".", 1)[0] + ".LightGBMRegressor"
return from_java(java_stage, module_name)
def _create_model(self, java_model):
return M(java_model)
[docs]class M(ComplexParamsMixin, JavaModel, JavaMLWritable, JavaMLReadable):
"""
Model fitted by :class:`LightGBMRegressor`.
This class is left empty on purpose.
All necessary methods are exposed through inheritance.
"""
[docs] @classmethod
def read(cls):
""" Returns an MLReader instance for this class. """
return JavaMMLReader(cls)
[docs] @staticmethod
def getJavaPackage():
""" Returns package name String. """
return "M"
@staticmethod
def _from_java(java_stage):
module_name=M.__module__
module_name=module_name.rsplit(".", 1)[0] + ".M"
return from_java(java_stage, module_name)