Source code for mars.learn.contrib.xgboost.classifier

# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ..utils import make_import_error_func
from .core import xgboost, XGBScikitLearnBase


XGBClassifier = make_import_error_func('xgboost')
if xgboost:
    from xgboost.sklearn import XGBClassifierBase

    from .... import tensor as mt
    from .dmatrix import MarsDMatrix
    from .core import evaluation_matrices
    from .train import train
    from .predict import predict

[docs] class XGBClassifier(XGBScikitLearnBase, XGBClassifierBase): """ Implementation of the scikit-learn API for XGBoost classification. """ def fit(self, X, y, sample_weights=None, eval_set=None, sample_weight_eval_set=None, **kw): session = kw.pop('session', None) run_kwargs = kw.pop('run_kwargs', dict()) if kw: raise TypeError(f"fit got an unexpected keyword argument '{next(iter(kw))}'") dtrain = MarsDMatrix(X, label=y, weight=sample_weights, session=session, run_kwargs=run_kwargs) params = self.get_xgb_params() self.classes_ = mt.unique(y, aggregate_size=1).to_numpy(session=session, **run_kwargs) self.n_classes_ = len(self.classes_) if self.n_classes_ > 2: params['objective'] = 'multi:softprob' params['num_class'] = self.n_classes_ else: params['objective'] = 'binary:logistic' evals = evaluation_matrices(eval_set, sample_weight_eval_set, session=session, run_kwargs=run_kwargs) self.evals_result_ = dict() result = train(params, dtrain, num_boost_round=self.get_num_boosting_rounds(), evals=evals, evals_result=self.evals_result_, session=session, run_kwargs=run_kwargs) self._Booster = result return self def predict(self, data, **kw): session = kw.pop('session', None) run_kwargs = kw.pop('run_kwargs', dict()) run = kw.pop('run', True) prob = predict(self.get_booster(), data, run=False, **kw) if prob.ndim > 1: prediction = mt.argmax(prob, axis=1) else: prediction = (prob > 0.5).astype(mt.int64) if run: prediction.execute(session=session, **run_kwargs) return prediction def predict_proba(self, data, ntree_limit=None, **kw): if ntree_limit is not None: raise NotImplementedError('ntree_limit is not currently supported') return predict(self.get_booster(), data, **kw)