Source code for fedot.core.operations.evaluation.evaluation_interfaces

import warnings
from abc import abstractmethod
from typing import Optional

import numpy as np
from golem.core.log import default_log
from lightgbm.sklearn import LGBMClassifier, LGBMRegressor
from sklearn.cluster import KMeans as SklearnKmeans
from sklearn.ensemble import (
    AdaBoostRegressor,
    ExtraTreesRegressor,
    GradientBoostingRegressor,
    RandomForestClassifier,
    RandomForestRegressor
)
from sklearn.linear_model import (
    Lasso as SklearnLassoReg,
    LinearRegression as SklearnLinReg,
    LogisticRegression as SklearnLogReg,
    Ridge as SklearnRidgeReg,
    SGDRegressor as SklearnSGD
)
from sklearn.multioutput import MultiOutputClassifier, MultiOutputRegressor
from sklearn.naive_bayes import BernoulliNB as SklearnBernoulliNB, MultinomialNB as SklearnMultinomialNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVR as SklearnSVR
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from xgboost import XGBClassifier, XGBRegressor

from fedot.core.data.data import InputData, OutputData
from fedot.core.operations.operation_parameters import OperationParameters
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.operation_types_repository import OperationTypesRepository, get_operation_type_from_id
from fedot.core.repository.tasks import TaskTypesEnum
from fedot.utilities.custom_errors import AbstractMethodNotImplementError
from fedot.utilities.random import ImplementationRandomStateHandler

warnings.filterwarnings("ignore", category=UserWarning)


[docs]class EvaluationStrategy:
    """Base class to define the evaluation strategy of Operation object:
    the certain sklearn or any other operation with fit/predict methods.

    Args:
        operation_type: ``str`` of the operation defined in operation repository
        params: hyperparameters to fit the operation with
    """

    def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):
        self.params_for_fit = params or OperationParameters()
        self.operation_id = operation_type
        self.output_mode = False

        self.log = default_log(self)

    @property
    def operation_type(self):
        return get_operation_type_from_id(self.operation_id)

[docs]    @abstractmethod
    def fit(self, train_data: InputData):
        """Main method to train the operation with the data provided

        Args:
            train_data: data used for operation training

        Returns:

        """
        raise AbstractMethodNotImplementError(self.__class__)

[docs]    @abstractmethod
    def predict(self, trained_operation, predict_data: InputData) -> OutputData:
        """Method to predict the target data for predict stage.

        Args:
            trained_operation: trained operation object
            predict_data: data to predict

        Returns:
            passed data with new predicted target
        """
        raise AbstractMethodNotImplementError

[docs]    def predict_for_fit(self, trained_operation, predict_data: InputData) -> OutputData:
        """Method to predict the target data for fit stage.
        Allows to implement predict method different from main predict method
        if another behaviour for fit graph stage is needed.

        Args:
            trained_operation: trained operation object
            predict_data: data to predict
        Returns:
            passed data with new predicted target
        """
        return self.predict(trained_operation, predict_data)

    def _convert_to_operation(self, operation_type: str):
        if operation_type in self._operations_by_types:
            return self._operations_by_types[operation_type]
        else:
            raise ValueError(f'Impossible to obtain {self.__class__} strategy for {operation_type}')

    @property
    def implementation_info(self) -> str:
        return str(self._convert_to_operation(self.operation_type))

[docs]    @staticmethod
    def _convert_to_output(prediction, predict_data: InputData,
                           output_data_type: DataTypesEnum = DataTypesEnum.table) -> OutputData:
        """Method convert prediction into :obj:`OutputData` if it is not this type yet

        Args:
            prediction: output from model implementation
            predict_data: :obj:`InputData` used for prediction
            output_data_type: :obj:`DataTypesEnum` for output

        Returns: prediction as :obj:`OutputData`
        """

        if not isinstance(prediction, OutputData):
            # Wrap prediction as OutputData
            converted = OutputData(idx=predict_data.idx,
                                   features=predict_data.features,
                                   predict=prediction,
                                   task=predict_data.task,
                                   target=predict_data.target,
                                   data_type=output_data_type,
                                   supplementary_data=predict_data.supplementary_data)
        else:
            converted = prediction

        return converted


[docs]class SkLearnEvaluationStrategy(EvaluationStrategy):
    """This class defines the certain operation implementation for the sklearn operations
    defined in operation repository

    Args:
        operation_type: ``str`` of the operation defined in operation or
            data operation repositories

            .. details:: possible operations:

                - ``xgbreg``-> XGBRegressor
                - ``adareg``-> AdaBoostRegressor
                - ``gbr``-> GradientBoostingRegressor
                - ``dtreg``-> DecisionTreeRegressor
                - ``treg``-> ExtraTreesRegressor
                - ``rfr``-> RandomForestRegressor
                - ``linear``-> SklearnLinReg
                - ``ridge``-> SklearnRidgeReg
                - ``lasso``-> SklearnLassoReg
                - ``svr``-> SklearnSVR
                - ``sgdr``-> SklearnSGD
                - ``lgbmreg``-> LGBMRegressor
                - ``xgboost``-> XGBClassifier
                - ``logit``-> SklearnLogReg
                - ``bernb``-> SklearnBernoulliNB
                - ``multinb``-> SklearnMultinomialNB
                - ``dt``-> DecisionTreeClassifier
                - ``rf``-> RandomForestClassifier
                - ``mlp``-> MLPClassifier
                - ``lgbm``-> LGBMClassifier
                - ``kmeans``-> SklearnKmeans

        params: hyperparameters to fit the operation with
    """

    _operations_by_types = {
        'xgbreg': XGBRegressor,
        'adareg': AdaBoostRegressor,
        'gbr': GradientBoostingRegressor,
        'dtreg': DecisionTreeRegressor,
        'treg': ExtraTreesRegressor,
        'rfr': RandomForestRegressor,
        'linear': SklearnLinReg,
        'ridge': SklearnRidgeReg,
        'lasso': SklearnLassoReg,
        'svr': SklearnSVR,
        'sgdr': SklearnSGD,
        'lgbmreg': LGBMRegressor,

        'xgboost': XGBClassifier,
        'logit': SklearnLogReg,
        'bernb': SklearnBernoulliNB,
        'multinb': SklearnMultinomialNB,
        'dt': DecisionTreeClassifier,
        'rf': RandomForestClassifier,
        'mlp': MLPClassifier,
        'lgbm': LGBMClassifier,

        'kmeans': SklearnKmeans,
    }

    def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):
        self.operation_impl = self._convert_to_operation(operation_type)
        super().__init__(operation_type, params)

[docs]    def fit(self, train_data: InputData):
        """This method is used for operation training with the data provided

        Args:
            train_data: data used for operation training

        Returns:
            trained Sklearn operation
        """

        warnings.filterwarnings("ignore", category=RuntimeWarning)

        operation_implementation = self.operation_impl(**self.params_for_fit.to_dict())

        # If model doesn't support multi-output and current task is ts_forecasting
        current_task = train_data.task.task_type
        models_repo = OperationTypesRepository()
        non_multi_models = models_repo.suitable_operation(task_type=current_task,
                                                          tags=['non_multi'])
        is_model_not_support_multi = self.operation_type in non_multi_models

        # Multi-output task or not
        is_multi_target = is_multi_output_task(train_data)
        with ImplementationRandomStateHandler(implementation=operation_implementation):
            if is_model_not_support_multi and is_multi_target:
                # Manually wrap the regressor into multi-output model
                operation_implementation = convert_to_multivariate_model(operation_implementation,
                                                                         train_data)
            else:
                operation_implementation.fit(train_data.features, train_data.target)
        return operation_implementation

[docs]    @abstractmethod
    def predict(self, trained_operation, predict_data: InputData) -> OutputData:
        """This method used for prediction of the target data

        Args:
            trained_operation: operation object
            predict_data: data to predict

        Returns:
            passed data with new predicted target
        """
        raise AbstractMethodNotImplementError

    def _find_operation_by_impl(self, impl):
        for operation, operation_impl in self._operations_by_types.items():
            if operation_impl == impl:
                return operation

    @property
    def implementation_info(self) -> str:
        return str(self._convert_to_operation(self.operation_type))

    def _sklearn_compatible_prediction(self, trained_operation, features):
        is_multi_output_target = isinstance(trained_operation.classes_, list)
        # Check if target is multilabel (has 2 or more columns)
        if is_multi_output_target:
            n_classes = len(trained_operation.classes_[0])
        else:
            n_classes = len(trained_operation.classes_)
        if self.output_mode == 'labels':
            prediction = trained_operation.predict(features)
        elif self.output_mode in ['probs', 'full_probs', 'default']:
            prediction = trained_operation.predict_proba(features)
            if n_classes < 2:
                raise ValueError('Data set contain only 1 target class. Please reformat your data.')
            elif n_classes == 2 and self.output_mode != 'full_probs':
                if is_multi_output_target:
                    prediction = np.stack([pred[:, 1] for pred in prediction]).T
                else:
                    prediction = prediction[:, 1]
        else:
            raise ValueError(f'Output model {self.output_mode} is not supported')

        return prediction


[docs]def convert_to_multivariate_model(sklearn_model, train_data: InputData):
    """The function returns an iterator for multiple target for those models for
    which such a function is not initially provided

    Args:
        sklearn_model: :obj:`Sklearn model` to train
        train_data: data used for model training
    Returns:
        wrapped :obj:`Sklearn model`
    """

    if train_data.task.task_type == TaskTypesEnum.classification:
        multiout_func = MultiOutputClassifier
    elif train_data.task.task_type in [TaskTypesEnum.regression, TaskTypesEnum.ts_forecasting]:
        multiout_func = MultiOutputRegressor
    else:
        raise ValueError(f"For task type '{train_data.task.task_type}' MultiOutput wrapper is not supported")

    # Apply MultiOutput
    sklearn_model = multiout_func(sklearn_model)
    sklearn_model.fit(train_data.features, train_data.target)
    return sklearn_model


def is_multi_output_task(train_data):
    target_shape = train_data.target.shape
    is_multi_target = len(target_shape) > 1 and target_shape[1] > 1
    return is_multi_target