Source code for eflow.data_pipeline_segments.feature_transformer

from eflow._hidden.parent_objects import DataPipelineSegment
from eflow.utils.pandas_utils import check_if_feature_exists
from eflow.utils.misc_utils import get_parameters

import copy

__author__ = "Eric Cacciavillani"
__copyright__ = "Copyright 2019, eFlow"
__credits__ = ["Eric Cacciavillani"]
__license__ = "MIT"
__maintainer__ = "EricCacciavillani"
__email__ = "eric.cacciavillani@gmail.com"

# Template
# def METHOD_NAME(self,
#                 df,
#                 df_features,
#                 '''ALL YOUR OTHER ARGS'''
#                 _add_to_que=True):

#
#     '''
#     YOUR CUSTOM CODE HERE
#     '''
#
#
#     # Add to the given pipeline segment
#     if _add_to_que:
#         params_dict = locals()
#          parameters = get_parameters(self.METHOD_NAME)
#          self._DataPipelineSegment__add_function_to_que("METHOD_NAME",
#                                                         parameters,
#                                                         params_dict)


[docs]class FeatureTransformer(DataPipelineSegment):
    """
        Combines, removes, scales, etc features of a pandas dataframe.
    """
    def __init__(self,
                 segment_id=None,
                 create_file=True):
        """
        Args:
            segment_id:
                Reference id to past segments of this object.

        Note/Caveats:
            When creating any public function that will be part of the pipeline's
            structure it is important to follow this given template. Also,
            try not to use _add_to_que. Can ruin the entire purpose of this
            project.
        """
        DataPipelineSegment.__init__(self,
                                     object_type=self.__class__.__name__,
                                     segment_id=segment_id,
                                     create_file=create_file)

[docs]    def remove_features(self,
                        df,
                        df_features,
                        feature_names,
                        _add_to_que=True):
        """

            Removes unwanted features from the dataframe and saves them to the
            pipeline segment structure if _add_to_que is set to True.

        Args:
            df:
                Pandas Dataframe to update.

            df_features:
                DataFrameTypes object to update.

            feature_names:
                Features to remove

            _add_to_que:
                Pushes the function to pipeline segment parent if set to 'True'.
        """

        if isinstance(feature_names, str):
            feature_names = [feature_names]

        for feature_n in feature_names:

            try:

                if feature_n in df_features.all_features():
                    df_features.remove_feature(feature_n)

                check_if_feature_exists(df,
                                        feature_n)
                df.drop(columns=[feature_n],
                        inplace=True)

            except KeyError:
                pass

        if _add_to_que:
            params_dict = locals()
            parameters = get_parameters(self.remove_features)
            self._DataPipelineSegment__add_function_to_que("remove_features",
                                                           parameters,
                                                           params_dict)