Source code for eflow.data_pipeline_segments.feature_transformer

from eflow._hidden.parent_objects import DataPipelineSegment
from eflow.utils.pandas_utils import check_if_feature_exists
from eflow.utils.misc_utils import get_parameters

import copy

__author__ = "Eric Cacciavillani"
__copyright__ = "Copyright 2019, eFlow"
__credits__ = ["Eric Cacciavillani"]
__license__ = "MIT"
__maintainer__ = "EricCacciavillani"
__email__ = "eric.cacciavillani@gmail.com"

# Template
# def METHOD_NAME(self,
#                 df,
#                 df_features,
#                 '''ALL YOUR OTHER ARGS'''
#                 _add_to_que=True):

#
#     '''
#     YOUR CUSTOM CODE HERE
#     '''
#
#
#     # Add to the given pipeline segment
#     if _add_to_que:
#         params_dict = locals()
#          parameters = get_parameters(self.METHOD_NAME)
#          self._DataPipelineSegment__add_function_to_que("METHOD_NAME",
#                                                         parameters,
#                                                         params_dict)


[docs]class FeatureTransformer(DataPipelineSegment): """ Combines, removes, scales, etc features of a pandas dataframe. """ def __init__(self, segment_id=None, create_file=True): """ Args: segment_id: Reference id to past segments of this object. Note/Caveats: When creating any public function that will be part of the pipeline's structure it is important to follow this given template. Also, try not to use _add_to_que. Can ruin the entire purpose of this project. """ DataPipelineSegment.__init__(self, object_type=self.__class__.__name__, segment_id=segment_id, create_file=create_file)
[docs] def remove_features(self, df, df_features, feature_names, _add_to_que=True): """ Removes unwanted features from the dataframe and saves them to the pipeline segment structure if _add_to_que is set to True. Args: df: Pandas Dataframe to update. df_features: DataFrameTypes object to update. feature_names: Features to remove _add_to_que: Pushes the function to pipeline segment parent if set to 'True'. """ if isinstance(feature_names, str): feature_names = [feature_names] for feature_n in feature_names: try: if feature_n in df_features.all_features(): df_features.remove_feature(feature_n) check_if_feature_exists(df, feature_n) df.drop(columns=[feature_n], inplace=True) except KeyError: pass if _add_to_que: params_dict = locals() parameters = get_parameters(self.remove_features) self._DataPipelineSegment__add_function_to_que("remove_features", parameters, params_dict)