This clas will group all EDA techniques, so as to be more efficient in the dataset preparation.
include:
``import pandas as pd
class EDA(object):
"""
The scope of this class is to ensemble several useful methods for EDA on machine learning projects, in order to
be able to import and use them in a more conveniente way.
"""
def __init__(self):
pass
@staticmethod
def describe_dataframe(df=pd.DataFrame()):
"""This function generates descriptive stats of a dataframe
Args:
df (dataframe): the dataframe to be analyzed
Returns:
None
"""
print("\n\n")
print("*" * 30)
print("About the Data")
print("*" * 30)
print("Number of rows::", df.shape[0])
print("Number of columns::", df.shape[1])
print("\n")
print("Column Names::", df.columns.values.tolist())
print("\n")
print("Column Data Types::\n", df.dtypes)
print("\n")
print("Columns with Missing Values::", df.columns[df.isnull().any()].tolist())
print("\n")
print("Number of rows with Missing Values::", len(pd.isnull(df).any(1).nonzero()[0].tolist()))
print("\n")
print("Sample Indices with missing data::", pd.isnull(df).any(1).nonzero()[0].tolist()[0:5])
print("\n")
print("General Stats::")
print(df.info())
print("\n")
print("Summary Stats::")
print(df.describe())
print("\n")
print("Dataframe Sample Rows::")
display(df.head(5))
@staticmethod
def cleanup_column_names(df, rename_dict={}, do_inplace=True):
"""This function renames columns of a pandas dataframe
It converts column names to snake case if rename_dict is not passed.
Args:
rename_dict (dict): keys represent old column names and values point to
newer ones
do_inplace (bool): flag to update existing dataframe or return a new one
Returns:
pandas dataframe if do_inplace is set to False, None otherwise
"""
if not rename_dict:
return df.rename(columns={col: col.lower().replace(' ', '_')
for col in df.columns.values.tolist()},
inplace=do_inplace)
else:
return df.rename(columns=rename_dict, inplace=do_inplace)
Add auto_eda class
This clas will group all EDA techniques, so as to be more efficient in the dataset preparation.
include:
``import pandas as pd
class EDA(object): """ The scope of this class is to ensemble several useful methods for EDA on machine learning projects, in order to be able to import and use them in a more conveniente way. """
``