Source code for utils.classes.pandas_dataframe
import pandas as pd
[docs]
class PandasDataframe:
"""
Class responsible for the standardization and manipulation of Pandas DataFrames.
Attributes:
path (str):
Absolute path to the CSV file.
df (pandas.DataFrame):
Data stored in the dataframe.
list (list, optional):
List of dataframes used for concatenation.
dict (dict, optional):
Dictionary used to create a dataframe.
"""
def __init__(self, path, df, **kwargs):
self.path = path
self.df = df
self.list = kwargs.get('list') or None
self.dict = kwargs.get('dict') or None
[docs]
def csv_to_df(self):
"""
Reads a CSV file from the specified path and loads it into the dataframe.
"""
self.df = pd.read_csv(self.path, sep=';', encoding='utf-8', index_col=None)
[docs]
def df_to_csv(self, path):
"""
Exports the current dataframe to a CSV file.
Args:
path (str):
Destination path for the CSV file.
"""
self.df.to_csv(path, sep=';', index=False)
[docs]
def list_to_df(self):
"""
Concatenates a list of dataframes into a single dataframe.
"""
self.df = pd.concat(self.list, ignore_index=True)
[docs]
def dict_to_df(self):
"""
Converts the stored dictionary into a dataframe.
"""
self.df = pd.DataFrame(self.dict)
[docs]
def drop_column(self, column, direction):
"""
Drops rows or columns from the dataframe.
Args:
column (str or list):
Column name(s) or row label(s) to drop.
direction (int):
Axis to drop from (0 for rows, 1 for columns).
"""
self.df = self.df.drop(column, axis=direction)
[docs]
def query_date(self, start_date, end_date, column_name):
"""
Filters rows between two dates based on a specified date column.
Args:
start_date (str or datetime):
Start date for filtering.
end_date (str or datetime):
End date for filtering.
column_name (str):
Name of the date column.
"""
self.df[column_name] = pd.to_datetime(self.df[column_name])
self.df = self.df.loc[
(self.df[column_name] >= start_date) &
(self.df[column_name] <= end_date)
]
[docs]
def query_element_in(self, column, collection):
"""
Filters rows where column values are within a given collection.
Args:
column (str):
Column name to filter.
collection (list or set):
Collection of values to match.
"""
self.df = self.df[self.df[column].isin(collection)]
[docs]
def query_date_and_element(self, start_date, end_date, date_column_name, investment_cnpj, investment_column_cnpj):
"""
Filters rows based on both a date range and a specific element.
Args:
start_date (str or datetime):
Start date for filtering.
end_date (str or datetime):
End date for filtering.
date_column_name (str):
Name of the date column.
investment_cnpj (str):
Value to filter in the investment column.
investment_column_cnpj (str):
Column name containing the investment identifier.
Returns:
pandas.DataFrame:
Filtered dataframe.
"""
new_df_reference = self.df
new_df_reference = new_df_reference[
new_df_reference[investment_column_cnpj].isin([investment_cnpj])
]
new_df_reference[date_column_name] = pd.to_datetime(new_df_reference[date_column_name])
new_df_reference = new_df_reference.loc[
(new_df_reference[date_column_name] >= start_date) &
(new_df_reference[date_column_name] <= end_date)
]
return new_df_reference
[docs]
def reset_index(self):
"""
Resets the dataframe index.
"""
self.df = self.df.reset_index()
[docs]
def get_column_in_list(self, column):
"""
Returns a dataframe column as a Python list.
Args:
column (str):
Column name.
Returns:
list:
Column values as a list.
"""
return self.df[column].tolist()
[docs]
def sort_elements_list(self, sort_list):
"""
Sorts the dataframe by the specified columns.
Args:
sort_list (list):
List of column names to sort by.
"""
self.df = self.df.sort_values(by=sort_list)
[docs]
def order_columns(self, order_list):
"""
Reorders the dataframe columns.
Args:
order_list (list):
Desired column order.
"""
self.df = self.df[order_list]
[docs]
def group_element(self, group_element):
"""
Groups the dataframe by the specified column(s).
Args:
group_element (str or list):
Column(s) to group by.
"""
self.df = self.df.groupby(group_element)
[docs]
def find_row_date_greater_or_equals_than_indicated(self, date_str) -> tuple[bool, int]:
"""
Finds the first row where the date is greater than or equal to the given date.
Args:
date_str (str or datetime):
Reference date.
Returns:
tuple:
(True, index) if found, otherwise (False, 0).
"""
column_date = pd.to_datetime(self.df['DT_COMPTC'])
mask = column_date >= date_str
if mask.any():
idx = mask.idxmax()
return [True, idx]
else:
return [False, 0]
[docs]
def find_row_data(self, row):
"""
Retrieves a row from the dataframe by index.
Args:
row (int):
Row index.
Returns:
pandas.Series:
Row data.
"""
return self.df.iloc[row]
__all__ = ["PandasDataframe"]