jerry-git / learn-python3

Jupyter notebooks for teaching/learning Python 3
MIT License
6.43k stars 1.77k forks source link

93992136 #43

Closed mpmtrucks closed 3 weeks ago

mpmtrucks commented 3 weeks ago

import pandas as pd

Sample data, make sure all rows have the correct number of columns

data = [ ["DATE", "WITHDRAWALS", "DEPOSITS", "BALANCE", "PARTICULARS", "MODE", "TYPE"], # Header ["11-06-24", "50000", "200000", "150000", "IMPS/HDFC/416321586123/ABU DHABI COMMERCIAL/Ensure", "IMPS", "Transfers"], ["11-06-24", "50000", "50000", "", "MOB", "Transfers"],

Add more transactions as needed

["29-07-24", "", "130000", "155897.62", "BY CASH BR:CHERKALA", "BY CASH", "Cash Deposits"]

]

try:

Create a pandas DataFrame from your data

df = pd.DataFrame(data[1:], columns=data[0])  # Skip the header row in data

# Handle non-numeric values in the numeric columns
numeric_columns = ['WITHDRAWALS', 'DEPOSITS', 'BALANCE']
for col in numeric_columns:
    # Remove commas and other characters before conversion
    df[col] = pd.to_numeric(df[col].astype(str).str.replace('[^0-9.]', '', regex=True), errors='coerce').fillna(0)

# Convert the date column to a proper datetime object, handling errors
df['DATE'] = pd.to_datetime(df['DATE'], format='%d-%m-%y', errors='coerce')

# Extract transaction type from the PARTICULARS column
df['TRANSACTION_TYPE'] = df['PARTICULARS'].astype(str).str.split(':').str[0]

# Define transaction categories
transaction_categories = {
    'IMPS': 'Transfers',
    'MOB': 'Transfers',
    'UPI': 'Transfers',
    'NEFT': 'Transfers',
    'BY CASH': 'Cash Deposits',
    'ATM': 'ATM Transactions',
    'Minimum Balance Charges': 'Fees',
    'Unrecovered': 'Fees',
    'NACHDR RETN CHRG': 'Fees',
    'GST FOR NACHDR RETURN': 'Fees',
    'CASH HANDLING CHARGE': 'Fees',
    'Shopping': 'Expenses',
    'NACH_DR': 'Other Debits',
    'Int.Pd': 'Interest'
}

# Map transaction types to categories, handling uncategorized entries
df['CATEGORY'] = df['TRANSACTION_TYPE'].map(transaction_categories).fillna('Uncategorized')

# Create separate tables for each category
tables = {}
for category in df['CATEGORY'].unique():
    category_df = df[df['CATEGORY'] == category]
    if not category_df.empty:
        tables[category] = category_df

# Combine all tables into one DataFrame
result_df = pd.concat(tables.values(), ignore_index=True)

# Display the combined DataFrame
print(result_df.to_string())

except Exception as e: print(f"An error occurred during processing: {e}") print("Please check your data format and try again.")