nikhilhtks / LLM-Bio

0 stars 0 forks source link

issue 1423 #3

Open nikhilhtks opened 4 months ago

nikhilhtks commented 4 months ago

Apps installation not working

nikhilhtks commented 4 months ago

test import pandas as pd import numpy as np from datetime import datetime, timedelta

Sample data

data = { 'CAL_DATE': pd.date_range(start='2024-02-01', periods=90, freq='D'), 'AGREEMENTNAME': ['A']30 + ['B']30 + ['C']*30, 'CALLA_DELTA': np.random.randn(90), 'NETEXPOS_DELTA': np.random.randn(90) }

df = pd.DataFrame(data)

Function to get the relevant date

def get_relevant_date(yesterday): if yesterday.weekday() == 5: # Saturday return yesterday - timedelta(days=1) elif yesterday.weekday() == 6: # Sunday return yesterday - timedelta(days=2) return yesterday

Calculate the relevant date

yesterday = datetime.now() - timedelta(days=1) relevant_date = get_relevant_date(yesterday)

Get the mean and std dev for each agreement for the last 30 days

results = {} agreements = df['AGREEMENTNAME'].unique()

for agreement in agreements: last_30_days = df[(df['CAL_DATE'] <= relevant_date) & (df['CAL_DATE'] > relevant_date - timedelta(days=30)) & (df['AGREEMENTNAME'] == agreement)] mean_calla_delta = last_30_days['CALLA_DELTA'].mean() std_calla_delta = last_30_days['CALLA_DELTA'].std() mean_netexpos_delta = last_30_days['NETEXPOS_DELTA'].mean() std_netexpos_delta = last_30_days['NETEXPOS_DELTA'].std()

results[agreement] = {
    'mean_calla_delta': mean_calla_delta,
    'std_calla_delta': std_calla_delta,
    'mean_netexpos_delta': mean_netexpos_delta,
    'std_netexpos_delta': std_netexpos_delta
}

Calculate z-scores and identify anomalies for today's data

today = datetime.now().date() today_data = df[df['CAL_DATE'] == today]

anomalies = []

for _, row in today_data.iterrows(): agreement = row['AGREEMENTNAME'] mean_calla_delta = results[agreement]['mean_calla_delta'] std_calla_delta = results[agreement]['std_calla_delta'] mean_netexpos_delta = results[agreement]['mean_netexpos_delta'] std_netexpos_delta = results[agreement]['std_netexpos_delta']

z_score_calla = (row['CALLA_DELTA'] - mean_calla_delta) / std_calla_delta
z_score_netexpos = (row['NETEXPOS_DELTA'] - mean_netexpos_delta) / std_netexpos_delta

if z_score_calla < -8 or z_score_calla > +8 or z_score_netexpos < -8 or z_score_netexpos > +8:
    anomalies.append({
        'AGREEMENTNAME': agreement,
        'CALLA_DELTA': row['CALLA_DELTA'],
        'NETEXPOS_DELTA': row['NETEXPOS_DELTA']
    })

anomalies_df = pd.DataFrame(anomalies)

import ace_tools as tools; tools.display_dataframe_to_user(name="Anomalies", dataframe=anomalies_df)

nikhilhtks commented 4 months ago

test1