SelwynChan / Misc

0 stars 0 forks source link

Val #2

Open SelwynChan opened 8 months ago

SelwynChan commented 8 months ago

import os import pandas as pd import re

Validation functions

def is_integer(x): try: int(x) return True except ValueError: return False

def is_number(x): try: float(x) return True except ValueError: return False

def validate_string(df, field, length): return df[field].apply(lambda x: isinstance(x, str) and len(x) <= length)

def validate_number(df, field): return df[field].apply(is_number)

def validate_integer(df, field): return df[field].apply(is_integer)

def validate_boolean(df, field): return df[field].isin(['true', 'false'])

def validate_timestamp(df, field): pattern = r'\d{8}-\d{2}:\d{2}:\d{2}.\d{1,9}' return df[field].apply(lambda x: bool(re.fullmatch(pattern, str(x))))

def validate_date(df, field): pattern = r'\d{8}' return df[field].apply(lambda x: bool(re.fullmatch(pattern, str(x))))

def validate_currency(df, field): return df[field].apply(lambda x: isinstance(x, str) and len(x) == 3)

Validation rules

RULES = { 'boolean': validate_boolean, 'currency': validate_currency, 'date': validate_date, 'integer': validate_integer, 'number': validate_number, 'string-long': lambda df, field: validate_string(df, field, 256), 'string-short': lambda df, field: validate_string(df, field, 64), 'string-extra': lambda df, field: validate_string(df, field, 2048), 'timestamp': validate_timestamp, }

Event types with their mandatory and optional fields

EVENT_TYPES = { 'ONEW': { 'mandatory': ['event', 'eventDateTime', 'eventSequence', 'logicalOrderID', 'orderID', 'securityID', 'securitySource', 'securityType', 'accountID', 'clientID', 'clientType', 'orderCapacity', 'orderQty', 'orderType', 'side'], 'optional': ['eventRespDateTime', 'eventRespText', 'origOrderID', 'receivedOrderID', 'orderChannel', 'orderPrice', 'timeInForce', 'shortLocateReq', 'currency', 'aggregatedOrders', 'collectionID', 'algoStrategyID', 'algoAttributes', 'altLiquidityInd', 'automatedSplit', 'businessFlow', 'crossForbidden', 'directedOrder', 'executionVenue', 'expireDateTime', 'freeText', 'systemID', 'salesID', 'traderID', 'initiator', 'massCancelled', 'nDayOrderQty', 'solicitationType'] },

define other event types here...

}

Main validation function

def validate_files(directory): files = os.listdir(directory) csv_files = [f for f in files if f.endswith('.csv')] validation_results = {}

for file in csv_files:
    df = pd.read_csv(os.path.join(directory, file))
    event_type = df.loc[0, 'event']

    if event_type not in EVENT_TYPES:
        validation_results[file] = f'Unknown event type: {event_type}'
        continue

    for field, rule in RULES.items():
        if field not in df.columns:
            continue
        result = rule(df, field)
        if not result.all():
            validation_results[file] = f'Field {field} failed validation'
            break

return validation_results

Call the function with the path to your directory

directory = '/path/to/your/directory' result = validate_files(directory) print(result)