def validate_files(directory):
files = os.listdir(directory)
csv_files = [f for f in files if f.endswith('.csv')]
validation_results = {}
for file in csv_files:
df = pd.read_csv(os.path.join(directory, file))
event_type = df.loc[0, 'event']
if event_type not in EVENT_TYPES:
validation_results[file] = f'Unknown event type: {event_type}'
continue
for field, rule in RULES.items():
if field not in df.columns:
continue
result = rule(df, field)
if not result.all():
validation_results[file] = f'Field {field} failed validation'
break
return validation_results
Call the function with the path to your directory
directory = '/path/to/your/directory'
result = validate_files(directory)
print(result)
import os import pandas as pd import re
Validation functions
def is_integer(x): try: int(x) return True except ValueError: return False
def is_number(x): try: float(x) return True except ValueError: return False
def validate_string(df, field, length): return df[field].apply(lambda x: isinstance(x, str) and len(x) <= length)
def validate_number(df, field): return df[field].apply(is_number)
def validate_integer(df, field): return df[field].apply(is_integer)
def validate_boolean(df, field): return df[field].isin(['true', 'false'])
def validate_timestamp(df, field): pattern = r'\d{8}-\d{2}:\d{2}:\d{2}.\d{1,9}' return df[field].apply(lambda x: bool(re.fullmatch(pattern, str(x))))
def validate_date(df, field): pattern = r'\d{8}' return df[field].apply(lambda x: bool(re.fullmatch(pattern, str(x))))
def validate_currency(df, field): return df[field].apply(lambda x: isinstance(x, str) and len(x) == 3)
Validation rules
RULES = { 'boolean': validate_boolean, 'currency': validate_currency, 'date': validate_date, 'integer': validate_integer, 'number': validate_number, 'string-long': lambda df, field: validate_string(df, field, 256), 'string-short': lambda df, field: validate_string(df, field, 64), 'string-extra': lambda df, field: validate_string(df, field, 2048), 'timestamp': validate_timestamp, }
Event types with their mandatory and optional fields
EVENT_TYPES = { 'ONEW': { 'mandatory': ['event', 'eventDateTime', 'eventSequence', 'logicalOrderID', 'orderID', 'securityID', 'securitySource', 'securityType', 'accountID', 'clientID', 'clientType', 'orderCapacity', 'orderQty', 'orderType', 'side'], 'optional': ['eventRespDateTime', 'eventRespText', 'origOrderID', 'receivedOrderID', 'orderChannel', 'orderPrice', 'timeInForce', 'shortLocateReq', 'currency', 'aggregatedOrders', 'collectionID', 'algoStrategyID', 'algoAttributes', 'altLiquidityInd', 'automatedSplit', 'businessFlow', 'crossForbidden', 'directedOrder', 'executionVenue', 'expireDateTime', 'freeText', 'systemID', 'salesID', 'traderID', 'initiator', 'massCancelled', 'nDayOrderQty', 'solicitationType'] },
define other event types here...
}
Main validation function
def validate_files(directory): files = os.listdir(directory) csv_files = [f for f in files if f.endswith('.csv')] validation_results = {}
Call the function with the path to your directory
directory = '/path/to/your/directory' result = validate_files(directory) print(result)