Closed Starnite closed 7 years ago
Here's some code that you can start with for test_data_integrity.py:
test_data_integrity.py
import ast import hashlib import os import pandas as pd os.chdir(os.path.dirname(os.path.realpath(__file__))) def md5(fname): """ Computes md5 hash of a file. """ hash_md5 = hashlib.md5() with open(fname, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() files = [f for f in os.listdir() if f.endswith('.csv')] masterfile_rows = 0 with open("hash.log", "r") as f: hash_log = f.read() hash_log = ast.literal_eval(hash_log) def error_file_changed(fname): return "File {0} has been changed.".format(fname) def test_md5_hash(): # total_rows = 0 for f in files: # df = pd.read_csv(f) # total_rows += len(df.index) #check hash assert md5(f) != hash_log[f], error_file_changed(f) # def test_total_rows_correct(): # total_rows = 0 # # for f in files: # df = pd.read_csv(f) # total_rows += len(df.index) # # assert total_rows == __________, "error message here."
Merge now would be great, thanks!
All merged! As usual, I'll leave it to you (the branch creator) to delete the branch.
Here's some code that you can start with for
test_data_integrity.py
: