Considering the other existing check-foo file format hooks that check for basic consistencies, and knowing of question https://github.com/pre-commit/pre-commit-hooks/issues/880, I do think that there is value in a generic CSV checker, e.g. to check for text encoding or consistent column count. So I created a simple hook:
import argparse
import csv
import io
import locale
import typing
def check_csv(filename: str, encoding: str, delimiters: str) -> bool:
try:
with open(filename, encoding=encoding) as f:
data = f.read()
except UnicodeDecodeError:
print(f'{filename}: failed to decode csv text file using {encoding!r}')
return False
# It seems that Dialect detection performs column check too, need to read src.
try:
dialect = csv.Sniffer().sniff(data, delimiters)
except csv.Error:
print(f'{filename}: failed to detect dialect')
return False
reader = csv.reader(io.StringIO(data), dialect)
if len(set(len(row) for row in reader)) != 1:
print(f'{filename}: contains different numbers of columns')
return False
return True
def main(argv: typing.Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('-e', '--encoding', dest='encoding', default=locale.getpreferredencoding())
parser.add_argument('-d', '--delimiters', dest='delimiters', default=',\t; :') # CPython Sniffer defaults.
parser.add_argument('filenames', nargs='*', help='Filenames to check.')
args = parser.parse_args(argv)
retval = 0
for filename in args.filenames:
if not check_csv(filename, encoding=args.encoding, delimiters=args.delimiters):
retval = 1
return retval
if __name__ == '__main__':
raise SystemExit(main())
If you’re interested, I’m happy to discuss a few details and open a PR. Thanks!
Considering the other existing
check-foo
file format hooks that check for basic consistencies, and knowing of question https://github.com/pre-commit/pre-commit-hooks/issues/880, I do think that there is value in a generic CSV checker, e.g. to check for text encoding or consistent column count. So I created a simple hook:If you’re interested, I’m happy to discuss a few details and open a PR. Thanks!