In [1]: import pathlib
...:
...: import pandas as pd
...:
...: DIR = pathlib.Path('~/projects/cldf/autotyp-data/data').expanduser()
...: DATA = sorted(p for p in DIR.glob('*.csv') if p.name != 'Register.csv')
...:
...: lf = pd.read_csv(DIR / 'Register.csv', encoding='utf-8', index_col='LID')
...:
...: for d in DATA:
...: df = pd.read_csv(d, encoding='utf-8')
...: missing = df.loc[~df['LID'].isin(lf.index), ['LID']]
...: if not missing.empty:
...: print(d.name)
...: print(missing)
Grammatical_markers.csv
LID
2854 2915
4685 2915
NP_per_language.csv
LID
479 3000
NP_structure.csv
LID
922 3000
1029 3000
NP_structure_presence.csv
LID
1012 3000
1013 3000