parser = ArgumentParser()
parser.add_argument('--dataset_dir', default='/shares/volk.cl.uzh/zifjia/tensorflow_datasets_1', help='which dir to store the tensorflow dataset?')
parser.add_argument('--data_dir', default='/shares/easier.volk.cl.uzh/WMT_23/signsuisse', help='which dir to store the final dataset to be released?')
args = parser.parse_args()
config = SignDatasetConfig(name="firestore-7", version="1.0.0", include_video=True, include_pose='holistic', process_video=False)
dataset = tfds.load(name='sign_suisse', builder_kwargs={"config": config}, data_dir=args.dataset_dir)
data = []
for datum in dataset["train"]:
# for datum in itertools.islice(dataset["train"], 0, 10):
current = {
'id': datum['id'].numpy().decode('utf-8'),
'name': datum['name'].numpy().decode('utf-8'),
'spokenLanguage': datum['spokenLanguage'].numpy().decode('utf-8'),
'signedLanguage': sign_language_lookup_table[datum['signedLanguage'].numpy().decode('utf-8')],
# FIXME:https://github.com/sign-language-processing/datasets/issues/36
# 'category': datum['category'].numpy().decode('utf-8'),
'definition': datum['definition'].numpy().decode('utf-8'),
'paraphrase': datum['paraphrase'].numpy().decode('utf-8'),
'example': datum['exampleText'].numpy().decode('utf-8'),
# FIXME:https://github.com/sign-language-processing/datasets/issues/36
# 'url': datum['url'].numpy().decode('utf-8'),
'video': datum['video'].numpy().decode('utf-8'),
'poseMediapipe': datum['pose']['path'].numpy().decode('utf-8'),
'exampleVideo': datum['exampleVideo'].numpy().decode('utf-8'),
'examplePoseMediapipe': datum['examplePose']['path'].numpy().decode('utf-8'),
}
data.append(current)
print(f"{current['id']}: {current['exampleVideo']}")
Code:
and the log: loader_log.txt
Not sure what the cause is (perhaps a download error?), so not sure whether it reproduces in different environments.
But it is fine for all the
datum['video']
s (= all are local paths).