Open yibeichan opened 1 year ago
for reference, i put my python script here (with directory info removed):
df = pd.read_csv('hbn/item-names-cleaned.csv')
# choose one assessment
asr_df = df[df["datadic"]=="ASR"]
asr_df.dropna(subset=["keys"], inplace=True)
sub_asr_df = asr_df[asr_df["keys"].str.match(r'^ASR_\d+[a-z]?$')].copy()
# fill nan as 999
sub_asr_df["value"].fillna(999, inplace=True)
# create json file for each item
for index, row in sub_asr_df.iterrows():
var = row["keys"]
dom = row["domains"].replace('_', ' ')
desc = f"Q{index+1} of the {dom}"
question = row["questions"].replace(r'^\d+\.\s', '')
if row["value"] != 999:
itype = "radio"
resopt = "../valueConstraints"
else:
itype = "text"
resopt = {
"valueType": "xsd:string"
}
item_json = {
"@context": "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic",
"@type": "reproschema:Field",
"@id": var,
"prefLabel": var,
"description": desc,
"schemaVersion": "1.0.0-rc4",
"version": "0.0.1",
"question": {
"en": question
},
"ui": {
"inputType": itype
},
"responseOptions": resopt
}
with open(os.path.join(items_dir, f"{var}"), 'w', encoding='utf-8') as jsonfile:
json.dump(item_json, jsonfile, indent=4)
# create the main json file
vars = sub_asr_df["keys"]
# Build the JSON-LD object for the main file
jsonld_obj = {
"@context": "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc1/contexts/generic",
"@type": "reproschema:Activity",
"@id": "hbn_asr",
"prefLabel": "HBN Adult Self Report (ASR)",
"altLabel": "hbn_asr",
"schemaVersion": "1.0.0-rc1",
"version": "0.0.1",
"ui": {
"order": [f"items/{var}" for var in vars],
"shuffle": False,
"addProperties": [{"variableName": var, "isAbout": f"items/{var}"} for var in vars]
}
}
# Write the JSON-LD object to a file
with open(os.path.join(output_dir, "hbn_asr_schema"), 'w', encoding='utf-8') as jsonfile:
json.dump(jsonld_obj, jsonfile, indent=4)
item-names-cleaned.csv
is somewhere online?
@djarecka can you run the workflow again?
I converted ASR (one questionnaire from HBN) as my first try for reproschema. It would be great if @sooyounga or @djarecka can take a look at it (especially
items/ASR_126
). (btw, @djarecka I went through LinkML tutorials but think it's easier to write hard-coded python script for this first converting since I'm not that familiar with schema yet. )Also, based on my understanding of this questionnaire, our final output should be structured similar to this one, where items should be grouped into sub-categories (e.g., ASR_SC, ASR_AB, ASR_WD, etc.). I don't have detailed information about which items should be group which category at this moment but I'll update the structure once I have more info on the questionnaire.