Closed marcolarosa closed 1 year ago
// add all glottolog and all austlang languages to single dataset // cross-reference the double entries const fs = require("fs"); const { writeJson, readJson } = require("fs-extra"); const allLanguages = "./languages-main-data-pack.json"; const { groupBy } = require("lodash"); const glottologOut = "./Glottolog-language-data/glottolog-language-data-pack.json"; const austlangOut = "./Austlang-language-data/austlang-language-data-pack.json"; main(); // austlangGroupedBy = looks like // { // 'german': [{ ...}] // } async function main() { const ethnologueGroupedByName = ""; const austlang = await readJson("./Austlang-language-data/austlang-language-data-pack.json"); const glottolog = await readJson("./Glottolog-language-data/glottolog-language-data-pack.json"); const austlangGroupedByName = groupBy(austlang, "name"); const glottologGroupedByName = groupBy(glottolog, "name"); const languages = []; glottolog.forEach((entry) => { if (!entry.sameAs) entry.sameAs = []; if (entry.name === austlangGroupedByName[entry.name]) { entry.sameAs.push({ "@id": austlangGroupedByName[entry.name][0]["@id"] }); } if (entry.name === ethnologueGroupedByName[entry.name]) { entry.sameAs.push({ "@id": ethnologueGroupedByName[entry.name][0]["@id"] }); } languages.push(entry); }); glottolog = languages; languages = []; austlang.forEach((entry) => { if (!entry.sameAs) entry.sameAs = []; if (entry.name === glottologGroupedByName[entry.name]) { entry.sameAs.push({ "@id": glottologGroupedByName[entry.name][0]["@id"] }); } if (entry.name === ethnologueGroupedByName[entry.name]) { entry.sameAs.push({ "@id": ethnologueGroupedByName[entry.name][0]["@id"] }); } languages.push(entry); }); austlang = languages; // for (let i = 0; i < glottolog.length; i++) { // if (glottolog[i]["containedInPlace"] == "Australia") { // just in case if some other language has the same name as an Australian language // for (let j = 0; j < austlang.length; j++) { // if (austlang[j]["name"] == glottolog[i]["name"]) { // // add ethnologue entry to austlang // if (glottolog[i]["sameAs"].length > 0) { // austlang[j]["sameAs"].push(glottolog[i]["sameAs"][0]) // } // // add cross reference // glottolog[i]["sameAs"].push({"@id": austlang[j]["@id"]}) // add austlang to glottolog entries // austlang[j]["sameAs"].push({"@id": glottolog[i]["@id"]}) // add glottolog to austlang language // console.log(glottolog[i]) // } // } // } // delete glottolog[i]["containtInPlace"]; // } // write the individual packages writeJson(glottologOut, glottolog, { spaces: 4 }); writeJson(austlangOut, austlang, { spaces: 4 }); for (let m = 0; m < austlang.length; m++) { glottolog.push(austlang[m]); } //console.log(glottolog.length) // write the combined package await writeJson(allLanguages, glottolog, { spaces: 4 }); }
resolved by 809c305dca519160314921680ec8c3ed2181c957