Arkisto-Platform / describo-data-packs

https://arkisto-platform.github.io/describo-data-packs/
GNU General Public License v3.0
0 stars 0 forks source link

Revise languages_crossref_glotto_austlang #12

Closed marcolarosa closed 1 year ago

marcolarosa commented 2 years ago
// add all glottolog and all austlang languages to single dataset
// cross-reference the double entries

const fs = require("fs");
const { writeJson, readJson } = require("fs-extra");
const allLanguages = "./languages-main-data-pack.json";

const { groupBy } = require("lodash");

const glottologOut = "./Glottolog-language-data/glottolog-language-data-pack.json";
const austlangOut = "./Austlang-language-data/austlang-language-data-pack.json";

main();

// austlangGroupedBy = looks like
// {
//  'german': [{ ...}]
// }

async function main() {
    const ethnologueGroupedByName = "";
    const austlang = await readJson("./Austlang-language-data/austlang-language-data-pack.json");
    const glottolog = await readJson("./Glottolog-language-data/glottolog-language-data-pack.json");

    const austlangGroupedByName = groupBy(austlang, "name");
    const glottologGroupedByName = groupBy(glottolog, "name");

    const languages = [];
    glottolog.forEach((entry) => {
        if (!entry.sameAs) entry.sameAs = [];
        if (entry.name === austlangGroupedByName[entry.name]) {
            entry.sameAs.push({ "@id": austlangGroupedByName[entry.name][0]["@id"] });
        }
        if (entry.name === ethnologueGroupedByName[entry.name]) {
            entry.sameAs.push({ "@id": ethnologueGroupedByName[entry.name][0]["@id"] });
        }
        languages.push(entry);
    });
    glottolog = languages;

    languages = [];
    austlang.forEach((entry) => {
        if (!entry.sameAs) entry.sameAs = [];
        if (entry.name === glottologGroupedByName[entry.name]) {
            entry.sameAs.push({ "@id": glottologGroupedByName[entry.name][0]["@id"] });
        }
        if (entry.name === ethnologueGroupedByName[entry.name]) {
            entry.sameAs.push({ "@id": ethnologueGroupedByName[entry.name][0]["@id"] });
        }
        languages.push(entry);
    });
    austlang = languages;

    // for (let i = 0; i < glottolog.length; i++) {
    //     if (glottolog[i]["containedInPlace"] == "Australia") {
    // just in case if some other language has the same name as an Australian language
    // for (let j = 0; j < austlang.length; j++) {
    // if (austlang[j]["name"] == glottolog[i]["name"]) {
    //  // add ethnologue entry to austlang
    //  if (glottolog[i]["sameAs"].length > 0) {
    //      austlang[j]["sameAs"].push(glottolog[i]["sameAs"][0])
    //  }
    //  // add cross reference
    //  glottolog[i]["sameAs"].push({"@id": austlang[j]["@id"]})  // add austlang to glottolog entries
    //  austlang[j]["sameAs"].push({"@id": glottolog[i]["@id"]})  // add glottolog to austlang language
    //  console.log(glottolog[i])
    // }
    // }
    //     }

    //     delete glottolog[i]["containtInPlace"];
    // }

    // write the individual packages
    writeJson(glottologOut, glottolog, { spaces: 4 });
    writeJson(austlangOut, austlang, { spaces: 4 });

    for (let m = 0; m < austlang.length; m++) {
        glottolog.push(austlang[m]);
    }
    //console.log(glottolog.length)

    // write the combined package
    await writeJson(allLanguages, glottolog, { spaces: 4 });
}
marcolarosa commented 1 year ago

resolved by 809c305dca519160314921680ec8c3ed2181c957