Open alvinwmtan opened 1 year ago
Adding more unilemmas and odd categories discovered during SwadeshCDI work:
mutate(category = case_when(
category == "descriptive_words (adjectives)" ~ "descriptive_words",
category == "descriptive_words (adverbs)" ~ "descriptive_words",
category == "outside_places" ~ "outside",
category == "places" ~ "outside", # combine outside and places
category == "articles" ~ "quantifiers",
category == "hold" ~ "household",
category == "states" ~ "action_words",
category == "prepositions" ~ "locations",
category == "mental" & uni_lemma == "understand" ~ "action_words",
category == "mental" & uni_lemma == "remember" ~ "action_words",
category == "mental" & uni_lemma == "mad" ~ "descriptive_words",
category == "negation_words" ~ "games_routines", # Arabic: 'finished', 'don't want', 'not mine'...
uni_lemma == "although" & is.na(category) ~ "connecting_words",
uni_lemma == "accident" & is.na(category) ~ "other",
uni_lemma == "expensive" & is.na(category) ~ "descriptive_words",
uni_lemma == "album" & category == "toys" ~ "household",
uni_lemma == "allowed" & category == "games_routines" ~ "helping_verbs",
uni_lemma == "bored" & is.na(category) ~ "descriptive_words",
uni_lemma == "circle" & is.na(category) ~ "descriptive_words",
uni_lemma == "backpack" ~ "household",
uni_lemma == "only" ~ "quantifiers",
uni_lemma == "material" & is.na(category) ~ "household",
uni_lemma == "microscope" & is.na(category) ~ "toys",
uni_lemma == "mall" ~ "outside",
uni_lemma == "our" ~ "pronouns",
uni_lemma == "I" ~ "pronouns",
uni_lemma == "today" ~ "time_words",
uni_lemma == "bounce" ~ "action_words",
uni_lemma == "blouse" ~ "clothing",
uni_lemma == "gas" ~ "outside",
uni_lemma == "yet" ~ "descriptive_words",
uni_lemma == "deep" & is.na(category) ~ "descriptive_words",
uni_lemma == "bead" ~ "clothing",
uni_lemma == "every" ~ "quantifiers",
uni_lemma == "fishtank" ~ "household",
uni_lemma == "he" ~ "pronouns",
uni_lemma == "might" ~ "helping_verbs",
.default = category)) |>
mutate(uni_lemma = case_when(
definition == "haber (hay)" ~ "have",
uni_lemma == "Pencil" ~ "pencil", # Catalan typo
uni_lemma == "allowed" ~ "allow",
uni_lemma == "mop" ~ "mop (object)",
uni_lemma == "her" ~ "3SG.POSS", # Dutch
uni_lemma == "he" ~ "3SG", # Mandarin (Taiwanese)
uni_lemma == "fishtank" ~ "fish tank",
uni_lemma == "aggrieved" ~ "upset",
uni_lemma == "aound" ~ "round", # Irish typo
uni_lemma == "baby chair" ~ "high chair", #
uni_lemma == "self" & category == "vehicles" ~ "car", # Spanish (Chilean)
uni_lemma == "self" & category == "pronouns" ~ "1SG", # Estonian
uni_lemma == "back" & category == "body_parts" ~ "back (body part)",
uni_lemma == "nail" & category == "body_parts" ~ "fingernail",
uni_lemma == "baker" & category == "outside" ~ "bakery",
uni_lemma == "bat" & category == "toys" ~ "bat (object)",
uni_lemma == "chicken" & category == "animals" ~ "chicken (animal)",
uni_lemma == "I" ~ "1SG", # Japanese (boku, watashi)
uni_lemma == "our" ~ "1PL.POSS",
uni_lemma == "bead" ~ "beads",
uni_lemma == "chewing gum" ~ "gum",
uni_lemma == "clothing" ~ "clothes",
uni_lemma == "cock-a-doodle-doo" ~ "cockadoodledoo",
uni_lemma == "child's name" ~ "child's own name",
uni_lemma == "fireman" ~ "firefighter",
uni_lemma == "fridge/freezer" ~ "fridge",
uni_lemma == "forger" ~ "forget",
uni_lemma == "fries" ~ "french fries",
.default = uni_lemma
))
Starting a new thread for unilemma consistency updating. Rather than change these individually when they are found, we intend to update them regularly once enough have accumulated.
Current list:
New items (to check if old forms contain this):