Closed FabianKaiser closed 2 years ago
So I changed more tags which differed in english and german. However, I feel like some of the tags cannot be directly translated into other tags.
# NO_COREF_LIST = ["i", "me", "my", "you", "your"]
NO_COREF_LIST = ["ich", "mein", "meins", "du", "dein", "deins"]
MENTION_TYPE = {"PRONOMINAL": 0, "NOMINAL": 1, "PROPER": 2, "LIST": 3}
MENTION_LABEL = {0: "PRONOMINAL", 1: "NOMINAL", 2: "PROPER", 3: "LIST"}
# KEEP_TAGS = ["NN", "NNP", "NNPS", "NNS", "PRP", "PRP$", "DT", "IN"]
KEEP_TAGS = ["NE", "NN", "PDAT", "PDS", "PIAT", "PIS", "PPER", "PPOSAT", "PRELS", "PRF", "PWAT", "APPR", "APPRART", "KOUS"]
# CONTENT_TAGS = ["NN", "NNS", "NNP", "NNPS"]
CONTENT_TAGS = ["NE", "NN"]
# PRP_TAGS = ["PRP", "PRP$"]
PRP_TAGS = ["PDAT", "PDS", "PIAT", "PIS", "PPER", "PPOSAT", "PRELS", "PRF", "PWAT"]
# CONJ_TAGS = ["CC", ","]
CONJ_TAGS = ["KON", ","]
# PROPER_TAGS = ["NNP", "NNPS"]
PROPER_TAGS = ["NE"]
# NSUBJ_OR_DEP = ["nsubj", "dep"]
NSUBJ_OR_DEP = ["sb", "sbp", "sp"]
# CONJ_OR_PREP = ["conj", "prep"]
CONJ_OR_PREP = ["cj", "mo"]
# LEAVE_DEP = ["det", "compound", "appos"]
LEAVE_DEP = ["app"]
# KEEP_DEP = ["nsubj", "dobj", "iobj", "pobj"]
KEEP_DEP = ["nk", "sb", "sbp", "sp", "oa", "oa2", "oc", "og", "op"]
REMOVE_POS = ["CCONJ", "INTJ", "ADP"]
LOWER_NOT_END = ["'s", ',', '.', '!', '?', ':', ';']
PUNCTS = [".", "!", "?"]
# ACCEPTED_ENTS = ["PERSON", "NORP", "FACILITY", "ORG", "GPE", "LOC", "PRODUCT", "EVENT", "WORK_OF_ART", "LANGUAGE"]
ACCEPTED_ENTS = ["PER", "LOC", "ORG", "MISC"]
##########################################################
##### UTILITIES TO CONVERT STRINGS IN SPACY HASHES #######
cdef set_hashes_list(Hashes* hashes, py_list, StringStore store, Pool mem):
hashes.length = len(py_list)
hashes.arr = <hash_t*>mem.alloc(hashes.length, sizeof(hash_t))
for i, st in enumerate(py_list):
hashes.arr[i] = store.add(st)
cdef HashesList get_hash_lookups(StringStore store, Pool mem):
cdef HashesList hashes
set_hashes_list(&hashes.no_coref_list, NO_COREF_LIST, store, mem)
set_hashes_list(&hashes.keep_tags, KEEP_TAGS, store, mem)
set_hashes_list(&hashes.PRP_tags, PRP_TAGS, store, mem)
set_hashes_list(&hashes.leave_dep, LEAVE_DEP, store, mem)
set_hashes_list(&hashes.keep_dep, KEEP_DEP, store, mem)
set_hashes_list(&hashes.nsubj_or_dep, NSUBJ_OR_DEP, store, mem)
set_hashes_list(&hashes.conj_or_prep, CONJ_OR_PREP, store, mem)
set_hashes_list(&hashes.remove_pos, REMOVE_POS, store, mem)
set_hashes_list(&hashes.lower_not_end, LOWER_NOT_END, store, mem)
set_hashes_list(&hashes.conj_tags, CONJ_TAGS, store, mem)
set_hashes_list(&hashes.proper_tags, PROPER_TAGS, store, mem)
set_hashes_list(&hashes.proper_tags, PROPER_TAGS, store, mem)
set_hashes_list(&hashes.puncts, PUNCTS, store, mem)
hashes.POSSESSIVE_MARK = store.add("'s")
# hashes.NSUBJ_MARK = store.add("nsubj")
hashes.NSUBJ_MARK = store.add("nk")
# hashes.IN_TAG = store.add('IN')
hashes.IN_TAG = store.add('KOUS')
hashes.MARK_DEP = store.add("mark")
hashes.unknown_word = store.add("UNKNOWN")
hashes.missing_word = store.add("missing")
hashes.digit_word = store.add("0")
return hashes
This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.
Hello,
I am currently trying to train a german neuralcoref model (with the dataset supplied in other issues). I got the training process started and am able to load the model afterwards, but while the model finds about 2/5 of the mentions in the first step, it decreases a lot during training. Meanwhile, the F1_conll score is increasing. I had some issues with differences in german and english pos tags, but I think I included all relevant ones. What am I missing?
Edit: I am aware that there might also be issues with dependency tags etc, but it would be nice if somebody could tell me what is needed for what