Skip to content

Commit

Permalink
Update text_augment.py
Browse files Browse the repository at this point in the history
  • Loading branch information
huu4ontocord authored Mar 9, 2022
1 parent b6d658e commit 2e6f1cf
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions text_augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1747,17 +1747,17 @@ def process_ner_chunks_with_trans(self,
for doc in docs.values():
doc[target_ner_key] = ner = doc.get(target_ner_key, {})
if True:
chunk2ner = detect_in_dictionary(doc[target_text_key])
ner_tuples = detect_in_dictionary(doc[target_text_key])
onto_items = []
for c, label in chunk2ner.items():
for c, start, end, label in ner_tuples:
if label not in ("PUBLIC_FIGURE",): continue # hard coded to only do famous people for now. we will depend on the other models to detect other NERs
ner_word = c[0].replace(" ", "").replace("_", "").replace("_", "") if cjk_detect(c[0]) else c[0].replace("_", " ").replace("_", " ").rstrip(strip_chars)
if ner_word.lower() not in stopwords2:
if not cjk_detect(ner_word) and label in ('PERSON', 'PUBLIC_FIGURE', 'ORG') and " " not in ner_word: continue
onto_items.append(((ner_word, c[1], c[1] + len(ner_word)), label))
for ner_mention, label in list(set(onto_items)):
aHash = ner.get(ner_mention, {})
aHash[(label, 'dict')] = aHash.get((label, 'dict'), 0) + dictionary_weight * TextAugment.onto_weights.get(target_lang, 0.5) * backtrans_weight
aHash[(label, 'dict')] = aHash.get((label, 'dict'), 0) + dictionary_weight * backtrans_weight
ner[ner_mention] = aHash

if do_spacy:
Expand Down

0 comments on commit 2e6f1cf

Please sign in to comment.