From 049359bc0518c4e2a0233f1b8836dafad84a765d Mon Sep 17 00:00:00 2001 From: ontocord Date: Sat, 12 Mar 2022 08:32:33 -0500 Subject: [PATCH] Update process.py --- process.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/process.py b/process.py index aa73dbb..79a0a54 100644 --- a/process.py +++ b/process.py @@ -127,7 +127,6 @@ parser.add_argument('-do_cleanup', dest='do_cleanup', type=int, help='Wether or not to cleanup NERs that are just stopwords or small number', default = 1) parser.add_argument('-do_marian_mt', dest='do_marian_mt', type=int, help='Wether or not to use marianMT for translation instead of M2M100', default = 0) parser.add_argument('-do_docs_trim_for_person', dest='do_docs_trim_for_person', type=int, help='Wether or not to filter out documents with no mentions of persons', default = 0) - parser.add_argument('-do_docs_filter', dest='do_docs_filter', type=int, help='Wether or not to filter out documents with high ratios of junk, or CSAM', default = 0) parser.add_argument('-do_kenlm', dest='do_kenlm', type=int, help='Wether or not to apply a KenLM model to decide if a name is a common person name', default = 1) parser.add_argument('-do_qg_rel', dest='do_qg_rel', type=int, help='Wether or not to infer a relationship between PII entities based an question generation (EXPERIMENTAL)', default = 0) parser.add_argument('-num_words_per_chunk', dest='num_words_per_chunk', type=int, help='number of words per chunk', default=70) @@ -258,7 +257,6 @@ regex_weight=args.regex_weight, backtrans_weight=args.backtrans_weight, do_docs_trim_for_person=args.do_docs_trim_for_person, - do_docs_filter=args.do_docs_filter, do_qg_rel=args.do_qg_rel, do_kenlm = args.do_kenlm, cutoff=cutoff, @@ -290,7 +288,6 @@ regex_weight=args.regex_weight, backtrans_weight=args.backtrans_weight, do_docs_trim_for_person=args.do_docs_trim_for_person, - do_docs_filter=args.do_docs_filter, do_qg_rel=args.do_qg_rel, do_kenlm = args.do_kenlm, cutoff=cutoff,