-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.tdl
234 lines (194 loc) · 9.83 KB
/
config.tdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
;;; files to load
grammar-top := "singlish.tdl".
variable-property-mapping := "semi_sg.vpm".
maxent-model := "../redwoods.mem".
preprocessor := "../rpp/tokenizer.rpp".
preprocessor-modules := ../rpp/xml.rpp ../rpp/ascii.rpp ../rpp/lgt.rpp ../rpp/quotes.rpp ../rpp/wiki.rpp ../rpp/gml.rpp ../rpp/html.rpp.
generation-ignore-lexemes := "../lkb/nogen-lex.set".
generation-ignore-rules := "../lkb/nogen-rules.set".
parse-node-labels := "parse-nodes_sg.tdl".
generation-trigger-rules := "../trigger.mtr".
version := "Version.lsp".
semantic-interface-2016 := "../etc/erg.smi".
semantic-interface-top-type := top.
idiom-rules := "../idioms.mtr".
non-idiom-root := root_non_idiom.
irregular-forms := ../irregs.tab.
quickcheck-code := "../ace/ace-erg-qc.txt".
post-model-path := "../ace/english-postagger.hmm".
;;; grammar internal properties
orth-path := ORTH.
semantics-path := SYNSEM LOCAL CONT.
lex-rels-path := SYNSEM LOCAL CONT RELS.
lex-carg-path := SYNSEM LKEYS KEYREL CARG.
lex-pred-path := SYNSEM LKEYS KEYREL PRED.
rule-rels-path := C-CONT RELS.
; parsing-roots := root_informal root_frag.
; good:
parsing-roots := root_strict root_informal root_lepak root_frag root_inffrag root_robust root_robust_frag.
;parsing-roots := root_strict root_informal root_frag root_inffrag root_bridge.
;parsing-roots := root_strict root_informal root_frag root_inffrag root_robust.
; demo:
;parsing-roots := root_strict root_informal.
;generation-roots := root_strict root_frag.
;generation-roots := root_strict.
;; Use this one to exclude focus movement (non-WH topicalization)
; CSY 22-11-2020 add in root gen lepak for more generation
generation-roots := root_gen_nofocus root_gen_lepak.
semarg-type := semarg.
list-type := *list*.
cons-type := *cons*.
null-type := *null*.
diff-list-type := *diff-list*.
chart-dependencies :=
"SYNSEM LKEYS --+COMPKEY" "SYNSEM LOCAL CAT HEAD MINORS MIN"
"SYNSEM LKEYS --+OCOMPKEY" "SYNSEM LOCAL CAT HEAD MINORS MIN"
"SYNSEM LKEYS --+ARGIND" "SYNSEM LOCAL CONT HOOK INDEX"
.
;; Consider restricting this in ERG...
;; (e.g. the completive 'up' as in |I should remind myself up of that idea| :)
process-chart-dependencies-before-lexical-parsing := no.
deleted-daughters :=
ARGS HD-DTR NH-DTR LCONJ-DTR RCONJ-DTR DTR DTR1 DTR2 DTRA DTRB.
parsing-packing-restrictor := RELS HCONS ICONS RNAME +TI +LL +TG.
; parsing-packing-restrictor := HCONS RELS RNAME ORTH RCLSTR ONSET LTOP --SLTOP GENRE NEGPOL ARG-S TO KEY-ARG --TL FROM IDIOM FORM LKEYS ARG0 +STAG L-HNDL LBL DATIVE +ID IND KEYREL E DIALECT ARG3 HS-LEX GEN LARG CPNG +PRB +TAG +CASE PROG C-CONT TENSE CPUNCT PSF ND-AFF ASPECT INSTLOC CTX CFROM +INIIAL DTR2 HD-DTR CTO RCONJ-DTR DTR1 IFORM ALT2KEYREL SQPNCT --TPC ADVNGADD DTR CLEX ARG1 BODY RSTR PERF KCMP LNAME LCONJ-DTR PRF POSS L-INDEX RPAREN RFP ADVADD TOKENS +LAST META-PREFIX PASSIVE DTRA R-HNDL.
generation-packing-restrictor :=
ONSET RELS HCONS ICONS RNAME.
generic-les-for-semantic-index := generic_proper_ne generic_card_ne generic_ord_ne generic_dom_card_ne generic_dom_ord_ne generic_year_ne generic_date_ne generic_pl_noun_ne.
generic-les-by-part-of-speech := "generic_adj a" "generic_adverb a" "gen_generic_noun n" "gen_generic_verb v".
mrs-deleted-roles :=
IDIOMP LNK CFROM CTO --PSV
;; starting here, mrs deleted roles left over from old ACE config file
WLINK PARAMS.
spanning-only-rules :=
aj-hd_int-inv_c
aj-r_frg_c np-aj_frg_c np-aj_rorp-frg_c cl_rel-frg_c aj-np_int-frg_c
pp-aj_frg_c j-aj_frg_c np_nb-frg_c np-cl_numitem_c cl-cl_runon-cma_c
cl-np_runon_c cl-np_runon-prn_c cl_cp-frg_c.
; also vp_sl-frg_c if used
;fragment-only-rules :=
; frag_np frag_nbar frag_pp_i
; frag_adj frag_int_adv frag_cl_adv frag_vp
; frag_l_mod_np frag_l_mod_pp frag_l_mod_adv frag_r_mod_np
; frag_r_mod_s_pp frag_r_mod_ap.
; heuristic:
; a rule should be hyperactive if the expected cost of processing
; with it as ordinary exceeds the expected cost of processing with it as
; hyperactive. Alternately, for each rule there is a maximum number of times
; we should be willing to build it before copying
;
; cost(reconstructing N times when our max unifies is K)
; = K * unify + copy if N >= K
; = N * unify if N < K
;
; say we know P(need dag n times)
; then E(cost) = sum_n P(n) cost(n)
;
; we don't know P(n), but we do know E(n) per rule
; maxent principle: assume poisson distribution of recombinations
; based on that assumption, can compute "correct" K given E(n)
; never copy: E(cost) = E(unify * n) = unify * E(n)
; always copy: E(cost) = unify + copy
; when is unify * E(n) > unify + copy?
; answer: when unify (E(n)-1) > copy
; E(n) > 1 + copy/unify
; i.e. when E(recombinations) > copy / unify
; answer, based on unify = 4.4us and copy = 9.8us:
; if E(recomb) < 2.227, then K = \infty
; else K = 1 (i.e. disable hyperactivity for this rule)
; calculations show that (at least in this range), K=\infty is always better
; than K>1
hyper-active-rules :=
hdn-np_app-idf-p_c hdn-n_prnth_c n-n_num-seq_c
vppr-vppr_crd-m_c vp-vp_crd-nfin-m_c j-n_crd-t_c np-hdn_cty-cpd_c
hdn-aj_rc-pr_c jpr-jpr_crd-m_c n-j_crd-t_c v-v_crd-fin-ncj_c
np-np_crd-i-t_c np-np_crd-nc-t_c cl-cl_runon_c pp-pp_crd-m_c
np-hdn_nme-cpd_c np-aj_j-frg_c ppr-ppr_crd-m_c n-n_crd-nc-m_c
hdn-aj_redrel-pr_c hdn-np_app-nbr_c np-aj_frg_c cl-np_runon_c
hd-aj_vmod-s_c sp-hd_hc_c jpr-vpr_crd-m_c np-hdn_ttl-cpd_c
cl-cl_runon-cma_c hdn-np_app-idf_c n-hdn_cpd_c
aj-np_frg_c n-j_j-t-cpd_c n-n_crd-m_c np-np_crd-nc-m_c
n-hdn_j-n-cpd_c n-j_j-cpd_c aj-pp_frg_c aj-r_frg_c
j-j_crd-prd-m_c flr-hd_nwh_c aj-hd_int-inv_c hd-hd_rnr-nv_c
ppr-ppr_crd-im_c np-hdn_cpd_c ppr-ppr_crd-t_c aj-np_int-frg_c
jpr-vpr_crd-im_c pp-aj_frg_c hd-aj_vmod_c np-hdn_num-cpd_c
j-j_crd-prd-im_c n-n_crd-3-t_c jpr-jpr_crd-im_c nb-aj_frg_c
jpr-jpr_crd-t_c vp-vp_crd-nfin-im_c pp-pp_crd-t_c jpr-vpr_crd-t_c
pp-pp_crd-im_c j-j_crd-prd-t_c np-np_crd-m_c vp-vp_crd-nfin-t_c
np-np_crd-i3-t_c flr-hd_wh-nmc-inf_c n-n_crd-t_c np-aj_rorp-frg_c
j-aj_frg_c sp-hd_n_c n-v_j-cpd_c aj-hdn_adjn_c
hd-aj_scp-pr_c n-n_crd-im_c n-n_crd-asym2-t_c
j-j_crd-att-t_c hdn-aj_rc_c flr-hd_nwh-nc_c aj-hdn_norm_c
flr-hd_wh-nmc-fin_c num-n_mnp_c hdn-aj_redrel_c cl-cl_crd-m_c
np-np_crd-i2-t_c hd-hd_rnr_c vppr-vppr_crd-im_c vppr-vppr_crd-t_c
mrk-nh_n_c flr-hd_wh-mc_c hdn-np_app-pr_c hdn-np_app_c
sb-hd_q_c vp-vp_crd-fin-im_c hdn-cl_dsh_c vp-vp_crd-fin-m_c
cl-cl_crd-im_c sb-hd_mc_c np-prdp_vpmod_c cl-cl_crd-rc-t_c
sb-hd_nmc_c n-n_crd-asym-t_c flr-hd_wh-mc-sb_c hd-cmp_u_c
flr-hd_rel-fin_c cl-cl_crd-int-t_c vp-vp_crd-fin-t_c cl-np_runon-prn_c
hd-cl_fr-rel_c np-np_crd-im_c cl-cl_crd-t_c.
old-hyper-active-rules :=
adv_coord_mid num_seq np_city_state adv_coord_top
measure_np hspechc fillhead_wh_nr_i adj_pred_coord_top adj_pred_coord_mid
fillhead_wh_r np_n_cmpnd frag_r_mod_np adj_attr_coord_top
frag_l_mod_adv frag_l_mod_pp frag_r_mod_s_pp frag_l_mod_np np_name_cmpnd
hspec hmarkatom noun_n_cmpnd meas_np_symb
fillhead_rel n_ttl_cmpnd nadj_rc_pr nadj_rc fillhead_wh_nr_f
v_coord_nonfin_top p_coord_top appos_npr
frag_r_mod_ap v_coord_nonfin_mid adj_adjn p_coord_mid fillhead_non_wh.
;; reduce storage requirements for the compiled lexicon when possible
simplify-lexicon := enabled.
;; faster generation
index-accessibility-filtering := enabled.
;; keep an extra copy of *ocons* and a couple other types around, so we don't
;; have to copy it when we use it as a glb constraint (1st time anyway).
extra-erg-dag-stash := enabled.
;; part of speech tagging
english-pos-tagger := enabled.
;; ERG peculiarity: after generating a passive edge, set its top-level type
;; to 'sign' during forest creation. this results in improved packing at
;; comparatively little cost in unpacking failures.
generalize-edge-top-types := enabled.
;; token settings
token-mapping := enabled.
lexicon-tokens-path := TOKENS +LIST.
lexicon-last-token-path := TOKENS +LAST.
token-type := token.
token-form-path := +FORM. ; [required] string for lexical lookup
token-id-path := +ID. ; [optional] list of external ids
token-from-path := +FROM. ; [optional] surface start position
token-to-path := +TO. ; [optional] surface end position
token-postags-path := +TNT +TAGS. ; [optional] list of POS tags
token-posprobs-path := +TNT +PRBS. ; [optional] list of POS probabilities
;; lattice mapping settings
lattice-mapping-input-path := +INPUT.
lattice-mapping-output-path := +OUTPUT.
lattice-mapping-context-path := +CONTEXT.
lattice-mapping-position-path := +POSITION.
; below here are not technically settings, but types that ACE needs to import that PET (owner of english.tdl) doesn't need.
:begin :type.
:include "../mtr".
:end :type.
; icons
enable-icons := yes.
mrs-icons-list := ICONS LIST.
icons-left := IARG1.
icons-right := IARG2.
;; Standard model
;; DPF 2019-11-20 - These next four should be uncommented for release
;übertag-emission-path := "../ut/nanc_wsj_redwoods_noaffix.ex.gz".
;übertag-transition-path := "../ut/nanc_wsj_redwoods_noaffix.tx.gz".
;übertag-generic-map-path := "../ut/generics.cfg".
;übertag-whitelist-path := "../ut/whitelist.cfg".
;;; Model trained on just trunk Redwoods
;;übertag-emission-path := "../ut/redwoods-train.ex.gz".
;;übertag-transition-path := "../ut/redwoods-train.tx.gz".
;;; Model trained on trunk Redwoods (gold) and NANC (not gold)
;;übertag-emission-path := "../ut/nanc-redwoods-train.ex.gz".
;;übertag-transition-path := "../ut/nanc-redwoods-train.tx.gz".
freezer-megabytes := 512.
;; For PCFG parsing, to stamp [GENRE robust] on edges that fail unification
robustness-marker-path := GENRE.
robustness-marker-type := robust.
;; For improved unknown word handling in generation
generics-overwrite-orth := true.