-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtext_alignment_process.py
29 lines (24 loc) · 999 Bytes
/
text_alignment_process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
## clear the raw test description
import argparse
from collections import defaultdict
import json
alignment_text = defaultdict(str)
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", type=str, default=None)
args = parser.parse_args()
def parse_description(str):
description = str
if "means" in str:
description = str.split("means")[1].strip()
description = description.replace('A','[T]')
description = description.replace('B','[H]')
return description.strip("\"")
import simplejson
relation_text_clean = defaultdict(str)
with open("dataset/" + args.dataset + "/alignment/alignment_output.txt", "r") as f:
lines = f.readlines()
for line in lines:
alignment_text = simplejson.loads(line)
relation_text_clean[alignment_text["Raw"]] = parse_description(alignment_text["Description"])
with open("dataset/" + args.dataset + "/alignment/alignment_clean.txt", "w") as f:
f.write(json.dumps(relation_text_clean, indent=1))