-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeid.py
68 lines (45 loc) · 1.79 KB
/
deid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import tempfile
from pathlib import Path
from deid_app.robust_app import RobustDeID
temdir = '.tmp'
tempfile.tempdir = '.tmp'
def delete_files(path: Path):
"""
Delete all files in a folder.
"""
for file in path.glob('*'):
if file.is_file():
file.unlink()
def deid(texts: list, app: RobustDeID):
notes = []
for idx, text in enumerate(texts):
note = {"text": text, "meta": {"note_id": f"note_{idx}", "patient_id": "patient_1"}, "spans": []}
notes.append(note)
ner_notes = app.get_ner_dataset_from_json_list(notes)
predictions = app.get_predictions_from_generator(ner_notes)
predictions_list = [item for item in predictions]
deid_dict_list = list(app.get_deid_text_replaced_from_values(notes, predictions_list))
# Get deid text
deid_texts = [pred_dict['deid_text'] for pred_dict in deid_dict_list]
highlight_texts = []
for deid_text in deid_texts:
highligted = [highlight_text for highlight_text in RobustDeID._get_highlights(deid_text)]
highlight_texts.append(highligted)
return highlight_texts
if __name__ == "__main__":
text = """\
Private Creator: GEIIS, Private Creator: CTP, Private tag data: Pseudo-PHI-DICOM-Data, Private tag data: 87009668, \
Private Creator: SIEMENS CSA HEADER, Private Creator: SIEMENS MEDCOM HEADER, Private Creator: SIEMENS MEDCOM OOG, \
Private Creator: GEIIS PACS, Private Creator: GEIIS, Private Creator: GEIIS\
"""
modelname = "OBI-RoBERTa De-ID"
threshold = "No threshold"
app = RobustDeID(modelname, threshold)
loggers = [
'robust_deid.sequence_tagging.sequence_tagger'
]
texts = [text] * 5
highlight_texts = deid(texts, app)
for highlighted in highlight_texts:
print(highlighted)
# delete_files(Path(temdir))