-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9259bcd
commit 2d6b12d
Showing
7 changed files
with
847 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
001-139 | ||
001-009 | ||
010-018 | ||
020-027 | ||
030-041 | ||
042-042 | ||
045-049 | ||
050-059 | ||
060-066 | ||
070-079 | ||
080-088 | ||
090-099 | ||
100-104 | ||
110-118 | ||
120-129 | ||
130-136 | ||
137-139 | ||
140-239 | ||
140-149 | ||
150-159 | ||
160-165 | ||
170-176 | ||
179-189 | ||
190-199 | ||
200-209 | ||
210-229 | ||
230-234 | ||
235-238 | ||
239-239 | ||
240-279 | ||
240-246 | ||
249-259 | ||
260-269 | ||
270-279 | ||
280-289 | ||
280 | ||
281 | ||
282 | ||
283 | ||
284 | ||
285 | ||
286 | ||
287 | ||
288 | ||
289 | ||
290-319 | ||
290-294 | ||
295-299 | ||
300-316 | ||
317-319 | ||
320-389 | ||
320-327 | ||
330-337 | ||
338-338 | ||
339-339 | ||
340-349 | ||
350-359 | ||
360-379 | ||
380-389 | ||
390-459 | ||
390-392 | ||
393-398 | ||
401-405 | ||
410-414 | ||
415-417 | ||
420-429 | ||
430-438 | ||
440-449 | ||
451-459 | ||
460-519 | ||
460-466 | ||
470-478 | ||
480-488 | ||
490-496 | ||
500-508 | ||
510-519 | ||
520-579 | ||
520-529 | ||
530-539 | ||
540-543 | ||
550-553 | ||
555-558 | ||
560-569 | ||
570-579 | ||
580-629 | ||
580-589 | ||
590-599 | ||
600-608 | ||
610-612 | ||
614-616 | ||
617-629 | ||
630-679 | ||
630-639 | ||
640-649 | ||
650-659 | ||
660-669 | ||
670-677 | ||
678-679 | ||
680-709 | ||
680-686 | ||
690-698 | ||
700-709 | ||
710-739 | ||
710-719 | ||
720-724 | ||
725-729 | ||
730-739 | ||
740-759 | ||
740 | ||
741 | ||
742 | ||
743 | ||
744 | ||
745 | ||
746 | ||
747 | ||
748 | ||
749 | ||
750 | ||
751 | ||
752 | ||
753 | ||
754 | ||
755 | ||
756 | ||
757 | ||
758 | ||
759 | ||
760-779 | ||
760-763 | ||
764-779 | ||
780-799 | ||
780-789 | ||
790-796 | ||
797-799 | ||
800-999 | ||
800-804 | ||
805-809 | ||
810-819 | ||
820-829 | ||
830-839 | ||
840-848 | ||
850-854 | ||
860-869 | ||
870-879 | ||
880-887 | ||
890-897 | ||
900-904 | ||
905-909 | ||
910-919 | ||
920-924 | ||
925-929 | ||
930-939 | ||
940-949 | ||
950-957 | ||
958-959 | ||
960-979 | ||
980-989 | ||
990-995 | ||
996-999 | ||
V01-V9 | ||
V01-V09 | ||
V10-V19 | ||
V20-V29 | ||
V30-V39 | ||
V40-V49 | ||
V50-V59 | ||
V60-V69 | ||
V70-V82 | ||
V83-V84 | ||
V85-V85 | ||
V86-V86 | ||
V87-V87 | ||
V88-V88 | ||
V89-V89 | ||
V90-V90 | ||
V91-V91 | ||
E000-E999 | ||
E000-E000 | ||
E001-E030 | ||
E800-E807 | ||
E810-E819 | ||
E820-E825 | ||
E826-E829 | ||
E830-E838 | ||
E840-E845 | ||
E846-E849 | ||
E850-E858 | ||
E860-E869 | ||
E870-E876 | ||
E878-E879 | ||
E880-E888 | ||
E890-E899 | ||
E900-E909 | ||
E910-E915 | ||
E916-E928 | ||
E929-E929 | ||
E930-E949 | ||
E950-E959 | ||
E960-E969 | ||
E970-E979 | ||
E980-E989 | ||
E990-E999 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
import os | ||
|
||
import numpy as np | ||
|
||
|
||
def parse_icd9_range(range_: str) -> (str, str, int, int): | ||
ranges = range_.lstrip().split('-') | ||
if ranges[0][0] == 'V': | ||
prefix = 'V' | ||
format_ = '%02d' | ||
start, end = int(ranges[0][1:]), int(ranges[1][1:]) | ||
elif ranges[0][0] == 'E': | ||
prefix = 'E' | ||
format_ = '%03d' | ||
start, end = int(ranges[0][1:]), int(ranges[1][1:]) | ||
else: | ||
prefix = '' | ||
format_ = '%03d' | ||
if len(ranges) == 1: | ||
start = int(ranges[0]) | ||
end = start + 1 | ||
else: | ||
start, end = int(ranges[0]), int(ranges[1]) | ||
return prefix, format_, start, end | ||
|
||
|
||
def generate_code_levels(path, code_map: dict) -> np.ndarray: | ||
print('generating code levels ...') | ||
three_level_code_set = set(code.split('.')[0] for code in code_map) | ||
icd9_path = os.path.join(path, 'icd9.txt') | ||
icd9_range = list(open(icd9_path, 'r', encoding='utf-8').readlines()) | ||
three_level_dict = dict() | ||
level1, level2, level3 = (1, 1, 1) | ||
level1_can_add = False | ||
for range_ in icd9_range: | ||
range_ = range_.rstrip() | ||
if range_[0] == ' ': | ||
prefix, format_, start, end = parse_icd9_range(range_) | ||
level2_cannot_add = True | ||
for i in range(start, end + 1): | ||
code = prefix + format_ % i | ||
if code in three_level_code_set: | ||
three_level_dict[code] = [level1, level2, level3] | ||
level3 += 1 | ||
level1_can_add = True | ||
level2_cannot_add = False | ||
if not level2_cannot_add: | ||
level2 += 1 | ||
else: | ||
if level1_can_add: | ||
level1 += 1 | ||
level1_can_add = False | ||
|
||
level4 = 1 | ||
code_level = dict() | ||
for code in code_map: | ||
three_level_code = code.split('.')[0] | ||
if three_level_code in three_level_dict: | ||
three_level = three_level_dict[three_level_code] | ||
code_level[code] = three_level + [level4] | ||
level4 += 1 | ||
else: | ||
print(three_level_code) | ||
code_level[code] = [0, 0, 0, 0] | ||
|
||
code_level_matrix = np.zeros((len(code_map) + 1, 4), dtype=int) | ||
for code, cid in code_map.items(): | ||
code_level_matrix[cid] = code_level[code] | ||
|
||
return code_level_matrix | ||
|
||
|
||
def generate_patient_code_adjacent(code_x: np.ndarray, code_num: int) -> np.ndarray: | ||
print('generating patient code adjacent matrix ...') | ||
result = np.zeros((len(code_x), code_num + 1), dtype=int) | ||
for i, codes in enumerate(code_x): | ||
adj_codes = codes[codes > 0] | ||
result[i][adj_codes] = 1 | ||
return result | ||
|
||
|
||
def generate_code_code_adjacent(code_num: int, code_level_matrix: np.ndarray) -> np.ndarray: | ||
print('generating code code adjacent matrix ...') | ||
n = code_num + 1 | ||
result = np.zeros((n, n), dtype=int) | ||
for i in range(1, n): | ||
print('\r\t%d / %d' % (i, n), end='') | ||
for j in range(1, n): | ||
if i != j: | ||
level_i = code_level_matrix[i] | ||
level_j = code_level_matrix[j] | ||
same_level = 4 | ||
while same_level > 0: | ||
level = same_level - 1 | ||
if level_i[level] == level_j[level]: | ||
break | ||
same_level -= 1 | ||
result[i, j] = same_level + 1 | ||
print('\r\t%d / %d' % (n, n)) | ||
return result | ||
|
||
|
||
def co_occur(pids: np.ndarray, | ||
patient_admission: dict, | ||
admission_codes_encoded: dict, | ||
code_num: int) -> (np.ndarray, np.ndarray, np.ndarray): | ||
print('calculating co-occurrence ...') | ||
x = np.zeros((code_num + 1, code_num + 1), dtype=float) | ||
for i, pid in enumerate(pids): | ||
print('\r\t%d / %d' % (i + 1, len(pids)), end='') | ||
admissions = patient_admission[pid] | ||
for k, admission in enumerate(admissions[:-1]): | ||
codes = admission_codes_encoded[admission['admission_id']] | ||
for m in range(len(codes) - 1): | ||
for n in range(m + 1, len(codes)): | ||
c_i, c_j = codes[m], codes[n] | ||
x[c_i, c_j] = 1 | ||
x[c_j, c_i] = 1 | ||
print('\r\t%d / %d' % (len(pids), len(pids))) | ||
return x |
Oops, something went wrong.