From 2f425df5df65d0ca5b1c7d113aa9de5ed5d29222 Mon Sep 17 00:00:00 2001 From: zinwang <32264884+zinwang@users.noreply.github.com> Date: Thu, 27 Jul 2023 13:34:58 +0800 Subject: [PATCH] Add showcase for computing t-closeness (#28) --- README.md | 39 +++++++++++++++++++ data/patient_anonymized.csv | 10 +++++ .../patient_hierarchy_Age.csv | 9 +++++ .../patient_hierarchy_Disease.csv | 6 +++ .../patient_hierarchy_ZIPCode.csv | 9 +++++ 5 files changed, 73 insertions(+) create mode 100644 data/patient_anonymized.csv create mode 100644 data/patient_hierarchy/patient_hierarchy_Age.csv create mode 100644 data/patient_hierarchy/patient_hierarchy_Disease.csv create mode 100644 data/patient_hierarchy/patient_hierarchy_ZIPCode.csv diff --git a/README.md b/README.md index 08ed971..e1417fa 100644 --- a/README.md +++ b/README.md @@ -334,6 +334,45 @@ $ python3 l-diversity.py } ``` +#### Compute the t-closeness + +```python +from PETWorks import PETValidation, report +from PETWorks.attributetypes import ( + SENSITIVE_ATTRIBUTE, + QUASI_IDENTIFIER, +) + +anonymized = "data/patient_anonymized.csv" +dataHierarchy = "data/patient_hierarchy" + +attributeTypes = { + "ZIPCode": QUASI_IDENTIFIER, + "Age": QUASI_IDENTIFIER, + "Disease": SENSITIVE_ATTRIBUTE, +} + +result = PETValidation( + None, + anonymized, + "t-closeness", + dataHierarchy=dataHierarchy, + attributeTypes=attributeTypes, + tLimit=0.376, +) +report(result, "json") +``` + +Execution Result + +``` +$ python3 t-closeness.py +{ + "t": 0.376, + "fulfill t-closeness": true +} +``` + #### Anonymize with the k-anonymity ```python diff --git a/data/patient_anonymized.csv b/data/patient_anonymized.csv new file mode 100644 index 0000000..7c68f1f --- /dev/null +++ b/data/patient_anonymized.csv @@ -0,0 +1,10 @@ +ZIPCode;Age;Salary;Disease +4767*;<=40;3;gastric ulcer +4760*;<=40;4;gastritis +4767*;<=40;5;stomach cancer +4790*;>40;6;gastritis +4790*;>40;11;flu +4790*;>40;8;bronchitis +4760*;<=40;7;bronchitis +4767*;<=40;9;pneumonia +4760*;<=40;10;stomach cancer diff --git a/data/patient_hierarchy/patient_hierarchy_Age.csv b/data/patient_hierarchy/patient_hierarchy_Age.csv new file mode 100644 index 0000000..256fd5e --- /dev/null +++ b/data/patient_hierarchy/patient_hierarchy_Age.csv @@ -0,0 +1,9 @@ +29;[20,30[;* +22;[20,30[;* +27;[20,30[;* +43;>=40;* +52;>=40;* +47;>=40;* +30;[30,40[;* +36;[30,40[;* +32;[30,40[;* diff --git a/data/patient_hierarchy/patient_hierarchy_Disease.csv b/data/patient_hierarchy/patient_hierarchy_Disease.csv new file mode 100644 index 0000000..7ddaa27 --- /dev/null +++ b/data/patient_hierarchy/patient_hierarchy_Disease.csv @@ -0,0 +1,6 @@ +gastric ulcer;stomach disease;digestive system disease;respiratory&digestive disease +gastritis;stomach disease;digestive system disease;respiratory&digestive disease +flu;respiratory infection;vascular lung disease;respiratory&digestive disease +bronchitis;respiratory infection;vascular lung disease;respiratory&digestive disease +pneumonia;respiratory infection;vascular lung disease;respiratory&digestive disease +stomach cancer;stomach disease;digestive system disease;respiratory&digestive disease diff --git a/data/patient_hierarchy/patient_hierarchy_ZIPCode.csv b/data/patient_hierarchy/patient_hierarchy_ZIPCode.csv new file mode 100644 index 0000000..6eba0c0 --- /dev/null +++ b/data/patient_hierarchy/patient_hierarchy_ZIPCode.csv @@ -0,0 +1,9 @@ +47677;4767*;476**;47***;4****;***** +47602;4760*;476**;47***;4****;***** +47678;4767*;476**;47***;4****;***** +47905;4790*;479**;47***;4****;***** +47909;4790*;479**;47***;4****;***** +47906;4790*;479**;47***;4****;***** +47605;4760*;476**;47***;4****;***** +47673;4767*;476**;47***;4****;***** +47607;4760*;476**;47***;4****;*****