-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathOpWT_population_structure.py
54 lines (43 loc) · 1.66 KB
/
OpWT_population_structure.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#%% import modules
import numpy as np
import pandas as pd
import scipy.stats as stats
import pickle
import ast
#%% load data
df_test = pd.read_table("test.csv");
df_test_intervention = pd.read_table("test_intervention.csv");
age_prop = pd.read_csv("prop.table.csv")
# load trained models
rskf_results = pd.read_csv("./results/rskf_results.csv")
# use best classifier for predictions
best_clf = rskf_results.sort_values(by="Accuracy", ascending=False).iloc[0, 3]
best_clf = pickle.loads(ast.literal_eval(best_clf))
age_prop_pred = best_clf.predict(df_test)
age_prop_int_pred = best_clf.predict(df_test_intervention)
# Predict and reconstruct age structure of population post intervention
# Test distribution of count data and predicted
true = df_test_intervention.index.values
pred = age_prop_int_pred
ks_fit_int = stats.ks_2samp(true, pred)
stats.chisquare(f_obs=pred, f_exp=true)
# Test of half-ogistic fits
hl_true = stats.halflogistic.fit(true)
hl_pred = stats.halflogistic.fit(pred)
hl_fit_int = stats.ks_2samp(hl_true, hl_pred)
# Compare true pre-post interventions
true_pre_int = df_test.index.values
true_post_int = df_test_intervention.index.values
stats.ks_2samp(true_pre_int, true_post_int)
# Test of half-ogistic fits
hl_true = stats.halflogistic.fit(true_pre_int)
hl_pred = stats.halflogistic.fit(true_post_int)
hl_fit_int = stats.ks_2samp(hl_true, hl_pred)
# Compare predicted pre-post interventions
pred_pre_int = age_prop_pred
pred_post_int = age_prop_int_pred
stats.ks_2samp(pred_pre_int, pred_post_int)
# Test of half-ogistic fits
hl_true = stats.halflogistic.fit(pred_pre_int)
hl_pred = stats.halflogistic.fit(pred_post_int)
hl_fit_int = stats.ks_2samp(hl_true, hl_pred)