-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimilarity_functions.py
616 lines (508 loc) · 24.2 KB
/
similarity_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
import properties
import numpy as np
import pandas as pd
# Basically convert all data objects to string format for the sake of processing.
def convert_to_string(dict):
temp_dict = {}
for key,val in dict.items():
temp_dict[key] = str(val)
return temp_dict
# This is method to obtain the scores from RBDA algorithm. Note we do not run the algorithm via UI. It has done offline
# and scores are saved.
def get_outlier_scores(user_id, flag=False):
# Load the outlier scores for adding them to be visible from the UI
import pickle
from pathlib import Path
if flag:
# Load the file
os_location = "".join(properties.user_os_dynname + ".pckl")
else:
# Load the file
os_location = "".join(properties.user_os_name + ".pckl")
p = Path(os_location)
if p.exists() and p.is_file():
outlier_scores = pickle.load(open(os_location, 'rb'))
score = [users[1] for users in outlier_scores if users[0] == user_id]
if score:
return score[0]
else:
return None
else:
return None
# determine dissimilarity. Basically this is the threshold (tau) which is the distance difference from the nearest neighbor
def dissimilarity_output(scores, alpha=1.0):
close_score = scores[0]
difference_scores = np.asarray([(scores[i] - close_score)
for i in range(1, len(scores))])
difference_scores_mean = np.mean(difference_scores)
difference_scores_std = np.std(difference_scores)
# 75 percentile cut-off.
# If anything greater than that is dissimilar in the neighboring group (Not used)
#threshold = np.percentile(difference_scores, 75, interpolation="midpoint")
threshold = difference_scores_mean + alpha * difference_scores_std
score_indexes = np.where(difference_scores > threshold)
return [score_index + 1 for score_index in score_indexes[0]], round(threshold, 7)
# Create the distance table to fill to UI. Table option to show distance is populated here
def create_distance_table(query_id, links_list, scores_index, threshold):
table_row = '<table class="table table-sm table-bordered "><tbody>'
temp_row = '<tr><td>--</td>'
dist_row = '<tr><td>' + str(query_id) + '</td>'
for index, element in enumerate(links_list):
if element["close"] == 1:
temp_row += '<td><b>' + element["target"] + '</b></td>'
dist_row += '<td><b>' + element["score"] + '</b></td>'
else:
if index in scores_index:
temp_row += '<td bgcolor="pink">' + element["target"] + '</td>'
dist_row += '<td bgcolor="pink">' + element["score"] + '</td>'
else:
temp_row += '<td>' + element["target"] + '</td>'
dist_row += '<td>' + element["score"] + '</td>'
if index == len(links_list) - 1:
temp_row += "</tr>"
dist_row += "</tr>"
table_row += temp_row + dist_row + '</tbody></table>'
p_row = '<p>Distance difference from the closest neighbor ' \
'is <span style="background-color:pink;"> >' + str(threshold) + '</span> threshold value</p>'
table_row += p_row
return table_row
# Generates the node and link required for force directed graph
def present_json(query_id, quest_cmb="all", k=5, simulate=False):
import properties
import utility
import static_sim_functions as smf
from sklearn.metrics.pairwise import cosine_distances
from HEOM import HEOM
pres_dict = {}
p_node_list = []
p_link_list = []
# Read the necessary model definitions. (Note: The names will have to be changed here.
# Should be moved to json of props ideally)
if simulate:
exp_df = utility.load_model("".join("/simulate/" + quest_cmb + "/" + quest_cmb + "_train_stat_q_data"))
train_sim_df = utility.load_model("".join("/simulate/" + quest_cmb + "/" + "train_sim_data.pckl"))
encoding_dm_model = "/simulate/" + quest_cmb + "/" + quest_cmb + "_stat_q_data_oe_model"
knn_model = "/simulate/" + quest_cmb + "/" + "knn_static"
else:
exp_df = utility.load_model("".join(quest_cmb + "/" + quest_cmb + "_train_stat_q_data"))
train_sim_df = utility.load_model("".join(quest_cmb + "/" + "train_sim_data.pckl"))
encoding_dm_model = quest_cmb + "/" + quest_cmb + "_stat_q_data_oe_model"
knn_model = quest_cmb + "/" + "knn_static"
#exp_df.reset_index(inplace=True, drop=True)
query_data = smf.get_user_data(query_id, simulate=simulate)
# drop based on combinations:
drop_cols = ["tschq01", "tschq04-1", "tschq04-2", "tschq07-2", "tschq13", "tschq25"]
if quest_cmb not in ["all", "overall"]:
filtered_cols = [x for x in properties.quest_comb[quest_cmb] if x not in drop_cols]
if quest_cmb == "bg_tinnitus_history":
filtered_query_data = query_data[filtered_cols + ["tschq04"]]
else:
filtered_query_data = query_data[filtered_cols]
else:
filtered_query_data = query_data
X = smf.preprocess(filtered_query_data, quest_cmb, age_bin=False,
process_model_name="".join(encoding_dm_model),
prediction=True, save_model=False)
#Note this is heom based model
model = utility.load_model(knn_model)
dist, idx = model.kneighbors(X.to_numpy()[:, 1:], n_neighbors=k)
dist = dist.flatten()
idx = idx.flatten()
#An index of the score to highlighted
scores_index, threshold = dissimilarity_output(dist, alpha=1.0)
# Construct the graph data structure.
q_data_dict = query_data.to_dict("r")[0]
p_init_dict = convert_to_string(q_data_dict)
p_init_dict["query"] = 0
p_init_dict['zscore'] = get_outlier_scores(int(query_id))
p_node_list.append(p_init_dict)
min_dist = min(dist)
for i, val in enumerate(idx):
user_id = train_sim_df.iloc[val]["user_id"]
p_dict = exp_df[exp_df["user_id"] == int(user_id)].to_dict("r")[0]
#Iterate to convert all to string
p_dict = convert_to_string(p_dict)
p_dict["query"] = 1
p_dict['zscore'] = get_outlier_scores(int(user_id))
p_dict["score"] = str(dist[i])
'''
scales_arr = []
for qval in ["tschq12", "tschq16", "tschq17"]:
scale_dict = {}
scale_dict["name"] = qval
scale_dict["val"] = p_dict[qval]
scales_arr.append(scale_dict)
p_dict["pie_scales"] = scales_arr
'''
p_node_list.append(p_dict)
p_link_dict = {}
#it should simple be the query_id
p_link_dict["source"] = str(query_id)
#All the targets to the query
p_link_dict["target"] = str(int(train_sim_df.iloc[val]["user_id"]))
#Their distances
p_link_dict["score"] = str(round(dist[i], 7))
p_link_dict["zscore"] = get_outlier_scores(int(user_id))
if dist[i] > min_dist:
p_link_dict["close"] = 0
else:
p_link_dict["close"] = 1
p_link_list.append(p_link_dict)
pres_dict["nodes"] = p_node_list
pres_dict["links"] = p_link_list
# Create the distance table
pres_dict["distance_table"] = create_distance_table(query_id, p_link_list, scores_index, threshold)
return pres_dict
# Get the heatmap for visualization
def get_patient_information(quest_cmb, query_id, nearest_pid, simulate=False):
heatmap_dict = {}
#Get row label encode and return it assuming there are lot categorical attributes.
# Load the encoded data.
import utility
import static_sim_functions as smf
if simulate:
encoded_data = utility.load_model("".join("/simulate/" + quest_cmb + "/" + quest_cmb + "_stat_q_data_encoded"))
exp_df = utility.load_model("".join("/simulate/" + quest_cmb + "/" + quest_cmb + "_stat_q_data"))
encoded_data_model = "/simulate/" + quest_cmb + "/" + quest_cmb + "_stat_q_data_oe_model"
else:
encoded_data = utility.load_model("".join(quest_cmb + "/" + quest_cmb + "_stat_q_data_encoded"))
exp_df = utility.load_model("".join(quest_cmb + "/" + quest_cmb + "_stat_q_data"))
encoded_data_model = quest_cmb + "/" + quest_cmb + "_stat_q_data_oe_model"
#encoded_data = utility.load_model("".join(quest_cmb + "/" + quest_cmb + "_stat_q_data_encoded"))
# load models according to the combinations. Move this to a method.
#exp_df = utility.load_model("".join(quest_cmb + "/" + quest_cmb + "_stat_q_data"))
query_data = smf.get_user_data(query_id, simulate=simulate)
# Before preprocessing removing the cols from the actual dataset by their combinations.
drop_cols = ["tschq01", "tschq04-1", "tschq04-2", "tschq07-2", "tschq13", "tschq25"]
if quest_cmb not in ["all", "overall"]:
filtered_cols = [x for x in properties.quest_comb[quest_cmb] if x not in drop_cols]
if quest_cmb == "bg_tinnitus_history":
filtered_query_data = query_data[filtered_cols + ["tschq04"]]
else:
filtered_query_data = query_data[filtered_cols]
else:
filtered_query_data = query_data
query_id_row_encoded = smf.preprocess(filtered_query_data, quest_cmb, age_bin=False,
process_model_name="".join(encoded_data_model),
prediction=True)
nearest_pid_row = encoded_data[encoded_data["user_id"] == int(nearest_pid)]
#Get only numeric data and visualize. Or sometimes the oridinal which can be all normalized.
nearest_pid_row_selected = nearest_pid_row
query_id_row_selected = query_id_row_encoded
#columns = nearest_pid_row.columns
combined_p_df = query_id_row_selected.append(nearest_pid_row_selected)
#combined_p_df_1 = nearest_pid_row_1.append(processed_query_data)
#combined_p_df_1.set_index("user_id", inplace=True)
combined_p_df.set_index('user_id', inplace=True)
#combined_p_df.drop(["id"], axis=1, inplace=True)
import patient
import patient_view
import json
parent_data_objs = []
pres_dict = {}
for index in combined_p_df.index:
dict = convert_to_string(pd.Series.to_dict(combined_p_df.loc[index]))
#To get actual attributes for heatmap. Look at this later
#dict1 = convert_to_string(pd.Series.to_dict(combined_p_df_1.loc[index]))
for key, val in dict.items():
obj = patient.Patient(index, key, dict[key])
parent_data_objs.append(obj)
import jsonpickle
data = patient_view.PatientView(parent_data_objs)
json_data = jsonpickle.encode(data, unpicklable=False)
return json_data
def create_patient_objs(key, series, var_type="tinnitus_distress"):
import patient
import patient_view
patient_data_list = []
for row in series:
#pObj = patient.TemporalPatient(str(row[0]), str(row[1]), str(row[2]))
if var_type == "tinnitus_distress":
pObj = patient.TemporalPatient(str(round(row[1], 3)), str(round(row[3], 3)))
else:
pObj = patient.TemporalPatient(str(round(row[1], 3)), str(round(row[2], 3)))
patient_data_list.append(pObj)
p_list_obj = patient.TemporalPatientList(key, patient_data_list)
return p_list_obj
# Visualize time series by realign them.
def get_patient_ts(query_id, nearest_id, tsuser_avg_grp, var_type):
patients_temporal_list = []
query_id = int(query_id)
nearest_id = int(nearest_id)
id_list = [query_id, nearest_id]
import patient_view
for ids in id_list:
val = tsuser_avg_grp.get_user_mday_ts_visualize(ids)
if val is not None:
patients_temporal_list.append(create_patient_objs(ids, val, var_type))
#for key, series in copy_data_series_tuple:
# if key == query_id or key == nearest_id:
import jsonpickle
temporal_patient_list = patient_view.TemporalPatientView(patients_temporal_list)
json_output = jsonpickle.encode(temporal_patient_list, unpicklable=False)
return json_output
# Get the time series of the query patient for making point predictions.
def get_query_ts(query_id, tsg_usr_data):
#import ts_preprocessing as tsp
from time_series_grp import TimeSeriesGroupProcessing
import patient_view
patients_temporal_list = []
#query_id = "8"
#tsg_data = TimeSeriesGroupProcessing(method="mean")
val = tsg_usr_data.get_usr_mday_ts_predict(int(query_id))
if val is not None:
patients_temporal_list.append(create_patient_objs(int(query_id), val, "tinnitus_distress"))
import jsonpickle
temporal_patient_list = patient_view.TemporalPatientView(patients_temporal_list)
json_output = jsonpickle.encode(temporal_patient_list, unpicklable=False)
return json_output
def weighted_average(distress_list):
average = np.asarray(distress_list, dtype=float).mean()
return average
def compute_weighted_avg(user_id, stress, nearest_n, prediction_at, ts_method_obj):
import patient
import patient_view
#import ts_group_processing as tsg
import jsonpickle
#tsg = TimeSeriesGroupProcessing(method=method)
user_ts = ts_method_obj.get_usr_mday_ts_predict(int(user_id))
user_ts_idx = user_ts[:, 1]
pred_idx = int(np.where(user_ts_idx == prediction_at)[0])
# Only forecast this point is ndays
if pred_idx - (len(user_ts_idx) - 1) == 0:
# Last point no ground truth needs only forecast
ref_pred_at = prediction_at
prediction_at_list = list()
for i in range(0, 3):
ref_pred_at += 1
prediction_at_list.append(ref_pred_at)
else:
#Other reference points only to the points available. Note: This is our assumption can be changed here.
prediction_at_list = user_ts_idx[pred_idx + 1:pred_idx + 4].tolist()
if len(prediction_at_list) < 3:
len_p_list = len(prediction_at_list)
day_prop = 1
#prev_day_idx_val = prediction_at_list[len(prediction_at_list) - 1]
for _ in range(len_p_list, 3):
prev_day_idx_val = prediction_at_list[len(prediction_at_list) - 1]
prediction_at_list.append(prev_day_idx_val + day_prop)
preds = list()
#Prediction for four time points
for val in prediction_at_list:
distress_list = list()
for user_id in nearest_n:
# print(user_id)
# For this user now get the time series.
user_ts = ts_method_obj.get_usr_mday_ts_predict(int(user_id))
diff_arr = np.abs(np.subtract(val, user_ts[:, 1]))
diff_near_idx = np.where(diff_arr == diff_arr.min())
#difference near index
usr_vals = np.array([user_ts[:, 3][n_idx] for n_idx in diff_near_idx])
if len(usr_vals) > 0:
value = np.average(usr_vals)
else:
value = usr_vals[0]
distress_list.append(value)
preds.append((val, weighted_average(distress_list)))
patient_data_list = []
pObj = patient.TemporalPatient(str(prediction_at), str(stress))
patient_data_list.append(pObj)
for time, p_stress in preds:
pObj = patient.TemporalPatient(str(time), str(round(p_stress, 3)))
patient_data_list.append(pObj)
import jsonpickle
temporal_patient_list = patient_view.TemporalPatientView(patient_data_list)
json_output = jsonpickle.encode(temporal_patient_list, unpicklable=False)
return json_output
# A weighted average approach using Linear regression logic. This can be modified to contain weighted by distance logic
# The linear regression technique
def compute_linear_regression(user_id, stress, nearest_n, prediction_at, ts_method_obj):
import patient
import patient_view
from sklearn.linear_model import LinearRegression
preds = list()
#Prediction for four time points
for i in range(int(prediction_at) + 1, int(prediction_at) + 4):
intercepts_list = list()
coeff_list = list()
for n_u_id in nearest_n:
# print(user_id)
# For this user now get the time series.
user_ts = ts_method_obj.get_usr_mday_ts_predict(int(n_u_id))
# Obtain the time series until time point and fit the data for linear regression
diff_arr = np.abs(np.subtract(i, user_ts[:, 1]))
diff_near_idx = np.where(diff_arr == diff_arr.min())
print("minimum to the time point is at -- ", diff_near_idx)
# difference near index. Handling for the length of users
usr_idx = diff_near_idx[0][0]
user_ts_p = user_ts[:usr_idx]
user_ts_df = pd.DataFrame(user_ts_p, columns=["day", "day_sess_index",
"s02", "s03", "s04",
"s05", "s06", "s07"])
X = user_ts_df[["day_sess_index"]]
# We show for tinnitus distress. This can be extended to other physiological variables as well.
y = user_ts_df[["s03"]]
# Fit on X axis as time and Y as the s03 predictive value.
reg_fit = LinearRegression(normalize=True)
reg_fit.fit(X, y)
# Add the line's coefficient and intercept.
intercepts_list.append(reg_fit.intercept_)
coeff_list.append(reg_fit.coef_)
print("Predicting the value of s3 over the averaged slope and intercepts of "
"observations of the neighbors")
# y = mx + c, where m is the average slope of the neighbors and c is the average intercept obtained.
print("The equation to estimate s03 for the user is {}".format("".join(str(np.asarray(coeff_list).mean())) +
"* time_index + " +
str(np.asarray(intercepts_list).mean())))
y = np.multiply(np.asarray(coeff_list).mean(), i) + np.asarray(intercepts_list).mean()
preds.append((i, y))
patient_data_list = []
pObj = patient.TemporalPatient(str(prediction_at), str(stress))
patient_data_list.append(pObj)
for time, p_stress in preds:
pObj = patient.TemporalPatient(str(time), str(round(p_stress, 3)))
patient_data_list.append(pObj)
import jsonpickle
temporal_patient_list = patient_view.TemporalPatientView(patient_data_list)
json_output = jsonpickle.encode(temporal_patient_list, unpicklable=False)
return json_output
### Dynamic functions to present data.
def present_json_ts(query_id, k=5):
# Load the train and test data.
import utility
import properties
p_link_list = []
p_node_list = []
train_data = utility.load_model("".join("dynamic_ts/" + "dynamic_ts_train_data"))
test_data = utility.load_model("".join("dynamic_ts/" + "dynamic_ts_test_data"))
knn_ts = utility.load_model("".join("dynamic_ts/" + "dynamic_ts_knn"))
query_data = test_data[test_data["user_id"] == int(query_id)]
exp_df = utility.load_model("".join("overall" + "/" + "overall" + "_stat_q_data"))
dist, idx = knn_ts.kneighbors(query_data[train_data.index], n_neighbors=k)
dist = dist.flatten()
idx = idx.flatten()
# An index of the score to highlighted
scores_index, threshold = dissimilarity_output(dist, alpha=1.0)
query_data = exp_df[exp_df["user_id"] == int(query_id)].to_dict("r")[0]
p_init_dict = convert_to_string(query_data)
p_init_dict["query"] = 0
p_init_dict["zscore"] = get_outlier_scores(int(query_id))
p_node_list.append(p_init_dict)
min_dist = min(dist)
train_data_users = train_data["user_id"].to_numpy()
for i, val in enumerate(idx):
user_id = train_data_users[val]
nn_dict = {}
node = exp_df[exp_df["user_id"] == int(user_id)].to_dict("r")[0]
node_dict = convert_to_string(node)
node_dict["query"] = 1
node_dict["score"] = str(dist[i])
node_dict["zscore"] = get_outlier_scores(int(train_data.iloc[val]["user_id"]))
p_node_list.append(node_dict)
link = {}
link["source"] = str(query_id)
link["target"] = str(int(train_data.iloc[val]["user_id"]))
link["score"] = str(round(dist[i], 7))
link["zscore"] = get_outlier_scores(int(train_data.iloc[val]["user_id"]))
if dist[i] > min_dist:
link["close"] = 0
else:
link["close"] = 1
p_link_list.append(link)
nn_dict["nodes"] = p_node_list
nn_dict["links"] = p_link_list
# Create the distance table
nn_dict["distance_table"] = create_distance_table(query_id, p_link_list, scores_index, threshold)
return nn_dict
'''
This is a plot to compare the time series recording of the patients distribution in hour of day.
Mostly, binned into Morning to Night through feature creation (See: Explore.py) and utilize this
for showing a distribution plot for comparing.
Hypothesis: Nearest neighbor and query time series recordings may be are similar on parts of the day (This can change)
'''
def call_box_plot(user_data, var_type, col_dict, query_id):
def custom_sort(x):
pos_session = {
"Early Morning": 0,
"Morning": 1,
"Afternoon": 2,
"Evening": 3,
"Night": 4,
"Late Night": 5
}
return str(pos_session[x])
import plotly
import plotly.graph_objs as go
import numpy as np
sorted_hour_bins = []
for sess in user_data["hour_bins"].unique():
sorted_hour_bins.append(custom_sort(sess) + "-" + sess)
sorted_hour_bins = np.sort(sorted_hour_bins)
col_list = [col_dict["box"] for i in range(len(sorted_hour_bins))]
creation_date_list = np.sort(user_data["created_at"].unique())
var_dict = {
"s02": "Tinnitus Loudness",
"s03": "Tinnitus Distress",
"s04": "Wellness of hearing",
"s05": "Limited by hearing ability",
"s06": "Level of stress",
"s07": "Level of Exhaustion"
}
fig = go.Figure()
for creation_date, hour_bin, col in zip(creation_date_list, sorted_hour_bins, col_list):
hour_sess = hour_bin.split("-")[1]
fig.add_trace(go.Box(
y=user_data[user_data["hour_bins"] == hour_sess][str(var_type)].to_numpy(),
name="".join(str(hour_sess)),
hovertext=["".join("(Date:" + str(d).split("T")[0] + "), (Time:" + str(d).split("T")[1].split(".")[0] + ")")
for d in
user_data[user_data["hour_bins"] == hour_sess]["created_at"].to_numpy()],
jitter=0.3,
pointpos=0,
boxmean=True,
boxpoints='all', # all points are shown
marker=dict(
color=col_dict["marker"],
size=1.5,
line=dict(
color=col_dict["line"],
width=1
)),
fillcolor=str(col),
width=0.6
))
fig.update_layout(
width=505,
height=400,
boxgap=0.05,
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)',
title_text="(User - {}, Feature - {})"
.format(str(query_id), str(var_dict[str(var_type)])))
fig.update_xaxes(title_text="Session")
fig.update_yaxes(title_text=str(var_dict[str(var_type)]))
fig.update_layout(showlegend=False)
return fig.to_dict()
def create_boxplot_compare(query_list, var_type, simulate=False):
import json
import plotly
#query_list = [8, 18]
#var_type = "s03"
# query_list 1st element is the query and 2nd is the clicked nearest neighbor
# var_type - The time series variables utilized for the visualization
query_color_dict = {"box": "rgb(255,159,0)", "marker": "rgb(200, 90, 50)", "line": "rgb(21,38,53)"}
nn_col_dict = {"box": "rgb(0, 91, 131)", "marker": "rgb(50, 0, 200)", "line": "rgb(255, 102, 44)"}
from exploration import Explore
explore_ts = Explore(simulate)
ts_feature_df = explore_ts.features_df
# Create the required plot and add it.
graph_data = list()
for idx, user_id in enumerate(query_list):
user_id_data = ts_feature_df[ts_feature_df["user_id"] == int(user_id)]
if idx == 0:
figure_dict = call_box_plot(user_id_data, var_type, query_color_dict, user_id)
else:
figure_dict = call_box_plot(user_id_data, var_type, nn_col_dict, user_id)
graph_data.append(figure_dict)
graph_data = json.dumps(graph_data, cls=plotly.utils.PlotlyJSONEncoder)
return graph_data