-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelper.py
82 lines (66 loc) · 2.44 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pandas as pd
import numpy as np
import os
# Data visualization
import matplotlib.pyplot as plt
def pre_process_df(df):
"""
Remove rows without ratings.
Remove feature columns that have nulls after agregating information per artist.
Reset user ids to [0-n_unique_users].
"""
df = df.loc[:, ~df.columns.str.contains('Unnamed')]
# Prepare dataset with non-null users, artists and ratings entries.
not_null_df = df[df['LIKE_ARTIST'].notnull()]
print("Dataset Without Null Ratings Size: {:,}\n".format(len(not_null_df)))
# Reset users ids to 0-n_users
idx_label = np.sort(np.array(not_null_df.User.unique()))
# Use np.argwhere to covert value to its sorted index.
not_null_df.loc[:,'User'] = not_null_df.apply(lambda x: np.argwhere(idx_label == x.User)[0][0],
axis=1)
# Drop features columns that have nulls after aggregating per artist.
drop = ['HEARD_OF','OWN_ARTIST_MUSIC']
artiststing = df.groupby('Artist').mean()
for column in artiststing.columns:
if column != 'Artist' and (artiststing[column].isnull().sum() > 0) :
drop.append(column)
df = df.drop(columns=drop)
return df
def plot_histogram(distribution,bins_num, xlabel, ylabel, title):
print('{} avg. value: {:.2f}'.format(xlabel, np.mean(distribution)))
plt.hist(distribution, bins=bins_num)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title(title)
plt.show()
def plot_xygraph(yvals, title, xlabel, ylabel):
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.plot(yvals)
plt.show()
def plot_training(loss, accuracy, num_epochs):
plt.title("Training Evaluaiton per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Metrics")
plt.gca().set_prop_cycle(color=['blue', 'green'])
# Set x axis
plt.xticks(range(num_epochs), range(1,num_epochs+1))
plt.plot(loss)
plt.plot(accuracy)
plt.legend(['val_loss', 'val_accuracy'], loc='lower left')
plt.show()
def plot_results(content, content_embed, collaborative, user, top):
plt.title("Top 5 Recommendation for user {}".format(user))
plt.xlabel("Rank")
plt.ylabel("model")
plt.gca().set_prop_cycle(color=['blue', 'yellow','red'])
# Set x axis
x = range(1,top+1)
size = 70
tranperancy = 0.7
plt.scatter(x, content, s=size, alpha=tranperancy)
plt.scatter(x, content_embed, s=size, alpha=tranperancy)
plt.scatter(x, collaborative, s=size, alpha=tranperancy)
plt.legend(['features content', 'embedding content', 'collaborative'])
plt.show()