-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgraph_alg_country_gender_final.py
265 lines (227 loc) · 12.8 KB
/
graph_alg_country_gender_final.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
import networkx as nx
import bellmanford as bf
import numpy as np
import miqp_country_gender as mcg
import scipy.stats as sp
import random
import matplotlib.pyplot as plt
def transfer_multiple_worker(cycle, G, matr_team_country_gender, wmatr_team_country, lambd1, lambd2):
new_matr=np.copy(matr_team_country_gender)
for i in range(len(cycle)-1):
snode = cycle[i]
dnode = cycle[i+1]
t_snode = int(snode[1 : snode.find('C')])
t_dnode = int(dnode[1 : dnode.find('C')])
c_snode = int(snode[snode.find('C')+1 : snode.find('G')])
c_dnode = int(dnode[dnode.find('C')+1 : dnode.find('G')])
g_snode = int(snode[snode.find('G')+1 : len(snode)-3])
g_dnode = int(dnode[dnode.find('G')+1 : len(dnode)-3])
if((t_snode != t_dnode) and (c_snode == c_dnode) and g_snode == g_dnode):
if((snode[-3:] == 'out')):
new_matr[t_snode][c_snode][g_snode] -= 1
new_matr[t_dnode][c_dnode][g_dnode] += 1
#the following case should not happen
elif((dnode[-3:] == 'out')):
new_matr[t_snode][c_snode][g_snode] += 1
new_matr[t_dnode][c_dnode][g_dnode] -= 1
matt_diff = new_matr - matr_team_country_gender
while True:
temp_matr = new_matr + matt_diff
if (temp_matr >= 0).all():
new_fitness = objective_function(temp_matr, temp_matr.shape[0], temp_matr.shape[1], temp_matr.shape[2], wmatr_team_country, lambd1, lambd2)
old_fitness = objective_function(new_matr, new_matr.shape[0], new_matr.shape[1], new_matr.shape[2], wmatr_team_country, lambd1, lambd2)
if new_fitness >= old_fitness:
break
else:
new_matr = temp_matr
else:
break
return new_matr
def worker_country_gender(matr_team_country_gender, wmatr_team_country, num_teams,num_countries, num_genders,lambd1, lambd2):
st = 'T0C0G0inp'
ed = 'T0C0G0out'
while(True):
G = find_graph_weighted_gender(matr_team_country_gender, wmatr_team_country, num_teams,num_countries, num_genders,lambd1, lambd2)
length, cycle, negative_cycle = bf.bellman_ford(G, st, ed, weight = 'weight')
if negative_cycle is False:
break
matr_team_country_gender = transfer_multiple_worker(cycle, G, matr_team_country_gender, wmatr_team_country, lambd1, lambd2)
if((length > -1e-10) & (negative_cycle==True)):
print ("Small value", length)
break
return matr_team_country_gender
def find_graph_weighted_gender(matr_team_country_gender, wmatr_team_country, num_teams,num_countries, num_genders,lambd1, lambd2):
G = nx.DiGraph()
labels=find_node_labels_gated_gender(num_teams,num_countries,num_genders)
G.add_nodes_from(labels)
for i in range(num_teams):
for j in range(num_countries):
for k in range(num_genders):
src ='T'+str(i)+'C'+str(j)+'G'+str(k)+'inp'
sink ='T'+str(i)+'C'+str(j)+'G'+str(k)+'out'
if(i==0):
G.add_edge(src, sink, weight = 0)
else:
G.add_edge(src, sink, weight = -2*lambd1-2*lambd2)
#Loop over nodes within same team, same country but other genders
other_genders=[indx for indx in range(num_genders)]
other_genders.remove(k)
for l in other_genders:
sink_other_gender ='T'+str(i)+'C'+str(j)+'G'+str(l)+'out'
if (i==0):
G.add_edge(src, sink_other_gender, weight = 0)
else:
G.add_edge(src, sink_other_gender, weight = -2*lambd1)
#Loop over nodes within same team, same gender, but different country
other_countries = [indx for indx in range(num_countries)]
other_countries.remove(j)
for l in other_countries:
sink_other_countrie ='T'+str(i)+'C'+str(l)+'G'+str(k)+'out'
if (i==0):
G.add_edge(src, sink_other_countrie, weight = 0)
else:
G.add_edge(src, sink_other_countrie, weight = -2*lambd2)
#Loop over nodes within same team, different gender, and different country
for p in range(num_countries):
for q in range(num_genders):
if p == j or q == k:
continue
else:
G.add_edge(src, 'T'+str(i)+'C'+str(p)+'G'+str(q)+'out', weight = 0)
#Loop over nodes within same country and gender but other teams
other_teams = [indx for indx in range(num_teams)]
other_teams.remove(i)
for i2 in other_teams:
sink_other_team = 'T'+str(i2)+'C'+str(j)+'G'+str(k)+'inp'
weight = 0
#if sink_other_team == 'T1C2G1inp' and sink == 'T2C2G1out':
if(matr_team_country_gender[i][j][k]>0):
num_i_j = sum(matr_team_country_gender[i][j][x] for x in range(num_genders))
num_i_k = sum(matr_team_country_gender[i][x][k] for x in range(num_countries))
num_i2_j = sum(matr_team_country_gender[i2][j][x] for x in range(num_genders))
num_i2_k = sum(matr_team_country_gender[i2][x][k] for x in range(num_countries))
if (i2 != 0 ):
weight += (wmatr_team_country[i2][j])+lambd1*(1+2*num_i2_j) + lambd2*(1+2*num_i2_k)
if (i != 0):
weight += (-wmatr_team_country[i][j])+lambd1*(1-2*num_i_j) + lambd2*(1-2*num_i_k)
else:
weight=1000000
G.add_edge(sink, sink_other_team, weight=weight)
return G
def find_node_labels_gated_gender(num_teams,num_countries,num_genders):
prestring='T'
labels=[0 for i in range(2*num_teams*num_countries*num_genders)]
for i in range(num_teams):
for j in range(num_countries):
for k in range(num_genders):
nodelabel=prestring+str(i)+'C'+str(j)+'G'+str(k)+'inp'
labels[2*i*num_countries*num_genders+2*j*num_genders+2*k]=nodelabel
nodelabel=prestring+str(i)+'C'+str(j)+'G'+str(k)+'out'
labels[2*i*num_countries*num_genders+2*j*num_genders+2*k+1]=nodelabel
return labels
def objective_function(matr_team_country_gender, num_teams_plus_one, num_countries, num_genders, weights, lambd1, lambd2):
team_country = [[0 for c in range(num_countries)] for t in range(num_teams_plus_one)]
for t in range(num_teams_plus_one):
for c in range(num_countries):
team_country[t][c] = sum(matr_team_country_gender[t][c][g] for g in range(num_genders))
team_gender = [[0 for g in range(num_genders)] for t in range(num_teams_plus_one)]
for t in range(num_teams_plus_one):
for g in range(num_genders):
team_gender[t][g] = sum(matr_team_country_gender[t][c][g] for c in range(num_countries))
A = sum(weights[t][c]*team_country[t][c] for t in range(1, num_teams_plus_one) for c in range(num_countries))
B = (sum(team_country[t][c]*lambd1*team_country[t][c] for t in range(1, num_teams_plus_one) for c in range(num_countries)))
C = (sum(team_gender[t][g]*lambd2*team_gender[t][g] for t in range(1, num_teams_plus_one) for g in range(num_genders)))
return A + B + C
def initial_solution(country_capacities, team_demands, num_teams_plus_one, num_countries, num_genders):
initial_matr_team_country_gender = [[[0 for k in range(num_genders)] for j in range(num_countries)] for i in range(num_teams_plus_one)]
current_country, current_gender = 0, 0
for i in range(1, num_teams_plus_one):
remaining_demand_current_team = team_demands[i - 1]
while remaining_demand_current_team > 0:
if remaining_demand_current_team >= country_capacities[current_country][current_gender]:
initial_matr_team_country_gender[i][current_country][current_gender] += country_capacities[current_country][current_gender]
remaining_demand_current_team -= country_capacities[current_country][current_gender]
country_capacities[current_country][current_gender] = 0
if current_gender == (num_genders - 1):
current_gender = 0
current_country += 1
else:
current_gender += 1
else:
initial_matr_team_country_gender[i][current_country][current_gender] += remaining_demand_current_team
country_capacities[current_country][current_gender] -= remaining_demand_current_team
remaining_demand_current_team = 0
for k in range(current_gender, num_genders):
if current_country < num_countries:
initial_matr_team_country_gender[0][current_country][k] += country_capacities[current_country][k]
for j in range(current_country+1, num_countries):
for k in range(num_genders):
initial_matr_team_country_gender[0][j][k] += country_capacities[j][k]
return initial_matr_team_country_gender
def match_entropy(res):
#Function to calculate entropy of a matching for given clusters
num_items,m=np.shape(res)
ent=np.zeros((num_items,))
for i in range(num_items):
a=res[i,:]
ent[i]=sp.entropy(a)
return np.mean(ent)
if __name__ == '__main__':
np.random.seed(1)
"""Read UIUC numpy weights from data folder and implement
1) Auxiliary graph Matching
2) Diverse MIQP Matching"""
#Load W1 and lab1
G = nx.DiGraph()
W1=np.load('RevieweData/W1.npy')
#Allocate labels to all reviewers
lab1=np.load('RevieweData/lab1.npy')
#Allocate gender labels to every reviewer
lab2=np.random.randint(0,2, [len(lab1),])
#Make two copies of each person, to meet the demands
W1=np.concatenate((W1,W1))
lab1=np.concatenate((lab1,lab1))
lab2=np.concatenate((lab2,lab2))
num_countries = 1+max(lab1)
num_genders = 1+max(lab2)
num_reviewers = 378 #authors
num_papers = 13 #papers
min_worker = 4 #minimum number of nodes that must match to each team
min_country = 0 # minimum number of nodes that must match to each country
max_worker = num_reviewers # maximum number of nodes that can match to each worker
Wmat = W1[0:num_reviewers,0:num_papers]
lab = lab1[0:num_reviewers] #every movie has a cluster label
W2 = np.zeros((num_countries,num_papers))
clus_capac = np.zeros(num_countries,)
for j in range(num_countries):
idx=np.where(lab == j)[0]
clus_capac[j] = len(idx)
for i in range(num_papers):
#We average the weights of all people from a country to find the weight of country to team
W2[j,i] = np.median(Wmat[idx,i])
W = list(np.ravel(W2))
#find the maximum number of people from each country/gender
clus_gen_matr = np.zeros((num_countries, num_genders))
for i in range(num_countries):
find_clusidx=np.where(lab == i)[0]
clus_gen = lab2[find_clusidx]
for j in range(num_genders):
clus_gen_matr[i,j] = len(np.where(clus_gen == j)[0])
lambd1, lambd2 = 1.0, 1.0
num_teams_plus_one = num_papers+1
num_teams = num_papers
W2 = np.transpose(W2)
wmatr_team_country_miqp = [list(i) for i in list(W2)]
#Add zero weights to T0, which has unassigned people
wmatr_team_country = [[0.0]*num_countries] + wmatr_team_country_miqp
team_demands = [min_worker]*num_papers
country_capacities = []
c_capac = []
for i in range(num_countries):
country_capacities.append(list(clus_gen_matr[i,:]))
c_capac.append(list(clus_gen_matr[i,:]))
matr_team_country_gender = initial_solution(country_capacities, team_demands, num_teams_plus_one, num_countries, num_genders)
find_graph_weighted_gender(matr_team_country_gender, wmatr_team_country, num_teams_plus_one,num_countries, num_genders,lambd1, lambd2)
final_matr_tcg = worker_country_gender(matr_team_country_gender, wmatr_team_country, num_teams_plus_one,num_countries, num_genders,lambd1, lambd2)
print ("Our algorithm objective function value: "+ objective_function(final_matr_tcg, num_teams_plus_one, num_countries, num_genders, wmatr_team_country, lambd1, lambd2))
mcg.tradeoff_matching_gender(num_teams, num_countries, 2, wmatr_team_country_miqp, team_demands,c_capac, 1.0, 1.0, 60)