forked from Networks-Learning/strategic-decisions
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfair.py
67 lines (56 loc) · 2.26 KB
/
fair.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import numpy as np
import pandas as pd
import click
from lib.greedy_deter import compute_gd
from lib.greedy_fair import compute_gf
@click.command()
@click.option('--data', required=True)
@click.option('--gamma', type=float, required=True)
@click.option('--alpha',type=float ,required=True)
@click.option('--k', type=int, required=True)
@click.option('--njobs', default=1, help="number of parallel threads")
@click.option('--seed', default=1, help="random seed")
@click.option('--output', required=True, help="output")
def experiment(data, output, gamma, seed, alpha, k, njobs):
"""
Executes the greedy deterministic algorithm on real data, with a matroid constraint.
Parameters
----------
data : string
data directory prefix (e.g., data/processed/fico)
gamma : float
gamma parameter value
alpha : float
alpha parameter value
k : int
maximum number of explanations
seed : int
random seed for reproducibility
njobs : int
number of parallel threads to be used
output : string
output directory prefix (e.g., outputs/exec1_)
"""
# Read outcomes
u = pd.read_csv(data+'_pyx.csv', index_col=0, names=["ID", "Probability"], header=0, dtype={'Probability': np.float})
u.sort_values(by=["Probability"],inplace=True, ascending=False)
indexing=u.index.values.flatten().tolist()
u=u.values.flatten()-gamma
# Read costs
cost=pd.read_csv(data+'_cost.csv', index_col=0, header=0)
cost.columns = cost.columns.astype(int)
cost=cost[indexing]
c = cost.reindex(indexing).to_numpy()
c = c*alpha # scaling
# Read population
px_df = pd.read_csv(data+'_px.csv', index_col=0, header=0)
px = px_df.reindex(indexing).to_numpy().flatten()
# Read natural vectors and set the partition matroid
natural_vectors = pd.read_csv(data+'_vectors.csv', index_col=0, header=0)
partitions = natural_vectors['Age group'].reindex(indexing).astype(int).to_numpy().flatten()
num_of_partitions = len(np.unique(partitions))
# Compute
compute_gf(num_of_partitions=num_of_partitions, partitions=partitions, output=output, C=c, U=u, Px=px, k=k,
seed=seed, alpha=alpha, indexing=indexing, njobs=njobs)
if __name__ == '__main__':
experiment()