-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhpo.py
62 lines (49 loc) · 1.76 KB
/
hpo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import pickle
import click
import mlflow
import optuna
from optuna.samplers import TPESampler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("random-forest-hyperopt")
def load_pickle(filename):
with open(filename, "rb") as f_in:
return pickle.load(f_in)
@click.command()
@click.option(
"--data_path",
default="../output",
help="Location where the processed NYC taxi trip data was saved"
)
@click.option(
"--num_trials",
default=10,
help="The number of parameter evaluations for the optimizer to explore"
)
def run_optimization(data_path: str, num_trials: int):
X_train, y_train = load_pickle(os.path.join(data_path, "train.pkl"))
X_val, y_val = load_pickle(os.path.join(data_path, "val.pkl"))
def objective(trial):
with mlflow.start_run():
params = {
'n_estimators': trial.suggest_int('n_estimators', 10, 50, 1),
'max_depth': trial.suggest_int('max_depth', 1, 20, 1),
'min_samples_split': trial.suggest_int('min_samples_split', 2, 10, 1),
'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 4, 1),
'random_state': 42,
'n_jobs': -1
}
rf = RandomForestRegressor(**params)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_val)
rmse = mean_squared_error(y_val, y_pred, squared=False)
mlflow.log_params(params)
mlflow.log_metric('RMSE', rmse)
return rmse
sampler = TPESampler(seed=42)
study = optuna.create_study(direction="minimize", sampler=sampler)
study.optimize(objective, n_trials=num_trials)
if __name__ == '__main__':
run_optimization()