Skip to content

Commit

Permalink
Merge pull request #135 from automl/development
Browse files Browse the repository at this point in the history
[WIP] prepare release
  • Loading branch information
mfeurer authored Dec 9, 2016
2 parents d7aa29b + 65a75d6 commit 7430205
Show file tree
Hide file tree
Showing 17 changed files with 208 additions and 149 deletions.
18 changes: 17 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# 0.2.2

* FIX 124: SMAC could crash if the number of instances was less than seven
* FIX 126: Memory limit was not correctly passed to the target algorithm
evaluator
* Local search is now started from the configurations with highest EI, drawn by
random sampling
* Reduce the number of trees to 10 to allow faster predictions (as in SMAC2)
* Do an adaptive number of stochastic local search iterations instead of a fixd
number (a5914a1d97eed2267ae82f22bd53246c92fe1e2c)
* FIX a bug which didn't make SMAC run at least two configurations per call to
intensify
* ADD more efficient data structure to update the cost of a configuration
* FIX do only count a challenger as a run if it actually was run
(and not only considered)(a993c29abdec98c114fc7d456ded1425a6902ce3)

# 0.2.1

* CI: travis-ci continuous integration on OSX
Expand Down Expand Up @@ -61,4 +77,4 @@

# 0.0.1

* initial release
* initial release
1 change: 1 addition & 0 deletions examples/branin/branin_scenario.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ algo = python branin.py
paramfile = branin_pcs.pcs
run_obj = quality
runcount_limit = 500
deterministic = 1
19 changes: 9 additions & 10 deletions examples/rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,22 +109,21 @@ def rfr(cfg, seed):
"deterministic": "true",
"memory_limit": 1024,
})

# register function to be optimize
taf = ExecuteTAFuncDict(rfr)


# Optimize
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
tae_runner=rfr)

# example call of the function
# it returns: Status, Cost, Runtime, Additional Infos
def_value = taf.run(cs.get_default_configuration())[1]
def_value = smac.solver.intensifier.tae_runner.run(
cs.get_default_configuration(), 1)[1]
print("Default Value: %.2f" % (def_value))

# Optimize
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
tae_runner=taf)

try:
incumbent = smac.optimize()
finally:
incumbent = smac.solver.incumbent

inc_value = taf.run(incumbent)[1]
inc_value = smac.solver.intensifier.tae_runner.run(incumbent, 1)[1]
print("Optimized Value: %.2f" % (inc_value))
4 changes: 2 additions & 2 deletions smac/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
VERSION = '0.2.1'
VERSION = '0.2.2'
AUTHORS = "Marius Lindauer, Matthias Feurer, Katharina Eggensperger, " \
"Aaron Klein, Stefan Falkner and Frank Hutter"
"Aaron Klein, Stefan Falkner and Frank Hutter"
2 changes: 1 addition & 1 deletion smac/epm/rf_with_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class RandomForestWithInstances(AbstractEPM):

def __init__(self, types,
instance_features=None,
num_trees=30,
num_trees=10,
do_bootstrapping=True,
n_points_per_tree=0,
ratio_features=5. / 6.,
Expand Down
1 change: 1 addition & 0 deletions smac/facade/smac_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ def __init__(self,
tae_runner = ExecuteTAFuncDict(ta=tae_runner,
stats=self.stats,
run_obj=scenario.run_obj,
memory_limit=scenario.memory_limit,
runhistory=runhistory,
par_factor=scenario.par_factor)
# Third case, if it is an ExecuteTaRun we can simply use the
Expand Down
6 changes: 3 additions & 3 deletions smac/initial_design/multi_config_initial_design.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ def run(self) -> Configuration:
# (also not on the incumbent)
# therefore, at least two different configurations have to be in <configs>
inc, inc_perf = self.intensifier.intensify(challengers=set(configs[1:]),
incumbent=configs[0],
run_history=self.runhistory,
aggregate_func=self.aggregate_func)
incumbent=configs[0],
run_history=self.runhistory,
aggregate_func=self.aggregate_func)

else:
self.logger.debug("All initial challengers are identical")
Expand Down
46 changes: 30 additions & 16 deletions smac/intensification/intensification.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,15 @@ def intensify(self, challengers, incumbent, run_history, aggregate_func,
raise ValueError("time_bound must be >= 0.01")

num_run = 0
chall_indx = 0

# Line 1 + 2
for chall_indx, challenger in enumerate(challengers):
for challenger in challengers:
if challenger == incumbent:
self.logger.warning(
"Challenger was the same as the current incumbent; Skipping challenger")
continue

self.logger.debug("Intensify on %s", challenger)
if hasattr(challenger, 'origin'):
self.logger.debug(
Expand Down Expand Up @@ -163,11 +165,12 @@ def intensify(self, challengers, incumbent, run_history, aggregate_func,
next_instance = self.rs.choice(list(available_insts))
# Line 7
self.logger.debug("Add run of incumbent")
status, cost, dur, res = self.tae_runner.start(config=incumbent,
instance=next_instance,
seed=next_seed,
cutoff=self.cutoff,
instance_specific=self.instance_specifics.get(next_instance, "0"))
status, cost, dur, res = self.tae_runner.start(
config=incumbent,
instance=next_instance,
seed=next_seed,
cutoff=self.cutoff,
instance_specific=self.instance_specifics.get(next_instance, "0"))

num_run += 1
else:
Expand All @@ -179,6 +182,10 @@ def intensify(self, challengers, incumbent, run_history, aggregate_func,

inc_inst_seeds = set(run_history.get_runs_for_config(incumbent))
inc_perf = aggregate_func(incumbent, run_history, inc_inst_seeds)

# at least one run of challenger
# to increase chall_indx counter
first_run = False

# Line 9
while True:
Expand All @@ -187,7 +194,7 @@ def intensify(self, challengers, incumbent, run_history, aggregate_func,

# Line 10
missing_runs = list(inc_inst_seeds - chall_inst_seeds)

# Line 11
self.rs.shuffle(missing_runs)
to_run = missing_runs[:min(N, len(missing_runs))]
Expand Down Expand Up @@ -219,13 +226,18 @@ def intensify(self, challengers, incumbent, run_history, aggregate_func,

else:
cutoff = self.cutoff


if not first_run:
first_run = True
chall_indx += 1

self.logger.debug("Add run of challenger")
status, cost, dur, res = self.tae_runner.start(config=challenger,
instance=instance,
seed=seed,
cutoff=cutoff,
instance_specific=self.instance_specifics.get(instance, "0"))
status, cost, dur, res = self.tae_runner.start(
config=challenger,
instance=instance,
seed=seed,
cutoff=cutoff,
instance_specific=self.instance_specifics.get(instance, "0"))
num_run += 1

# we cannot use inst_seed_pairs here since we could have less runs
Expand Down Expand Up @@ -253,7 +265,7 @@ def intensify(self, challengers, incumbent, run_history, aggregate_func,

n_samples = len(inst_seed_pairs)
self.logger.info("Challenger (%.4f) is better than incumbent (%.4f) on %d runs." % (
chal_perf / n_samples, inc_perf / n_samples, n_samples))
chal_perf, inc_perf, n_samples))
self.logger.info(
"Changing incumbent to challenger: %s" % (challenger))
incumbent = challenger
Expand All @@ -268,11 +280,11 @@ def intensify(self, challengers, incumbent, run_history, aggregate_func,
# challenger is not worse, continue
N = 2 * N

if chall_indx >= 1 and num_run > self.run_limit:
if chall_indx > 1 and num_run > self.run_limit:
self.logger.debug(
"Maximum #runs for intensification reached")
break
elif chall_indx >= 1 and time.time() - self.start_time - time_bound >= 0:
elif chall_indx > 1 and time.time() - self.start_time - time_bound >= 0:
self.logger.debug("Timelimit for intensification reached ("
"used: %f sec, available: %f sec)" % (
time.time() - self.start_time, time_bound))
Expand All @@ -283,5 +295,7 @@ def intensify(self, challengers, incumbent, run_history, aggregate_func,
inc_perf = aggregate_func(incumbent, run_history, inc_runs)
self.logger.info("Updated estimated performance of incumbent on %d runs: %.4f" % (
len(inc_runs), inc_perf))

self.stats.update_average_configs_per_intensify(n_configs=chall_indx)

return incumbent, inc_perf
47 changes: 32 additions & 15 deletions smac/runhistory/runhistory.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
RunKey = collections.namedtuple(
'RunKey', ['config_id', 'instance_id', 'seed'])

InstSeedKey = collections.namedtuple(
'InstSeedKey', ['instance', 'seed'])

RunValue = collections.namedtuple(
'RunValue', ['cost', 'time', 'status', 'additional_info'])

Expand All @@ -41,6 +44,10 @@ def __init__(self, aggregate_func):
# order as it was added.
self.data = collections.OrderedDict()

# for fast access, we have also an unordered data structure
# to get all instance seed pairs of a configuration
self._configid_to_inst_seed = {}

self.config_ids = {} # config -> id
self.ids_config = {} # id -> config
self._n_id = 0
Expand Down Expand Up @@ -88,8 +95,14 @@ def add(self, config, cost, time,

k = RunKey(config_id, instance_id, seed)
v = RunValue(cost, time, status, additional_info)

self.data[k] = v

# also add to fast data structure
is_k = InstSeedKey(instance_id, seed)
self._configid_to_inst_seed[
config_id] = self._configid_to_inst_seed.get(config_id, [])
self._configid_to_inst_seed[config_id].append(is_k)

# assumes an average across runs as cost function
self.incremental_update_cost(config, cost)

Expand Down Expand Up @@ -156,6 +169,10 @@ def incremental_update_cost(self, config: Configuration, cost: float):
self.runs_per_config[config_id] = n_runs + 1

def get_cost(self, config):
'''
returns empirical cost for a configuration;
uses self.cost_per_config
'''
config_id = self.config_ids[config]
return self.cost_per_config[config_id]

Expand All @@ -170,18 +187,12 @@ def get_runs_for_config(self, config):
----------
list: tuples of instance, seed
"""
InstanceSeedPair = collections.namedtuple("InstanceSeedPair",
["instance", "seed"])
config_id = self.config_ids.get(config)
list_ = []
for k in self.data:
# TA will return ABORT if config. budget was exhausted and
# we don't want to collect such runs to compute the cost of a
# configuration
if config_id == k.config_id and self.data[k].status not in [StatusType.ABORT]:
ist = InstanceSeedPair(k.instance_id, k.seed)
list_.append(ist)
return list_
is_list = self._configid_to_inst_seed.get(config_id)
if is_list is None:
return []
else:
return is_list

def empty(self):
"""
Expand Down Expand Up @@ -240,9 +251,15 @@ def load_json(self, fn, cs):

self._n_id = len(self.config_ids)

self.data = {RunKey(int(k[0]), k[1], int(k[2])):
RunValue(float(v[0]), float(v[1]), v[2], v[3])
for k, v in all_data["data"]}
# important to use add method to use all data structure correctly
for k, v in all_data["data"]:
self.add(config=self.ids_config[int(k[0])],
cost=float(v[0]),
time=float(v[1]),
status=v[2],
instance_id=k[1],
seed=int(k[2]),
additional_info=v[3])

def update_from_json(self, fn, cs):
"""Update the current runhistory by adding new runs from a json file.
Expand Down
2 changes: 1 addition & 1 deletion smac/scenario/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def extract_instance_specific(instance_list):
#PCA
pca = PCA(n_components=self.PCA_DIM)
self.feature_array = pca.fit_transform(X)
self.n_features = self.PCA_DIM
self.n_features = self.feature_array.shape[1]
# update feature dictionary
for feat, inst_ in zip(self.feature_array, self.train_insts):
self.feature_dict[inst_] = feat
Expand Down
Loading

0 comments on commit 7430205

Please sign in to comment.