在这篇文章中,我介绍如何使用工具组合来加速超参数优化任务。这里提供了Ubuntu的说明,但可以合理地应用于任何*nix系统。
export PYENV_ROOT="$HOME/.pyenv"
curl -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer | bash
echo 'export PATH="$HOME/.pyenv/bin:$PATH"' >> ~/.bash_profile
echo 'eval "$(pyenv init -)"' >> ~/.bash_profile
source ~/.bash_profile
sudo apt install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev xz-utils tk-dev
env PYTHON_CONFIGURE_OPTS="--enable-shared" MAKEOPTS="-j 8" pyenv install 3.6.5
pyenv local 3.6.5
pip install tensorflow git+https://github.com/hyperopt/hyperopt git+https://github.com/maxpumperla/hyperas keras pssh matplotlib h5py pymongo
from hyperas import optim
from hyperas.distributions import quniform, uniform
from hyperopt import STATUS_OK, tpe, mongoexp
import keras
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.datasets import mnist
import tempfile
from datetime import datetime
def data():
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
return x_train, y_train, x_test, y_test
def create_model(x_train, y_train, x_test, y_test):
"""
Create your model...
"""
l1_size = {{quniform(12, 256, 4)}}
l1_dropout = {{uniform(0.001, 0.7)}}
params = {
'l1_size': l1_size,
'l1_dropout': l1_dropout
}
num_classes = 10
model = Sequential()
model.add(Dense(int(l1_size), activation='relu'))
model.add(Dropout(l1_dropout))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
start = datetime.now()
model.fit(x_train, y_train, batch_size=128, epochs=10, validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, verbose=0)
out = {
'loss': -acc,
'score': score,
'status': STATUS_OK,
'duration': (datetime.now() - start).total_seconds(),
'ho_params': params,
'model_config': model.get_config()
}
# optionally store a dump of your model here so you can get it from the database later
temp_name = tempfile.gettempdir()+'/'+next(tempfile._get_candidate_names()) + '.h5'
model.save(temp_name)
with open(temp_name, 'rb') as infile:
model_bytes = infile.read()
out['model_serial'] = model_bytes
return out
if __name__ == "__main__":
trials = mongoexp.MongoTrials('mongo://username:pass@mongodb.host:27017/jobs/jobs', exp_key='mnist_test')
best_run, best_model = optim.minimize(model=create_model,
data=data,
algo=tpe.suggest,
max_evals=10,
trials=trials,
keep_temp=True) # this last bit is important
print("Best performing model chosen hyper-parameters:")
print(best_run)
python optimise_task.py
mkdir hyperopt_job
touch hyperopt_job/job.sh
chmod +x hyperopt_job/job.sh
#!/bin/bash
export PYENV_ROOT="$HOME/.pyenv"
export PATH="$PYENV_ROOT/bin:$PATH"
eval "$(pyenv init -)"
export PYTHONPATH=~/hyperopt_job/
cd ~/hyperopt_job
pyenv local 3.6.5
hyperopt-mongo-worker --mongo="mongo://username:password@mongodb.host:27017/jobs" --exp-key=mnist_test
如果.pyenv文件夹还不存在,您还可以让该脚本从URL中获取压缩版本的.pyenv文件夹,方法是在脚本前加上如下内容:
if [ ! -d "$HOME/.pyenv" ]; then
wget https://url.to/mypenv.zip
unzip mypyenv.zip
fi
tmp
目录并设置相应地PYENV_ROOT
环境变量。pssh -h hosts.txt bash -c "nohup ~/hyperopt_job/job.sh &"
from pymongo import MongoClient, ASCENDING
from keras.models import load_model
import tempfile
c = MongoClient('mongodb://username:pass@mongodb.host:27017/jobs')
best_model = c['jobs']['jobs'].find_one({'exp_key': 'mnist_test', 'result.status': 'ok'}, sort=[('result.loss', ASCENDING)])
temp_name = tempfile.gettempdir()+'/'+next(tempfile._get_candidate_names()) + '.h5'
with open(temp_name, 'wb') as outfile:
outfile.write(best_model['result']['model_serial'])
model = load_model(temp_name)
# do things with your model here
model.summary()
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from pymongo import MongoClient
import numpy as np
if __name__ == "__main__":
# get the data
jobs = MongoClient('mongodb://username:pass@mongodb.host:27017/jobs')['jobs']['jobs']
cursor = jobs.find({'exp_key': 'mnist_test', 'result.status': 'ok'})
results = defaultdict(lambda: defaultdict(list))
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
for i in ax.get_xticklabels():
i.set_rotation(90)
for row in cursor:
cc = row['tid']
results[cc]['x'].append(row['result']['loss'])
results[cc]['y'].append(row['result']['ho_params']['l1_size'])
results[cc]['z'].append(row['result']['ho_params']['l1_dropout'])
colors = cm.rainbow(np.linspace(0, 1, len(results)))
it = iter(colors)
for k, v in results.items():
ax.scatter(v['x'], v['y'], v['z'], label=k, color=next(it))
ax.set_xlabel('RMSE')
ax.set_ylabel('Layer Count')
ax.set_zlabel('L1 Size')
plt.title("Hyperparameter Optimisation Results")
plt.show()