Source code for whetlab

import os
import tempfile
import ConfigParser
import collections
import numpy as np
import server
import time
import functools
import requests

def catch_exception(f):
    @functools.wraps(f)
    def func(*args, **kwargs):
        try:
            return f(*args, **kwargs)
        except requests.exceptions.ConnectionError:
            raise RuntimeError('Unable to reach the server. Either the server is experiencing difficulties or your internet connection is down.')
        except server.error.ClientError as e:
            if e.message == "Unable to understand the content type of response returned by request responsible for error":
                raise RuntimeError('The server is currently busy, please try again shortly.')
            else:
                raise e
    return func


INF_PAGE_SIZE = 1000000

DEFAULT_API_URL = 'http://whetlab-server.elasticbeanstalk.com'

supported_properties = set(['min','max','size','scale','units','type','options'])
required_properties = {
    'float':set(['min','max']),
    'integer':set(['min','max']),
    'enum':set(['options'])
}
default_values = {
    'float':{
        'size':1,
        'scale':'linear',
        'units':'Reals',
    },
    'integer':{
        'size':1,
        'scale':'linear',
        'units':'Integers',
    },
    'enum':{
        'size':1
    },
    'type':'float'
}
legal_values = {
    'size':set([1]),
    'scale':set(['linear','log']),
    'type':set(['float', 'integer', 'enum'])
}

python_types = {'float':float,'integer':int,'enum':str}

outcome_supported_properties = set(['units','type','name'])
outcome_required_properties = set(['name'])
outcome_default_values = {'min':-100.,
              'max':100.,
              'size':1,
              'scale':'linear',
              'units':'Reals',
              'type':'float'}
outcome_legal_values = {'size':set([1]),
            'scale':set(['linear']),
            'type':set(['float'])}

@catch_exception
def _reformat_float(rest_param):
    """
    Convert float parameter description in REST-server format to internal client format.

    :param rest_param: Parameter description obtained from REST server.
    :type rest_param: dict
    :return: Parameter description in interal client format.
    :rtype: dict
    """

    type  = rest_param['type']
    min   = rest_param['min']
    max   = rest_param['max']
    size  = rest_param['size']
    units = rest_param['units']
    scale = rest_param['scale']

    return {'type':type,'min':min,'max':max,
            'size':size,'units':units,'scale':scale}

@catch_exception
def _reformat_integer(rest_param):
    """
    Convert integer parameter description in REST-server format to internal client format.

    :param rest_param: Parameter description obtained from REST server.
    :type rest_param: dict
    :return: Parameter description in interal client format.
    :rtype: dict
    """

    type  = rest_param['type']
    min   = rest_param['min']
    max   = rest_param['max']
    size  = rest_param['size']
    units = rest_param['units']
    scale = rest_param['scale']

    return {'type':type,'min':min,'max':max,
            'size':size,'units':units,'scale':scale}

@catch_exception
def _reformat_enum(rest_param):
    """
    Convert enum parameter description in REST-server format to internal client format.

    :param rest_param: Parameter description obtained from REST server.
    :type rest_param: dict
    :return: Parameter description in interal client format.
    :rtype: dict
    """

    type       = rest_param['type']
    options    = rest_param['options']
    size       = rest_param['size']

    return {'type':type,'options':options,'size':size}

@catch_exception
def _validate_integer(name, properties):
    """
    Validate that integer parameter description is valid.

    :param name: Name of parameter
    :type name: str
    :param name: Properties of the parameter
    :type name: dict
    """

    # Check if required properties are present
    for property in required_properties['integer']:
        if property not in properties:
            raise ValueError("Parameter '" +name+ "': property '" + property + "' must be defined")

    # Add default parameters if not present
    for property, default in default_values['integer'].iteritems():
        if property not in properties:
            properties[property] = default

    # Check compatibility of properties
    if properties['min'] >= properties['max']:
        raise ValueError("Parameter '" + name + "': 'min' should be smaller than 'max'")

    if np.mod(properties['min'],1) != 0 : raise ValueError("Parameter '" + name + "': 'min' should be an integer")
    if np.mod(properties['max'],1) != 0 : raise ValueError("Parameter '" + name + "': 'max' should be an integer")

    for property, legals in legal_values.iteritems():
        if properties[property] not in legals:
            raise ValueError("Parameter '" +name+ "': invalid value for property '" + property+"'")

@catch_exception
def _validate_float(name, properties):
    """
    Validate that float parameter description is valid.

    :param name: Name of parameter
    :type name: str
    :param name: Properties of the parameter
    :type name: dict
    """

    # Check if required properties are present
    for property in required_properties['float']:
        if property not in properties:
            raise ValueError("Parameter '" +name+ "': property '" + property + "' must be defined")

    # Add default parameters if not present
    for property, default in default_values['float'].iteritems():
        if property not in properties:
            properties[property] = default

    # Check compatibility of properties
    if properties['min'] >= properties['max']:
        raise ValueError("Parameter '" + name + "': 'min' should be smaller than 'max'")

    for property, legals in legal_values.iteritems():
        if properties[property] not in legals:
            raise ValueError("Parameter '" +name+ "': invalid value for property '" + property+"'")


@catch_exception
def _validate_enum(name, properties):
    """
    Validate that enum parameter description is valid.

    :param name: Name of parameter
    :type name: str
    :param name: Properties of the parameter
    :type name: dict
    """

    # Check if required properties are present
    for property in required_properties['enum']:
        if property not in properties:
            raise ValueError("Parameter '" +name+ "': property '" + property + "' must be defined")

    # Add default parameters if not present
    for property, default in default_values['enum'].iteritems():
        if property not in properties:
            properties[property] = default

    # Check compatibility of properties
    if len(properties['options']) < 3:
        raise ValueError("Parameter '%s': must give at least 3 options." % name)

    if not all([isinstance(c,python_types['enum']) for c in properties['options']]):
        raise ValueError("Parameter '%s': options must be of type %s." % name, python_types['enum'])

reformat_from_rest = {'integer': _reformat_integer,
                      'float'  : _reformat_float,
                      'enum'   : _reformat_enum}

validate = {'integer': _validate_integer,
            'float'  : _validate_float,
            'enum'   : _validate_enum}


@catch_exception
[docs]def delete_experiment(access_token, name): """ Delete the experiment with the given name. Important, this cancels the experiment and removes all saved results! :param access_token: User access token :type access_token: str :param name: Experiment name :type name: str """ try: scientist = Experiment(access_token, name, resume=True) except ValueError: raise ValueError('Could not delete experiment \''+name+'\' (either it doesn\'t exist or access token is invalid)') scientist._delete()
@catch_exception def load_config(): filename = '.whetlab' search_path = ['.', os.path.expanduser('~')] for dir in search_path: full_path = os.path.join(dir, filename) if os.path.exists(full_path): config = ConfigParser.RawConfigParser() config.read(full_path) config_dict = {} if config.has_option('whetlab', 'access_token'): config_dict['access_token'] = config.get('whetlab', 'access_token') if config.has_option('whetlab', 'api_url'): config_dict['api_url'] = config.get('whetlab', 'api_url') return config_dict return {}
[docs]class Experiment: """ A Whetlab tuning experiment. A ``name`` and ``description`` for the experiment must be specified. A Whetlab access token must also be provided. The parameters to tune in the experiment are specified by ``parameters``. It should be a ``dict``, where keys are the parameters (``str``) and values are ``dict`` that provide information about these parameters. Each of these ``dict`` should contain the appropriate keys to properly describe the parameter: * ``'min'``: minimum value of the parameter * ``'max'``: maximum value of the parameter * ``'scale'``: scale to use when exploring parameter values (default: ``'linear'``) * ``'units'``: units (``str``) in which the parameter is measured (default: ``''``) * ``'type'``: type of the parameter (default: ``'float'``) * ``'size'``: size of parameter (default: ``1``) ``outcome`` should also be a ``dict``, describing the outcome. It should have the keys: * ``'name'``: name (``str``) for the outcome being optimized * ``'type'``: type of the parameter, either ``'float'``, ``'int'`` or ``'enum'`` (default: ``'float'``) * ``'units'``: units (``str``) in which the parameter is measured (default: ``''``) If ``name`` and ``description`` match a previously created experiment, that experiment will be resumed (in this case, ``parameters`` and ``outcoume`` are ignored). This behavior can be avoided by setting the argument ``resume`` to ``False`` (in which case an error will be raised is an experiment with the same name and description is found). :param access_token: Access token for your Whetlab account. :type access_token: str :param name: Name of the experiment. :type name: str :param description: Description of the experiment. :type description: str :param parameters: Parameters to be tuned during the experiment. :type parameters: dict :param outcome: Description of the outcome to maximize. :type outcome: dict :param resume: Whether to allow the resuming of a previously executed experiment. :type resume: bool A Whetlab experiment instance will have the following variables: :ivar parameters: Parameters to be tuned during the experiment. :type parameters: dict :ivar outcome: Description of the outcome to maximize. :type outcome: dict :ivar experiment_id: ID of the experiment (useful for resuming). :type experiment_id: int """ @catch_exception def __init__(self, access_token=None, name='Default name', description='Default description', parameters=None, outcome=None, resume = True, url=None): # These are for the client to keep track of things without always # querying the REST server ... # ... From result IDs to client parameter values self._ids_to_param_values = {} # ... From result IDs to outcome values self._ids_to_outcome_values = {} # ... From a parameter name to the setting IDs self._param_names_to_setting_ids = {} config = load_config() if url is None: if config.has_key('api_url'): url = config['api_url'] else: url = DEFAULT_API_URL if access_token is None: if config.has_key('access_token'): access_token = config['access_token'] else: raise Exception("No access token specified in dotfile or via constructor.") # Create REST server client options = ({'headers' : {'Authorization':'Bearer ' + access_token}, 'user_agent':'whetlab_python_client', 'api_version':'api', 'base': url}) self._client = server.Client({},options) # Make a few obvious asserts if name == '' or type(name) not in [str,unicode]: raise ValueError('Name of experiment must be a non-empty string') if type(description) not in [str,unicode]: raise ValueError('Description of experiment must be a string') self.experiment = name self.experiment_description = description self.experiment_id = self._find_experiment(self.experiment) if self.experiment_id is not None and resume: # Sync all the internals with the REST server self._sync_with_server() else: if type(parameters) != dict or len(parameters) == 0: raise ValueError('Parameters of experiment must be a non-empty dictionary') if type(outcome) != dict or len(outcome) == 0: raise ValueError('Outcome of experiment must be a non-empty dictionary') if 'name' not in outcome: raise ValueError('Argument outcome should have key \'name\'') self.outcome_name = outcome['name'] # Add specification of parameters to experiment. settings = [] #settings = {} for key in parameters.keys(): param = {} param.update(parameters[key]) for property in param.iterkeys(): if property not in supported_properties: raise ValueError("Parameter '" +key+ "': property '" + property + "' not supported") ptype = param['type'] if param.has_key('type') else default_values['type'] if ptype not in validate: raise ValueError("Parameter '%s' uses unsupported type '%s'." % (key, ptype)) # Check whether description of parameter is valid validate[ptype](key,param) param['isOutput'] = False param['name'] = key settings += [param] #settings['name'] = param # Add the outcome variable param = {} param.update(outcome) # Check outcome doesn't have the same name as any of the parameters if outcome['name'] in parameters: raise ValueError("Outcome name should not match any of the parameter names") # Check if all properties are supported for property in param.iterkeys(): if property not in outcome_supported_properties : raise ValueError("Parameter '" +key+ "': property '" + property + "' not supported") # Check if required properties are present for property in outcome_required_properties: if property not in param : raise ValueError("Parameter '" +key+ "': property '" + property + "' must be defined") # Add default parameters if not present for property, default in outcome_default_values.iteritems(): if property not in param: param[property] = default # Check compatibility of properties for property, legals in outcome_legal_values.iteritems(): if param[property] not in legals : raise ValueError("Parameter '" +key+ "': invalid value for property '" + property+"'") param['isOutput'] = True settings += [param] #settings[outcome['name']] = param # Create experiment. try: res = self._client.experiments().create(name=self.experiment, description=self.experiment_description, settings=settings) self.experiment_id = res.body['id'] except Exception as inst: # If experiment creation doesn't work, then retry resuming the experiment. # This is for cases where two processes are starting an experiment, and # one gets to create it first while the other should be resuming it. self.experiment_id = self._find_experiment(self.experiment) if not resume or self.experiment_id is None : raise inst # Call _sync_with_server in order to fill-in the state of the object # (e.g. fetching the setting ids) self._sync_with_server() pending = self.pending() if len(pending) > 0: print "INFO: this experiment currently has "+str(len(pending))+" jobs (results) that are pending." def _find_experiment(self, name): """ Look for experiment matching name and return its ID. :param name: Experiment's name :type name: str :return: Experiment's ID. :rtype: int """ # Search one page at a time page = 1 more_pages = True while more_pages: rest_exps = self._client.experiments().get({'query':{'page':page}}).body # Check if more pages to come more_pages = rest_exps['next'] is not None page += 1 # Find in current page whether we find the experiment we are looking for rest_exps = rest_exps['results'] for exp in rest_exps: if cmp(exp['name'],name) == 0: return exp['id'] return None @catch_exception def _sync_with_server(self): """ Synchronize the client's internals with the REST server. """ res = self._client.experiments().get({'query':{'id':self.experiment_id}}).body['results'][0] self.experiment = res['name'] self.experiment_description = res['description'] # Reset internals self._ids_to_param_values = {} self._ids_to_outcome_values = {} self._param_names_to_setting_ids = {} # Get settings for this experiment, to get the parameter and outcome names rest_parameters = self._client.settings().get(str(self.experiment_id),{'query':{'page_size':INF_PAGE_SIZE}}).body rest_parameters = rest_parameters['results'] self.parameters = {} for rest_param in rest_parameters: rest_param id = rest_param['id'] name = rest_param['name'] type = rest_param['type'] isOutput = rest_param['isOutput'] self._param_names_to_setting_ids[name] = id if isOutput: self.outcome_name = name else: self.parameters[name] = reformat_from_rest[type](rest_param) # Get results generated so far for this experiment rest_results = self._client.results().get({'query': {'experiment':self.experiment_id,'page_size':INF_PAGE_SIZE}}).body['results'] # Construct things needed by client internally, to keep track of # all the results for res in rest_results: res_id = res['id'] variables = res['variables'] # Construct _ids__param_values dict and ids_to_outcome_values self._ids_to_param_values[res_id] = {} for v in variables: id = v['id'] name = v['name'] if cmp(name,self.outcome_name) == 0 : self._ids_to_outcome_values[res_id] = v['value'] else: self._ids_to_param_values[res_id][v['name']] = v['value'] @catch_exception
[docs] def suggest(self): """ Suggest a new job. :return: Values to assign to the parameters in the suggested job. :rtype: dict """ res = self._client.suggest(str(self.experiment_id)).go() result_id = res.body['id'] # Poll the server for the actual variable values in the suggestion. variables = res.body['variables'] while not variables: time.sleep(2) result = self._client.result(str(result_id)).get() variables = result.body['variables'] # Put in nicer format next = {} for var in variables: # Don't return the outcome variable if cmp(var['name'],self.outcome_name) != 0: next[var['name']] = python_types[self.parameters[var['name']]['type']](var['value']) # Keep track of id / param_values relationship self._ids_to_param_values[result_id] = next return next
@catch_exception def _get_id(self,param_values): """ Return the result ID corresponding to the given ``param_values``. If no result matches, return ``None``. :param param_values: Values of parameters. :type param_values: dict :return: ID of the corresponding result. If not match, None is returned. :rtype: int or None """ # Sync with the REST server self._sync_with_server() id = None for k,v in self._ids_to_param_values.iteritems(): if cmp(v,param_values) == 0: id = k return id @catch_exception
[docs] def update(self, param_values, outcome_val): """ Update the experiment with the outcome value associated with some parameter values. :param param_values: Values of parameters. :type param_values: dict :param outcome_val: Value of the outcome. :type outcome_val: type defined for outcome """ if outcome_val is not None: outcome_val = float(outcome_val) # Check if param_values is compatible for param,value in param_values.iteritems(): if param not in self.parameters: raise ValueError("Parameter '" +param+ "' not valid") if self.parameters[param]['type'] == 'float' or self.parameters[param]['type'] == 'integer': if value < self.parameters[param]['min'] or value > self.parameters[param]['max']: raise ValueError("Parameter '" +param+ "' should have value between "+str(self.parameters[param]['min']) +" and " + str(self.parameters[param]['max'])) if type(value) != python_types[self.parameters[param]['type']]: raise TypeError("Parameter '" +param+ "' should be of type " + self.parameters[param]['type']) # Check is all parameter values are specified for param in self.parameters.keys(): if param not in param_values: raise ValueError("Parameter '" +param+ "' not specified") # Check whether this param_values has a results ID result_id = self._get_id(param_values) if result_id is None: # If not, then this is a result that was not suggested, # must add it to the server ## Get a time stamp for this submitted result #import datetime #import json # #dthandler = lambda obj: ( # obj.isoformat() # if isinstance(obj, datetime.datetime) # or isinstance(obj, datetime.date) # else None) #date = json.loads(json.dumps(datetime.datetime.now(), default=dthandler)) # Create variables for new result variables = [] for name, setting_id in self._param_names_to_setting_ids.iteritems(): if name in param_values: value = param_values[name] elif name == self.outcome_name: value = outcome_val else: raise ValueError('Failed to update with non-suggested experiment') variables += [{'setting':setting_id, 'result':result_id, 'name':name, 'value':value}] res = self._client.results().add(variables, self.experiment_id, True, self.experiment_description) result_id = res.body['id'] self._ids_to_param_values[result_id] = param_values else: # Fill in result with the given outcome value result = self._client.result(str(result_id)).get().body for var in result['variables']: if var['name'] == self.outcome_name: var['value'] = outcome_val self._ids_to_outcome_values[result_id] = var break # Assume only one outcome per experiment! res = self._client.result(str(result_id)).update(**result) self._ids_to_outcome_values[result_id] = outcome_val
@catch_exception
[docs] def cancel(self,param_values): """ Cancel a job, by removing it from the jobs recorded so far in the experiment. :param param_values: Values of the parameters for the job to cancel. :type param_values: dict """ # Check whether this param_values has a results ID id = self._get_id(param_values) if id is not None: # Delete from internals del self._ids_to_param_values[id] if id in self._ids_to_outcome_values: del self._ids_to_outcome_values[id] # Delete from server self._client.result(str(id)).delete() else: print 'Did not find experiment with the provided parameters'
@catch_exception def _delete(self): """ Delete the experiment with the given name and description. Important, this cancels the experiment and removes all saved results! """ res = self._client.experiment(str(self.experiment_id)).delete() print 'Experiment has been deleted' @catch_exception
[docs] def pending(self): """ Return the list of jobs which have been suggested, but for which no result has been provided yet. :return: List of parameter values. :rtype: list """ # Sync with the REST server self._sync_with_server() # Find IDs of results with value None and append parameters to returned list ret = [] for key,val in self._ids_to_outcome_values.iteritems(): if val is None: ret.append(self._ids_to_param_values[key]) return list(ret)
@catch_exception
[docs] def clear_pending(self): """ Cancel jobs (results) that are marked as pending. """ p = self.pending() for job in p: self.cancel(job)
@catch_exception
[docs] def best(self): """ Return job with best outcome found so far. :return: Parameter values with best outcome. :rtype: dict """ # Sync with the REST server self._sync_with_server() # Find ID of result with best outcome ids = np.array(self._ids_to_outcome_values.keys()) outcomes = [self._ids_to_outcome_values[i] for i in ids] # Change Nones with infs outcomes = np.array(map(lambda x: x if x is not None else np.inf, outcomes)) result_id = ids[outcomes.argmax()] return self._ids_to_param_values[result_id]
@catch_exception
[docs] def report(self): """ Plot a visual report of the progress made so far in the experiment. """ # Sync with the REST server self._sync_with_server() # Report historical progress and results assumed pending import matplotlib.pyplot as plt # Get outcome values and put them in order of their IDs, # which should be equivalent to chronological order (of suggestion time) ids = np.array(self._ids_to_outcome_values.keys()) outcomes_values = np.array(self._ids_to_outcome_values.values()) # Change Nones with infs outcomes_values = np.array(map(lambda x: x if x is not None else np.inf, outcomes_values)) s = ids.argsort() ids = ids[s] outcome_values = outcomes_values[s] if outcome_values.size == 0 or np.all(np.isinf(outcome_values)): print 'There are no completed results to report' return # Plot progression plt.figure(1) plt.clf() y = outcome_values best_so_far = [ np.max(y[:(i+1)]) for i in range(len(y)) ] plt.scatter(range(len(y)),y,marker='x',color='k',label='Outcomes') plt.plot(range(len(y)),best_so_far,color='k',label='Best so far') plt.xlabel('Result #') plt.ylabel(self.outcome_name) plt.title('Results progression') plt.legend(loc=3) plt.draw() plt.ion() plt.show() # Plot table of results plt.figure(2) param_names = list(np.sort(self.parameters.keys())) col_names = ['Result #'] + param_names + [self.outcome_name] cell_text = [] for nb,id in enumerate(ids): # Get paramater values, put in correct order and add to # table with corresponding outcome value params, values = zip(*self._ids_to_param_values[id].iteritems()) s = np.argsort(params) values = np.array(values)[s] outcome = self._ids_to_outcome_values[id] cell_text.append([str(nb+1)] + [str(v) for v in values] + [str(outcome)]) if len(cell_text) > 20: cell_text = cell_text[-20:] the_table = plt.table(cellText = cell_text, colLabels=col_names, loc='center') ## change cell properties table_props=the_table.properties() table_cells=table_props['child_artists'] for cell in table_cells: cell.set_fontsize(8) plt.axis('off') plt.title('Table of results') plt.draw() plt.ion() plt.show()