From bb6c6f2187e451abb738fe86e2ca36d944155bed Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Fri, 28 Jun 2024 16:43:22 -0500 Subject: [PATCH] Require explicit arguments for mlos_core optimizers (#760) This is a simple PR that makes all arguments explicit for optimizer-related function calls in preparation to add additional arguments in #751 and make it easier to review. --------- Co-authored-by: Brian Kroth Co-authored-by: Brian Kroth --- .../optimizers/mlos_core_optimizer.py | 4 +- .../optimizers/toy_optimization_loop_test.py | 2 +- .../bayesian_optimizers/bayesian_optimizer.py | 12 ++-- .../bayesian_optimizers/smac_optimizer.py | 48 +++++++-------- .../mlos_core/optimizers/flaml_optimizer.py | 18 +++--- mlos_core/mlos_core/optimizers/optimizer.py | 58 +++++++++---------- .../mlos_core/optimizers/random_optimizer.py | 18 +++--- .../optimizers/bayesian_optimizers_test.py | 4 +- .../tests/optimizers/one_hot_test.py | 16 ++--- .../optimizers/optimizer_multiobj_test.py | 2 +- .../tests/optimizers/optimizer_test.py | 16 ++--- 11 files changed, 99 insertions(+), 99 deletions(-) diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index 7747035c134..8e7c75a0d51 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -117,7 +117,7 @@ def bulk_register(self, # TODO: Specify (in the config) which metrics to pass to the optimizer. # Issue: https://github.com/microsoft/MLOS/issues/745 - self._opt.register(df_configs, df_scores[opt_targets].astype(float)) + self._opt.register(configs=df_configs, scores=df_scores[opt_targets].astype(float)) if _LOG.isEnabledFor(logging.DEBUG): (score, _) = self.get_best_observation() @@ -195,7 +195,7 @@ def register(self, tunables: TunableGroups, status: Status, _LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config) # TODO: Specify (in the config) which metrics to pass to the optimizer. # Issue: https://github.com/microsoft/MLOS/issues/745 - self._opt.register(df_config, pd.DataFrame([registered_score], dtype=float)) + self._opt.register(configs=df_config, scores=pd.DataFrame([registered_score], dtype=float)) return registered_score def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: diff --git a/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py b/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py index 2307bcd646d..183db1dc620 100644 --- a/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py +++ b/mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py @@ -50,7 +50,7 @@ def _optimize(env: Environment, opt: Optimizer) -> Tuple[float, TunableGroups]: config_df = config_to_dataframe(config) logger("config: %s", str(config)) try: - logger("prediction: %s", opt._opt.surrogate_predict(config_df)) + logger("prediction: %s", opt._opt.surrogate_predict(configs=config_df)) except RuntimeError: pass diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py index f066be1fb91..2de01637f89 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py @@ -20,14 +20,14 @@ class BaseBayesianOptimizer(BaseOptimizer, metaclass=ABCMeta): """Abstract base class defining the interface for Bayesian optimization.""" @abstractmethod - def surrogate_predict(self, configurations: pd.DataFrame, + def surrogate_predict(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: """Obtain a prediction from this Bayesian optimizer's surrogate model for the given configuration(s). Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. context : pd.DataFrame Not Yet Implemented. @@ -35,14 +35,14 @@ def surrogate_predict(self, configurations: pd.DataFrame, pass # pylint: disable=unnecessary-pass # pragma: no cover @abstractmethod - def acquisition_function(self, configurations: pd.DataFrame, + def acquisition_function(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: """Invokes the acquisition function from this Bayesian optimizer for the given configuration. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. context : pd.DataFrame Not Yet Implemented. diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index 8a433218fa5..43803b7dbb6 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -80,10 +80,10 @@ def __init__(self, *, # pylint: disable=too-many-locals,too-many-arguments See Also: mlos_bench.optimizer.bulk_register max_ratio : Optional[int] - Maximum ratio of max_trials to be random configurations to be evaluated + Maximum ratio of max_trials to be random configs to be evaluated at start to bootstrap the optimizer. Useful if you want to explicitly control the number of random - configurations evaluated at start. + configs evaluated at start. use_default_config: bool Whether to use the default config for the first trial after random initialization. @@ -168,7 +168,7 @@ def __init__(self, *, # pylint: disable=too-many-locals,too-many-arguments initial_design_args['n_configs'] = n_random_init if n_random_init > 0.25 * max_trials and max_ratio is None: warning( - 'Number of random initial configurations (%d) is ' + + 'Number of random initial configs (%d) is ' + 'greater than 25%% of max_trials (%d). ' + 'Consider setting max_ratio to avoid SMAC overriding n_random_init.', n_random_init, @@ -241,17 +241,17 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None # -- this planned to be fixed in some future release: https://github.com/automl/SMAC3/issues/946 raise RuntimeError('This function should never be called.') - def _register(self, configurations: pd.DataFrame, + def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: - """Registers the given configurations and scores. + """Registers the given configs and scores. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. scores : pd.DataFrame - Scores from running the configurations. The index is the same as the index of the configurations. + Scores from running the configs. The index is the same as the index of the configs. context : pd.DataFrame Not Yet Implemented. @@ -262,7 +262,7 @@ def _register(self, configurations: pd.DataFrame, warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) # Register each trial (one-by-one) - for (config, (_i, score)) in zip(self._to_configspace_configs(configurations), scores.iterrows()): + for (config, (_i, score)) in zip(self._to_configspace_configs(configs=configs), scores.iterrows()): # Retrieve previously generated TrialInfo (returned by .ask()) or create new TrialInfo instance info: TrialInfo = self.trial_info_map.get( config, TrialInfo(config=config, seed=self.base_optimizer.scenario.seed)) @@ -272,7 +272,7 @@ def _register(self, configurations: pd.DataFrame, # Save optimizer once we register all configs self.base_optimizer.optimizer.save() - def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: """Suggests a new configuration. Parameters @@ -299,10 +299,10 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys())) return config_df - def register_pending(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: + def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() - def surrogate_predict(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: + def surrogate_predict(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: from smac.utils.configspace import convert_configurations_to_array # pylint: disable=import-outside-toplevel if context is not None: @@ -318,11 +318,11 @@ def surrogate_predict(self, configurations: pd.DataFrame, context: Optional[pd.D if self.base_optimizer._config_selector._model is None: raise RuntimeError('Surrogate model is not yet trained') - configs: npt.NDArray = convert_configurations_to_array(self._to_configspace_configs(configurations)) - mean_predictions, _ = self.base_optimizer._config_selector._model.predict(configs) + config_array: npt.NDArray = convert_configurations_to_array(self._to_configspace_configs(configs=configs)) + mean_predictions, _ = self.base_optimizer._config_selector._model.predict(config_array) return mean_predictions.reshape(-1,) - def acquisition_function(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: + def acquisition_function(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) if self._space_adapter: @@ -332,28 +332,28 @@ def acquisition_function(self, configurations: pd.DataFrame, context: Optional[p if self.base_optimizer._config_selector._acquisition_function is None: raise RuntimeError('Acquisition function is not yet initialized') - configs: list = self._to_configspace_configs(configurations) - return self.base_optimizer._config_selector._acquisition_function(configs).reshape(-1,) + cs_configs: list = self._to_configspace_configs(configs=configs) + return self.base_optimizer._config_selector._acquisition_function(cs_configs).reshape(-1,) def cleanup(self) -> None: if self._temp_output_directory is not None: self._temp_output_directory.cleanup() self._temp_output_directory = None - def _to_configspace_configs(self, configurations: pd.DataFrame) -> List[ConfigSpace.Configuration]: - """Convert a dataframe of configurations to a list of ConfigSpace configurations. + def _to_configspace_configs(self, *, configs: pd.DataFrame) -> List[ConfigSpace.Configuration]: + """Convert a dataframe of configs to a list of ConfigSpace configs. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. Returns ------- - configurations : list - List of ConfigSpace configurations. + configs : list + List of ConfigSpace configs. """ return [ ConfigSpace.Configuration(self.optimizer_parameter_space, values=config.to_dict()) - for (_, config) in configurations.astype('O').iterrows() + for (_, config) in configs.astype('O').iterrows() ] diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index 0ad3c2da29c..a58e74af021 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -85,24 +85,24 @@ def __init__(self, *, # pylint: disable=too-many-arguments self.evaluated_samples: Dict[ConfigSpace.Configuration, EvaluatedSample] = {} self._suggested_config: Optional[dict] - def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, + def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: - """Registers the given configurations and scores. + """Registers the given configs and scores. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. scores : pd.DataFrame - Scores from running the configurations. The index is the same as the index of the configurations. + Scores from running the configs. The index is the same as the index of the configs. context : None Not Yet Implemented. """ if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) - for (_, config), (_, score) in zip(configurations.astype('O').iterrows(), scores.iterrows()): + for (_, config), (_, score) in zip(configs.astype('O').iterrows(), scores.iterrows()): cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration( self.optimizer_parameter_space, values=config.to_dict()) if cs_config in self.evaluated_samples: @@ -112,7 +112,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, score=float(np.average(score.astype(float), weights=self._objective_weights)), ) - def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: """Suggests a new configuration. Sampled at random using ConfigSpace. @@ -132,7 +132,7 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: config: dict = self._get_next_config() return pd.DataFrame(config, index=[0]) - def register_pending(self, configurations: pd.DataFrame, + def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() @@ -165,7 +165,7 @@ def _get_next_config(self) -> dict: Since FLAML does not provide an ask-and-tell interface, we need to create a new instance of FLAML each time we get asked for a new suggestion. This is suboptimal performance-wise, but works. - To do so, we use any previously evaluated configurations to bootstrap FLAML (i.e., warm-start). + To do so, we use any previously evaluated configs to bootstrap FLAML (i.e., warm-start). For more info: https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function#warm-start Returns diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index f1cedb85dcc..a72a4e1eb80 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -68,16 +68,16 @@ def space_adapter(self) -> Optional[BaseSpaceAdapter]: """Get the space adapter instance (if any).""" return self._space_adapter - def register(self, configurations: pd.DataFrame, scores: pd.DataFrame, + def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: - """Wrapper method, which employs the space adapter (if any), before registering the configurations and scores. + """Wrapper method, which employs the space adapter (if any), before registering the configs and scores. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. scores : pd.DataFrame - Scores from running the configurations. The index is the same as the index of the configurations. + Scores from running the configs. The index is the same as the index of the configs. context : pd.DataFrame Not Yet Implemented. @@ -87,40 +87,40 @@ def register(self, configurations: pd.DataFrame, scores: pd.DataFrame, "Mismatched optimization targets." assert self._has_context is None or self._has_context ^ (context is None), \ "Context must always be added or never be added." - assert len(configurations) == len(scores), \ - "Mismatched number of configurations and scores." + assert len(configs) == len(scores), \ + "Mismatched number of configs and scores." if context is not None: - assert len(configurations) == len(context), \ - "Mismatched number of configurations and context." - assert configurations.shape[1] == len(self.parameter_space.values()), \ + assert len(configs) == len(context), \ + "Mismatched number of configs and context." + assert configs.shape[1] == len(self.parameter_space.values()), \ "Mismatched configuration shape." - self._observations.append((configurations, scores, context)) + self._observations.append((configs, scores, context)) self._has_context = context is not None if self._space_adapter: - configurations = self._space_adapter.inverse_transform(configurations) - assert configurations.shape[1] == len(self.optimizer_parameter_space.values()), \ + configs = self._space_adapter.inverse_transform(configs) + assert configs.shape[1] == len(self.optimizer_parameter_space.values()), \ "Mismatched configuration shape after inverse transform." - return self._register(configurations, scores, context) + return self._register(configs=configs, scores=scores, context=context) @abstractmethod - def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, + def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: - """Registers the given configurations and scores. + """Registers the given configs and scores. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. scores : pd.DataFrame - Scores from running the configurations. The index is the same as the index of the configurations. + Scores from running the configs. The index is the same as the index of the configs. context : pd.DataFrame Not Yet Implemented. """ pass # pylint: disable=unnecessary-pass # pragma: no cover - def suggest(self, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> pd.DataFrame: + def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> pd.DataFrame: """ Wrapper method, which employs the space adapter (if any), after suggesting a new configuration. @@ -142,7 +142,7 @@ def suggest(self, context: Optional[pd.DataFrame] = None, defaults: bool = False if self.space_adapter is not None: configuration = self.space_adapter.inverse_transform(configuration) else: - configuration = self._suggest(context) + configuration = self._suggest(context=context) assert len(configuration) == 1, \ "Suggest must return a single configuration." assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), \ @@ -154,7 +154,7 @@ def suggest(self, context: Optional[pd.DataFrame] = None, defaults: bool = False return configuration @abstractmethod - def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: """Suggests a new configuration. Parameters @@ -170,16 +170,16 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: pass # pylint: disable=unnecessary-pass # pragma: no cover @abstractmethod - def register_pending(self, configurations: pd.DataFrame, + def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: - """Registers the given configurations as "pending". + """Registers the given configs as "pending". That is it say, it has been suggested by the optimizer, and an experiment trial has been started. This can be useful for executing multiple trials in parallel, retry logic, etc. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. context : pd.DataFrame Not Yet Implemented. """ @@ -202,7 +202,7 @@ def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.Data for _, _, context in self._observations]).reset_index(drop=True) return (configs, scores, contexts if len(contexts.columns) > 0 else None) - def get_best_observations(self, n_max: int = 1) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: + def get_best_observations(self, *, n_max: int = 1) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: """ Get the N best observations so far as a triplet of DataFrames (config, score, context). Default is N=1. The columns are ordered in ASCENDING order of the optimization targets. @@ -231,7 +231,7 @@ def cleanup(self) -> None: Redefine this method in optimizers that require cleanup. """ - def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: + def _from_1hot(self, *, config: npt.NDArray) -> pd.DataFrame: """ Convert numpy array from one-hot encoding to a DataFrame with categoricals and ints in proper columns. @@ -254,7 +254,7 @@ def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: j += 1 return pd.DataFrame(df_dict) - def _to_1hot(self, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: + def _to_1hot(self, *, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: """ Convert pandas DataFrame to one-hot-encoded numpy array. """ diff --git a/mlos_core/mlos_core/optimizers/random_optimizer.py b/mlos_core/mlos_core/optimizers/random_optimizer.py index f81092a65d8..8893b456ac1 100644 --- a/mlos_core/mlos_core/optimizers/random_optimizer.py +++ b/mlos_core/mlos_core/optimizers/random_optimizer.py @@ -24,19 +24,19 @@ class RandomOptimizer(BaseOptimizer): The parameter space to optimize. """ - def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, + def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: - """Registers the given configurations and scores. + """Registers the given configs and scores. - Doesn't do anything on the RandomOptimizer except storing configurations for logging. + Doesn't do anything on the RandomOptimizer except storing configs for logging. Parameters ---------- - configurations : pd.DataFrame - Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + configs : pd.DataFrame + Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. scores : pd.DataFrame - Scores from running the configurations. The index is the same as the index of the configurations. + Scores from running the configs. The index is the same as the index of the configs. context : None Not Yet Implemented. @@ -45,7 +45,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) # should we pop them from self.pending_observations? - def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: """Suggests a new configuration. Sampled at random using ConfigSpace. @@ -65,7 +65,7 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0]) - def register_pending(self, configurations: pd.DataFrame, + def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() - # self._pending_observations.append((configurations, context)) + # self._pending_observations.append((configs, context)) diff --git a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py index 69ce4f8dffb..037e85ef739 100644 --- a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py +++ b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py @@ -39,11 +39,11 @@ def test_context_not_implemented_warning(configuration_space: CS.ConfigurationSp context = pd.DataFrame([["something"]]) with pytest.raises(UserWarning): - optimizer.register(suggestion, scores, context=context) + optimizer.register(configs=suggestion, scores=scores, context=context) with pytest.raises(UserWarning): optimizer.suggest(context=context) if isinstance(optimizer, BaseBayesianOptimizer): with pytest.raises(UserWarning): - optimizer.surrogate_predict(suggestion, context=context) + optimizer.surrogate_predict(configs=suggestion, context=context) diff --git a/mlos_core/mlos_core/tests/optimizers/one_hot_test.py b/mlos_core/mlos_core/tests/optimizers/one_hot_test.py index 0a9a6ed3c51..8e10afa3023 100644 --- a/mlos_core/mlos_core/tests/optimizers/one_hot_test.py +++ b/mlos_core/mlos_core/tests/optimizers/one_hot_test.py @@ -85,7 +85,7 @@ def test_to_1hot_data_frame(optimizer: BaseOptimizer, """ Toy problem to test one-hot encoding of dataframe. """ - assert optimizer._to_1hot(data_frame) == pytest.approx(one_hot_data_frame) + assert optimizer._to_1hot(config=data_frame) == pytest.approx(one_hot_data_frame) def test_to_1hot_series(optimizer: BaseOptimizer, @@ -93,7 +93,7 @@ def test_to_1hot_series(optimizer: BaseOptimizer, """ Toy problem to test one-hot encoding of series. """ - assert optimizer._to_1hot(series) == pytest.approx(one_hot_series) + assert optimizer._to_1hot(config=series) == pytest.approx(one_hot_series) def test_from_1hot_data_frame(optimizer: BaseOptimizer, @@ -102,7 +102,7 @@ def test_from_1hot_data_frame(optimizer: BaseOptimizer, """ Toy problem to test one-hot decoding of dataframe. """ - assert optimizer._from_1hot(one_hot_data_frame).to_dict() == data_frame.to_dict() + assert optimizer._from_1hot(config=one_hot_data_frame).to_dict() == data_frame.to_dict() def test_from_1hot_series(optimizer: BaseOptimizer, @@ -111,7 +111,7 @@ def test_from_1hot_series(optimizer: BaseOptimizer, """ Toy problem to test one-hot decoding of series. """ - one_hot_df = optimizer._from_1hot(one_hot_series) + one_hot_df = optimizer._from_1hot(config=one_hot_series) assert one_hot_df.shape[0] == 1, f"Unexpected number of rows ({one_hot_df.shape[0]} != 1)" assert one_hot_df.iloc[0].to_dict() == series.to_dict() @@ -120,7 +120,7 @@ def test_round_trip_data_frame(optimizer: BaseOptimizer, data_frame: pd.DataFram """ Round-trip test for one-hot-encoding and then decoding a data frame. """ - df_round_trip = optimizer._from_1hot(optimizer._to_1hot(data_frame)) + df_round_trip = optimizer._from_1hot(config=optimizer._to_1hot(config=data_frame)) assert df_round_trip.x.to_numpy() == pytest.approx(data_frame.x) assert (df_round_trip.y == data_frame.y).all() assert (df_round_trip.z == data_frame.z).all() @@ -130,7 +130,7 @@ def test_round_trip_series(optimizer: BaseOptimizer, series: pd.DataFrame) -> No """ Round-trip test for one-hot-encoding and then decoding a series. """ - series_round_trip = optimizer._from_1hot(optimizer._to_1hot(series)) + series_round_trip = optimizer._from_1hot(config=optimizer._to_1hot(config=series)) assert series_round_trip.x.to_numpy() == pytest.approx(series.x) assert (series_round_trip.y == series.y).all() assert (series_round_trip.z == series.z).all() @@ -141,7 +141,7 @@ def test_round_trip_reverse_data_frame(optimizer: BaseOptimizer, """ Round-trip test for one-hot-decoding and then encoding of a numpy array. """ - round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_data_frame)) + round_trip = optimizer._to_1hot(config=optimizer._from_1hot(config=one_hot_data_frame)) assert round_trip == pytest.approx(one_hot_data_frame) @@ -150,5 +150,5 @@ def test_round_trip_reverse_series(optimizer: BaseOptimizer, """ Round-trip test for one-hot-decoding and then encoding of a numpy array. """ - round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_series)) + round_trip = optimizer._to_1hot(config=optimizer._from_1hot(config=one_hot_series)) assert round_trip == pytest.approx(one_hot_series) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py index 4ff5f157f74..e3c053fa5b4 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py @@ -99,7 +99,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: observation = objective(suggestion) assert isinstance(observation, pd.DataFrame) assert set(observation.columns) == {'main_score', 'other_score'} - optimizer.register(suggestion, observation) + optimizer.register(configs=suggestion, scores=observation) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 67c7eddf3b2..49ff6916358 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -56,7 +56,7 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace # pending not implemented with pytest.raises(NotImplementedError): - optimizer.register_pending(suggestion) + optimizer.register_pending(configs=suggestion) @pytest.mark.parametrize(('optimizer_class', 'kwargs'), [ @@ -103,7 +103,7 @@ def objective(x: pd.Series) -> pd.DataFrame: configuration.is_valid_configuration() observation = objective(suggestion['x']) assert isinstance(observation, pd.DataFrame) - optimizer.register(suggestion, observation) + optimizer.register(configs=suggestion, scores=observation) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) @@ -126,10 +126,10 @@ def objective(x: pd.Series) -> pd.DataFrame: # It would be better to put this into bayesian_optimizer_test but then we'd have to refit the model if isinstance(optimizer, BaseBayesianOptimizer): - pred_best = optimizer.surrogate_predict(best_config) + pred_best = optimizer.surrogate_predict(configs=best_config) assert pred_best.shape == (1,) - pred_all = optimizer.surrogate_predict(all_configs) + pred_all = optimizer.surrogate_predict(configs=all_configs) assert pred_all.shape == (20,) @@ -270,14 +270,14 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: # loop for optimizer suggestion = optimizer.suggest() observation = objective(suggestion) - optimizer.register(suggestion, observation) + optimizer.register(configs=suggestion, scores=observation) # loop for llamatune-optimizer suggestion = llamatune_optimizer.suggest() _x, _y = suggestion['x'].iloc[0], suggestion['y'].iloc[0] assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3., rel=1e-3) # optimizer explores 1-dimensional space observation = objective(suggestion) - llamatune_optimizer.register(suggestion, observation) + llamatune_optimizer.register(configs=suggestion, scores=observation) # Retrieve best observations best_observation = optimizer.get_best_observations() @@ -311,7 +311,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: # .surrogate_predict method not currently implemented if space adapter is employed if isinstance(llamatune_optimizer, BaseBayesianOptimizer): with pytest.raises(NotImplementedError): - llamatune_optimizer.surrogate_predict(llamatune_best_config) + llamatune_optimizer.surrogate_predict(configs=llamatune_best_config) # Dynamically determine all of the optimizers we have implemented. @@ -388,7 +388,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: # Test registering the suggested configuration with a score. observation = objective(suggestion) assert isinstance(observation, pd.DataFrame) - optimizer.register(suggestion, observation) + optimizer.register(configs=suggestion, scores=observation) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame)