From 24d62eadc2ef0f24d28381902e27f1d042363422 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 20 Mar 2019 16:07:17 +0100 Subject: [PATCH] remove optimizers --- examples/Spark_ML_Pipeline.ipynb | 7 +------ examples/ml_pipeline_otto.py | 5 +---- examples/mnist_mlp_spark.py | 1 - tests/ml/test_params.py | 7 ------- tests/test_optimizers.py | 0 5 files changed, 2 insertions(+), 18 deletions(-) delete mode 100644 tests/test_optimizers.py diff --git a/examples/Spark_ML_Pipeline.ipynb b/examples/Spark_ML_Pipeline.ipynb index baeb53f..04c54f5 100644 --- a/examples/Spark_ML_Pipeline.ipynb +++ b/examples/Spark_ML_Pipeline.ipynb @@ -332,7 +332,7 @@ "\n", "To lift the above Keras ```model``` to Spark, we define an ```Estimator``` on top of it. An ```Estimator``` is Spark's incarnation of a model that still has to be trained. It essentially only comes with only a single (required) method, namely ```fit```. Once we call ```fit``` on a data frame, we get back a ```Model```, which is a trained model with a ```transform``` method to predict labels.\n", "\n", - "We do this by initializing an ```ElephasEstimator``` and setting a few properties. As by now our input data frame will have many columns, we have to tell the model where to find features and labels by column name. Then we provide serialized versions of Keras model and Elephas optimizer. We can not plug in keras models into the ```Estimator``` directly, as Spark will have to serialize them anyway for communication with workers, so it's better to provide the serialization ourselves. In fact, while pyspark knows how to serialize ```model```, it is extremely inefficient and can break if models become too large. Spark ML is especially picky (and rightly so) about parameters and more or less prohibits you from providing non-atomic types and arrays of the latter. Most of the remaining parameters are optional and rather self explainatory. Plus, many of them you know if you have ever run a keras model before. We just include them here to show the full set of training configuration." + "We do this by initializing an ```ElephasEstimator``` and setting a few properties. As by now our input data frame will have many columns, we have to tell the model where to find features and labels by column name. Then we provide serialized versions of our Keras model. We can not plug in keras models into the ```Estimator``` directly, as Spark will have to serialize them anyway for communication with workers, so it's better to provide the serialization ourselves. In fact, while pyspark knows how to serialize ```model```, it is extremely inefficient and can break if models become too large. Spark ML is especially picky (and rightly so) about parameters and more or less prohibits you from providing non-atomic types and arrays of the latter. Most of the remaining parameters are optional and rather self explainatory. Plus, many of them you know if you have ever run a keras model before. We just include them here to show the full set of training configuration." ] }, { @@ -355,17 +355,12 @@ ], "source": [ "from elephas.ml_model import ElephasEstimator\n", - "from elephas import optimizers as elephas_optimizers\n", - "\n", - "# Define elephas optimizer (which tells the model how to aggregate updates on the Spark master)\n", - "adadelta = elephas_optimizers.Adadelta()\n", "\n", "# Initialize SparkML Estimator and set all relevant properties\n", "estimator = ElephasEstimator()\n", "estimator.setFeaturesCol(\"scaled_features\") # These two come directly from pyspark,\n", "estimator.setLabelCol(\"index_category\") # hence the camel case. Sorry :)\n", "estimator.set_keras_model_config(model.to_yaml()) # Provide serialized Keras model\n", - "estimator.set_optimizer_config(adadelta.get_config()) # Provide serialized Elephas optimizer\n", "estimator.set_categorical_labels(True)\n", "estimator.set_nb_classes(nb_classes)\n", "estimator.set_num_workers(1) # We just use one worker here. Feel free to adapt it.\n", diff --git a/examples/ml_pipeline_otto.py b/examples/ml_pipeline_otto.py index c515c6b..3341915 100644 --- a/examples/ml_pipeline_otto.py +++ b/examples/ml_pipeline_otto.py @@ -15,7 +15,7 @@ from keras.layers import Dense, Dropout, Activation from elephas.ml_model import ElephasEstimator -from elephas import optimizers as elephas_optimizers + data_path = "../" @@ -77,8 +77,6 @@ def load_data_rdd(csv_file, shuffle=True, train=True): sgd_conf = optimizers.serialize(sgd) # Initialize Elephas Spark ML Estimator -adadelta = elephas_optimizers.Adadelta() - estimator = ElephasEstimator() estimator.set_keras_model_config(model.to_yaml()) estimator.set_optimizer_config(sgd_conf) @@ -87,7 +85,6 @@ def load_data_rdd(csv_file, shuffle=True, train=True): estimator.set_metrics(['acc']) estimator.setFeaturesCol("scaled_features") estimator.setLabelCol("index_category") -estimator.set_elephas_optimizer_config(adadelta.get_config()) estimator.set_epochs(10) estimator.set_batch_size(128) estimator.set_num_workers(1) diff --git a/examples/mnist_mlp_spark.py b/examples/mnist_mlp_spark.py index 4975257..95bfbcb 100644 --- a/examples/mnist_mlp_spark.py +++ b/examples/mnist_mlp_spark.py @@ -9,7 +9,6 @@ from elephas.spark_model import SparkModel from elephas.utils.rdd_utils import to_simple_rdd -from elephas import optimizers as elephas_optimizers from pyspark import SparkContext, SparkConf diff --git a/tests/ml/test_params.py b/tests/ml/test_params.py index ea37619..bbc6a94 100644 --- a/tests/ml/test_params.py +++ b/tests/ml/test_params.py @@ -8,13 +8,6 @@ def test_has_keras_model_config(): assert conf == param.get_keras_model_config() -def test_has_elephas_optimizer_config(): - param = HasElephasOptimizerConfig() - conf = {"foo": "bar"} - param.set_elephas_optimizer_config(conf) - assert conf == param.get_elephas_optimizer_config() - - def test_has_optimizer_config(): param = HasKerasOptimizerConfig() conf = {"foo": "bar"} diff --git a/tests/test_optimizers.py b/tests/test_optimizers.py deleted file mode 100644 index e69de29..0000000