remove optimizers

srstrickland · Mar 20, 2019 · 24d62ea · 24d62ea
1 parent 6c5a74e
commit 24d62ea
Show file tree

Hide file tree

Showing 5 changed files with 2 additions and 18 deletions.
diff --git a/examples/Spark_ML_Pipeline.ipynb b/examples/Spark_ML_Pipeline.ipynb
@@ -332,7 +332,7 @@
     "\n",
     "To lift the above Keras ```model``` to Spark, we define an ```Estimator``` on top of it. An ```Estimator``` is Spark's incarnation of a model that still has to be trained. It essentially only comes with only a single (required) method, namely ```fit```. Once we call ```fit``` on a data frame, we get back a ```Model```, which is a trained model with a ```transform``` method to predict labels.\n",
     "\n",
-    "We do this by initializing an ```ElephasEstimator``` and setting a few properties. As by now our input data frame will have many columns, we have to tell the model where to find features and labels by column name. Then we provide serialized versions of Keras model and Elephas optimizer. We can not plug in keras models into the ```Estimator``` directly, as Spark will have to serialize them anyway for communication with workers, so it's better to provide the serialization ourselves. In fact, while pyspark knows how to serialize ```model```, it is extremely inefficient and can break if models become too large. Spark ML is especially picky (and rightly so) about parameters and more or less prohibits you from providing non-atomic types and arrays of the latter. Most of the remaining parameters are optional and rather self explainatory. Plus, many of them you know if you have ever run a keras model before. We just include them here to show the full set of training configuration."
+    "We do this by initializing an ```ElephasEstimator``` and setting a few properties. As by now our input data frame will have many columns, we have to tell the model where to find features and labels by column name. Then we provide serialized versions of our Keras model. We can not plug in keras models into the ```Estimator``` directly, as Spark will have to serialize them anyway for communication with workers, so it's better to provide the serialization ourselves. In fact, while pyspark knows how to serialize ```model```, it is extremely inefficient and can break if models become too large. Spark ML is especially picky (and rightly so) about parameters and more or less prohibits you from providing non-atomic types and arrays of the latter. Most of the remaining parameters are optional and rather self explainatory. Plus, many of them you know if you have ever run a keras model before. We just include them here to show the full set of training configuration."
    ]
   },
   {
@@ -355,17 +355,12 @@
    ],
    "source": [
     "from elephas.ml_model import ElephasEstimator\n",
-    "from elephas import optimizers as elephas_optimizers\n",
-    "\n",
-    "# Define elephas optimizer (which tells the model how to aggregate updates on the Spark master)\n",
-    "adadelta = elephas_optimizers.Adadelta()\n",
     "\n",
     "# Initialize SparkML Estimator and set all relevant properties\n",
     "estimator = ElephasEstimator()\n",
     "estimator.setFeaturesCol(\"scaled_features\")             # These two come directly from pyspark,\n",
     "estimator.setLabelCol(\"index_category\")                 # hence the camel case. Sorry :)\n",
     "estimator.set_keras_model_config(model.to_yaml())       # Provide serialized Keras model\n",
-    "estimator.set_optimizer_config(adadelta.get_config())   # Provide serialized Elephas optimizer\n",
     "estimator.set_categorical_labels(True)\n",
     "estimator.set_nb_classes(nb_classes)\n",
     "estimator.set_num_workers(1)  # We just use one worker here. Feel free to adapt it.\n",

diff --git a/examples/ml_pipeline_otto.py b/examples/ml_pipeline_otto.py
@@ -15,7 +15,7 @@
 from keras.layers import Dense, Dropout, Activation
 
 from elephas.ml_model import ElephasEstimator
-from elephas import optimizers as elephas_optimizers
+
 
 data_path = "../"
 
@@ -77,8 +77,6 @@ def load_data_rdd(csv_file, shuffle=True, train=True):
 sgd_conf = optimizers.serialize(sgd)
 
 # Initialize Elephas Spark ML Estimator
-adadelta = elephas_optimizers.Adadelta()
-
 estimator = ElephasEstimator()
 estimator.set_keras_model_config(model.to_yaml())
 estimator.set_optimizer_config(sgd_conf)
@@ -87,7 +85,6 @@ def load_data_rdd(csv_file, shuffle=True, train=True):
 estimator.set_metrics(['acc'])
 estimator.setFeaturesCol("scaled_features")
 estimator.setLabelCol("index_category")
-estimator.set_elephas_optimizer_config(adadelta.get_config())
 estimator.set_epochs(10)
 estimator.set_batch_size(128)
 estimator.set_num_workers(1)

diff --git a/examples/mnist_mlp_spark.py b/examples/mnist_mlp_spark.py
@@ -9,7 +9,6 @@
 
 from elephas.spark_model import SparkModel
 from elephas.utils.rdd_utils import to_simple_rdd
-from elephas import optimizers as elephas_optimizers
 
 from pyspark import SparkContext, SparkConf
 

diff --git a/tests/ml/test_params.py b/tests/ml/test_params.py
@@ -8,13 +8,6 @@ def test_has_keras_model_config():
     assert conf == param.get_keras_model_config()
 
 
-def test_has_elephas_optimizer_config():
-    param = HasElephasOptimizerConfig()
-    conf = {"foo": "bar"}
-    param.set_elephas_optimizer_config(conf)
-    assert conf == param.get_elephas_optimizer_config()
-
-
 def test_has_optimizer_config():
     param = HasKerasOptimizerConfig()
     conf = {"foo": "bar"}

diff --git a/tests/test_optimizers.py b/tests/test_optimizers.py