Merge pull request #323 from leondavi/hf_integration

[APISERVER] Help refactor + add ipywidgets
leondavi · May 12, 2024 · 3c4ece3 · 3c4ece3
2 parents e3958b9 + 2b351a8
commit 3c4ece3
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 54 deletions.
diff --git a/src_py/apiServer/apiServer.py b/src_py/apiServer/apiServer.py
@@ -17,6 +17,7 @@
 from logger import *
 from NerlComDB import *
 from events_sync import *
+from apiServerHelp import API_SERVER_HELP_STR
 
 class ApiServer():
     def __init__(self):
@@ -38,59 +39,7 @@ def reset(self):
         return self.__init__()
 
     def help(self):
-    #i) data saved as .csv, training file ends with "_Training.csv", prediction with "_Prediction.csv" (may change in future)
-        print(
-f"""
-__________NERLNET CHECKLIST__________
-0. Run this Jupyter in the folder of generated .py files!
-1. Nerlnet configuration files are located at config directory
-   Make sure data and jsons in correct folder, and jsons include the correct paths
-    * Data includes: labeled prediction csv, training file, prediction file
-    * Prediction CSVs need to be ordered the same!
-    * jsonsDir is set to {self.json_dir_parser.get_json_dir_path()}
-            
-____________API COMMANDS_____________
-==========Setting experiment========
-
--showJsons():                                           shows available arch / conn / exp layouts
--list_datasets():                                       reads `hf_repo_ids.json` and list of datasets and files of Nerlnet organizaion on https://huggingface.co/Nerlnet
--download_dataset(idx, dir):                            downloads dataset files from Huggingface to the specified directory (default is /tmp/nerlnet/data/NerlnetData-master/nerlnet)
--add_repo_to_datasets_list(repo, name , description):   adds a repository to the datasets list in `hf_repo_ids.json`
--printArchParams(Num)                                   print description of selected arch file
--selectJsons():                                         get input from user for arch / conn / exp selection
--setJsons(arch, conn, exp):                             set layout in code
--getUserJsons():                                        returns the selected arch / conn / exp
-
--initialization(experiment_name, dc, conn, exp_flow, custom_csv_path):  
-                                                        set up server for a NerlNet run
-                                                        dc - distributed configuration file (can be generated by Nerlplanner)
-                                                        conn - connection map file, graph of connections between entities
-                                                        exp - experiment flow file, defines the flow of the experiment demonstrated as experiment phases of training and prediction
-                                                        custom_csv_path - optional, path to custom csv file for the experiment, overrides the one in experiment flow file
-                                                        
--send_jsons_to_devices():                               send each NerlNet device the dc and conn jsons to init entities on it
--sendDataToSources(phase(,split)):                      phase := "training" | "prediction". split := 1 default (split) | 2 (whole file). send the experiment data to sources (currently happens in beggining of train/predict)
-
-======== Running experiment ==========
--experiment_phase_is_valid()        returns True if there are more experiment phases to run
--run_current_experiment_phase()     runs the current experiment phase
--next_experiment_phase()            moves to the next experiment phase
-
-======== Retrieving statistics ======
--get_experiment_flow(experiment_name).generate_stats()   returns statistics object (E.g., assigned to StatsInst) class for the current experiment phase
--StatsInst.get_communication_stats_workers()         returns communication statistics for workers
--StatsInst.get_communication_stats_sources()         returns communication statistics for sources
--StatsInst.get_communication_stats_clients()         returns communication statistics for clients
--StatsInst.get_communication_stats_routers()         returns communication statistics for routers
--StatsInst.get_communication_stats_main_server()     returns communication statistics for main server
--StatsInst.get_loss_ts()                             returns the loss over time
--StatsInst.get_min_loss()                            returns the minimum loss
--StatsInst.get_missed_batches()                      returns the missed batches
-
-======== Workers Model Metrics and Performance ========
--StatsInst.get_confusion_matrices()                  returns tuple of two types of confusion matrices ordered by sources and ordered by workers
--StatsInst.get_model_performence_stats(confusion_matrix_worker_dict, saveToFile) returns the model performance statistics for the workers
-""")
+        print(API_SERVER_HELP_STR)        
 
     def __new_experiment(self, experiment_name : str, json_path: str, batch_size: int, network_componenets: NetworkComponents, csv_path = ""):
         assert experiment_name not in self.experiments_dict, "experiment name exists!"

diff --git a/src_py/apiServer/apiServerHelp.py b/src_py/apiServer/apiServerHelp.py
@@ -0,0 +1,54 @@
+
+API_SERVER_HELP_STR = """
+__________NERLNET CHECKLIST__________
+Nerlnet configuration files are located at config directory.
+Make sure data and jsons in correct folder, and jsons include the correct paths
+* Data includes: a single csv that includes all the data for the experiment (training and prediction phases)
+* Jsons include: - distributed configuration (dc_<name>.json)
+                 - connection map (conn_<name>.json)
+                 - experiment flow (exp_<name>.json)
+* Jsons directory: can be defined by changing the config file: config/jsonsDir.nerlconfig
+
+____________API COMMANDS_____________
+==========Setting experiment========
+
+-showJsons():                                           lists available json files in jsons directory (dc, conn, exp) to be used with setJsons and getUserJsons
+-list_datasets():                                       reads `hf_repo_ids.json` and list of datasets and files of Nerlnet organizaion on https://huggingface.co/Nerlnet
+-download_dataset(idx, dir):                            downloads dataset files from Huggingface to the specified directory (default is /tmp/nerlnet/data/NerlnetData-master/nerlnet)
+-add_repo_to_datasets_list(repo, name , description):   adds a repository to the datasets list in `hf_repo_ids.json`
+-printArchParams(Num)                                   print description of selected arch file
+
+-selectJsons():                                         get input from user for arch / conn / exp selection
+-setJsons(arch, conn, exp):                             set selected jsons to get their path by getUserJsons
+-getUserJsons():                                        return a tuple of 3 paths to dc, conn, exp jsons that is used for initialization
+
+-initialization(experiment_name, dc, conn, exp_flow, custom_csv_path):  
+                                                        setting up the api-server to communicate with main-server of Nerlnet cluster
+                                                        dc - path to distributed configuration file (can be generated by Nerlplanner)
+                                                        conn - path to connection map file, graph of connections between entities
+                                                        exp - path to experiment flow file, defines the flow of the experiment demonstrated as experiment phases of training and prediction
+                                                        custom_csv_path - optional, path to custom csv file for the experiment, overrides the one in experiment flow file
+                                                        
+-send_jsons_to_devices():                               send each NerlNet device the dc and conn jsons to init entities on it
+-sendDataToSources(phase(,split)):                      phase := "training" | "prediction". split := 1 default (split) | 2 (whole file). send the experiment data to sources (currently happens in beggining of train/predict)
+
+======== Running experiment ==========
+-experiment_phase_is_valid()        returns True if there are more experiment phases to run
+-run_current_experiment_phase()     runs the current experiment phase
+-next_experiment_phase()            moves to the next experiment phase
+
+======== Retrieving statistics ======
+-get_experiment_flow(experiment_name).generate_stats()   returns statistics object (E.g., assigned to StatsInst) class for the current experiment phase
+-StatsInst.get_communication_stats_workers()         returns communication statistics for workers
+-StatsInst.get_communication_stats_sources()         returns communication statistics for sources
+-StatsInst.get_communication_stats_clients()         returns communication statistics for clients
+-StatsInst.get_communication_stats_routers()         returns communication statistics for routers
+-StatsInst.get_communication_stats_main_server()     returns communication statistics for main server
+-StatsInst.get_loss_ts()                             returns the loss over time
+-StatsInst.get_min_loss()                            returns the minimum loss
+-StatsInst.get_missed_batches()                      returns the missed batches
+
+======== Workers Model Metrics and Performance ========
+-StatsInst.get_confusion_matrices()                  returns tuple of two types of confusion matrices ordered by sources and ordered by workers
+-StatsInst.get_model_performence_stats(confusion_matrix_worker_dict, saveToFile) returns the model performance statistics for the workers
+"""
diff --git a/src_py/requirements.txt b/src_py/requirements.txt
@@ -7,4 +7,5 @@ pandas
 Requests
 scikit_learn
 seaborn
-huggingface_hub
+huggingface_hub
+ipywidgets