1. Refactoring and correction for the first pypi push

eXascaleInfolab · Oct 14, 2024 · 1b5aef3 · 1b5aef3
1 parent c2593cf
commit 1b5aef3
Show file tree

Hide file tree

Showing 34 changed files with 143 additions and 150 deletions.
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/build/lib/imputegap/algorithms/cdrec.py b/build/lib/imputegap/algorithms/cdrec.py
@@ -1,9 +1,10 @@
 import ctypes
 import os
-import platform
 import time
 import ctypes as __native_c_types_import;
 import numpy as __numpy_import;
+import importlib.resources
+
 
 
 def __marshal_as_numpy_column(__ctype_container, __py_sizen, __py_sizem):
@@ -19,23 +20,21 @@ def __marshal_as_native_column(__py_matrix):
     return __ctype_marshal;
 
 
-def load_share_lib(name="lib_cdrec"):
+def load_share_lib(name="lib_cdrec", lib=True):
     """
     Determine the OS and load the correct shared library
     :param name: name of the library
     :return: the correct path to the library
     """
 
-    local_path_win = './algorithms/lib/' + name + '.dll'
-    local_path_lin = './algorithms/lib/' + name + '.so'
+    if lib:
+        lib_path = importlib.resources.files('imputegap.algorithms.lib').joinpath("./lib_cdrec.so")
+    else:
+        local_path_lin = './algorithms/lib/' + name + '.so'
 
-    if not os.path.exists(local_path_lin):
-        local_path_win = './imputegap/algorithms/lib/' + name + '.dll'
-        local_path_lin = './imputegap/algorithms/lib/' + name + '.so'
+        if not os.path.exists(local_path_lin):
+            local_path_lin = './imputegap/algorithms/lib/' + name + '.so'
 
-    if platform.system() == 'Windows':
-        lib_path = os.path.join(local_path_win)
-    else:
         lib_path = os.path.join(local_path_lin)
 
     return ctypes.CDLL(lib_path)
@@ -81,7 +80,7 @@ def native_cdrec(__py_matrix, __py_rank, __py_eps, __py_iters):
     return __py_recovered;
 
 
-def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True):
+def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True, lib_path=None):
     """
     CDREC algorithm for imputation of missing data
     @author : Quentin Nater
@@ -92,6 +91,7 @@ def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True):
     :param iterations : number of iterations
 
     :param logs: print logs of time execution
+    :param lib_path: file to library
 
     :return: imputed_matrix, metrics : all time series with imputation data and their metrics
 

diff --git a/build/lib/imputegap/algorithms/stmvl.py b/build/lib/imputegap/algorithms/stmvl.py
@@ -1,7 +1,7 @@
 import ctypes
 import time
 import os
-import platform
+import importlib.resources
 import ctypes as __native_c_types_import;
 import numpy as __numpy_import;
 
@@ -17,25 +17,22 @@ def __marshal_as_native_column(__py_matrix):
     return __ctype_marshal;
 
 
-def load_share_lib(name = "lib_stmvl"):
+def load_share_lib(name = "lib_stmvl", lib=True):
     """
     Determine the OS and load the correct shared library
     :param name: name of the library
     :return: the correct path to the library
     """
 
-    local_path_win = './algorithms/lib/'+name+'.dll'
-    local_path_lin = './algorithms/lib/'+name+'.so'
+    if lib:
+        lib_path = importlib.resources.files('imputegap.algorithms.lib').joinpath("./lib_stmvl.so")
+    else:
+        local_path_lin = './algorithms/lib/'+name+'.so'
 
-    if not os.path.exists(local_path_lin):
-        local_path_win = './imputegap/algorithms/lib/'+name+'.dll'
-        local_path_lin = './imputegap/algorithms/lib/'+name+'.so'
+        if not os.path.exists(local_path_lin):
+            local_path_lin = './imputegap/algorithms/lib/'+name+'.so'
 
-    if platform.system() == 'Windows':
-        lib_path = os.path.join(local_path_win)
-    else:
         lib_path = os.path.join(local_path_lin)
-    #print("\n", lib_path, " has been loaded...")
 
     return ctypes.CDLL(lib_path)
 

diff --git a/build/lib/imputegap/assets/shap/eeg_iim_DTL_Beeswarm.png b/build/lib/imputegap/assets/shap/eeg_iim_DTL_Beeswarm.png
diff --git a/build/lib/imputegap/assets/shap/eeg_iim_DTL_Waterfall.png b/build/lib/imputegap/assets/shap/eeg_iim_DTL_Waterfall.png
diff --git a/build/lib/imputegap/assets/shap/eeg_iim_results.txt b/build/lib/imputegap/assets/shap/eeg_iim_results.txt
@@ -0,0 +1,22 @@
+Feature : 17    iim        with a score of 24.43      Trend              Entropy of successive pairs in symbolized series                  SB_MotifThree_quantile_hh
+Feature : 0     iim        with a score of 14.07      Geometry           5-bin histogram mode                                              DN_HistogramMode_5
+Feature : 10    iim        with a score of 9.91       Geometry           Goodness of exponential fit to embedding distance distribution    CO_Embed2_Dist_tau_d_expfit_meandiff
+Feature : 4     iim        with a score of 9.7        Correlation        Histogram-based automutual information (lag 2, 5 bins)            CO_HistogramAMI_even_2_5
+Feature : 14    iim        with a score of 9.52       Geometry           Negative outlier timing                                           DN_OutlierInclude_n_001_mdrmd
+Feature : 15    iim        with a score of 9.39       Transformation     Power in the lowest 20% of frequencies                            SP_Summaries_welch_rect_area_5_1
+Feature : 6     iim        with a score of 7.59       Geometry           Proportion of high incremental changes in the series              MD_hrv_classic_pnn40
+Feature : 1     iim        with a score of 3.46       Geometry           10-bin histogram mode                                             DN_HistogramMode_10
+Feature : 5     iim        with a score of 3.31       Correlation        Time reversibility                                                CO_trev_1_num
+Feature : 21    iim        with a score of 2.34       Trend              Error of 3-point rolling mean forecast                            FC_LocalSimple_mean3_stderr
+Feature : 8     iim        with a score of 2.25       Geometry           Transition matrix column variance                                 SB_TransitionMatrix_3ac_sumdiagcov
+Feature : 13    iim        with a score of 2.0        Geometry           Positive outlier timing                                           DN_OutlierInclude_p_001_mdrmd
+Feature : 18    iim        with a score of 1.76       Geometry           Rescaled range fluctuation analysis (low-scale scaling)           SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1
+Feature : 20    iim        with a score of 0.19       Transformation     Centroid frequency                                                SP_Summaries_welch_rect_centroid
+Feature : 2     iim        with a score of 0.07       Correlation        First 1/e crossing of the ACF                                     CO_f1ecac
+Feature : 3     iim        with a score of 0.0        Correlation        First minimum of the ACF                                          CO_FirstMin_ac
+Feature : 7     iim        with a score of 0.0        Geometry           Longest stretch of above-mean values                              SB_BinaryStats_mean_longstretch1
+Feature : 9     iim        with a score of 0.0        Trend              Wangs periodicity metric                                          PD_PeriodicityWang_th0_01
+Feature : 11    iim        with a score of 0.0        Correlation        First minimum of the AMI function                                 IN_AutoMutualInfoStats_40_gaussian_fmmi
+Feature : 12    iim        with a score of 0.0        Correlation        Change in autocorrelation timescale after incremental differencing FC_LocalSimple_mean1_tauresrat
+Feature : 16    iim        with a score of 0.0        Geometry           Longest stretch of decreasing values                              SB_BinaryStats_diff_longstretch0
+Feature : 19    iim        with a score of 0.0        Geometry           Detrended fluctuation analysis (low-scale scaling)                SC_FluctAnal_2_dfa_50_1_2_logi_prop_r1
diff --git a/build/lib/imputegap/assets/shap/eeg_iim_shap_aggregate_plot.png b/build/lib/imputegap/assets/shap/eeg_iim_shap_aggregate_plot.png
diff --git a/build/lib/imputegap/assets/shap/eeg_iim_shap_aggregate_reverse_plot.png b/build/lib/imputegap/assets/shap/eeg_iim_shap_aggregate_reverse_plot.png
diff --git a/build/lib/imputegap/assets/shap/eeg_iim_shap_correlation_plot.png b/build/lib/imputegap/assets/shap/eeg_iim_shap_correlation_plot.png
diff --git a/build/lib/imputegap/assets/shap/eeg_iim_shap_geometry_plot.png b/build/lib/imputegap/assets/shap/eeg_iim_shap_geometry_plot.png
diff --git a/build/lib/imputegap/assets/shap/eeg_iim_shap_plot.png b/build/lib/imputegap/assets/shap/eeg_iim_shap_plot.png
diff --git a/build/lib/imputegap/assets/shap/eeg_iim_shap_reverse_plot.png b/build/lib/imputegap/assets/shap/eeg_iim_shap_reverse_plot.png
diff --git a/build/lib/imputegap/assets/shap/eeg_iim_shap_transformation_plot.png b/build/lib/imputegap/assets/shap/eeg_iim_shap_transformation_plot.png
diff --git a/build/lib/imputegap/assets/shap/eeg_iim_shap_trend_plot.png b/build/lib/imputegap/assets/shap/eeg_iim_shap_trend_plot.png
diff --git a/build/lib/imputegap/explainer/explainer.py b/build/lib/imputegap/explainer/explainer.py
@@ -1,5 +1,6 @@
 import math
 import os
+import time
 import importlib.resources
 
 
@@ -147,17 +148,14 @@ def convert_results(tmp, file, algo, descriptions, features, categories, mean_fe
 
         result_display = sorted(result_display, key=lambda tup: (tup[1], tup[2]), reverse=True)
 
-        for tup in result_display:
-            print(tup[2], end=",")
-
         with open(to_save + "_results.txt", 'w') as file_output:
             for (x, algo, rate, description, feature, category, mean_features) in result_display:
                 file_output.write(f"Feature : {x:<5} {algo:<10} with a score of {rate:<10} {category:<18} {description:<65} {feature}\n")
                 result_shap.append([file, algo, rate, description, feature, category, mean_features])
 
         return result_shap
 
-    def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, splitter=10, display=False):
+    def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, splitter=10, display=False, verbose=False):
         """
         Launch the SHAP model for explaining the features of the dataset
         @author : Quentin Nater
@@ -169,6 +167,7 @@ def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, spli
         :param algorithm: algorithm used
         :param splitter: splitter from data training and testing
         :param display: display or not plots
+        :param verbose: display or not the prints
         :return: results of the explainer model
         """
 
@@ -207,12 +206,13 @@ def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, spli
         print("\t SHAP_MODEL >> y_train shape:", y_train.shape)
         print("\t SHAP_MODEL >> x_test shape:", x_test.shape)
         print("\t SHAP_MODEL >> y_test shape:", y_test.shape, "\n")
-        print("\t SHAP_MODEL >> features shape:", x_features.shape)
-        print("\t SHAP_MODEL >> categories shape:", x_categories.shape)
-        print("\t SHAP_MODEL >> descriptions shape:", x_descriptions.shape, "\n")
-        print("\t SHAP_MODEL >> features OK:", np.all(np.all(x_features == x_features[0, :], axis=1)))
-        print("\t SHAP_MODEL >> categories OK:", np.all(np.all(x_categories == x_categories[0, :], axis=1)))
-        print("\t SHAP_MODEL >> descriptions OK:", np.all(np.all(x_descriptions==x_descriptions[0, :], axis=1)), "\n\n")
+        if verbose:
+            print("\t SHAP_MODEL >> features shape:", x_features.shape)
+            print("\t SHAP_MODEL >> categories shape:", x_categories.shape)
+            print("\t SHAP_MODEL >> descriptions shape:", x_descriptions.shape, "\n")
+            print("\t SHAP_MODEL >> features OK:", np.all(np.all(x_features == x_features[0, :], axis=1)))
+            print("\t SHAP_MODEL >> categories OK:", np.all(np.all(x_categories == x_categories[0, :], axis=1)))
+            print("\t SHAP_MODEL >> descriptions OK:", np.all(np.all(x_descriptions==x_descriptions[0, :], axis=1)), "\n\n")
 
         model = RandomForestRegressor()
         model.fit(x_train, y_train)
@@ -221,8 +221,6 @@ def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, spli
         shval = exp.shap_values(x_test)
         shap_values = exp(x_train)
 
-        #print("\t\tSHAP VALUES : ", np.array(shval).shape, " with : \n\t", *shval)
-
         optimal_display = []
         for desc, group in zip(x_descriptions[0], x_categories[0]):
             optimal_display.append(desc + " (" + group + ")")
@@ -371,12 +369,13 @@ def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, spli
         plt.close()
         print("\t\t\tGRAPH has benn computed : ", alpha, "\n\n")
 
-        print("\t\tSHAP Families details : \n")
-        print("\t\t\tgeometry:", geometry.shape)
-        print("\t\t\ttransformation:", transformation.shape)
-        print("\t\t\tcorrelation:", correlation.shape)
-        print("\t\t\ttrend':", trend.shape)
-        print("\t\t\tmean_features:", mean_features.shape, "\n\n")
+        if verbose:
+            print("\t\tSHAP Families details :")
+            print("\t\t\tgeometry:", geometry.shape)
+            print("\t\t\ttransformation:", transformation.shape)
+            print("\t\t\tcorrelation:", correlation.shape)
+            print("\t\t\ttrend':", trend.shape)
+            print("\t\t\tmean_features:", mean_features.shape, "\n\n")
 
         # Aggregate shapely values per element of X_test
         total_weights = [np.abs(shval.T[i]).mean(0) for i in range(len(shval[0]))]
@@ -393,7 +392,7 @@ def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, spli
 
     def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar", missing_rate=0.4,
                        block_size=10, protection=0.1, use_seed=True, seed=42, limitation=15, splitter=0,
-                       file_name="ts", display=False):
+                       file_name="ts", display=False, verbose=False):
         """
         Handle parameters and set the variables to launch a model SHAP
         @author : Quentin Nater
@@ -410,33 +409,35 @@ def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar
         :param limitation: limitation of series for the model | default 15
         :param splitter: limitation of training series for the model | default 3/4 of limitation
         :param display: display or not the plots | default False
+        :param verbose: display or not the prints
 
         :return: ground_truth_matrixes, obfuscated_matrixes, output_metrics, input_params, shap_values
         """
 
+        start_time = time.time()  # Record start time
+
         if limitation > raw_data.shape[0]:
             limitation = int(raw_data.shape[0] * 0.75)
 
         if splitter == 0 or splitter >= limitation - 1:
             splitter = int(limitation * 0.60)
 
-        print("SHAP Explainer has been called\n\t",
-              "missing_values (", missing_rate * 100, "%)\n\t",
-              "for a contamination (", contamination, "), \n\t",
-              "imputated by (", algorithm, ") with params (", params, ")\n\t",
-              "with limitation and splitter after verification of (", limitation, ") and (", splitter, ") for ",
-              raw_data.shape, "...\n\n\tGeneration of the dataset with the time series...")
+        if verbose:
+            print("SHAP Explainer has been called\n\t",
+                  "missing_values (", missing_rate * 100, "%)\n\t",
+                  "for a contamination (", contamination, "), \n\t",
+                  "imputated by (", algorithm, ") with params (", params, ")\n\t",
+                  "with limitation and splitter after verification of (", limitation, ") and (", splitter, ") for ",
+                  raw_data.shape, "...\n\n\tGeneration of the dataset with the time series...")
 
         ground_truth_matrices, obfuscated_matrices = [], []
         output_metrics, output_rmse, input_params, input_params_full = [], [], [], []
 
         categories, features = Explainer.load_configuration()
 
-        print("categories", categories)
-        print("features", features)
-
         for current_series in range(0, limitation):
-            print("Generation ", current_series, "___________________________________________________________________")
+
+            print("Generation ", current_series, "/", limitation, "(", int((current_series/limitation)*100),"%)________________________________________________________")
             print("\tContamination ", current_series, "...")
 
             if contamination == "mcar":
@@ -461,6 +462,7 @@ def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar
             elif algorithm == "mrnn":
                 algo = Imputation.ML.MRNN(obfuscated_matrix)
 
+            algo.logs = False
             algo.impute(user_defined=True, params=params)
             algo.score(raw_data)
             imputation_results = algo.metrics
@@ -479,8 +481,12 @@ def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar
         for input, output in zip(input_params, output_metrics):
             shap_details.append((input, output["RMSE"]))
 
-        shap_values = Explainer.launch_shap_model(input_params, input_params_full, output_rmse, file_name, algorithm, splitter, display)
+        shap_values = Explainer.launch_shap_model(input_params, input_params_full, output_rmse, file_name, algorithm, splitter, display, verbose)
+
+
+        print("\n\nSHAP Explainer succeeded without fail, please find the results in : ./assets/shap/*\n")
 
-        print("\n\n\nSHAP Explainer succeeded without fail, please find the results in : ./assets/shap/*\n\n\n")
+        end_time = time.time()
+        print(f"\n\t\t> logs, shap explainer - Execution Time: {(end_time - start_time):.4f} seconds\n\n\n")
 
         return shap_values, shap_details
diff --git a/build/lib/imputegap/recovery/manager.py b/build/lib/imputegap/recovery/manager.py
@@ -4,6 +4,7 @@
 import matplotlib
 from scipy.stats import zscore
 from sklearn.preprocessing import MinMaxScaler
+import importlib.resources
 
 from imputegap.tools import utils
 
@@ -13,6 +14,7 @@
 from matplotlib import pyplot as plt  # type: ignore
 
 
+
 class TimeSeries:
 
     def __init__(self):
@@ -57,6 +59,10 @@ def load_timeseries(self, data=None, max_series=None, max_values=None):
         if data is not None:
             if isinstance(data, str):
                 print("\nThe time series has been loaded from " + str(data) + "\n")
+
+                if data in ["bafu.txt", "chlorine.txt", "climate.txt", "drift.txt", "eeg.txt", "meteo.txt", "test.txt", "test-large.txt"] :
+                    data = importlib.resources.files('imputegap.dataset').joinpath(data)
+
                 self.data = np.genfromtxt(data, delimiter=' ', max_rows=max_values)
 
                 if max_series is not None:

diff --git a/build/lib/imputegap/tools/utils.py b/build/lib/imputegap/tools/utils.py
@@ -17,15 +17,15 @@ def search_path(set_name="test"):
     :return: correct file paths
     """
 
-    if set_name is ["bafu", "chlorine", "climate", "drift", "eeg", "meteo", "test", "test-large"]:
-        filepath = importlib.resources.files('imputegap.dataset').joinpath(set_name + ".txt")
+    if set_name in ["bafu", "chlorine", "climate", "drift", "eeg", "meteo", "test", "test-large"]:
+        return set_name + ".txt"
     else:
         filepath = "../imputegap/dataset/" + set_name + ".txt"
 
         if not os.path.exists(filepath):
             filepath = filepath[1:]
 
-    return filepath
+        return filepath
 
 
 def get_save_path_asset():

diff --git a/dist/imputegap-0.1.0-py3-none-any.whl b/dist/imputegap-0.1.0-py3-none-any.whl
diff --git a/dist/imputegap-0.1.0.tar.gz b/dist/imputegap-0.1.0.tar.gz
diff --git a/dist/imputegap-0.1.1.tar.gz b/dist/imputegap-0.1.1.tar.gz
diff --git a/dist/imputegap-0.1.1-py3-none-any.whl → dist/imputegap-0.1.2-py3-none-any.whl b/dist/imputegap-0.1.1-py3-none-any.whl → dist/imputegap-0.1.2-py3-none-any.whl
diff --git a/dist/imputegap-0.1.2.tar.gz b/dist/imputegap-0.1.2.tar.gz
diff --git a/imputegap.egg-info/PKG-INFO b/imputegap.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imputegap
-Version: 0.1.1
+Version: 0.1.2
 Summary: A Library of Imputation Techniques for Time Series Data
 Home-page: https://github.com/eXascaleInfolab/ImputeGAP
 Author: Quentin Nater

diff --git a/imputegap/algorithms/__pycache__/cdrec.cpython-312.pyc b/imputegap/algorithms/__pycache__/cdrec.cpython-312.pyc
diff --git a/imputegap/algorithms/__pycache__/stmvl.cpython-312.pyc b/imputegap/algorithms/__pycache__/stmvl.cpython-312.pyc
diff --git a/imputegap/algorithms/cdrec.py b/imputegap/algorithms/cdrec.py
@@ -1,9 +1,10 @@
 import ctypes
 import os
-import platform
 import time
 import ctypes as __native_c_types_import;
 import numpy as __numpy_import;
+import importlib.resources
+
 
 
 def __marshal_as_numpy_column(__ctype_container, __py_sizen, __py_sizem):
@@ -19,23 +20,21 @@ def __marshal_as_native_column(__py_matrix):
     return __ctype_marshal;
 
 
-def load_share_lib(name="lib_cdrec"):
+def load_share_lib(name="lib_cdrec", lib=True):
     """
     Determine the OS and load the correct shared library
     :param name: name of the library
     :return: the correct path to the library
     """
 
-    local_path_win = './algorithms/lib/' + name + '.dll'
-    local_path_lin = './algorithms/lib/' + name + '.so'
+    if lib:
+        lib_path = importlib.resources.files('imputegap.algorithms.lib').joinpath("./lib_cdrec.so")
+    else:
+        local_path_lin = './algorithms/lib/' + name + '.so'
 
-    if not os.path.exists(local_path_lin):
-        local_path_win = './imputegap/algorithms/lib/' + name + '.dll'
-        local_path_lin = './imputegap/algorithms/lib/' + name + '.so'
+        if not os.path.exists(local_path_lin):
+            local_path_lin = './imputegap/algorithms/lib/' + name + '.so'
 
-    if platform.system() == 'Windows':
-        lib_path = os.path.join(local_path_win)
-    else:
         lib_path = os.path.join(local_path_lin)
 
     return ctypes.CDLL(lib_path)
@@ -81,7 +80,7 @@ def native_cdrec(__py_matrix, __py_rank, __py_eps, __py_iters):
     return __py_recovered;
 
 
-def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True):
+def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True, lib_path=None):
     """
     CDREC algorithm for imputation of missing data
     @author : Quentin Nater
@@ -92,6 +91,7 @@ def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True):
     :param iterations : number of iterations
 
     :param logs: print logs of time execution
+    :param lib_path: file to library
 
     :return: imputed_matrix, metrics : all time series with imputation data and their metrics