Skip to content

Commit

Permalink
1. Refactoring and correction for the first pypi push
Browse files Browse the repository at this point in the history
  • Loading branch information
qnater committed Oct 14, 2024
1 parent c2593cf commit 1b5aef3
Show file tree
Hide file tree
Showing 34 changed files with 143 additions and 150 deletions.
83 changes: 17 additions & 66 deletions .idea/workspace.xml

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions build/lib/imputegap/algorithms/cdrec.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import ctypes
import os
import platform
import time
import ctypes as __native_c_types_import;
import numpy as __numpy_import;
import importlib.resources



def __marshal_as_numpy_column(__ctype_container, __py_sizen, __py_sizem):
Expand All @@ -19,23 +20,21 @@ def __marshal_as_native_column(__py_matrix):
return __ctype_marshal;


def load_share_lib(name="lib_cdrec"):
def load_share_lib(name="lib_cdrec", lib=True):
"""
Determine the OS and load the correct shared library
:param name: name of the library
:return: the correct path to the library
"""

local_path_win = './algorithms/lib/' + name + '.dll'
local_path_lin = './algorithms/lib/' + name + '.so'
if lib:
lib_path = importlib.resources.files('imputegap.algorithms.lib').joinpath("./lib_cdrec.so")
else:
local_path_lin = './algorithms/lib/' + name + '.so'

if not os.path.exists(local_path_lin):
local_path_win = './imputegap/algorithms/lib/' + name + '.dll'
local_path_lin = './imputegap/algorithms/lib/' + name + '.so'
if not os.path.exists(local_path_lin):
local_path_lin = './imputegap/algorithms/lib/' + name + '.so'

if platform.system() == 'Windows':
lib_path = os.path.join(local_path_win)
else:
lib_path = os.path.join(local_path_lin)

return ctypes.CDLL(lib_path)
Expand Down Expand Up @@ -81,7 +80,7 @@ def native_cdrec(__py_matrix, __py_rank, __py_eps, __py_iters):
return __py_recovered;


def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True):
def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True, lib_path=None):
"""
CDREC algorithm for imputation of missing data
@author : Quentin Nater
Expand All @@ -92,6 +91,7 @@ def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True):
:param iterations : number of iterations
:param logs: print logs of time execution
:param lib_path: file to library
:return: imputed_matrix, metrics : all time series with imputation data and their metrics
Expand Down
19 changes: 8 additions & 11 deletions build/lib/imputegap/algorithms/stmvl.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import ctypes
import time
import os
import platform
import importlib.resources
import ctypes as __native_c_types_import;
import numpy as __numpy_import;

Expand All @@ -17,25 +17,22 @@ def __marshal_as_native_column(__py_matrix):
return __ctype_marshal;


def load_share_lib(name = "lib_stmvl"):
def load_share_lib(name = "lib_stmvl", lib=True):
"""
Determine the OS and load the correct shared library
:param name: name of the library
:return: the correct path to the library
"""

local_path_win = './algorithms/lib/'+name+'.dll'
local_path_lin = './algorithms/lib/'+name+'.so'
if lib:
lib_path = importlib.resources.files('imputegap.algorithms.lib').joinpath("./lib_stmvl.so")
else:
local_path_lin = './algorithms/lib/'+name+'.so'

if not os.path.exists(local_path_lin):
local_path_win = './imputegap/algorithms/lib/'+name+'.dll'
local_path_lin = './imputegap/algorithms/lib/'+name+'.so'
if not os.path.exists(local_path_lin):
local_path_lin = './imputegap/algorithms/lib/'+name+'.so'

if platform.system() == 'Windows':
lib_path = os.path.join(local_path_win)
else:
lib_path = os.path.join(local_path_lin)
#print("\n", lib_path, " has been loaded...")

return ctypes.CDLL(lib_path)

Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 22 additions & 0 deletions build/lib/imputegap/assets/shap/eeg_iim_results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Feature : 17 iim with a score of 24.43 Trend Entropy of successive pairs in symbolized series SB_MotifThree_quantile_hh
Feature : 0 iim with a score of 14.07 Geometry 5-bin histogram mode DN_HistogramMode_5
Feature : 10 iim with a score of 9.91 Geometry Goodness of exponential fit to embedding distance distribution CO_Embed2_Dist_tau_d_expfit_meandiff
Feature : 4 iim with a score of 9.7 Correlation Histogram-based automutual information (lag 2, 5 bins) CO_HistogramAMI_even_2_5
Feature : 14 iim with a score of 9.52 Geometry Negative outlier timing DN_OutlierInclude_n_001_mdrmd
Feature : 15 iim with a score of 9.39 Transformation Power in the lowest 20% of frequencies SP_Summaries_welch_rect_area_5_1
Feature : 6 iim with a score of 7.59 Geometry Proportion of high incremental changes in the series MD_hrv_classic_pnn40
Feature : 1 iim with a score of 3.46 Geometry 10-bin histogram mode DN_HistogramMode_10
Feature : 5 iim with a score of 3.31 Correlation Time reversibility CO_trev_1_num
Feature : 21 iim with a score of 2.34 Trend Error of 3-point rolling mean forecast FC_LocalSimple_mean3_stderr
Feature : 8 iim with a score of 2.25 Geometry Transition matrix column variance SB_TransitionMatrix_3ac_sumdiagcov
Feature : 13 iim with a score of 2.0 Geometry Positive outlier timing DN_OutlierInclude_p_001_mdrmd
Feature : 18 iim with a score of 1.76 Geometry Rescaled range fluctuation analysis (low-scale scaling) SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1
Feature : 20 iim with a score of 0.19 Transformation Centroid frequency SP_Summaries_welch_rect_centroid
Feature : 2 iim with a score of 0.07 Correlation First 1/e crossing of the ACF CO_f1ecac
Feature : 3 iim with a score of 0.0 Correlation First minimum of the ACF CO_FirstMin_ac
Feature : 7 iim with a score of 0.0 Geometry Longest stretch of above-mean values SB_BinaryStats_mean_longstretch1
Feature : 9 iim with a score of 0.0 Trend Wangs periodicity metric PD_PeriodicityWang_th0_01
Feature : 11 iim with a score of 0.0 Correlation First minimum of the AMI function IN_AutoMutualInfoStats_40_gaussian_fmmi
Feature : 12 iim with a score of 0.0 Correlation Change in autocorrelation timescale after incremental differencing FC_LocalSimple_mean1_tauresrat
Feature : 16 iim with a score of 0.0 Geometry Longest stretch of decreasing values SB_BinaryStats_diff_longstretch0
Feature : 19 iim with a score of 0.0 Geometry Detrended fluctuation analysis (low-scale scaling) SC_FluctAnal_2_dfa_50_1_2_logi_prop_r1
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
68 changes: 37 additions & 31 deletions build/lib/imputegap/explainer/explainer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import math
import os
import time
import importlib.resources


Expand Down Expand Up @@ -147,17 +148,14 @@ def convert_results(tmp, file, algo, descriptions, features, categories, mean_fe

result_display = sorted(result_display, key=lambda tup: (tup[1], tup[2]), reverse=True)

for tup in result_display:
print(tup[2], end=",")

with open(to_save + "_results.txt", 'w') as file_output:
for (x, algo, rate, description, feature, category, mean_features) in result_display:
file_output.write(f"Feature : {x:<5} {algo:<10} with a score of {rate:<10} {category:<18} {description:<65} {feature}\n")
result_shap.append([file, algo, rate, description, feature, category, mean_features])

return result_shap

def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, splitter=10, display=False):
def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, splitter=10, display=False, verbose=False):
"""
Launch the SHAP model for explaining the features of the dataset
@author : Quentin Nater
Expand All @@ -169,6 +167,7 @@ def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, spli
:param algorithm: algorithm used
:param splitter: splitter from data training and testing
:param display: display or not plots
:param verbose: display or not the prints
:return: results of the explainer model
"""

Expand Down Expand Up @@ -207,12 +206,13 @@ def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, spli
print("\t SHAP_MODEL >> y_train shape:", y_train.shape)
print("\t SHAP_MODEL >> x_test shape:", x_test.shape)
print("\t SHAP_MODEL >> y_test shape:", y_test.shape, "\n")
print("\t SHAP_MODEL >> features shape:", x_features.shape)
print("\t SHAP_MODEL >> categories shape:", x_categories.shape)
print("\t SHAP_MODEL >> descriptions shape:", x_descriptions.shape, "\n")
print("\t SHAP_MODEL >> features OK:", np.all(np.all(x_features == x_features[0, :], axis=1)))
print("\t SHAP_MODEL >> categories OK:", np.all(np.all(x_categories == x_categories[0, :], axis=1)))
print("\t SHAP_MODEL >> descriptions OK:", np.all(np.all(x_descriptions==x_descriptions[0, :], axis=1)), "\n\n")
if verbose:
print("\t SHAP_MODEL >> features shape:", x_features.shape)
print("\t SHAP_MODEL >> categories shape:", x_categories.shape)
print("\t SHAP_MODEL >> descriptions shape:", x_descriptions.shape, "\n")
print("\t SHAP_MODEL >> features OK:", np.all(np.all(x_features == x_features[0, :], axis=1)))
print("\t SHAP_MODEL >> categories OK:", np.all(np.all(x_categories == x_categories[0, :], axis=1)))
print("\t SHAP_MODEL >> descriptions OK:", np.all(np.all(x_descriptions==x_descriptions[0, :], axis=1)), "\n\n")

model = RandomForestRegressor()
model.fit(x_train, y_train)
Expand All @@ -221,8 +221,6 @@ def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, spli
shval = exp.shap_values(x_test)
shap_values = exp(x_train)

#print("\t\tSHAP VALUES : ", np.array(shval).shape, " with : \n\t", *shval)

optimal_display = []
for desc, group in zip(x_descriptions[0], x_categories[0]):
optimal_display.append(desc + " (" + group + ")")
Expand Down Expand Up @@ -371,12 +369,13 @@ def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, spli
plt.close()
print("\t\t\tGRAPH has benn computed : ", alpha, "\n\n")

print("\t\tSHAP Families details : \n")
print("\t\t\tgeometry:", geometry.shape)
print("\t\t\ttransformation:", transformation.shape)
print("\t\t\tcorrelation:", correlation.shape)
print("\t\t\ttrend':", trend.shape)
print("\t\t\tmean_features:", mean_features.shape, "\n\n")
if verbose:
print("\t\tSHAP Families details :")
print("\t\t\tgeometry:", geometry.shape)
print("\t\t\ttransformation:", transformation.shape)
print("\t\t\tcorrelation:", correlation.shape)
print("\t\t\ttrend':", trend.shape)
print("\t\t\tmean_features:", mean_features.shape, "\n\n")

# Aggregate shapely values per element of X_test
total_weights = [np.abs(shval.T[i]).mean(0) for i in range(len(shval[0]))]
Expand All @@ -393,7 +392,7 @@ def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, spli

def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar", missing_rate=0.4,
block_size=10, protection=0.1, use_seed=True, seed=42, limitation=15, splitter=0,
file_name="ts", display=False):
file_name="ts", display=False, verbose=False):
"""
Handle parameters and set the variables to launch a model SHAP
@author : Quentin Nater
Expand All @@ -410,33 +409,35 @@ def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar
:param limitation: limitation of series for the model | default 15
:param splitter: limitation of training series for the model | default 3/4 of limitation
:param display: display or not the plots | default False
:param verbose: display or not the prints
:return: ground_truth_matrixes, obfuscated_matrixes, output_metrics, input_params, shap_values
"""

start_time = time.time() # Record start time

if limitation > raw_data.shape[0]:
limitation = int(raw_data.shape[0] * 0.75)

if splitter == 0 or splitter >= limitation - 1:
splitter = int(limitation * 0.60)

print("SHAP Explainer has been called\n\t",
"missing_values (", missing_rate * 100, "%)\n\t",
"for a contamination (", contamination, "), \n\t",
"imputated by (", algorithm, ") with params (", params, ")\n\t",
"with limitation and splitter after verification of (", limitation, ") and (", splitter, ") for ",
raw_data.shape, "...\n\n\tGeneration of the dataset with the time series...")
if verbose:
print("SHAP Explainer has been called\n\t",
"missing_values (", missing_rate * 100, "%)\n\t",
"for a contamination (", contamination, "), \n\t",
"imputated by (", algorithm, ") with params (", params, ")\n\t",
"with limitation and splitter after verification of (", limitation, ") and (", splitter, ") for ",
raw_data.shape, "...\n\n\tGeneration of the dataset with the time series...")

ground_truth_matrices, obfuscated_matrices = [], []
output_metrics, output_rmse, input_params, input_params_full = [], [], [], []

categories, features = Explainer.load_configuration()

print("categories", categories)
print("features", features)

for current_series in range(0, limitation):
print("Generation ", current_series, "___________________________________________________________________")

print("Generation ", current_series, "/", limitation, "(", int((current_series/limitation)*100),"%)________________________________________________________")
print("\tContamination ", current_series, "...")

if contamination == "mcar":
Expand All @@ -461,6 +462,7 @@ def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar
elif algorithm == "mrnn":
algo = Imputation.ML.MRNN(obfuscated_matrix)

algo.logs = False
algo.impute(user_defined=True, params=params)
algo.score(raw_data)
imputation_results = algo.metrics
Expand All @@ -479,8 +481,12 @@ def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar
for input, output in zip(input_params, output_metrics):
shap_details.append((input, output["RMSE"]))

shap_values = Explainer.launch_shap_model(input_params, input_params_full, output_rmse, file_name, algorithm, splitter, display)
shap_values = Explainer.launch_shap_model(input_params, input_params_full, output_rmse, file_name, algorithm, splitter, display, verbose)


print("\n\nSHAP Explainer succeeded without fail, please find the results in : ./assets/shap/*\n")

print("\n\n\nSHAP Explainer succeeded without fail, please find the results in : ./assets/shap/*\n\n\n")
end_time = time.time()
print(f"\n\t\t> logs, shap explainer - Execution Time: {(end_time - start_time):.4f} seconds\n\n\n")

return shap_values, shap_details
6 changes: 6 additions & 0 deletions build/lib/imputegap/recovery/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import matplotlib
from scipy.stats import zscore
from sklearn.preprocessing import MinMaxScaler
import importlib.resources

from imputegap.tools import utils

Expand All @@ -13,6 +14,7 @@
from matplotlib import pyplot as plt # type: ignore



class TimeSeries:

def __init__(self):
Expand Down Expand Up @@ -57,6 +59,10 @@ def load_timeseries(self, data=None, max_series=None, max_values=None):
if data is not None:
if isinstance(data, str):
print("\nThe time series has been loaded from " + str(data) + "\n")

if data in ["bafu.txt", "chlorine.txt", "climate.txt", "drift.txt", "eeg.txt", "meteo.txt", "test.txt", "test-large.txt"] :
data = importlib.resources.files('imputegap.dataset').joinpath(data)

self.data = np.genfromtxt(data, delimiter=' ', max_rows=max_values)

if max_series is not None:
Expand Down
6 changes: 3 additions & 3 deletions build/lib/imputegap/tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ def search_path(set_name="test"):
:return: correct file paths
"""

if set_name is ["bafu", "chlorine", "climate", "drift", "eeg", "meteo", "test", "test-large"]:
filepath = importlib.resources.files('imputegap.dataset').joinpath(set_name + ".txt")
if set_name in ["bafu", "chlorine", "climate", "drift", "eeg", "meteo", "test", "test-large"]:
return set_name + ".txt"
else:
filepath = "../imputegap/dataset/" + set_name + ".txt"

if not os.path.exists(filepath):
filepath = filepath[1:]

return filepath
return filepath


def get_save_path_asset():
Expand Down
Binary file removed dist/imputegap-0.1.0-py3-none-any.whl
Binary file not shown.
Binary file removed dist/imputegap-0.1.0.tar.gz
Binary file not shown.
Binary file removed dist/imputegap-0.1.1.tar.gz
Binary file not shown.
Binary file not shown.
Binary file added dist/imputegap-0.1.2.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion imputegap.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: imputegap
Version: 0.1.1
Version: 0.1.2
Summary: A Library of Imputation Techniques for Time Series Data
Home-page: https://github.com/eXascaleInfolab/ImputeGAP
Author: Quentin Nater
Expand Down
Binary file modified imputegap/algorithms/__pycache__/cdrec.cpython-312.pyc
Binary file not shown.
Binary file modified imputegap/algorithms/__pycache__/stmvl.cpython-312.pyc
Binary file not shown.
22 changes: 11 additions & 11 deletions imputegap/algorithms/cdrec.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import ctypes
import os
import platform
import time
import ctypes as __native_c_types_import;
import numpy as __numpy_import;
import importlib.resources



def __marshal_as_numpy_column(__ctype_container, __py_sizen, __py_sizem):
Expand All @@ -19,23 +20,21 @@ def __marshal_as_native_column(__py_matrix):
return __ctype_marshal;


def load_share_lib(name="lib_cdrec"):
def load_share_lib(name="lib_cdrec", lib=True):
"""
Determine the OS and load the correct shared library
:param name: name of the library
:return: the correct path to the library
"""

local_path_win = './algorithms/lib/' + name + '.dll'
local_path_lin = './algorithms/lib/' + name + '.so'
if lib:
lib_path = importlib.resources.files('imputegap.algorithms.lib').joinpath("./lib_cdrec.so")
else:
local_path_lin = './algorithms/lib/' + name + '.so'

if not os.path.exists(local_path_lin):
local_path_win = './imputegap/algorithms/lib/' + name + '.dll'
local_path_lin = './imputegap/algorithms/lib/' + name + '.so'
if not os.path.exists(local_path_lin):
local_path_lin = './imputegap/algorithms/lib/' + name + '.so'

if platform.system() == 'Windows':
lib_path = os.path.join(local_path_win)
else:
lib_path = os.path.join(local_path_lin)

return ctypes.CDLL(lib_path)
Expand Down Expand Up @@ -81,7 +80,7 @@ def native_cdrec(__py_matrix, __py_rank, __py_eps, __py_iters):
return __py_recovered;


def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True):
def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True, lib_path=None):
"""
CDREC algorithm for imputation of missing data
@author : Quentin Nater
Expand All @@ -92,6 +91,7 @@ def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True):
:param iterations : number of iterations
:param logs: print logs of time execution
:param lib_path: file to library
:return: imputed_matrix, metrics : all time series with imputation data and their metrics
Expand Down
Loading

0 comments on commit 1b5aef3

Please sign in to comment.