diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 177efb5..e38505d 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,19 +2,38 @@ + + - - - - - + + + + - - - - + + + + + + + + + + + + + + + + + + + + + + @@ -37,7 +56,7 @@ - + @@ -54,35 +73,35 @@ - { - "keyToString": { - "RunOnceActivity.OpenProjectViewOnStart": "true", - "RunOnceActivity.ShowReadmeOnStart": "true", - "WebServerToolWindowFactoryState": "false", - "last_opened_file_path": "C:/Users/nquen/switchdrive/MST_MasterThesis/imputegap/tests", - "nodejs_package_manager_path": "npm", - "settings.editor.selected.configurable": "com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable", - "vue.rearranger.settings.migration": "true" + +}]]> + - + + - - - - + + - + - + - + - + + + - - + + - - - + @@ -216,7 +242,7 @@ - + @@ -236,12 +262,13 @@ + - + - + \ No newline at end of file diff --git a/imputegap/assets/contamination/test_contamination.png b/imputegap/assets/contamination/test_contamination.png index e95ea06..7d2237e 100644 Binary files a/imputegap/assets/contamination/test_contamination.png and b/imputegap/assets/contamination/test_contamination.png differ diff --git a/imputegap/assets/ground_truth/test_ground_truth.png b/imputegap/assets/ground_truth/test_ground_truth.png deleted file mode 100644 index 61ef585..0000000 Binary files a/imputegap/assets/ground_truth/test_ground_truth.png and /dev/null differ diff --git a/imputegap/assets/ground_truth_normalized/.gitkeep b/imputegap/assets/ground_truth_normalized/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/imputegap/assets/ground_truth/.gitkeep b/imputegap/assets/gt/.gitkeep similarity index 100% rename from imputegap/assets/ground_truth/.gitkeep rename to imputegap/assets/gt/.gitkeep diff --git a/imputegap/assets/gt/test_gt.png b/imputegap/assets/gt/test_gt.png new file mode 100644 index 0000000..e57f74a Binary files /dev/null and b/imputegap/assets/gt/test_gt.png differ diff --git a/imputegap/assets/imputation/test_imputation.png b/imputegap/assets/imputation/test_imputation.png index 427b1e7..f770ec1 100644 Binary files a/imputegap/assets/imputation/test_imputation.png and b/imputegap/assets/imputation/test_imputation.png differ diff --git a/imputegap/contamination/__init__.py b/imputegap/contamination/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/imputegap/contamination/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/imputegap/contamination/__pycache__/__init__.cpython-311.pyc b/imputegap/contamination/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 44efba5..0000000 Binary files a/imputegap/contamination/__pycache__/__init__.cpython-311.pyc and /dev/null differ diff --git a/imputegap/contamination/__pycache__/__init__.cpython-312.pyc b/imputegap/contamination/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index d937f9d..0000000 Binary files a/imputegap/contamination/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/imputegap/contamination/__pycache__/__init__.cpython-38.pyc b/imputegap/contamination/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index c2e1895..0000000 Binary files a/imputegap/contamination/__pycache__/__init__.cpython-38.pyc and /dev/null differ diff --git a/imputegap/contamination/__pycache__/contamination.cpython-311.pyc b/imputegap/contamination/__pycache__/contamination.cpython-311.pyc deleted file mode 100644 index 6e35651..0000000 Binary files a/imputegap/contamination/__pycache__/contamination.cpython-311.pyc and /dev/null differ diff --git a/imputegap/contamination/__pycache__/contamination.cpython-312.pyc b/imputegap/contamination/__pycache__/contamination.cpython-312.pyc deleted file mode 100644 index 0323517..0000000 Binary files a/imputegap/contamination/__pycache__/contamination.cpython-312.pyc and /dev/null differ diff --git a/imputegap/contamination/__pycache__/contamination.cpython-38.pyc b/imputegap/contamination/__pycache__/contamination.cpython-38.pyc deleted file mode 100644 index 915356d..0000000 Binary files a/imputegap/contamination/__pycache__/contamination.cpython-38.pyc and /dev/null differ diff --git a/imputegap/evaluation/__pycache__/evaluation.cpython-38.pyc b/imputegap/evaluation/__pycache__/evaluation.cpython-38.pyc deleted file mode 100644 index 6cb2b22..0000000 Binary files a/imputegap/evaluation/__pycache__/evaluation.cpython-38.pyc and /dev/null differ diff --git a/imputegap/explainer/explainer.py b/imputegap/explainer/explainer.py index 9256774..2e1a82c 100644 --- a/imputegap/explainer/explainer.py +++ b/imputegap/explainer/explainer.py @@ -8,8 +8,8 @@ from matplotlib import pyplot as plt from sklearn.ensemble import RandomForestRegressor -from imputegap.contamination.contamination import Contamination -from imputegap.imputation.imputation import Imputation +from imputegap.recovery.contamination import Contamination +from imputegap.recovery.imputation import Imputation class Explainer: diff --git a/imputegap/imputation/__pycache__/__init__.cpython-311.pyc b/imputegap/imputation/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 63b8e51..0000000 Binary files a/imputegap/imputation/__pycache__/__init__.cpython-311.pyc and /dev/null differ diff --git a/imputegap/imputation/__pycache__/__init__.cpython-312.pyc b/imputegap/imputation/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index f20a4c1..0000000 Binary files a/imputegap/imputation/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/imputegap/imputation/__pycache__/__init__.cpython-38.pyc b/imputegap/imputation/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index eb08c15..0000000 Binary files a/imputegap/imputation/__pycache__/__init__.cpython-38.pyc and /dev/null differ diff --git a/imputegap/imputation/__pycache__/imputation.cpython-311.pyc b/imputegap/imputation/__pycache__/imputation.cpython-311.pyc deleted file mode 100644 index b1d959e..0000000 Binary files a/imputegap/imputation/__pycache__/imputation.cpython-311.pyc and /dev/null differ diff --git a/imputegap/imputation/__pycache__/imputation.cpython-38.pyc b/imputegap/imputation/__pycache__/imputation.cpython-38.pyc deleted file mode 100644 index 6a73657..0000000 Binary files a/imputegap/imputation/__pycache__/imputation.cpython-38.pyc and /dev/null differ diff --git a/imputegap/manager/__pycache__/__init__.cpython-312.pyc b/imputegap/manager/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 40a97b5..0000000 Binary files a/imputegap/manager/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/imputegap/manager/__pycache__/__init__.cpython-38.pyc b/imputegap/manager/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index 0d3099f..0000000 Binary files a/imputegap/manager/__pycache__/__init__.cpython-38.pyc and /dev/null differ diff --git a/imputegap/manager/__pycache__/manager.cpython-38.pyc b/imputegap/manager/__pycache__/manager.cpython-38.pyc deleted file mode 100644 index c100c8d..0000000 Binary files a/imputegap/manager/__pycache__/manager.cpython-38.pyc and /dev/null differ diff --git a/imputegap/optimization/__init__.py b/imputegap/optimization/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/imputegap/optimization/__pycache__/__init__.cpython-312.pyc b/imputegap/optimization/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 9d50302..0000000 Binary files a/imputegap/optimization/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/imputegap/optimization/__pycache__/algorithm_parameters.cpython-312.pyc b/imputegap/optimization/__pycache__/algorithm_parameters.cpython-312.pyc deleted file mode 100644 index ab13558..0000000 Binary files a/imputegap/optimization/__pycache__/algorithm_parameters.cpython-312.pyc and /dev/null differ diff --git a/imputegap/contamination/README.md b/imputegap/recovery/README.md similarity index 63% rename from imputegap/contamination/README.md rename to imputegap/recovery/README.md index 4cbcb9f..597cc14 100644 --- a/imputegap/contamination/README.md +++ b/imputegap/recovery/README.md @@ -1,6 +1,7 @@ ![My Logo](../../assets/logo_imputegab.png) -# Scenarios +# CONTAMINATION +## Scenarios @@ -23,7 +24,7 @@ -
MNumber of time series
BBlock size
+
### MCAR MCAR selects random series and remove block at random positions until a total of W of all points of time series are missing. @@ -32,7 +33,7 @@ This scenario uses random number generator with fixed seed and will produce the Definition - + @@ -49,4 +50,31 @@ This scenario uses random number generator with fixed seed and will produce the -
N10 - 100%NMAX
MMAX
B2 - 20
\ No newline at end of file + + +
+ +### MISSING PERCENTAGE +MISSING PERCENTAGE selects of percent of series to contaminate from the first to the last with a desired percentage of missing value to remove. + + + Definition + + + + + + + + + + + + + + + + + + +
NMAX
MMAX
R1 - 100%
S1 - 100%
W(N-P) * R
BR

\ No newline at end of file diff --git a/imputegap/imputation/__init__.py b/imputegap/recovery/__init__.py similarity index 100% rename from imputegap/imputation/__init__.py rename to imputegap/recovery/__init__.py diff --git a/imputegap/recovery/__pycache__/__init__.cpython-312.pyc b/imputegap/recovery/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..d411536 Binary files /dev/null and b/imputegap/recovery/__pycache__/__init__.cpython-312.pyc differ diff --git a/imputegap/recovery/__pycache__/contamination.cpython-312.pyc b/imputegap/recovery/__pycache__/contamination.cpython-312.pyc new file mode 100644 index 0000000..0125b44 Binary files /dev/null and b/imputegap/recovery/__pycache__/contamination.cpython-312.pyc differ diff --git a/imputegap/imputation/__pycache__/imputation.cpython-312.pyc b/imputegap/recovery/__pycache__/imputation.cpython-312.pyc similarity index 77% rename from imputegap/imputation/__pycache__/imputation.cpython-312.pyc rename to imputegap/recovery/__pycache__/imputation.cpython-312.pyc index 2a81820..15a10c8 100644 Binary files a/imputegap/imputation/__pycache__/imputation.cpython-312.pyc and b/imputegap/recovery/__pycache__/imputation.cpython-312.pyc differ diff --git a/imputegap/manager/__pycache__/manager.cpython-312.pyc b/imputegap/recovery/__pycache__/manager.cpython-312.pyc similarity index 65% rename from imputegap/manager/__pycache__/manager.cpython-312.pyc rename to imputegap/recovery/__pycache__/manager.cpython-312.pyc index 4b8f77c..dfe2e2b 100644 Binary files a/imputegap/manager/__pycache__/manager.cpython-312.pyc and b/imputegap/recovery/__pycache__/manager.cpython-312.pyc differ diff --git a/imputegap/optimization/__pycache__/bayesian_optimization.cpython-312.pyc b/imputegap/recovery/__pycache__/optimization.cpython-312.pyc similarity index 76% rename from imputegap/optimization/__pycache__/bayesian_optimization.cpython-312.pyc rename to imputegap/recovery/__pycache__/optimization.cpython-312.pyc index c822100..145172b 100644 Binary files a/imputegap/optimization/__pycache__/bayesian_optimization.cpython-312.pyc and b/imputegap/recovery/__pycache__/optimization.cpython-312.pyc differ diff --git a/imputegap/contamination/contamination.py b/imputegap/recovery/contamination.py similarity index 83% rename from imputegap/contamination/contamination.py rename to imputegap/recovery/contamination.py index 8fd236c..d525d0c 100644 --- a/imputegap/contamination/contamination.py +++ b/imputegap/recovery/contamination.py @@ -1,9 +1,27 @@ -import math import numpy as np class Contamination: + def verification_limitation(percentage, low_limit=0.01, high_limit=1.0): + """ + Format the percentage given by the user. + :param percentage: The percentage to be checked. + :param low_limit: The lower limit of the acceptable percentage range. + :param high_limit: The upper limit of the acceptable percentage range. + :return: Adjusted percentage. + """ + if low_limit <= percentage <= high_limit: + return percentage # No modification needed + + elif 1 <= percentage <= 100: + print(f"The percentage {percentage} is between 1 and 100. Dividing by 100 to convert to a decimal.") + return percentage / 100 + + else: + print("The percentage", percentage, "is out of the acceptable range", low_limit, "-", high_limit, ".") + return percentage + def format_selection(ts, selection): """ Format the selection of series based on keywords @@ -59,6 +77,10 @@ def scenario_mcar(ts, series_impacted=0.2, missing_rate=0.2, block_size=10, prot if use_seed: np.random.seed(seed) + missing_rate = Contamination.verification_limitation(missing_rate) + series_impacted = Contamination.verification_limitation(series_impacted) + protection = Contamination.verification_limitation(protection) + ts_contaminated = ts.copy() M, _ = ts_contaminated.shape @@ -128,6 +150,10 @@ def scenario_missing_percentage(ts, series_impacted=0.2, missing_rate=0.2, prote ts_contaminated = ts.copy() M, _ = ts_contaminated.shape + missing_rate = Contamination.verification_limitation(missing_rate) + series_impacted = Contamination.verification_limitation(series_impacted) + protection = Contamination.verification_limitation(protection) + nbr_series_impacted = int(np.ceil(M * series_impacted)) print("\n\nMISSING PERCENTAGE contamination has been called with :" diff --git a/imputegap/imputation/imputation.py b/imputegap/recovery/imputation.py similarity index 98% rename from imputegap/imputation/imputation.py rename to imputegap/recovery/imputation.py index d81e78c..037cdad 100644 --- a/imputegap/imputation/imputation.py +++ b/imputegap/recovery/imputation.py @@ -1,14 +1,12 @@ import re -import os -import toml from imputegap.algorithms.cdrec import cdrec from imputegap.algorithms.iim import iim from imputegap.algorithms.min_impute import min_impute from imputegap.algorithms.mrnn import mrnn from imputegap.algorithms.stmvl import stmvl from imputegap.algorithms.zero_impute import zero_impute -from imputegap.evaluation.evaluation import Evaluation -from imputegap.manager import utils +from imputegap.tools.evaluation import Evaluation +from imputegap.tools import utils class Imputation: diff --git a/imputegap/manager/manager.py b/imputegap/recovery/manager.py similarity index 91% rename from imputegap/manager/manager.py rename to imputegap/recovery/manager.py index 1a0acfd..6f5e29e 100644 --- a/imputegap/manager/manager.py +++ b/imputegap/recovery/manager.py @@ -145,11 +145,12 @@ def normalization_z_score(self, ts): return z_scores - def plot(self, ts_type="ground_truth", title='Time Series Data', save_path="", limitation=10, size=(16, 8), display=True, colors=['dimgrey', 'plum', 'lightblue', 'mediumseagreen', 'khaki']): + def plot(self, ts_type="gt", title='Time Series Data', save_path="", limitation=10, size=(16, 8), display=True, colors=['dimgrey', 'plum', 'lightblue', 'mediumseagreen', 'khaki']): """ Plot a chosen time series @author Quentin Nater + :param ts_type : ("gt", "contamination", "imputation"), type of time series to print | default "gt" :param title: title of the plot :param save_path : path to save locally the plot :param limitation: number of series displayed inside the plot @@ -165,7 +166,7 @@ def plot(self, ts_type="ground_truth", title='Time Series Data', save_path="", l if limitation == 0: limitation = self.ts.shape[0] - if ts_type == "ground_truth": + if ts_type == "gt": for i in range(self.ts.shape[0]): plt.plot(np.arange(self.ts.shape[1]), self.ts[i, :], label=f'Series {i + 1}') number_of_series += 1 @@ -173,21 +174,16 @@ def plot(self, ts_type="ground_truth", title='Time Series Data', save_path="", l if number_of_series == limitation: break - elif ts_type == "ground_truth_normalized": - for i in range(self.normalized_ts.shape[0]): - plt.plot(np.arange(self.normalized_ts.shape[1]), self.normalized_ts[i, :], label=f'Series {i + 1}') - number_of_series += 1 - - if number_of_series == limitation: - break elif ts_type == "contamination": for i in range(self.ts.shape[0]): color = colors[i % len(colors)] - plt.plot(np.arange(self.ts.shape[1]), self.ts[i, :], 'r--', label=f'Series {i + 1}-GT') + if np.isnan(self.ts_contaminate[i, :]).any(): + plt.plot(np.arange(self.ts.shape[1]), self.ts[i, :], 'r--', label=f'Series {i + 1}') + plt.plot(np.arange(self.ts_contaminate.shape[1]), self.ts_contaminate[i, :], linewidth=2.5, - color=color, linestyle='-', label=f'Series {i + 1}-MV') + color=color, linestyle='-', label=f'Series {i + 1}-CNT') number_of_series += 1 if number_of_series == limitation: diff --git a/imputegap/optimization/bayesian_optimization.py b/imputegap/recovery/optimization.py similarity index 96% rename from imputegap/optimization/bayesian_optimization.py rename to imputegap/recovery/optimization.py index e9daa78..34126d3 100644 --- a/imputegap/optimization/bayesian_optimization.py +++ b/imputegap/recovery/optimization.py @@ -1,13 +1,14 @@ import os - +import toml import numpy as np + + import skopt -import toml from skopt.space import Integer from skopt.utils import use_named_args -from imputegap.imputation.imputation import Imputation -from imputegap.optimization.algorithm_parameters import SEARCH_SPACES +from imputegap.recovery.imputation import Imputation +from imputegap.tools.algorithm_parameters import SEARCH_SPACES # Define the search space for each algorithm separately search_spaces = SEARCH_SPACES diff --git a/imputegap/runner_contamination.py b/imputegap/runner_contamination.py new file mode 100644 index 0000000..1e60ca3 --- /dev/null +++ b/imputegap/runner_contamination.py @@ -0,0 +1,32 @@ +from imputegap.recovery.contamination import Contamination +from imputegap.recovery.imputation import Imputation +from imputegap.recovery.manager import TimeSeries +from imputegap.recovery.optimization import Optimization +from imputegap.tools.utils import display_title +from imputegap.tools import utils +import os + + +def check_block_size(filename): + if "test" in filename: + return (2, 2) + else: + return (10, 10) + + +if __name__ == '__main__': + + display_title() + + filename = "eeg" + + file_path = os.path.join("./dataset/", filename + ".txt") + gap = TimeSeries(data=file_path, normalization="z_score", limitation_values=100) + + block_size, plot_limit = check_block_size(filename) + + gap.ts_contaminate = Contamination.scenario_missing_percentage(ts=gap.ts, series_impacted=0.1, missing_rate=0.4, protection=0.1, use_seed=True, seed=42) + gap.print(limitation=10) + gap.plot(ts_type="contamination", title="test", save_path="assets", limitation=plot_limit, display=True) + + print("\n", "_"*95, "end") \ No newline at end of file diff --git a/imputegap/runner_explainer.py b/imputegap/runner_explainer.py index f96bfff..13f9157 100644 --- a/imputegap/runner_explainer.py +++ b/imputegap/runner_explainer.py @@ -1,16 +1,8 @@ -from imputegap.contamination.contamination import Contamination -from imputegap.imputation.imputation import Imputation -from imputegap.manager.manager import TimeSeries +from imputegap.recovery.manager import TimeSeries from imputegap.explainer.explainer import Explainer -import os - +from imputegap.tools.utils import display_title -def display_title(title="Master Thesis", aut="Quentin Nater", lib="ImputeGAP", university="University Fribourg - exascale infolab"): - print("=" * 100) - print(f"{title} : {aut}") - print("=" * 100) - print(f" {lib} - {university}") - print("=" * 100) +import os def check_block_size(filename): diff --git a/imputegap/runner_imputation.py b/imputegap/runner_imputation.py index 69cb009..770474e 100644 --- a/imputegap/runner_imputation.py +++ b/imputegap/runner_imputation.py @@ -1,19 +1,11 @@ -from imputegap.contamination.contamination import Contamination -from imputegap.imputation.imputation import Imputation -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries +from imputegap.recovery.contamination import Contamination +from imputegap.recovery.imputation import Imputation +from imputegap.recovery.manager import TimeSeries +from imputegap.recovery.optimization import Optimization +from imputegap.tools.utils import display_title +from imputegap.tools import utils import os -from imputegap.optimization.bayesian_optimization import Optimization - - -def display_title(title="Master Thesis", aut="Quentin Nater", lib="ImputeGAP", university="University Fribourg - exascale infolab"): - print("=" * 100) - print(f"{title} : {aut}") - print("=" * 100) - print(f" {lib} - {university}") - print("=" * 100) - def check_block_size(filename): if "test" in filename: @@ -36,7 +28,7 @@ def check_block_size(filename): block_size, plot_limit = check_block_size(filename) gap.print(limitation=5) - gap.plot(title="test", save_path="assets", limitation=0, display=False) + gap.plot(ts_type="gt", title="test", save_path="assets", limitation=0, display=False) gap.ts_contaminate = Contamination.scenario_mcar(ts=gap.ts, series_impacted=0.4, missing_rate=0.4, block_size=block_size, protection=0.1, use_seed=True, seed=42) gap.print(limitation=10) diff --git a/imputegap/runner_optimization.py b/imputegap/runner_optimization.py index cc519df..841d43a 100644 --- a/imputegap/runner_optimization.py +++ b/imputegap/runner_optimization.py @@ -1,16 +1,9 @@ -from imputegap.contamination.contamination import Contamination -from imputegap.manager.manager import TimeSeries -import os - -from imputegap.optimization.bayesian_optimization import Optimization +from imputegap.recovery.contamination import Contamination +from imputegap.recovery.manager import TimeSeries +from imputegap.recovery.optimization import Optimization +from imputegap.tools.utils import display_title - -def display_title(title="Master Thesis", aut="Quentin Nater", lib="ImputeGAP", university="University Fribourg - exascale infolab"): - print("=" * 100) - print(f"{title} : {aut}") - print("=" * 100) - print(f" {lib} - {university}") - print("=" * 100) +import os def check_block_size(filename): diff --git a/imputegap/manager/__init__.py b/imputegap/tools/__init__.py similarity index 100% rename from imputegap/manager/__init__.py rename to imputegap/tools/__init__.py diff --git a/imputegap/tools/__pycache__/__init__.cpython-312.pyc b/imputegap/tools/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..c7ebc89 Binary files /dev/null and b/imputegap/tools/__pycache__/__init__.cpython-312.pyc differ diff --git a/imputegap/tools/__pycache__/algorithm_parameters.cpython-312.pyc b/imputegap/tools/__pycache__/algorithm_parameters.cpython-312.pyc new file mode 100644 index 0000000..f729eb5 Binary files /dev/null and b/imputegap/tools/__pycache__/algorithm_parameters.cpython-312.pyc differ diff --git a/imputegap/evaluation/__pycache__/evaluation.cpython-312.pyc b/imputegap/tools/__pycache__/evaluation.cpython-312.pyc similarity index 92% rename from imputegap/evaluation/__pycache__/evaluation.cpython-312.pyc rename to imputegap/tools/__pycache__/evaluation.cpython-312.pyc index 28cb94e..ee176df 100644 Binary files a/imputegap/evaluation/__pycache__/evaluation.cpython-312.pyc and b/imputegap/tools/__pycache__/evaluation.cpython-312.pyc differ diff --git a/imputegap/manager/__pycache__/utils.cpython-312.pyc b/imputegap/tools/__pycache__/utils.cpython-312.pyc similarity index 50% rename from imputegap/manager/__pycache__/utils.cpython-312.pyc rename to imputegap/tools/__pycache__/utils.cpython-312.pyc index 6fa263b..204450c 100644 Binary files a/imputegap/manager/__pycache__/utils.cpython-312.pyc and b/imputegap/tools/__pycache__/utils.cpython-312.pyc differ diff --git a/imputegap/optimization/algorithm_parameters.py b/imputegap/tools/algorithm_parameters.py similarity index 87% rename from imputegap/optimization/algorithm_parameters.py rename to imputegap/tools/algorithm_parameters.py index a0a058c..28b08d9 100644 --- a/imputegap/optimization/algorithm_parameters.py +++ b/imputegap/tools/algorithm_parameters.py @@ -8,8 +8,6 @@ # IIM parameters IIM_LEARNING_NEIGHBOR_RANGE = [i for i in range(1, 100)] # Test up to 100 learning neighbors -# IIM_ADAPTIVE_RANGE = [True, False] # Test with and without adaptive learning -# IIM_METRIC_RANGE = ['euclidean', 'cosine'] # Test with euclidean and cosine distance, could be more # MRNN parameters MRNN_LEARNING_RATE_CHANGE = np.logspace(-6, 0, num=20) # log scale for learning rate @@ -45,11 +43,4 @@ 'iim': ['learning_neighbors'], 'mrnn': ['hidden_dim', 'learning_rate', 'iterations', 'seq_len' ], 'stmvl': ['window_size', 'gamma', 'alpha'] -} - -DEFAULT_PARAMS = { - 'cdrec': [1, 1e-6, 100], - 'iim': [10], - 'mrnn': [10, 0.01, 1000, 7], - 'stmvl': [2, 0.85, 7] } \ No newline at end of file diff --git a/imputegap/evaluation/evaluation.py b/imputegap/tools/evaluation.py similarity index 100% rename from imputegap/evaluation/evaluation.py rename to imputegap/tools/evaluation.py diff --git a/imputegap/manager/utils.py b/imputegap/tools/utils.py similarity index 90% rename from imputegap/manager/utils.py rename to imputegap/tools/utils.py index a0fc64e..3b456b8 100644 --- a/imputegap/manager/utils.py +++ b/imputegap/tools/utils.py @@ -1,8 +1,15 @@ import os - import toml +def display_title(title="Master Thesis", aut="Quentin Nater", lib="ImputeGAP", university="University Fribourg - exascale infolab"): + print("=" * 100) + print(f"{title} : {aut}") + print("=" * 100) + print(f" {lib} - {university}") + print("=" * 100) + + def get_file_path_dataset(set_name="test"): """ Find the accurate path for loading files of tests diff --git a/tests/__pycache__/test_contamination_mp.cpython-312.pyc b/tests/__pycache__/test_contamination_mp.cpython-312.pyc index 86e73f3..27c9a2f 100644 Binary files a/tests/__pycache__/test_contamination_mp.cpython-312.pyc and b/tests/__pycache__/test_contamination_mp.cpython-312.pyc differ diff --git a/tests/__pycache__/test_imputation_cdrec.cpython-312.pyc b/tests/__pycache__/test_imputation_cdrec.cpython-312.pyc index 612d50a..5b631b2 100644 Binary files a/tests/__pycache__/test_imputation_cdrec.cpython-312.pyc and b/tests/__pycache__/test_imputation_cdrec.cpython-312.pyc differ diff --git a/tests/test_contamination_mcar.py b/tests/test_contamination_mcar.py index 776a5f4..7456a24 100644 --- a/tests/test_contamination_mcar.py +++ b/tests/test_contamination_mcar.py @@ -2,9 +2,9 @@ import unittest import numpy as np -from imputegap.contamination.contamination import Contamination -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries +from imputegap.recovery.contamination import Contamination +from imputegap.tools import utils +from imputegap.recovery.manager import TimeSeries class TestContamination(unittest.TestCase): diff --git a/tests/test_contamination_mp.py b/tests/test_contamination_mp.py index 34c079b..495d86c 100644 --- a/tests/test_contamination_mp.py +++ b/tests/test_contamination_mp.py @@ -1,11 +1,10 @@ -import os import unittest import numpy as np import math -from imputegap.contamination.contamination import Contamination -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries +from imputegap.recovery.contamination import Contamination +from imputegap.tools import utils +from imputegap.recovery.manager import TimeSeries class TestContamination(unittest.TestCase): @@ -14,42 +13,29 @@ def test_mp_selection(self): """ the goal is to test if only the selected values are contaminated """ - impute_gap = TimeSeries(utils.get_file_path_dataset("test")) + gap = TimeSeries(utils.get_file_path_dataset("test")) - series_impacted = [0.4] - missing_rates = [0.4] + series_impacted = [0.4, 1] + missing_rates = [0.4, 1] seeds_start, seeds_end = 42, 43 protection = 0.1 - - length_of_gap_ts = len(impute_gap.ts[0]) - len_expected = math.ceil(missing_rates[0] * length_of_gap_ts) - series_check = [str(i) for i in range(len_expected)] + M, N = gap.ts.shape for seed_value in range(seeds_start, seeds_end): - for series_sel in series_impacted: + for series_per in series_impacted: for missing_rate in missing_rates: - - - ts_contaminate = Contamination.scenario_missing_percentage(ts=impute_gap.ts, - series_impacted=series_sel, + ts_contaminate = Contamination.scenario_missing_percentage(ts=gap.ts, + series_impacted=series_per, missing_rate=missing_rate, protection=protection, use_seed=True, seed=seed_value) - check_nan_series = False - - for series, data in enumerate(ts_contaminate): - if str(series) in series_check: - if np.isnan(data).any(): - check_nan_series = True - else: - if np.isnan(data).any(): - check_nan_series = False - break - else: - check_nan_series = True + n_nan = np.isnan(ts_contaminate).sum() + expected_nan_series = math.ceil(series_per * M) + expected_nan_values = int((N - int(N * protection)) * missing_rate) + expected = expected_nan_series * expected_nan_values - self.assertTrue(check_nan_series, True) + self.assertEqual(n_nan, expected, f"Expected {expected} contaminated series but found {n_nan}") def test_mp_position(self): """ diff --git a/tests/test_explainer.py b/tests/test_explainer.py index c1135bb..cda85a1 100644 --- a/tests/test_explainer.py +++ b/tests/test_explainer.py @@ -1,11 +1,10 @@ -import os import unittest import numpy as np from imputegap.explainer.explainer import Explainer -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries +from imputegap.tools import utils +from imputegap.recovery.manager import TimeSeries class TestExplainer(unittest.TestCase): diff --git a/tests/test_imputation_cdrec.py b/tests/test_imputation_cdrec.py index 8497e0c..26596e6 100644 --- a/tests/test_imputation_cdrec.py +++ b/tests/test_imputation_cdrec.py @@ -2,10 +2,10 @@ import unittest import numpy as np -from imputegap.contamination.contamination import Contamination -from imputegap.imputation.imputation import Imputation -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries +from imputegap.recovery.contamination import Contamination +from imputegap.recovery.imputation import Imputation +from imputegap.tools import utils +from imputegap.recovery.manager import TimeSeries def resolve_path(local_path, github_actions_path): diff --git a/tests/test_imputation_iim.py b/tests/test_imputation_iim.py index bf3465e..f45e86d 100644 --- a/tests/test_imputation_iim.py +++ b/tests/test_imputation_iim.py @@ -1,11 +1,10 @@ -import os import unittest import numpy as np -from imputegap.contamination.contamination import Contamination -from imputegap.imputation.imputation import Imputation -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries +from imputegap.recovery.contamination import Contamination +from imputegap.recovery.imputation import Imputation +from imputegap.tools import utils +from imputegap.recovery.manager import TimeSeries class TestIIM(unittest.TestCase): diff --git a/tests/test_imputation_mrnn.py b/tests/test_imputation_mrnn.py index f51109c..be73389 100644 --- a/tests/test_imputation_mrnn.py +++ b/tests/test_imputation_mrnn.py @@ -1,11 +1,10 @@ -import os import unittest import numpy as np -from imputegap.contamination.contamination import Contamination -from imputegap.imputation.imputation import Imputation -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries +from imputegap.recovery.contamination import Contamination +from imputegap.recovery.imputation import Imputation +from imputegap.tools import utils +from imputegap.recovery.manager import TimeSeries class TestMRNN(unittest.TestCase): diff --git a/tests/test_imputation_stmvl.py b/tests/test_imputation_stmvl.py index 3531279..58998f5 100644 --- a/tests/test_imputation_stmvl.py +++ b/tests/test_imputation_stmvl.py @@ -1,11 +1,10 @@ -import os import unittest import numpy as np -from imputegap.contamination.contamination import Contamination -from imputegap.imputation.imputation import Imputation -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries +from imputegap.recovery.contamination import Contamination +from imputegap.recovery.imputation import Imputation +from imputegap.tools import utils +from imputegap.recovery.manager import TimeSeries class TestSTMVL(unittest.TestCase): diff --git a/tests/test_loading.py b/tests/test_loading.py index 6fc43b9..790d002 100644 --- a/tests/test_loading.py +++ b/tests/test_loading.py @@ -5,8 +5,8 @@ from scipy.stats import zscore from sklearn.preprocessing import MinMaxScaler -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries +from imputegap.tools import utils +from imputegap.recovery.manager import TimeSeries class TestLoading(unittest.TestCase): @@ -36,7 +36,7 @@ def test_loading_plot(self): """ impute_gap = TimeSeries(utils.get_file_path_dataset("test")) to_save = utils.get_save_path_asset() - file_path = impute_gap.plot("ground_truth", "test", to_save, 5, (16, 8), False) + file_path = impute_gap.plot("gt", "test", to_save, 5, (16, 8), False) self.assertTrue(os.path.exists(file_path)) diff --git a/tests/test_opti_bayesian_cdrec.py b/tests/test_opti_bayesian_cdrec.py index 4f62553..502413a 100644 --- a/tests/test_opti_bayesian_cdrec.py +++ b/tests/test_opti_bayesian_cdrec.py @@ -1,12 +1,10 @@ -import os import unittest -import numpy as np -from imputegap.contamination.contamination import Contamination -from imputegap.imputation.imputation import Imputation -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries -from imputegap.optimization.bayesian_optimization import Optimization +from imputegap.recovery.contamination import Contamination +from imputegap.recovery.imputation import Imputation +from imputegap.recovery.optimization import Optimization +from imputegap.tools import utils +from imputegap.recovery.manager import TimeSeries class TestOptiCDREC(unittest.TestCase): diff --git a/tests/test_opti_bayesian_iim.py b/tests/test_opti_bayesian_iim.py index ee870d9..5b8baa7 100644 --- a/tests/test_opti_bayesian_iim.py +++ b/tests/test_opti_bayesian_iim.py @@ -1,12 +1,10 @@ -import os import unittest -import numpy as np -from imputegap.contamination.contamination import Contamination -from imputegap.imputation.imputation import Imputation -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries -from imputegap.optimization.bayesian_optimization import Optimization +from imputegap.recovery.contamination import Contamination +from imputegap.recovery.imputation import Imputation +from imputegap.recovery.optimization import Optimization +from imputegap.tools import utils +from imputegap.recovery.manager import TimeSeries diff --git a/tests/test_opti_bayesian_mrnn.py b/tests/test_opti_bayesian_mrnn.py index b5346f6..9c64da1 100644 --- a/tests/test_opti_bayesian_mrnn.py +++ b/tests/test_opti_bayesian_mrnn.py @@ -1,12 +1,10 @@ -import os import unittest -import numpy as np -from imputegap.contamination.contamination import Contamination -from imputegap.imputation.imputation import Imputation -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries -from imputegap.optimization.bayesian_optimization import Optimization +from imputegap.recovery.contamination import Contamination +from imputegap.recovery.imputation import Imputation +from imputegap.recovery.optimization import Optimization +from imputegap.tools import utils +from imputegap.recovery.manager import TimeSeries diff --git a/tests/test_opti_bayesian_stmvl.py b/tests/test_opti_bayesian_stmvl.py index 6a03662..9e0be9b 100644 --- a/tests/test_opti_bayesian_stmvl.py +++ b/tests/test_opti_bayesian_stmvl.py @@ -1,12 +1,10 @@ -import os import unittest -import numpy as np -from imputegap.contamination.contamination import Contamination -from imputegap.imputation.imputation import Imputation -from imputegap.manager import utils -from imputegap.manager.manager import TimeSeries -from imputegap.optimization.bayesian_optimization import Optimization +from imputegap.recovery.contamination import Contamination +from imputegap.recovery.imputation import Imputation +from imputegap.recovery.optimization import Optimization +from imputegap.tools import utils +from imputegap.recovery.manager import TimeSeries