diff --git a/.github/workflows/pytest_10.yml b/.github/workflows/pytest_10.yml deleted file mode 100644 index f8d45c2..0000000 --- a/.github/workflows/pytest_10.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Pytest - ImputeGAP - 10 - -on: - push: - pull_request: - -jobs: - test: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - with: - lfs: true - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install libmlpack-dev - sudo apt-get install libopenblas-dev - sudo apt-get install python3-dev build-essential - pip install -r requirements.txt - pip install mypy - pip install pytest - - - name: Run pytest - run: python -m pytest ./tests/ \ No newline at end of file diff --git a/.github/workflows/pytest_contamination.yml b/.github/workflows/pytest_contamination.yml index 2d23d4e..a42d507 100644 --- a/.github/workflows/pytest_contamination.yml +++ b/.github/workflows/pytest_contamination.yml @@ -1,4 +1,4 @@ -name: Pytest - ImputeGAP - Contamination - MCAR - 8 +name: Pytest - ImputeGAP - Contamination - MCAR - 12 on: push: diff --git a/.github/workflows/pytest_imp_cdrec.yml b/.github/workflows/pytest_imp_cdrec.yml index 68329ba..d233ff9 100644 --- a/.github/workflows/pytest_imp_cdrec.yml +++ b/.github/workflows/pytest_imp_cdrec.yml @@ -1,5 +1,5 @@ -name: Pytest - ImputeGAP - CDREC - 8 +name: Pytest - ImputeGAP - CDREC - 12 on: push: diff --git a/.github/workflows/pytest_imp_iim.yml b/.github/workflows/pytest_imp_iim.yml index dd95f56..87c7172 100644 --- a/.github/workflows/pytest_imp_iim.yml +++ b/.github/workflows/pytest_imp_iim.yml @@ -1,5 +1,5 @@ -name: Pytest - ImputeGAP - IIM - 8 +name: Pytest - ImputeGAP - IIM - 12 on: push: diff --git a/.github/workflows/pytest_imp_mrnn.yml b/.github/workflows/pytest_imp_mrnn.yml index dfbbfc6..79ae0ea 100644 --- a/.github/workflows/pytest_imp_mrnn.yml +++ b/.github/workflows/pytest_imp_mrnn.yml @@ -1,5 +1,5 @@ -name: Pytest - ImputeGAP - MRNN - 8 +name: Pytest - ImputeGAP - MRNN - 12 on: push: diff --git a/.github/workflows/pytest_imp_stmvl.yml b/.github/workflows/pytest_imp_stmvl.yml index 4ca7b83..4bdf32c 100644 --- a/.github/workflows/pytest_imp_stmvl.yml +++ b/.github/workflows/pytest_imp_stmvl.yml @@ -1,5 +1,5 @@ -name: Pytest - ImputeGAP - ST-MVL - 8 +name: Pytest - ImputeGAP - ST-MVL - 12 on: push: diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 980afd4..ad82b3f 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,11 +2,23 @@ + + + + + + - + - + + + + + + + - + - + - + - + - + - - - - + + + + - - - - + + + + @@ -193,7 +205,7 @@ - + @@ -217,7 +229,7 @@ - + diff --git a/env/optimal_parameters_cdrec.toml b/env/optimal_parameters_cdrec.toml index 6681f27..76551d2 100644 --- a/env/optimal_parameters_cdrec.toml +++ b/env/optimal_parameters_cdrec.toml @@ -1,4 +1,4 @@ [cdrec] -rank = "9" -epsilon = 1.591301339858638e-5 -iteration = "100" +rank = "8" +epsilon = 0.00030828609249299515 +iteration = "449" diff --git a/imputegap/imputation/__pycache__/imputation.cpython-312.pyc b/imputegap/imputation/__pycache__/imputation.cpython-312.pyc index ab67289..cf00018 100644 Binary files a/imputegap/imputation/__pycache__/imputation.cpython-312.pyc and b/imputegap/imputation/__pycache__/imputation.cpython-312.pyc differ diff --git a/imputegap/imputation/imputation.py b/imputegap/imputation/imputation.py index 028da0b..0601618 100644 --- a/imputegap/imputation/imputation.py +++ b/imputegap/imputation/imputation.py @@ -8,57 +8,11 @@ from imputegap.algorithms.stmvl import stmvl from imputegap.algorithms.zero_impute import zero_impute from imputegap.evaluation.evaluation import Evaluation +from imputegap.manager import utils class Imputation: - def load_parameters(query: str = "default", algorithm: str = "cdrec"): - """ - Load default values of algorithms - - :param query : ('optimal' or 'default'), load default or optimal parameters for algorithms | default "default" - :param algorithm : algorithm parameters to load | default "cdrec" - - :return: tuples of optimal parameters and the config of default values - """ - - filepath = "" - if query == "default": - filepath = "../env/default_values.toml" - elif query == "optimal": - filepath = "../env/optimal_parameters_"+str(algorithm)+".toml" - else: - print("Query not found for this function ('optimal' or 'default')") - - if not os.path.exists(filepath): - filepath = filepath[1:] - - with open(filepath, "r") as _: - config = toml.load(filepath) - - if algorithm == "cdrec": - truncation_rank = int(config['cdrec']['rank']) - epsilon = config['cdrec']['epsilon'] - iterations = int(config['cdrec']['iteration']) - return (truncation_rank, epsilon, iterations) - elif algorithm == "stmvl": - window_size = int(config['stmvl']['window_size']) - gamma = float(config['stmvl']['gamma']) - alpha = int(config['stmvl']['alpha']) - return (window_size, gamma, alpha) - elif algorithm == "iim": - learning_neighbors = int(config['iim']['learning_neighbors']) - algo_code = config['iim']['algorithm_code'] - return (learning_neighbors, algo_code) - elif algorithm == "mrnn": - hidden_dim = int(config['mrnn']['hidden_dim']) - learning_rate = float(config['mrnn']['learning_rate']) - iterations = int(config['mrnn']['iterations']) - sequence_length = int(config['mrnn']['sequence_length']) - return (hidden_dim, learning_rate, iterations, sequence_length) - else : - print("Default/Optimal config not found for this algorithm") - return None def evaluate_params(ground_truth, contamination, configuration, algorithm="cdrec"): """ @@ -81,8 +35,8 @@ def evaluate_params(ground_truth, contamination, configuration, algorithm="cdrec alg_code = "iim " + re.sub(r'[\W_]', '', str(learning_neighbours)) imputation, error_measures = Imputation.Regression.iim_imputation(ground_truth, contamination, (learning_neighbours, alg_code)) elif algorithm == 'mrnn': - hidden_dim, learning_rate, iterations, keep_prob, seq_len = configuration - imputation, error_measures = Imputation.ML.mrnn_imputation(ground_truth, contamination, (hidden_dim, learning_rate, iterations, seq_len)) + hidden_dim, learning_rate, iterations = configuration + imputation, error_measures = Imputation.ML.mrnn_imputation(ground_truth, contamination, (hidden_dim, learning_rate, iterations, 7)) elif algorithm == 'stmvl': window_size, gamma, alpha = configuration imputation, error_measures = Imputation.Pattern.stmvl_imputation(ground_truth, contamination, (window_size, gamma, alpha)) @@ -106,7 +60,7 @@ def cdrec(ground_truth, contamination, params=None): if params is not None: truncation_rank, epsilon, iterations = params else: - truncation_rank, epsilon, iterations = Imputation.load_parameters(query="default", algorithm="cdrec") + truncation_rank, epsilon, iterations = utils.load_parameters(query="default", algorithm="cdrec") imputed_matrix = cdrec(contamination=contamination, truncation_rank=truncation_rank, iterations=iterations, epsilon=epsilon) @@ -163,7 +117,7 @@ def iim_imputation(ground_truth, contamination, params=None): if params is not None: neighbors, algo_code = params else: - neighbors, algo_code = Imputation.load_parameters(query="default", algorithm="iim") + neighbors, algo_code = utils.load_parameters(query="default", algorithm="iim") imputed_matrix = iim(contamination=contamination, number_neighbor=neighbors, algo_code=algo_code) @@ -186,7 +140,7 @@ def mrnn_imputation(ground_truth, contamination, params=None): if params is not None: hidden_dim, learning_rate, iterations, sequence_length = params else: - hidden_dim, learning_rate, iterations, sequence_length = Imputation.load_parameters(query="default", algorithm="mrnn") + hidden_dim, learning_rate, iterations, sequence_length = utils.load_parameters(query="default", algorithm="mrnn") imputed_matrix = mrnn(contamination=contamination, hidden_dim=hidden_dim, learning_rate=learning_rate, iterations=iterations, sequence_length=sequence_length) @@ -211,7 +165,7 @@ def stmvl_imputation(ground_truth, contamination, params=None): if params is not None: window_size, gamma, alpha = params else: - window_size, gamma, alpha = Imputation.load_parameters(query="default", algorithm="stmvl") + window_size, gamma, alpha = utils.load_parameters(query="default", algorithm="stmvl") imputed_matrix = stmvl(contamination=contamination, window_size=window_size, gamma=gamma, alpha=alpha) diff --git a/imputegap/manager/__pycache__/utils.cpython-312.pyc b/imputegap/manager/__pycache__/utils.cpython-312.pyc index e6a61c1..224a79f 100644 Binary files a/imputegap/manager/__pycache__/utils.cpython-312.pyc and b/imputegap/manager/__pycache__/utils.cpython-312.pyc differ diff --git a/imputegap/manager/utils.py b/imputegap/manager/utils.py index 38fbfcf..e02ba6c 100644 --- a/imputegap/manager/utils.py +++ b/imputegap/manager/utils.py @@ -1,5 +1,7 @@ import os +import toml + def get_file_path_dataset(set_name="test"): """ @@ -26,3 +28,52 @@ def get_save_path_asset(): filepath = filepath[1:] return filepath + + +def load_parameters(query: str = "default", algorithm: str = "cdrec"): + """ + Load default values of algorithms + + :param query : ('optimal' or 'default'), load default or optimal parameters for algorithms | default "default" + :param algorithm : algorithm parameters to load | default "cdrec" + + :return: tuples of optimal parameters and the config of default values + """ + + filepath = "" + if query == "default": + filepath = "../env/default_values.toml" + elif query == "optimal": + filepath = "../env/optimal_parameters_"+str(algorithm)+".toml" + else: + print("Query not found for this function ('optimal' or 'default')") + + if not os.path.exists(filepath): + filepath = filepath[1:] + + with open(filepath, "r") as _: + config = toml.load(filepath) + + if algorithm == "cdrec": + truncation_rank = int(config['cdrec']['rank']) + epsilon = config['cdrec']['epsilon'] + iterations = int(config['cdrec']['iteration']) + return (truncation_rank, epsilon, iterations) + elif algorithm == "stmvl": + window_size = int(config['stmvl']['window_size']) + gamma = float(config['stmvl']['gamma']) + alpha = int(config['stmvl']['alpha']) + return (window_size, gamma, alpha) + elif algorithm == "iim": + learning_neighbors = int(config['iim']['learning_neighbors']) + algo_code = config['iim']['algorithm_code'] + return (learning_neighbors, algo_code) + elif algorithm == "mrnn": + hidden_dim = int(config['mrnn']['hidden_dim']) + learning_rate = float(config['mrnn']['learning_rate']) + iterations = int(config['mrnn']['iterations']) + sequence_length = int(config['mrnn']['sequence_length']) + return (hidden_dim, learning_rate, iterations, sequence_length) + else : + print("Default/Optimal config not found for this algorithm") + return None \ No newline at end of file diff --git a/imputegap/optimization/__pycache__/algorithm_parameters.cpython-312.pyc b/imputegap/optimization/__pycache__/algorithm_parameters.cpython-312.pyc index 670951b..ab13558 100644 Binary files a/imputegap/optimization/__pycache__/algorithm_parameters.cpython-312.pyc and b/imputegap/optimization/__pycache__/algorithm_parameters.cpython-312.pyc differ diff --git a/imputegap/optimization/__pycache__/bayesian_optimization.cpython-312.pyc b/imputegap/optimization/__pycache__/bayesian_optimization.cpython-312.pyc index 7e4550e..110a70c 100644 Binary files a/imputegap/optimization/__pycache__/bayesian_optimization.cpython-312.pyc and b/imputegap/optimization/__pycache__/bayesian_optimization.cpython-312.pyc differ diff --git a/imputegap/optimization/algorithm_parameters.py b/imputegap/optimization/algorithm_parameters.py index 9b054f1..a0a058c 100644 --- a/imputegap/optimization/algorithm_parameters.py +++ b/imputegap/optimization/algorithm_parameters.py @@ -27,7 +27,7 @@ SEARCH_SPACES = { 'cdrec': [Integer(1, 9, name='rank'), Real(1e-6, 1, "log-uniform", name='epsilon'), Integer(100, 1000, name='iteration')], 'iim': [Integer(1, 100, name='learning_neighbors')], - 'mrnn': [Integer(0, 9, name='hidden_dim'), Real(1e-6, 1, "log-uniform", name='learning_rate'), Integer(0, 95, name='iterations'), Integer(0, 99, name='seq_len')], + 'mrnn': [Integer(10, 15, name='hidden_dim'), Real(1e-6, 1e-1, "log-uniform", name='learning_rate'), Integer(10, 95, name='iterations')], 'stmvl': [Integer(2, 99, name='window_size'), Real(1e-6, 0.999999, "log-uniform", name='gamma'), Integer(1, 9, name='alpha')], } diff --git a/imputegap/optimization/bayesian_optimization.py b/imputegap/optimization/bayesian_optimization.py index fe5d4ee..4e0923e 100644 --- a/imputegap/optimization/bayesian_optimization.py +++ b/imputegap/optimization/bayesian_optimization.py @@ -40,7 +40,7 @@ def save_optimization(optimal_params, algorithm="cdrec", file_name=None): class Bayesian: def bayesian_optimization(ground_truth, contamination, selected_metrics=["RMSE"], algorithm="cdrec", - n_calls=100, n_random_starts=50, acq_func='gp_hedge'): + n_calls=10, n_random_starts=50, acq_func='gp_hedge'): """ Conduct the Bayesian optimization hyperparameter optimization. diff --git a/imputegap/runner_imputation.py b/imputegap/runner_imputation.py index 7255531..8ce370a 100644 --- a/imputegap/runner_imputation.py +++ b/imputegap/runner_imputation.py @@ -1,5 +1,6 @@ from imputegap.contamination.contamination import Contamination from imputegap.imputation.imputation import Imputation +from imputegap.manager import utils from imputegap.manager.manager import TimeSeries import os @@ -36,7 +37,7 @@ def check_block_size(filename): gap.print() gap.plot(ts_type="contamination", title="test", save_path="assets", limitation=plot_limit, display=False) - gap.optimal_params = Imputation.load_parameters(query="optimal", algorithm="cdrec") + gap.optimal_params = utils.load_parameters(query="optimal", algorithm="cdrec") gap.ts_imputation, gap.metrics = Imputation.MR.cdrec(ground_truth=gap.ts, contamination=gap.ts_contaminate, params=gap.optimal_params) gap.print() diff --git a/tests/__pycache__/test_imputation_cdrec.cpython-312.pyc b/tests/__pycache__/test_imputation_cdrec.cpython-312.pyc index e077d65..612d50a 100644 Binary files a/tests/__pycache__/test_imputation_cdrec.cpython-312.pyc and b/tests/__pycache__/test_imputation_cdrec.cpython-312.pyc differ diff --git a/tests/__pycache__/test_imputation_mrnn.cpython-312.pyc b/tests/__pycache__/test_imputation_mrnn.cpython-312.pyc index 98dbec5..899adce 100644 Binary files a/tests/__pycache__/test_imputation_mrnn.cpython-312.pyc and b/tests/__pycache__/test_imputation_mrnn.cpython-312.pyc differ diff --git a/tests/__pycache__/test_opti_bayesian_mrnn.cpython-312.pyc b/tests/__pycache__/test_opti_bayesian_mrnn.cpython-312.pyc index a231aff..28fdc56 100644 Binary files a/tests/__pycache__/test_opti_bayesian_mrnn.cpython-312.pyc and b/tests/__pycache__/test_opti_bayesian_mrnn.cpython-312.pyc differ diff --git a/tests/__pycache__/test_opti_bayesian_stmvl.cpython-312.pyc b/tests/__pycache__/test_opti_bayesian_stmvl.cpython-312.pyc index 9ac20a1..3ebc7b7 100644 Binary files a/tests/__pycache__/test_opti_bayesian_stmvl.cpython-312.pyc and b/tests/__pycache__/test_opti_bayesian_stmvl.cpython-312.pyc differ diff --git a/tests/test_imputation_iim.py b/tests/test_imputation_iim.py index abd1b65..bf3465e 100644 --- a/tests/test_imputation_iim.py +++ b/tests/test_imputation_iim.py @@ -8,8 +8,6 @@ from imputegap.manager.manager import TimeSeries - - class TestIIM(unittest.TestCase): def test_imputation_iim_chlorine(self): diff --git a/tests/test_opti_bayesian_cdrec.py b/tests/test_opti_bayesian_cdrec.py index eabdf58..c47e6eb 100644 --- a/tests/test_opti_bayesian_cdrec.py +++ b/tests/test_opti_bayesian_cdrec.py @@ -26,7 +26,7 @@ def test_optimization_bayesian_cdrec(self): print("\nOptimization done successfully... ") print("\n", optimal_params, "\n") - params = Imputation.load_parameters(query="default", algorithm=algorithm) + params = utils.load_parameters(query="default", algorithm=algorithm) params_optimal = (optimal_params['rank'], optimal_params['epsilon'], optimal_params['iteration']) _, metrics_optimal = Imputation.MR.cdrec(ground_truth=gap.ts, contamination=ts_contaminated, params=params_optimal) diff --git a/tests/test_opti_bayesian_iim.py b/tests/test_opti_bayesian_iim.py index 8452d5a..5fe3333 100644 --- a/tests/test_opti_bayesian_iim.py +++ b/tests/test_opti_bayesian_iim.py @@ -30,7 +30,7 @@ def test_optimization_bayesian_iim(self): print("\nOptimization done successfully... ") print("\n", optimal_params, "\n") - params = Imputation.load_parameters(query="default", algorithm=algorithm) + params = utils.load_parameters(query="default", algorithm=algorithm) params_optimal = (optimal_params['neighbor'], "iim 2") _, metrics_optimal = Imputation.Regression.iim_imputation(ground_truth=gap.ts, contamination=ts_contaminated, params=params_optimal) diff --git a/tests/test_opti_bayesian_mrnn.py b/tests/test_opti_bayesian_mrnn.py index ae028fd..4e297fe 100644 --- a/tests/test_opti_bayesian_mrnn.py +++ b/tests/test_opti_bayesian_mrnn.py @@ -20,8 +20,7 @@ def test_optimization_bayesian_mrnn(self): algorithm = "mrnn" - ts_contaminated = Contamination.scenario_mcar(ts=gap.ts, series_impacted=0.4, missing_rate=0.4, block_size=2, - protection=0.1, use_seed=True, seed=42) + ts_contaminated = Contamination.scenario_mcar(ts=gap.ts, series_impacted=0.4, missing_rate=0.4, block_size=2, protection=0.1, use_seed=True, seed=42) optimal_params, yi = Optimization.Bayesian.bayesian_optimization(ground_truth=gap.ts, contamination=ts_contaminated, @@ -30,11 +29,10 @@ def test_optimization_bayesian_mrnn(self): print("\nOptimization done successfully... ") print("\n", optimal_params, "\n") - params = Imputation.load_parameters(query="default", algorithm=algorithm) + params = utils.load_parameters(query="default", algorithm=algorithm) params_optimal = (optimal_params['hidden_dim'], optimal_params['learning_rate'], optimal_params['iterations'], optimal_params['sequence_length']) - _, metrics_optimal = Imputation.ML.mrnn_imputation(ground_truth=gap.ts, contamination=ts_contaminated, - params=params_optimal) + _, metrics_optimal = Imputation.ML.mrnn_imputation(ground_truth=gap.ts, contamination=ts_contaminated, params=params_optimal) _, metrics_default = Imputation.ML.mrnn_imputation(ground_truth=gap.ts, contamination=ts_contaminated, params=params) Optimization.save_optimization(optimal_params=optimal_params, algorithm=algorithm+"_test") diff --git a/tests/test_opti_bayesian_stmvl.py b/tests/test_opti_bayesian_stmvl.py index ba361e7..97a2f59 100644 --- a/tests/test_opti_bayesian_stmvl.py +++ b/tests/test_opti_bayesian_stmvl.py @@ -30,7 +30,7 @@ def test_optimization_bayesian_stmvl(self): print("\nOptimization done successfully... ") print("\n", optimal_params, "\n") - params = Imputation.load_parameters(query="default", algorithm=algorithm) + params = utils.load_parameters(query="default", algorithm=algorithm) params_optimal = (optimal_params['window_size'], optimal_params['gamma'], optimal_params['alpha']) _, metrics_optimal = Imputation.Pattern.stmvl_imputation(ground_truth=gap.ts, contamination=ts_contaminated,