Skip to content

Commit

Permalink
2. correction of optimization and creation of batches
Browse files Browse the repository at this point in the history
  • Loading branch information
qnater committed Sep 30, 2024
1 parent 9888ea8 commit cc45353
Show file tree
Hide file tree
Showing 64 changed files with 261 additions and 205 deletions.
117 changes: 72 additions & 45 deletions .idea/workspace.xml

Large diffs are not rendered by default.

Binary file modified imputegap/assets/contamination/test_contamination.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed imputegap/assets/ground_truth/test_ground_truth.png
Binary file not shown.
Empty file.
File renamed without changes.
Binary file added imputegap/assets/gt/test_gt.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified imputegap/assets/imputation/test_imputation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 0 additions & 1 deletion imputegap/contamination/__init__.py

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 2 additions & 2 deletions imputegap/explainer/explainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from matplotlib import pyplot as plt
from sklearn.ensemble import RandomForestRegressor

from imputegap.contamination.contamination import Contamination
from imputegap.imputation.imputation import Imputation
from imputegap.recovery.contamination import Contamination
from imputegap.recovery.imputation import Imputation


class Explainer:
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed imputegap/manager/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file removed imputegap/manager/__pycache__/__init__.cpython-38.pyc
Binary file not shown.
Binary file removed imputegap/manager/__pycache__/manager.cpython-38.pyc
Binary file not shown.
Empty file removed imputegap/optimization/__init__.py
Empty file.
Binary file not shown.
Binary file not shown.
36 changes: 32 additions & 4 deletions imputegap/contamination/README.md → imputegap/recovery/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
![My Logo](../../assets/logo_imputegab.png)

# Scenarios
# CONTAMINATION
## Scenarios
<table>
<tr>
<td>M</td><td>Number of time series</td>
Expand All @@ -23,7 +24,7 @@
<tr>
<td>B</td><td>Block size</td>
</tr>
</table>
</table><br />

### MCAR
MCAR selects random series and remove block at random positions until a total of W of all points of time series are missing.
Expand All @@ -32,7 +33,7 @@ This scenario uses random number generator with fixed seed and will produce the
<table>
<tbody>Definition</tbody>
<tr>
<td>N</td><td>10 - 100%</td>
<td>N</td><td>MAX</td>
</tr>
<tr>
<td>M</td><td>MAX</td>
Expand All @@ -49,4 +50,31 @@ This scenario uses random number generator with fixed seed and will produce the
<tr>
<td>B</td><td>2 - 20</td>
</tr>
</table>
</table>

<br />

### MISSING PERCENTAGE
MISSING PERCENTAGE selects of percent of series to contaminate from the first to the last with a desired percentage of missing value to remove.

<table>
<tbody>Definition</tbody>
<tr>
<td>N</td><td>MAX</td>
</tr>
<tr>
<td>M</td><td>MAX</td>
</tr>
<tr>
<td>R</td><td>1 - 100%</td>
</tr>
<tr>
<td>S</td><td>1 - 100%</td>
</tr>
<tr>
<td>W</td><td>(N-P) * R</td>
</tr>
<tr>
<td>B</td><td>R</td>
</tr>
</table><br />
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,9 +1,27 @@
import math
import numpy as np


class Contamination:

def verification_limitation(percentage, low_limit=0.01, high_limit=1.0):
"""
Format the percentage given by the user.
:param percentage: The percentage to be checked.
:param low_limit: The lower limit of the acceptable percentage range.
:param high_limit: The upper limit of the acceptable percentage range.
:return: Adjusted percentage.
"""
if low_limit <= percentage <= high_limit:
return percentage # No modification needed

elif 1 <= percentage <= 100:
print(f"The percentage {percentage} is between 1 and 100. Dividing by 100 to convert to a decimal.")
return percentage / 100

else:
print("The percentage", percentage, "is out of the acceptable range", low_limit, "-", high_limit, ".")
return percentage

def format_selection(ts, selection):
"""
Format the selection of series based on keywords
Expand Down Expand Up @@ -59,6 +77,10 @@ def scenario_mcar(ts, series_impacted=0.2, missing_rate=0.2, block_size=10, prot
if use_seed:
np.random.seed(seed)

missing_rate = Contamination.verification_limitation(missing_rate)
series_impacted = Contamination.verification_limitation(series_impacted)
protection = Contamination.verification_limitation(protection)

ts_contaminated = ts.copy()
M, _ = ts_contaminated.shape

Expand Down Expand Up @@ -128,6 +150,10 @@ def scenario_missing_percentage(ts, series_impacted=0.2, missing_rate=0.2, prote
ts_contaminated = ts.copy()
M, _ = ts_contaminated.shape

missing_rate = Contamination.verification_limitation(missing_rate)
series_impacted = Contamination.verification_limitation(series_impacted)
protection = Contamination.verification_limitation(protection)

nbr_series_impacted = int(np.ceil(M * series_impacted))

print("\n\nMISSING PERCENTAGE contamination has been called with :"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import re
import os
import toml
from imputegap.algorithms.cdrec import cdrec
from imputegap.algorithms.iim import iim
from imputegap.algorithms.min_impute import min_impute
from imputegap.algorithms.mrnn import mrnn
from imputegap.algorithms.stmvl import stmvl
from imputegap.algorithms.zero_impute import zero_impute
from imputegap.evaluation.evaluation import Evaluation
from imputegap.manager import utils
from imputegap.tools.evaluation import Evaluation
from imputegap.tools import utils


class Imputation:
Expand Down
18 changes: 7 additions & 11 deletions imputegap/manager/manager.py → imputegap/recovery/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,12 @@ def normalization_z_score(self, ts):

return z_scores

def plot(self, ts_type="ground_truth", title='Time Series Data', save_path="", limitation=10, size=(16, 8), display=True, colors=['dimgrey', 'plum', 'lightblue', 'mediumseagreen', 'khaki']):
def plot(self, ts_type="gt", title='Time Series Data', save_path="", limitation=10, size=(16, 8), display=True, colors=['dimgrey', 'plum', 'lightblue', 'mediumseagreen', 'khaki']):
"""
Plot a chosen time series
@author Quentin Nater
:param ts_type : ("gt", "contamination", "imputation"), type of time series to print | default "gt"
:param title: title of the plot
:param save_path : path to save locally the plot
:param limitation: number of series displayed inside the plot
Expand All @@ -165,29 +166,24 @@ def plot(self, ts_type="ground_truth", title='Time Series Data', save_path="", l
if limitation == 0:
limitation = self.ts.shape[0]

if ts_type == "ground_truth":
if ts_type == "gt":
for i in range(self.ts.shape[0]):
plt.plot(np.arange(self.ts.shape[1]), self.ts[i, :], label=f'Series {i + 1}')
number_of_series += 1

if number_of_series == limitation:
break

elif ts_type == "ground_truth_normalized":
for i in range(self.normalized_ts.shape[0]):
plt.plot(np.arange(self.normalized_ts.shape[1]), self.normalized_ts[i, :], label=f'Series {i + 1}')
number_of_series += 1

if number_of_series == limitation:
break

elif ts_type == "contamination":
for i in range(self.ts.shape[0]):
color = colors[i % len(colors)]

plt.plot(np.arange(self.ts.shape[1]), self.ts[i, :], 'r--', label=f'Series {i + 1}-GT')
if np.isnan(self.ts_contaminate[i, :]).any():
plt.plot(np.arange(self.ts.shape[1]), self.ts[i, :], 'r--', label=f'Series {i + 1}')

plt.plot(np.arange(self.ts_contaminate.shape[1]), self.ts_contaminate[i, :], linewidth=2.5,
color=color, linestyle='-', label=f'Series {i + 1}-MV')
color=color, linestyle='-', label=f'Series {i + 1}-CNT')

number_of_series += 1
if number_of_series == limitation:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import os

import toml
import numpy as np


import skopt
import toml
from skopt.space import Integer
from skopt.utils import use_named_args

from imputegap.imputation.imputation import Imputation
from imputegap.optimization.algorithm_parameters import SEARCH_SPACES
from imputegap.recovery.imputation import Imputation
from imputegap.tools.algorithm_parameters import SEARCH_SPACES

# Define the search space for each algorithm separately
search_spaces = SEARCH_SPACES
Expand Down
32 changes: 32 additions & 0 deletions imputegap/runner_contamination.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from imputegap.recovery.contamination import Contamination
from imputegap.recovery.imputation import Imputation
from imputegap.recovery.manager import TimeSeries
from imputegap.recovery.optimization import Optimization
from imputegap.tools.utils import display_title
from imputegap.tools import utils
import os


def check_block_size(filename):
if "test" in filename:
return (2, 2)
else:
return (10, 10)


if __name__ == '__main__':

display_title()

filename = "eeg"

file_path = os.path.join("./dataset/", filename + ".txt")
gap = TimeSeries(data=file_path, normalization="z_score", limitation_values=100)

block_size, plot_limit = check_block_size(filename)

gap.ts_contaminate = Contamination.scenario_missing_percentage(ts=gap.ts, series_impacted=0.1, missing_rate=0.4, protection=0.1, use_seed=True, seed=42)
gap.print(limitation=10)
gap.plot(ts_type="contamination", title="test", save_path="assets", limitation=plot_limit, display=True)

print("\n", "_"*95, "end")
14 changes: 3 additions & 11 deletions imputegap/runner_explainer.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,8 @@
from imputegap.contamination.contamination import Contamination
from imputegap.imputation.imputation import Imputation
from imputegap.manager.manager import TimeSeries
from imputegap.recovery.manager import TimeSeries
from imputegap.explainer.explainer import Explainer
import os

from imputegap.tools.utils import display_title

def display_title(title="Master Thesis", aut="Quentin Nater", lib="ImputeGAP", university="University Fribourg - exascale infolab"):
print("=" * 100)
print(f"{title} : {aut}")
print("=" * 100)
print(f" {lib} - {university}")
print("=" * 100)
import os


def check_block_size(filename):
Expand Down
22 changes: 7 additions & 15 deletions imputegap/runner_imputation.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
from imputegap.contamination.contamination import Contamination
from imputegap.imputation.imputation import Imputation
from imputegap.manager import utils
from imputegap.manager.manager import TimeSeries
from imputegap.recovery.contamination import Contamination
from imputegap.recovery.imputation import Imputation
from imputegap.recovery.manager import TimeSeries
from imputegap.recovery.optimization import Optimization
from imputegap.tools.utils import display_title
from imputegap.tools import utils
import os

from imputegap.optimization.bayesian_optimization import Optimization


def display_title(title="Master Thesis", aut="Quentin Nater", lib="ImputeGAP", university="University Fribourg - exascale infolab"):
print("=" * 100)
print(f"{title} : {aut}")
print("=" * 100)
print(f" {lib} - {university}")
print("=" * 100)


def check_block_size(filename):
if "test" in filename:
Expand All @@ -36,7 +28,7 @@ def check_block_size(filename):
block_size, plot_limit = check_block_size(filename)

gap.print(limitation=5)
gap.plot(title="test", save_path="assets", limitation=0, display=False)
gap.plot(ts_type="gt", title="test", save_path="assets", limitation=0, display=False)

gap.ts_contaminate = Contamination.scenario_mcar(ts=gap.ts, series_impacted=0.4, missing_rate=0.4, block_size=block_size, protection=0.1, use_seed=True, seed=42)
gap.print(limitation=10)
Expand Down
17 changes: 5 additions & 12 deletions imputegap/runner_optimization.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,9 @@
from imputegap.contamination.contamination import Contamination
from imputegap.manager.manager import TimeSeries
import os

from imputegap.optimization.bayesian_optimization import Optimization
from imputegap.recovery.contamination import Contamination
from imputegap.recovery.manager import TimeSeries
from imputegap.recovery.optimization import Optimization
from imputegap.tools.utils import display_title


def display_title(title="Master Thesis", aut="Quentin Nater", lib="ImputeGAP", university="University Fribourg - exascale infolab"):
print("=" * 100)
print(f"{title} : {aut}")
print("=" * 100)
print(f" {lib} - {university}")
print("=" * 100)
import os


def check_block_size(filename):
Expand Down
File renamed without changes.
Binary file added imputegap/tools/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@

# IIM parameters
IIM_LEARNING_NEIGHBOR_RANGE = [i for i in range(1, 100)] # Test up to 100 learning neighbors
# IIM_ADAPTIVE_RANGE = [True, False] # Test with and without adaptive learning
# IIM_METRIC_RANGE = ['euclidean', 'cosine'] # Test with euclidean and cosine distance, could be more

# MRNN parameters
MRNN_LEARNING_RATE_CHANGE = np.logspace(-6, 0, num=20) # log scale for learning rate
Expand Down Expand Up @@ -45,11 +43,4 @@
'iim': ['learning_neighbors'],
'mrnn': ['hidden_dim', 'learning_rate', 'iterations', 'seq_len' ],
'stmvl': ['window_size', 'gamma', 'alpha']
}

DEFAULT_PARAMS = {
'cdrec': [1, 1e-6, 100],
'iim': [10],
'mrnn': [10, 0.01, 1000, 7],
'stmvl': [2, 0.85, 7]
}
File renamed without changes.
9 changes: 8 additions & 1 deletion imputegap/manager/utils.py → imputegap/tools/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
import os

import toml


def display_title(title="Master Thesis", aut="Quentin Nater", lib="ImputeGAP", university="University Fribourg - exascale infolab"):
print("=" * 100)
print(f"{title} : {aut}")
print("=" * 100)
print(f" {lib} - {university}")
print("=" * 100)


def get_file_path_dataset(set_name="test"):
"""
Find the accurate path for loading files of tests
Expand Down
Binary file modified tests/__pycache__/test_contamination_mp.cpython-312.pyc
Binary file not shown.
Binary file modified tests/__pycache__/test_imputation_cdrec.cpython-312.pyc
Binary file not shown.
6 changes: 3 additions & 3 deletions tests/test_contamination_mcar.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import unittest
import numpy as np

from imputegap.contamination.contamination import Contamination
from imputegap.manager import utils
from imputegap.manager.manager import TimeSeries
from imputegap.recovery.contamination import Contamination
from imputegap.tools import utils
from imputegap.recovery.manager import TimeSeries


class TestContamination(unittest.TestCase):
Expand Down
Loading

0 comments on commit cc45353

Please sign in to comment.