Skip to content

Commit

Permalink
Merge pull request #13 from eXascaleInfolab/naterq_upload_package
Browse files Browse the repository at this point in the history
naterq upload package
  • Loading branch information
qnater authored Oct 14, 2024
2 parents c99806a + 0e391fe commit b2d9ac6
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 43 deletions.
41 changes: 14 additions & 27 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 15 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,9 @@ ts_1.load_timeseries(utils.search_path("eeg"))
ts_1.normalize(normalizer="z_score")

# [OPTIONAL] you can plot your raw data / print the information
ts_1.plot(raw_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./assets")
ts_1.plot(raw_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./imputegap/assets")
ts_1.print(limit=10)

```

<br /><hr /><br />
Expand All @@ -90,7 +91,7 @@ ts_1.print(limit=10)

## Contamination
ImputeGAP allows to contaminate datasets with a specific scenario to reproduce a situation. Up to now, the scenarios are : <b>MCAR, MISSING POURCENTAGE, ...</b><br />
Please find the documentation in this page : <a href="https://github.com/eXascaleInfolab/ImputeGAP/tree/main/imputegap/contamination#readme" >missing data scenarios</a><br><br>
Please find the documentation in this page : <a href="https://github.com/eXascaleInfolab/ImputeGAP/tree/main/imputegap/recovery#readme" >missing data scenarios</a><br><br>


### Example Contamination
Expand All @@ -107,6 +108,10 @@ ts_1.normalize(normalizer="min_max")

# 3. contamination of the data with MCAR scenario
infected_data = ts_1.Contaminate.mcar(ts_1.data, series_impacted=0.4, missing_rate=0.2, use_seed=True)

# [OPTIONAL] you can plot your raw data / print the contamination
ts_1.print(limit=10)
ts_1.plot(ts_1.data, infected_data, title="contamination", max_series=1, save_path="./imputegap/assets")
```

<br /><hr /><br />
Expand Down Expand Up @@ -140,13 +145,16 @@ cdrec = Imputation.MD.CDRec(infected_data)
# imputation with default values
cdrec.impute()
# OR imputation with user defined values
cdrec.impute(params={"rank": 5, "epsilon":0.01, "iterations": 100})
cdrec.impute(params={"rank": 5, "epsilon": 0.01, "iterations": 100})

# [OPTIONAL] save your results in a new Time Series object
ts_3 = TimeSeries().import_matrix(cdrec.imputed_matrix)

# 5. score the imputation with the raw_data
cdrec.score(ts_1.data, ts_3.data)

# [OPTIONAL] print the results
ts_3.print_results(cdrec.metrics)
```


Expand Down Expand Up @@ -174,14 +182,14 @@ ts_1.normalize(normalizer="min_max")
infected_data = ts_1.Contaminate.mcar(ts_1.data)

# 4. imputation of the contaminated data
# choice of the algorithm, and their parameters (default, automl, or defined by the user)
cdrec = Imputation.MD.CDRec(infected_data)

# imputation with AutoML which will discover the optimal hyperparameters for your dataset and your algorithm
cdrec.impute = Imputation.MD.CDRec(infected_data).impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 5}})
cdrec = Imputation.MD.CDRec(infected_data).impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 5}})

# 5. score the imputation with the raw_data
cdrec.score(ts_1.data, cdrec.imputed_matrix)

# [OPTIONAL] print the results
ts_1.print_results(cdrec.metrics)
```


Expand Down
Binary file not shown.
Binary file not shown.
24 changes: 16 additions & 8 deletions imputegap.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: imputegap
Version: 0.1.2
Version: 0.1.3
Summary: A Library of Imputation Techniques for Time Series Data
Home-page: https://github.com/eXascaleInfolab/ImputeGAP
Author: Quentin Nater
Expand Down Expand Up @@ -112,8 +112,9 @@ ts_1.load_timeseries(utils.search_path("eeg"))
ts_1.normalize(normalizer="z_score")

# [OPTIONAL] you can plot your raw data / print the information
ts_1.plot(raw_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./assets")
ts_1.plot(raw_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./imputegap/assets")
ts_1.print(limit=10)

```

<br /><hr /><br />
Expand All @@ -122,7 +123,7 @@ ts_1.print(limit=10)

## Contamination
ImputeGAP allows to contaminate datasets with a specific scenario to reproduce a situation. Up to now, the scenarios are : <b>MCAR, MISSING POURCENTAGE, ...</b><br />
Please find the documentation in this page : <a href="https://github.com/eXascaleInfolab/ImputeGAP/tree/main/imputegap/contamination#readme" >missing data scenarios</a><br><br>
Please find the documentation in this page : <a href="https://github.com/eXascaleInfolab/ImputeGAP/tree/main/imputegap/recovery#readme" >missing data scenarios</a><br><br>


### Example Contamination
Expand All @@ -139,6 +140,10 @@ ts_1.normalize(normalizer="min_max")

# 3. contamination of the data with MCAR scenario
infected_data = ts_1.Contaminate.mcar(ts_1.data, series_impacted=0.4, missing_rate=0.2, use_seed=True)

# [OPTIONAL] you can plot your raw data / print the contamination
ts_1.print(limit=10)
ts_1.plot(ts_1.data, infected_data, title="contamination", max_series=1, save_path="./imputegap/assets")
```

<br /><hr /><br />
Expand Down Expand Up @@ -172,13 +177,16 @@ cdrec = Imputation.MD.CDRec(infected_data)
# imputation with default values
cdrec.impute()
# OR imputation with user defined values
cdrec.impute(params={"rank": 5, "epsilon":0.01, "iterations": 100})
cdrec.impute(params={"rank": 5, "epsilon": 0.01, "iterations": 100})

# [OPTIONAL] save your results in a new Time Series object
ts_3 = TimeSeries().import_matrix(cdrec.imputed_matrix)

# 5. score the imputation with the raw_data
cdrec.score(ts_1.data, ts_3.data)

# [OPTIONAL] print the results
ts_3.print_results(cdrec.metrics)
```


Expand Down Expand Up @@ -206,14 +214,14 @@ ts_1.normalize(normalizer="min_max")
infected_data = ts_1.Contaminate.mcar(ts_1.data)

# 4. imputation of the contaminated data
# choice of the algorithm, and their parameters (default, automl, or defined by the user)
cdrec = Imputation.MD.CDRec(infected_data)

# imputation with AutoML which will discover the optimal hyperparameters for your dataset and your algorithm
cdrec.impute = Imputation.MD.CDRec(infected_data).impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 5}})
cdrec = Imputation.MD.CDRec(infected_data).impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 5}})

# 5. score the imputation with the raw_data
cdrec.score(ts_1.data, cdrec.imputed_matrix)

# [OPTIONAL] print the results
ts_1.print_results(cdrec.metrics)
```


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setuptools.setup(
name="imputegap",
version="0.1.2",
version="0.1.3",
description="A Library of Imputation Techniques for Time Series Data",
long_description=open('README.md').read(),
long_description_content_type="text/markdown",
Expand Down

0 comments on commit b2d9ac6

Please sign in to comment.