diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b4f52e8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,254 @@ + +# Created by https://www.toptal.com/developers/gitignore/api/python,pycharm +# Edit at https://www.toptal.com/developers/gitignore?templates=python,pycharm + +### PyCharm ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### PyCharm Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint +.idea/**/sonarlint/ + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator-enh.xml +.idea/**/markdown-navigator/ + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 +.idea/$CACHE_FILE$ + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream +.idea/codestream.xml + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# End of https://www.toptal.com/developers/gitignore/api/python,pycharm + +day-summary/* +*.checkpoint \ No newline at end of file diff --git a/mercado_bitcoin/apis.py b/mercado_bitcoin/apis.py new file mode 100644 index 0000000..38a11ea --- /dev/null +++ b/mercado_bitcoin/apis.py @@ -0,0 +1,60 @@ +import datetime +from abc import ABC, abstractmethod + +import logging +import ratelimit +import requests +from backoff import on_exception, expo, constant + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +class MercadoBitcoinApi(ABC): + + def __init__(self, coin: str) -> None: + self.coin = coin + self.base_endpoint = "https://www.mercadobitcoin.net/api" + + @abstractmethod + def _get_endpoint(self, **kwargs) -> str: + pass + + @on_exception(constant, ratelimit.exception.RateLimitException, max_tries=10) + @ratelimit.limits(calls=30, period=30) + @on_exception(expo, requests.exceptions.HTTPError, max_tries=10) + def get_data(self, **kwargs) -> dict: + endpoint = self._get_endpoint(**kwargs) + logger.info(f"Getting data from endpoint: {endpoint}") + response = requests.get(endpoint) + response.raise_for_status() + return response.json() + + +class DaySummaryApi(MercadoBitcoinApi): + type = "day-summary" + + def _get_endpoint(self, date: datetime.date) -> str: + return f"{self.base_endpoint}/{self.coin}/{self.type}/{date.year}/{date.month}/{date.day}" + + +class TradesApi(MercadoBitcoinApi): + type = "trades" + + def _get_unix_epoch(self, date: datetime.datetime) -> int: + return int(date.timestamp()) + + def _get_endpoint(self, date_from: datetime.datetime = None, date_to: datetime.datetime = None) -> str: + if date_from and not date_to: + unix_date_from = self._get_unix_epoch(date_from) + endpoint = f'{self.base_endpoint}/{self.coin}/{self.type}/{unix_date_from}' + elif date_from and date_to: + if date_from > date_to: + raise RuntimeError("date_from cannot be greater than date_to") + unix_date_from = self._get_unix_epoch(date_from) + unix_date_to = self._get_unix_epoch(date_to) + endpoint = f'{self.base_endpoint}/{self.coin}/{self.type}/{unix_date_from}/{unix_date_to}' + else: + endpoint = f'{self.base_endpoint}/{self.coin}/{self.type}' + + return endpoint \ No newline at end of file diff --git a/mercado_bitcoin/ingestors.py b/mercado_bitcoin/ingestors.py new file mode 100644 index 0000000..441ba41 --- /dev/null +++ b/mercado_bitcoin/ingestors.py @@ -0,0 +1,49 @@ +import datetime +from abc import ABC, abstractmethod +from typing import List + +from mercado_bitcoin.apis import DaySummaryApi + + +class DataIngestor(ABC): + + def __init__(self, writer, coins: List[str], default_start_date: datetime.date) -> None: + self.default_start_date = default_start_date + self.coins = coins + self.writer = writer + self._checkpoint = self._load_checkpoint() + + @property + def _checkpoint_filename(self) -> str: + return f"{self.__class__.__name__}.checkpoint" + + def _write_checkpoint(self): + with open(self._checkpoint_filename, "w") as f: + f.write(f"{self._checkpoint}") + + def _load_checkpoint(self) -> datetime.date: + try: + with open(self._checkpoint_filename, "r") as f: + return datetime.datetime.strptime(f.read(), "%Y-%m-%d").date() + except FileNotFoundError: + return self.default_start_date + + def _update_checkpoint(self, value): + self._checkpoint = value + self._write_checkpoint() + + @abstractmethod + def ingest(self) -> None: + pass + + +class DaySummaryIngestor(DataIngestor): + + def ingest(self) -> None: + date = self._load_checkpoint() + if date < datetime.date.today(): + for coin in self.coins: + api = DaySummaryApi(coin=coin) + data = api.get_data(date=date) + self.writer(coin=coin, api=api.type).write(data) + self._update_checkpoint(date + datetime.timedelta(days=1)) diff --git a/mercado_bitcoin/main.py b/mercado_bitcoin/main.py new file mode 100644 index 0000000..c7bad48 --- /dev/null +++ b/mercado_bitcoin/main.py @@ -0,0 +1,26 @@ +import datetime +import time + +from schedule import repeat, every, run_pending +from mercado_bitcoin.ingestors import DaySummaryIngestor +from mercado_bitcoin.writers import DataWriter + + +if __name__ == "__main__": + day_summary_ingestor = DaySummaryIngestor( + writer=DataWriter, + coins=["BTC", "ETH", "LTC", "BCH"], + default_start_date=datetime.date(2021, 6, 1) + ) + + + @repeat(every(1).seconds) + def job(): + day_summary_ingestor.ingest() + + + while True: + run_pending() + time.sleep(0.5) + + diff --git a/mercado_bitcoin/writers.py b/mercado_bitcoin/writers.py new file mode 100644 index 0000000..1c6d768 --- /dev/null +++ b/mercado_bitcoin/writers.py @@ -0,0 +1,33 @@ +import datetime +import json +import os +from typing import List + + +class DataTypeNotSupportedForIngestionException(Exception): + def __init__(self, data): + self.data = data + self.message = f"Data type {type(data)} is not supported for ingestion" + super().__init__(self.message) + + +class DataWriter: + + def __init__(self, coin: str, api: str) -> None: + self.api = api + self.coin = coin + self.filename = f"{self.api}/{self.coin}/{datetime.datetime.now()}.json" + + def _write_row(self, row: str) -> None: + os.makedirs(os.path.dirname(self.filename), exist_ok=True) + with open(self.filename, "a") as f: + f.write(row) + + def write(self, data: [List, dict]): + if isinstance(data, dict): + self._write_row(json.dumps(data) + "\n") + elif isinstance(data, List): + for element in data: + self.write(element) + else: + raise DataTypeNotSupportedForIngestionException(data) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..663bd1f --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +requests \ No newline at end of file diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..498100f --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,2 @@ +pytest==6.2.4 +pytest-cov==2.4.7 \ No newline at end of file diff --git a/tests/test_apis.py b/tests/test_apis.py new file mode 100644 index 0000000..e335272 --- /dev/null +++ b/tests/test_apis.py @@ -0,0 +1,113 @@ +import datetime +from unittest.mock import patch + +import pytest +import requests + +from mercado_bitcoin.apis import DaySummaryApi, TradesApi, MercadoBitcoinApi + + +class TestDaySummaryApi: + @pytest.mark.parametrize( + "coin, date, expected", + [ + ("BTC", datetime.date(2021, 6, 21), "https://www.mercadobitcoin.net/api/BTC/day-summary/2021/6/21"), + ("ETH", datetime.date(2021, 6, 21), "https://www.mercadobitcoin.net/api/ETH/day-summary/2021/6/21"), + ("ETH", datetime.date(2019, 1, 2), "https://www.mercadobitcoin.net/api/ETH/day-summary/2019/1/2"), + ] + ) + def test_get_endpoint(self, coin, date, expected): + api = DaySummaryApi(coin=coin) + actual = api._get_endpoint(date=date) + assert actual == expected + + +class TestTradesApi: + @pytest.mark.parametrize( + "coin, date_from, date_to, expected", + [ + ("TEST", datetime.datetime(2019, 1, 1), datetime.datetime(2019, 1, 2), + "https://www.mercadobitcoin.net/api/TEST/trades/1546300800/1546387200"), + ("TEST", datetime.datetime(2021, 6, 12), datetime.datetime(2021, 6, 15), + "https://www.mercadobitcoin.net/api/TEST/trades/1623452400/1623711600"), + ("TEST", None, None, + "https://www.mercadobitcoin.net/api/TEST/trades"), + ("TEST", None, datetime.datetime(2021, 6, 15), + "https://www.mercadobitcoin.net/api/TEST/trades"), + ("TEST", datetime.datetime(2021, 6, 12), None, + "https://www.mercadobitcoin.net/api/TEST/trades/1623452400"), + ] + ) + def test_get_endpoint(self, coin, date_from, date_to, expected): + actual = TradesApi(coin=coin)._get_endpoint(date_from=date_from, date_to=date_to) + assert actual == expected + + def test_get_endpoint_date_from_greater_than_date_to(self): + with pytest.raises(RuntimeError): + TradesApi(coin="TEST")._get_endpoint( + date_from=datetime.datetime(2021, 6, 15), + date_to=datetime.datetime(2021, 6, 12) + ) + + @pytest.mark.parametrize( + "date, expected", + [ + (datetime.datetime(2019, 1, 1), 1546300800), + (datetime.datetime(2019, 1, 2), 1546387200), + (datetime.datetime(2021, 6, 12), 1623452400), + (datetime.datetime(2021, 6, 12, 0, 0, 5), 1623452405), + (datetime.datetime(2021, 6, 15), 1623711600), + ] + ) + def test_get_unix_epoch(self, date, expected): + actual = TradesApi(coin="TEST")._get_unix_epoch(date) + assert actual == expected + + +@pytest.fixture() +@patch("mercado_bitcoin.apis.MercadoBitcoinApi.__abstractmethods__", set()) +def fixture_mercado_bitcoin_api(): + return MercadoBitcoinApi( + coin="test" + ) + + +def mocked_requests_get(*args, **kwargs): + class MockResponse(requests.Response): + def __init__(self, json_data, status_code): + super().__init__() + self.status_code = status_code + self.json_data = json_data + + def json(self): + return self.json_data + + def raise_for_status(self) -> None: + if self.status_code != 200: + raise Exception + + if args[0] == "valid_endpoint": + return MockResponse(json_data={"foo": "bar"}, status_code=200) + else: + return MockResponse(json_data=None, status_code=404) + + +class TestMercadoBitcoinApi: + @patch("requests.get") + @patch("mercado_bitcoin.apis.MercadoBitcoinApi._get_endpoint", return_value="valid_endpoint") + def test_get_data_requests_is_called(self, mock_get_endpoint, mock_requests, fixture_mercado_bitcoin_api): + fixture_mercado_bitcoin_api.get_data() + mock_requests.assert_called_once_with("valid_endpoint") + + @patch("requests.get", side_effect=mocked_requests_get) + @patch("mercado_bitcoin.apis.MercadoBitcoinApi._get_endpoint", return_value="valid_endpoint") + def test_get_data_with_valid_endpoint(self, mock_get_endpoint, mock_requests, fixture_mercado_bitcoin_api): + actual = fixture_mercado_bitcoin_api.get_data() + expected = {"foo": "bar"} + assert actual == expected + + @patch("requests.get", side_effect=mocked_requests_get) + @patch("mercado_bitcoin.apis.MercadoBitcoinApi._get_endpoint", return_value="invalid_endpoint") + def test_get_data_with_valid_endpoint(self, mock_get_endpoint, mock_requests, fixture_mercado_bitcoin_api): + with pytest.raises(Exception): + fixture_mercado_bitcoin_api.get_data() diff --git a/tests/test_ingestors.py b/tests/test_ingestors.py new file mode 100644 index 0000000..ef23fbc --- /dev/null +++ b/tests/test_ingestors.py @@ -0,0 +1,54 @@ +import datetime +from unittest.mock import patch, mock_open + +import pytest + +from mercado_bitcoin.ingestors import DataIngestor +from mercado_bitcoin.writers import DataWriter + + +@pytest.fixture +@patch("mercado_bitcoin.ingestors.DataIngestor.__abstractmethods__", set()) +def data_ingestor_fixture(): + return DataIngestor( + writer=DataWriter, + coins=["TEST", "HOW"], + default_start_date=datetime.date(2021, 6, 21) + ) + + +@patch("mercado_bitcoin.ingestors.DataIngestor.__abstractmethods__", set()) +class TestIngestors: + def test_checkpoint_filename(self, data_ingestor_fixture): + actual = data_ingestor_fixture._checkpoint_filename + expected = "DataIngestor.checkpoint" + assert actual == expected + + def test_load_checkpoint_no_checkpoint(self, data_ingestor_fixture): + actual = data_ingestor_fixture._load_checkpoint() + expected = datetime.date(2021, 6, 21) + assert actual == expected + + @patch("builtins.open", new_callable=mock_open, read_data="2021-06-25") + def test_load_checkpoint_existing_checkpoint(self, mock, data_ingestor_fixture): + actual = data_ingestor_fixture._load_checkpoint() + expected = datetime.date(2021, 6, 25) + assert actual == expected + + @patch("mercado_bitcoin.ingestors.DataIngestor._write_checkpoint", return_value=None) + def test_update_checkpoint_checkpoint_updated(self, mock, data_ingestor_fixture): + data_ingestor_fixture._update_checkpoint(value=datetime.date(2019, 1, 1)) + actual = data_ingestor_fixture._checkpoint + expected = datetime.date(2019, 1, 1) + assert actual == expected + + @patch("mercado_bitcoin.ingestors.DataIngestor._write_checkpoint", return_value=None) + def test_update_checkpoint_checkpoint_written(self, mock, data_ingestor_fixture): + data_ingestor_fixture._update_checkpoint(value=datetime.date(2019, 1, 1)) + mock.assert_called_once() + + @patch("builtins.open", new_callable=mock_open, read_data="2021-06-25") + @patch("mercado_bitcoin.ingestors.DataIngestor._checkpoint_filename", return_value="foobar.checkpoint") + def test_write_checkpoint(self, mock_checkpoint_filename, mock_open_file, data_ingestor_fixture): + data_ingestor_fixture._write_checkpoint() + mock_open_file.assert_called_with(mock_checkpoint_filename, 'w') diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..ebb8123 --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,15 @@ +import datetime + +from mercado_bitcoin.apis import DaySummaryApi + + +class TestDaySummaryApi: + def test_get_data(self): + actual = DaySummaryApi(coin="BTC").get_data(date=datetime.date(2021, 1, 1)) + expected = {'date': '2021-01-01', 'opening': 152700.00002, 'closing': 153458.29999999, 'lowest': 151539, 'highest': 153975, 'volume': 12583384.54790148, 'quantity': 82.27265844, 'amount': 4824, 'avg_price': 152947.34346135} + assert actual == expected + + def test_get_data_better(self): + actual = DaySummaryApi(coin="BTC").get_data(date=datetime.date(2021, 1, 1)).get("date") + expected = "2021-01-01" + assert actual == expected