diff --git a/.gitignore b/.gitignore index bc9cd25..93a32af 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ vendor/ __pycache__/ docker/keys/* !docker/keys/genkeys.sh +!docker/keys/keys-to-config-json.py diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/README.md b/README.md index d1c8c2d..9955548 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Example: ``` https://yourDomain.zendesk.com/api/v2/ -``` +``` -- OR -- @@ -148,7 +148,7 @@ Moved to [new docs](https://developers.keboola.com/extend/generic-extractor/conf - **authentication.type**: `query` - **authentication.query.apiKey**: `{"attr": "apiKey"}` - this will look for the *apiKey* query parameter value in the config attribute named *apiKey* -- **authentication.query.sig**: +- **authentication.query.sig**: ``` { "function": "md5", @@ -169,7 +169,7 @@ Moved to [new docs](https://developers.keboola.com/extend/generic-extractor/conf } ] } - ``` + ``` - this will generate a *sig* parameter value from MD5 of merged configuration table attributes *apiKey* and *secret*, followed by current *time()* at the time of the request (time() being the PHP function) - Allowed functions are listed below in the *User functions* section - If you're using any config parameter by using `"attr": "parameterName"`, it has to be identical string to the one in the actual config, including eventual `#` if KBC Docker's encryption is used. @@ -468,7 +468,7 @@ Moved to [new docs](https://developers.keboola.com/extend/generic-extractor/conf - sets which query parameter should contain the limit value (default to `limit`) - **pagination.offsetParam**(optional) - sets which query parameter should contain the offset value (default to `offset`) - + ``` "api": { "pagination": { @@ -478,7 +478,7 @@ Moved to [new docs](https://developers.keboola.com/extend/generic-extractor/conf "offsetParam": "offset" } } - ``` + ``` - **pagination.firstPageParams**(optional) - Whether or not include limit and offset params in the first request (default to `true`) @@ -769,7 +769,7 @@ Moved to [new docs](https://developers.keboola.com/extend/generic-extractor/conf } ] } - ``` + ``` - **dataType**: Type of data returned by the endpoint. It also describes a table name, where the results will be stored - **dataField**: Allows to override which field of the response will be exported. - If there's multiple arrays in the response "root" the extractor may not know which array to export and fail @@ -842,7 +842,7 @@ Moved to [new docs](https://developers.keboola.com/extend/generic-extractor/conf "data": {"object": "can't really parse this!"} } ] - ``` + ``` - To be able to work with such response, set `"responseFilter": "data"` - it should be a path within each object of the response array, **not** including the key of the response array - To filter values within nested arrays, use `"responseFilter": "data.array[].key"` @@ -1218,25 +1218,25 @@ Best way to create and test new configurations is run extractor in docker contai # Running tests: ``` docker compose run --rm tests -``` +``` or (with local source code and vendor copy) ``` docker compose run --rm tests-local -``` +``` # mTLS -1. `cd docker/keys` and then run `./genkeys.sh` -2. ``` +1. Run `cd docker/keys` and then `./genkeys.sh`. The script generates CA, server and client certificates + and also a config.json file with the following structure to be pasted into your own config.json: + ``` "api": { "baseUrl": "https://server.local/", - "caCertificate": "conent of file rootCA.crt with \n at the end", - "#clientCertificate": "conent of file client.crt with \n at the end", - "#clientKey": "conent of file client.key with \n at the end" + "caCertificate": "-- rootCA.crt --", + "#clientCertificate": "-- client.crt bundled with client.key --", } - ``` -3. restart nginx + ``` +1. Restart nginx ## License diff --git a/docker-compose.yml b/docker-compose.yml index a0c619f..fa9417b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,3 @@ -version: '3' services: app: &app build: . @@ -40,7 +39,7 @@ services: ports: - "443:443" volumes: - - "./docker/nginx/default.conf:/etc/nginx/conf.d/default.conf" - - "./docker/keys/server.crt:/etc/nginx/server.crt" - - "./docker/keys/server.key:/etc/nginx/server.key" - - "./docker/keys/rootCA.crt:/etc/nginx/ca.crt" + - ./docker/nginx/default.conf:/etc/nginx/conf.d/default.conf + - ./docker/keys/server.crt:/etc/nginx/server.crt + - ./docker/keys/server.key:/etc/nginx/server.key + - ./docker/keys/rootCA.crt:/etc/nginx/ca.crt diff --git a/docker/keys/genkeys.sh b/docker/keys/genkeys.sh index be420af..347d6e2 100755 --- a/docker/keys/genkeys.sh +++ b/docker/keys/genkeys.sh @@ -1,12 +1,17 @@ -cd keys echo "creating rootCA" openssl genrsa -out rootCA.key 4096 openssl req -x509 -new -nodes -key rootCA.key -subj "/C=CZ/ST=CZ/O=authority" -days 1024 -out rootCA.crt + echo "creating server keys" openssl genrsa -out server.key 2048 -openssl req -new -key server.key -subj "/C=CZ/ST=CZ/O=mytest/CN=server.local" -out server.csr # CN = server.local name of service -openssl x509 -req -in server.csr -CA rootCA.crt -CAkey rootCA.key -CAcreateserial -out server.crt -days 500 +# SAN is required as it is the main place where modern clients check host name (in fact CN can be ignored -- and is by e.g. chrome or requests) +openssl req -new -key server.key -subj "/C=CZ/ST=CZ/O=mytest/CN=server.local" -addext "subjectAltName=DNS:server.local" -out server.csr +# Extensions such as SAN are not coppied by default from CSR when creating the certificate, -copy_extensions is required (semi-recent addition to OpenSSL) +openssl x509 -req -in server.csr -CA rootCA.crt -CAkey rootCA.key -CAcreateserial -out server.crt -days 500 -copy_extensions copy + echo "creating client keys" openssl genrsa -out client.key 2048 -openssl req -new -key client.key -subj "/C=CZ/ST=CZ/O=mytest/CN=dev" -out client.csr # CN = dev name of service -openssl x509 -req -in client.csr -CA rootCA.crt -CAkey rootCA.key -CAcreateserial -out client.crt -days 500 +openssl req -new -key client.key -subj "/C=CZ/ST=CZ/O=mytest/CN=client.local" -addext "subjectAltName=DNS:client.local" -out client.csr +openssl x509 -req -in client.csr -CA rootCA.crt -CAkey rootCA.key -CAcreateserial -out client.crt -days 500 -copy_extensions copy + +python3 keys-to-config-json.py diff --git a/docker/keys/keys-to-config-json.py b/docker/keys/keys-to-config-json.py new file mode 100755 index 0000000..fc06aad --- /dev/null +++ b/docker/keys/keys-to-config-json.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 + +import json + + +with open("rootCA.crt") as f: + ca_cert = f.read() + +with open("client.crt") as f: + client_cert = f.read() + +with open("client.key") as f: + client_key = f.read() + +with open("config.json", "w") as f: + json.dump( + { + "api": { + "baseUrl": "https://server.local/", + "caCertificate": ca_cert, + "#clientCertificate": client_cert + client_key, + } + }, + f, + indent=4, + ) diff --git a/python-sync-actions/Dockerfile b/python-sync-actions/Dockerfile index f1088b6..1389d19 100644 --- a/python-sync-actions/Dockerfile +++ b/python-sync-actions/Dockerfile @@ -1,25 +1,19 @@ FROM nikolaik/python-nodejs:python3.12-nodejs18 -ENV PYTHONIOENCODING utf-8 -COPY /src /code/src/ -COPY /tests /code/tests/ -COPY requirements.txt /code/requirements.txt -COPY flake8.cfg /code/flake8.cfg - -# install gcc to be able to build packages - e.g. required by regex, dateparser, also required for pandas -RUN apt-get update && apt-get install -y build-essential curl +RUN apt-get update && apt-get install -y curl -# Install curlconverter using npm RUN npm install --global curlconverter - RUN pip install flake8 +COPY requirements.txt /code/requirements.txt RUN pip install -r /code/requirements.txt +COPY flake8.cfg /code/flake8.cfg +COPY /src /code/src/ +COPY /tests /code/tests/ WORKDIR /code/ - CMD ["python", "-u", "/code/src/component.py"] diff --git a/python-sync-actions/docker-compose.yml b/python-sync-actions/docker-compose.yml index af85521..4a4958b 100644 --- a/python-sync-actions/docker-compose.yml +++ b/python-sync-actions/docker-compose.yml @@ -1,4 +1,3 @@ -version: "2" services: # for development purposes dev: @@ -6,6 +5,7 @@ services: volumes: - ./:/code - ./data:/data + - ../docker/keys:/code/keys environment: - KBC_DATADIR=./data test: @@ -35,8 +35,20 @@ services: tty: true stdin_open: true ports: - - "8888:80" + - 8888:80 volumes: - ./tests/calls:/examples/ environment: - - KBC_EXAMPLES_DIR=/examples/ \ No newline at end of file + - KBC_EXAMPLES_DIR=/examples/ + + # i was about to create a common network with the generic-extractor compose file + # to avoid duplication, but that would create an unnecessary dependency + server.local: + image: nginx:alpine + ports: + - "443:443" + volumes: + - ../docker/nginx/default.conf:/etc/nginx/conf.d/default.conf + - ../docker/keys/server.crt:/etc/nginx/server.crt + - ../docker/keys/server.key:/etc/nginx/server.key + - ../docker/keys/rootCA.crt:/etc/nginx/ca.crt diff --git a/python-sync-actions/requirements.in b/python-sync-actions/requirements.in new file mode 100644 index 0000000..be41878 --- /dev/null +++ b/python-sync-actions/requirements.in @@ -0,0 +1,9 @@ +keboola.component +dataconf +keboola.http-client +keboola.utils +keboola.json-to-csv==0.0.12 +mock==5.1.0 +freezegun==1.5.1 +nested-lookup==0.2.25 +python-dateutil==2.9.0.post0 diff --git a/python-sync-actions/requirements.txt b/python-sync-actions/requirements.txt index b727589..ff56290 100644 --- a/python-sync-actions/requirements.txt +++ b/python-sync-actions/requirements.txt @@ -1,9 +1,68 @@ -keboola.component==1.6.8 +# This file was autogenerated by uv via the following command: +# uv pip compile requirements.txt --universal --output-file requirements.all +certifi==2025.1.31 + # via requests +charset-normalizer==3.4.1 + # via requests dataconf==3.3.0 -keboola.http-client==1.0.1 -keboola.utils==1.1.0 -keboola.json-to-csv==0.0.12 -mock==5.1.0 + # via -r requirements.txt +dateparser==1.2.0 + # via keboola-utils +deprecated==1.2.18 + # via keboola-component freezegun==1.5.1 + # via -r requirements.txt +idna==3.10 + # via requests +isodate==0.6.1 + # via dataconf +keboola-component==1.6.8 + # via -r requirements.txt +keboola-http-client==1.0.1 + # via -r requirements.txt +keboola-json-to-csv==0.0.12 + # via -r requirements.txt +keboola-utils==1.1.0 + # via -r requirements.txt +mock==5.1.0 + # via -r requirements.txt nested-lookup==0.2.25 + # via -r requirements.txt +pygelf==0.4.2 + # via keboola-component +pyhocon==0.3.61 + # via dataconf +pyparsing==3.2.1 + # via + # dataconf + # pyhocon python-dateutil==2.9.0.post0 + # via + # -r requirements.txt + # dataconf + # dateparser + # freezegun +pytz==2025.1 + # via + # dateparser + # keboola-component + # keboola-utils +pyyaml==6.0.2 + # via dataconf +regex==2024.11.6 + # via dateparser +requests==2.32.3 + # via keboola-http-client +six==1.17.0 + # via + # isodate + # nested-lookup + # python-dateutil +tzdata==2025.1 ; sys_platform == 'win32' + # via tzlocal +tzlocal==5.2 + # via dateparser +urllib3==2.3.0 + # via requests +wrapt==1.17.2 + # via deprecated diff --git a/python-sync-actions/src/component.py b/python-sync-actions/src/component.py index 236f9ba..7fda870 100644 --- a/python-sync-actions/src/component.py +++ b/python-sync-actions/src/component.py @@ -4,22 +4,22 @@ """ import copy import logging +import tempfile from io import StringIO -from typing import List - -import requests -from requests.exceptions import JSONDecodeError -from keboola.component.base import ComponentBase, sync_action -from keboola.component.exceptions import UserException +from typing import Any import configuration +import requests from actions.curl import build_job_from_curl from actions.mapping import infer_mapping -from configuration import Configuration, ConfigHelpers -from http_generic.auth import AuthMethodBuilder, AuthBuilderError +from configuration import ConfigHelpers, Configuration +from http_generic.auth import AuthBuilderError, AuthMethodBuilder from http_generic.client import GenericHttpClient, HttpClientError from http_generic.pagination import PaginationBuilder +from keboola.component.base import ComponentBase, sync_action +from keboola.component.exceptions import UserException from placeholders_utils import PlaceholdersUtils +from requests.exceptions import JSONDecodeError MAX_CHILD_CALLS = 20 @@ -59,7 +59,7 @@ def __init__(self): logging.info("Component initialized") - self._configurations: List[Configuration] = None + self._configurations: list[Configuration] = None self._configuration: Configuration = None self._client: GenericHttpClient = None self._parent_params = {} @@ -236,7 +236,7 @@ def _parse_data(self, data, path) -> list: """ - def find_array_property_path(response_data: dict, result_arrays: list = None) -> list[dict] | None: + def find_array_property_path(response_data: dict, result_arrays: list | None = None) -> list[dict] | None: """ Travers all object and find the first array property, return None if there are two array properties Args: @@ -314,7 +314,7 @@ def _add_page_params(self, job: Configuration, query_parameters: dict) -> dict: return query_parameters - def make_call(self) -> tuple[list, any, str, str]: + def make_call(self) -> tuple[list, Any, str, str]: """ Make call to the API Returns: @@ -355,16 +355,35 @@ def recursive_call(parent_result, config_index=0): # build additional parameters query_parameters = {**api_cfg.default_query_parameters.copy(), **request_cfg.query_parameters.copy()} query_parameters = self._conf_helpers.fill_in_user_parameters(query_parameters, user_params) - ssl_verify = api_cfg.ssl_verification timeout = api_cfg.timeout # additional_params = self._build_request_parameters(additional_params_cfg) query_parameters = self._add_page_params(job, query_parameters) - request_parameters = {'params': query_parameters, - 'headers': new_headers, - 'verify': ssl_verify, - 'timeout': timeout} + # if user provided CA certificate or client certificate & key, those will be written to a temp file and used + if not api_cfg.ca_cert: + ca_cert_file = "" + else: + with tempfile.NamedTemporaryFile("w", delete=False) as cafp: + ca_cert_file = cafp.name + cafp.write(api_cfg.ca_cert) + + if not api_cfg.client_cert_key: + client_cert_key_file = "" + else: + with tempfile.NamedTemporaryFile("w", delete=False) as ccfp: + client_cert_key_file = ccfp.name + ccfp.write(api_cfg.client_cert_key) + + verify = ca_cert_file if ca_cert_file else api_cfg.ssl_verify + + request_parameters = { + "params": query_parameters, + "headers": new_headers, + "timeout": timeout, + "verify": verify, + "cert": client_cert_key_file, + } if job.request_content.content_type == configuration.ContentType.json: request_parameters['json'] = job.request_content.body @@ -374,13 +393,15 @@ def recursive_call(parent_result, config_index=0): row_path = job.request_parameters.endpoint_path if job.request_parameters.placeholders: - placeholders = PlaceholdersUtils.get_params_for_child_jobs(job.request_parameters.placeholders, - self._parent_results, self._parent_params) + placeholders = PlaceholdersUtils.get_params_for_child_jobs( + job.request_parameters.placeholders, self._parent_results, self._parent_params + ) self._parent_params = placeholders[0] row_path = self._fill_placeholders(placeholders, job.request_parameters.endpoint_path) - self._final_response = self._client.send_request(method=job.request_parameters.method, - endpoint_path=row_path, **request_parameters) + self._final_response = self._client.send_request( + method=job.request_parameters.method, endpoint_path=row_path, **request_parameters + ) current_results = self._parse_data(self._final_response.json(), job.data_path) diff --git a/python-sync-actions/src/configuration.py b/python-sync-actions/src/configuration.py index d49bea3..852048b 100644 --- a/python-sync-actions/src/configuration.py +++ b/python-sync-actions/src/configuration.py @@ -77,8 +77,10 @@ class ApiConfig(ConfigurationBase): pagination: dict = field(default_factory=dict) authentication: Authentication = None retry_config: RetryConfig = field(default_factory=RetryConfig) - ssl_verification: bool = True - timeout: float = None + timeout: float | None = None + ssl_verify: bool = True # toggles requests.[method](verify=True/False) + ca_cert: str = "" # if provided, this value will be written to a temp file and used instead of ssl_verify + client_cert_key: str = "" # client certificate bundled with private key (will also be written to a temp file) @dataclass @@ -192,8 +194,9 @@ def convert_to_v2(configuration: dict) -> list[Configuration]: auth_method = configuration.get('config').get('__AUTH_METHOD') default_headers = _remove_auth_from_dict(default_headers_org, _return_ui_params(configuration), auth_method) - default_query_parameters = _remove_auth_from_dict(default_query_parameters_org, _return_ui_params(configuration), - auth_method) + default_query_parameters = _remove_auth_from_dict( + default_query_parameters_org, _return_ui_params(configuration), auth_method + ) pagination = {} if api_json.get('pagination', {}).get('scrollers'): @@ -201,8 +204,25 @@ def convert_to_v2(configuration: dict) -> list[Configuration]: elif api_json.get('pagination'): pagination['common'] = api_json.get('pagination') - api_config = ApiConfig(base_url=base_url, default_headers=default_headers, - default_query_parameters=default_query_parameters, pagination=pagination) + if ca_cert := api_json.get("caCertificate"): + ca_cert = ca_cert.strip() + else: + ca_cert = "" + + if client_cert_key := api_json.get("#clientCertificate"): + client_cert_ley = client_cert_key.strip() + else: + client_cert_ley = "" + + api_config = ApiConfig( + base_url=base_url, + default_headers=default_headers, + default_query_parameters=default_query_parameters, + pagination=pagination, + ssl_verify=api_json.get("ssl_verify", True), + ca_cert=ca_cert, + client_cert_key=client_cert_ley, + ) api_config.retry_config = build_retry_config(configuration) api_config.authentication = AuthMethodConverter.convert(configuration) diff --git a/python-sync-actions/src/http_generic/client.py b/python-sync-actions/src/http_generic/client.py index e8e0e80..f161dcb 100644 --- a/python-sync-actions/src/http_generic/client.py +++ b/python-sync-actions/src/http_generic/client.py @@ -1,5 +1,3 @@ -from typing import Tuple, Dict - import requests from keboola.http_client import HttpClient from requests.adapters import HTTPAdapter @@ -19,12 +17,12 @@ def __init__(self, message, response=None): class GenericHttpClient(HttpClient): def __init__(self, base_url: str, - default_http_header: Dict = None, - default_params: Dict = None, + default_http_header: dict = None, + default_params: dict = None, auth_method: AuthMethodBase = None, max_retries: int = 10, backoff_factor: float = 0.3, - status_forcelist: Tuple[int, ...] = (500, 502, 504) + status_forcelist: tuple[int, ...] = (500, 502, 504) ): super().__init__(base_url=base_url, max_retries=max_retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist, @@ -42,6 +40,7 @@ def login(self): self._auth = self._auth_method.login() def send_request(self, method, endpoint_path, **kwargs): + resp = None try: resp = self._request_raw(method=method, endpoint_path=endpoint_path, is_absolute_path=False, **kwargs) resp.raise_for_status()