From c3c7893aba087fdd0be800f022ed88bb20d24a56 Mon Sep 17 00:00:00 2001 From: David Esner Date: Thu, 30 May 2024 08:08:26 +0200 Subject: [PATCH] Fix user data mapping, tests --- python-sync-actions/src/actions/mapping.py | 18 +++++++ python-sync-actions/src/component.py | 7 ++- .../__pycache__/test_curl.cpython-310.pyc | Bin 4198 -> 0 bytes .../config.json | 36 ++++++++++++++ .../config.json | 45 +++++++++++++++++ python-sync-actions/tests/test_mapping.py | 46 ++++++++++++++++++ 6 files changed, 150 insertions(+), 2 deletions(-) delete mode 100644 python-sync-actions/tests/__pycache__/test_curl.cpython-310.pyc create mode 100644 python-sync-actions/tests/data_tests/test_007_infer_mapping_userdata/config.json create mode 100644 python-sync-actions/tests/data_tests/test_008_infer_mapping_userdata_child/config.json diff --git a/python-sync-actions/src/actions/mapping.py b/python-sync-actions/src/actions/mapping.py index 29bc74c..9cd1610 100644 --- a/python-sync-actions/src/actions/mapping.py +++ b/python-sync-actions/src/actions/mapping.py @@ -33,6 +33,7 @@ def parse_row(self, row: dict[str, Any]): def infer_mapping(self, primary_keys: Optional[list[str]] = None, parent_pkeys: Optional[list[str]] = None, + user_data_columns: Optional[list[str]] = None, path_separator: str = '.', max_level: int = 2 ) -> dict: @@ -41,6 +42,7 @@ def infer_mapping(self, Args: primary_keys: optional list of columns to be used as primary keys parent_pkeys: optional list of columns to be used as parent primary keys + user_data_columns: optional list of columns to be used as user data columns path_separator: JSON path separator to use in the mapping max_level: maximum level to flatten results @@ -57,6 +59,10 @@ def infer_mapping(self, raise UserException(f"Parent {key} is already in the mapping, " f"please change the placeholder name") result_mapping[key] = MappingElements.parent_primary_key_column(key) + if user_data_columns: + for key in user_data_columns: + if key in result_mapping: + result_mapping[key] = MappingElements.user_data_column(key) return self.dedupe_values(result_mapping) @@ -158,10 +164,20 @@ def force_type_column(column_name: str) -> dict: "forceType": True } + @staticmethod + def user_data_column(column_name: str) -> dict: + return { + "type": "user", + "mapping": { + "destination": column_name + } + } + def infer_mapping(data: list[dict], primary_keys: Optional[list[str]] = None, parent_pkeys: Optional[list[str]] = None, + user_data_columns: Optional[list[str]] = None, path_separator: str = '.', max_level_nest_level: int = 2) -> dict: """ @@ -170,6 +186,7 @@ def infer_mapping(data: list[dict], data: sample data primary_keys: optional list of columns to be used as primary keys parent_pkeys: optional list of columns to be used as parent primary keys + user_data_columns: optional list of columns to be used as user data columns path_separator: JSON path separator to use in the mapping max_level_nest_level: maximum level to flatten results @@ -181,6 +198,7 @@ def infer_mapping(data: list[dict], analyzer.parse_row(row) result = analyzer.infer_mapping(primary_keys or [], parent_pkeys or [], + user_data_columns or [], path_separator=path_separator, max_level=max_level_nest_level) return result diff --git a/python-sync-actions/src/component.py b/python-sync-actions/src/component.py index ab3208c..07d72f3 100644 --- a/python-sync-actions/src/component.py +++ b/python-sync-actions/src/component.py @@ -272,7 +272,6 @@ def make_call(self) -> tuple[list, any, str, str]: self._parent_results = [{}] * len(self._configurations) - # TODO: omezit pocet callu z parent response na 10 kvuli timeoutu. E.g. zavolat child jen max 10x def recursive_call(parent_result, config_index=0): if parent_result: @@ -371,6 +370,7 @@ def infer_mapping(self) -> dict: nesting_level = self.configuration.parameters.get('__NESTING_LEVEL', 2) primary_keys = self.configuration.parameters.get('__PRIMARY_KEY', []) + is_child_job = len(self.configuration.parameters.get('__SELECTED_JOB', '').split('_')) > 1 parent_pkey = [] if len(self._configurations) > 1: parent_pkey = [f'parent_{p}' for p in self._configurations[-1].request_parameters.placeholders.keys()] @@ -378,15 +378,18 @@ def infer_mapping(self) -> dict: if not data: raise UserException("The request returned no data to infer mapping from.") - if self._configuration.user_data: + user_data_columns = [] + if self._configuration.user_data and not is_child_job: for record in data: for key, value in self._configuration.user_data.items(): + user_data_columns.append(key) if key in record: raise UserException(f"User data key [{key}] already exists in the response data, " f"please change the name.") record[key] = value mapping = infer_mapping(data, primary_keys, parent_pkey, + user_data_columns=user_data_columns, max_level_nest_level=nesting_level) return mapping diff --git a/python-sync-actions/tests/__pycache__/test_curl.cpython-310.pyc b/python-sync-actions/tests/__pycache__/test_curl.cpython-310.pyc deleted file mode 100644 index 1a7685c1007731da0b15b5bbbfd7eb132552465e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4198 zcmdT{%X8aA7}v|P>^N=G@))L2R09ba>bOaHhK81g($aR^Lc;)_VKj<%9jlQgXIE}w zCq6*AGH_-%w8#7{{0DpG#JRW90lsh7c49Xn;lMCRqu)NX`|Y>;JvSL2&uj41F5Ia7 zbW+oP#m3&pg2pBI_+R@IVqE6OzNs;z5RHIgCrcN(#& zb~k(9tU5%$qs?T013Fq|Mi=9sbIPu42DF)mNemrU&|(pkW7m_FW!h*)eo6xzz4tMo zaS1;D5dhVyI?<{I(TVY$RyB!9EGR9tay3J;#DTU=awHFBmW+`Clnxmu$DquS5K3Uum+tLbcb46Hnl=~8A(LkEF>bF{&=C!?ij*q`e|L{I7LR&gb7IfYRgT6f{nuk@&mKbLKn-U$P)6;%c|`3zlA^IJP-PARni9i3urY*V z2*(j75GE0Da}1o2#!exeM!=I-@G8^}JfslZ1s>9~ID87=@FVF@Iq*pMHK21eqX%=( zgW-dFf!g@KhJUQm`Boim@8tg8zVGrR26HCv^j2|=w&#iqMP+Tpvo3bb7Z)j7cO`~Q`!p21TbmX)y8Vp3)^;0%x{iStr;or%51jocq1$rDmj zao|A1U09Y6Yj~lr;irZ*d}!W~1kv_U;g;ZgQ~^K64zF)xybF4J8ZQQl*LwFfDaI@0+@xPbj#i7vAx$$X>vbHNbX$nrt z^%-^JBz1YK*-RK{i3@&3#Rf*#Tj+k=jWDgd*YDh_xPgi@ZVJ8?d~iWI$j|(cQ_e8t zN|(xB#M(;$!sICvIogljJ0^P>|GD@NJSjx2&2-duPwh$^7+`Hrvdg`iA4YG$#7F28 zEWKb9^cVjunBd_4?7X^<*q@qwp#0fcpg02(yFZZFhs#UX4hgOQmg#- z%_N4Qi(7*#_QthvwFMawtbn6OI=WM=CHrO3omyYqTJKIpVIu@Ovgl4FwHl|;RAqO% zbMBuqFHF!LVXK|&x^9v4dYkb?)|312#F#f=-xHJ27}@|1l&pHf^wzN@ne>F?DfYY_ zr<(0it!5-mksDB~9v`7s%}8oOUl$okN!W@`=fu-Otmb!xDB}ktbEs}3?gx4Y?uHfd z{~3f4-G5fU795418)bm8%^BM_hMOZI*9^OCHANCqDHI0Cgkex?(3*jmP zCZc;-mds;1%rXF&SaVFG9Sv>-WAe>8xx%>oJChSmD-KiKpqSj_0F^6~!Cho~*AjJc pD)d^jsC??iq;AMQTz_!I{a6vs1+}#g@D9yUN{$Ah9D@0C=3kaxyxjl* diff --git a/python-sync-actions/tests/data_tests/test_007_infer_mapping_userdata/config.json b/python-sync-actions/tests/data_tests/test_007_infer_mapping_userdata/config.json new file mode 100644 index 0000000..837c552 --- /dev/null +++ b/python-sync-actions/tests/data_tests/test_007_infer_mapping_userdata/config.json @@ -0,0 +1,36 @@ +{ + "parameters": { + "api": { + "baseUrl": "http://private-834388-extractormock.apiary-mock.com", + "pagination": { + "method": "response.url", + "urlKey": "next" + } + }, + "config": { + "outputBucket": "getPost", + "jobs": [ + { + "endpoint": "get", + "method": "GET", + "params": { + "parameter": "value" + } + } + ], + "userData": { + "start_date": { + "function": "date", + "args": [ + "Y-m-d H:i:s", + { + "time": "currentStart" + } + ] + } + } + }, + "__SELECTED_JOB": "0" + }, + "action": "infer_mapping" +} \ No newline at end of file diff --git a/python-sync-actions/tests/data_tests/test_008_infer_mapping_userdata_child/config.json b/python-sync-actions/tests/data_tests/test_008_infer_mapping_userdata_child/config.json new file mode 100644 index 0000000..3f77a6f --- /dev/null +++ b/python-sync-actions/tests/data_tests/test_008_infer_mapping_userdata_child/config.json @@ -0,0 +1,45 @@ +{ + "parameters": { + "api": { + "baseUrl": "http://private-834388-extractormock.apiary-mock.com", + "pagination": { + "method": "response.url", + "urlKey": "next" + } + }, + "config": { + "outputBucket": "getPost", + "jobs": [ + { + "endpoint": "get", + "method": "GET", + "params": { + "parameter": "value" + }, + "children": [ + { + "endpoint": "get", + "method": "GET", + "params": { + "parameter": "value" + } + } + ] + } + ], + "userData": { + "start_date": { + "function": "date", + "args": [ + "Y-m-d H:i:s", + { + "time": "currentStart" + } + ] + } + } + }, + "__SELECTED_JOB": "0_0" + }, + "action": "infer_mapping" +} \ No newline at end of file diff --git a/python-sync-actions/tests/test_mapping.py b/python-sync-actions/tests/test_mapping.py index e0f15af..907f358 100644 --- a/python-sync-actions/tests/test_mapping.py +++ b/python-sync-actions/tests/test_mapping.py @@ -1,6 +1,12 @@ +import os import unittest +from copy import deepcopy +from pathlib import Path + +from freezegun import freeze_time from actions.mapping import infer_mapping, StuctureAnalyzer +from component import Component class TestCurl(unittest.TestCase): @@ -24,6 +30,14 @@ class TestCurl(unittest.TestCase): } ] + def setUp(self): + self.tests_dir = Path(__file__).absolute().parent.joinpath('data_tests').as_posix() + + def _get_test_component(self, test_name): + test_dir = os.path.join(self.tests_dir, test_name) + os.environ['KBC_DATADIR'] = test_dir + return Component() + def test_nested_levels_pkeys(self): # nesting level 0 expected = {'array': {'forceType': True, 'mapping': {'destination': 'array'}, 'type': 'column'}, @@ -50,6 +64,21 @@ def test_no_pkey(self): self.assertEqual(res, expected) + def test_user_data(self): + # nesting level 1 + expected = {'array': {'forceType': True, 'mapping': {'destination': 'array'}, 'type': 'column'}, + 'contacts.email': 'contacts_email', 'contacts.skype': 'contacts_skype', + 'id': 'id', 'name': 'name', + "date_start": {'mapping': {'destination': 'date_start'}, 'type': 'user'}} + user_data_columns = ['date_start'] + sample_data = deepcopy(self.SAMPLE_DATA) + for row in sample_data: + row['date_start'] = '2021-01-01' + + res = infer_mapping(sample_data, max_level_nest_level=1, user_data_columns=user_data_columns) + + self.assertEqual(res, expected) + def test_invalid_characters(self): data = [{ "$id": 123, @@ -75,3 +104,20 @@ def test_dedupe_keys(self): 'name': 'name', 'test_array': 'array'} res = StuctureAnalyzer.dedupe_values(data) self.assertEqual(res, expected) + + @freeze_time("2021-01-01") + def test_infer_mapping_userdata(self): + component = self._get_test_component('test_007_infer_mapping_userdata') + output = component.infer_mapping() + expected_output = {'id': 'id', + 'start_date': {'mapping': {'destination': 'start_date'}, 'type': 'user'}, + 'status': 'status'} + self.assertEqual(output, expected_output) + + def test_infer_mapping_userdata_child(self): + component = self._get_test_component('test_008_infer_mapping_userdata_child') + output = component.infer_mapping() + # child job can't have user data + expected_output = {'id': 'id', + 'status': 'status'} + self.assertEqual(output, expected_output)