Skip to content

Commit

Permalink
Fix user data mapping, tests
Browse files Browse the repository at this point in the history
  • Loading branch information
davidesner committed May 30, 2024
1 parent bdaac96 commit c3c7893
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 2 deletions.
18 changes: 18 additions & 0 deletions python-sync-actions/src/actions/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def parse_row(self, row: dict[str, Any]):
def infer_mapping(self,
primary_keys: Optional[list[str]] = None,
parent_pkeys: Optional[list[str]] = None,
user_data_columns: Optional[list[str]] = None,
path_separator: str = '.',
max_level: int = 2
) -> dict:
Expand All @@ -41,6 +42,7 @@ def infer_mapping(self,
Args:
primary_keys: optional list of columns to be used as primary keys
parent_pkeys: optional list of columns to be used as parent primary keys
user_data_columns: optional list of columns to be used as user data columns
path_separator: JSON path separator to use in the mapping
max_level: maximum level to flatten results
Expand All @@ -57,6 +59,10 @@ def infer_mapping(self,
raise UserException(f"Parent {key} is already in the mapping, "
f"please change the placeholder name")
result_mapping[key] = MappingElements.parent_primary_key_column(key)
if user_data_columns:
for key in user_data_columns:
if key in result_mapping:
result_mapping[key] = MappingElements.user_data_column(key)

return self.dedupe_values(result_mapping)

Expand Down Expand Up @@ -158,10 +164,20 @@ def force_type_column(column_name: str) -> dict:
"forceType": True
}

@staticmethod
def user_data_column(column_name: str) -> dict:
return {
"type": "user",
"mapping": {
"destination": column_name
}
}


def infer_mapping(data: list[dict],
primary_keys: Optional[list[str]] = None,
parent_pkeys: Optional[list[str]] = None,
user_data_columns: Optional[list[str]] = None,
path_separator: str = '.',
max_level_nest_level: int = 2) -> dict:
"""
Expand All @@ -170,6 +186,7 @@ def infer_mapping(data: list[dict],
data: sample data
primary_keys: optional list of columns to be used as primary keys
parent_pkeys: optional list of columns to be used as parent primary keys
user_data_columns: optional list of columns to be used as user data columns
path_separator: JSON path separator to use in the mapping
max_level_nest_level: maximum level to flatten results
Expand All @@ -181,6 +198,7 @@ def infer_mapping(data: list[dict],
analyzer.parse_row(row)

result = analyzer.infer_mapping(primary_keys or [], parent_pkeys or [],
user_data_columns or [],
path_separator=path_separator,
max_level=max_level_nest_level)
return result
Expand Down
7 changes: 5 additions & 2 deletions python-sync-actions/src/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,6 @@ def make_call(self) -> tuple[list, any, str, str]:

self._parent_results = [{}] * len(self._configurations)

# TODO: omezit pocet callu z parent response na 10 kvuli timeoutu. E.g. zavolat child jen max 10x
def recursive_call(parent_result, config_index=0):

if parent_result:
Expand Down Expand Up @@ -371,22 +370,26 @@ def infer_mapping(self) -> dict:

nesting_level = self.configuration.parameters.get('__NESTING_LEVEL', 2)
primary_keys = self.configuration.parameters.get('__PRIMARY_KEY', [])
is_child_job = len(self.configuration.parameters.get('__SELECTED_JOB', '').split('_')) > 1
parent_pkey = []
if len(self._configurations) > 1:
parent_pkey = [f'parent_{p}' for p in self._configurations[-1].request_parameters.placeholders.keys()]

if not data:
raise UserException("The request returned no data to infer mapping from.")

if self._configuration.user_data:
user_data_columns = []
if self._configuration.user_data and not is_child_job:
for record in data:
for key, value in self._configuration.user_data.items():
user_data_columns.append(key)
if key in record:
raise UserException(f"User data key [{key}] already exists in the response data, "
f"please change the name.")
record[key] = value

mapping = infer_mapping(data, primary_keys, parent_pkey,
user_data_columns=user_data_columns,
max_level_nest_level=nesting_level)
return mapping

Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"parameters": {
"api": {
"baseUrl": "http://private-834388-extractormock.apiary-mock.com",
"pagination": {
"method": "response.url",
"urlKey": "next"
}
},
"config": {
"outputBucket": "getPost",
"jobs": [
{
"endpoint": "get",
"method": "GET",
"params": {
"parameter": "value"
}
}
],
"userData": {
"start_date": {
"function": "date",
"args": [
"Y-m-d H:i:s",
{
"time": "currentStart"
}
]
}
}
},
"__SELECTED_JOB": "0"
},
"action": "infer_mapping"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"parameters": {
"api": {
"baseUrl": "http://private-834388-extractormock.apiary-mock.com",
"pagination": {
"method": "response.url",
"urlKey": "next"
}
},
"config": {
"outputBucket": "getPost",
"jobs": [
{
"endpoint": "get",
"method": "GET",
"params": {
"parameter": "value"
},
"children": [
{
"endpoint": "get",
"method": "GET",
"params": {
"parameter": "value"
}
}
]
}
],
"userData": {
"start_date": {
"function": "date",
"args": [
"Y-m-d H:i:s",
{
"time": "currentStart"
}
]
}
}
},
"__SELECTED_JOB": "0_0"
},
"action": "infer_mapping"
}
46 changes: 46 additions & 0 deletions python-sync-actions/tests/test_mapping.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import os
import unittest
from copy import deepcopy
from pathlib import Path

from freezegun import freeze_time

from actions.mapping import infer_mapping, StuctureAnalyzer
from component import Component


class TestCurl(unittest.TestCase):
Expand All @@ -24,6 +30,14 @@ class TestCurl(unittest.TestCase):
}
]

def setUp(self):
self.tests_dir = Path(__file__).absolute().parent.joinpath('data_tests').as_posix()

def _get_test_component(self, test_name):
test_dir = os.path.join(self.tests_dir, test_name)
os.environ['KBC_DATADIR'] = test_dir
return Component()

def test_nested_levels_pkeys(self):
# nesting level 0
expected = {'array': {'forceType': True, 'mapping': {'destination': 'array'}, 'type': 'column'},
Expand All @@ -50,6 +64,21 @@ def test_no_pkey(self):

self.assertEqual(res, expected)

def test_user_data(self):
# nesting level 1
expected = {'array': {'forceType': True, 'mapping': {'destination': 'array'}, 'type': 'column'},
'contacts.email': 'contacts_email', 'contacts.skype': 'contacts_skype',
'id': 'id', 'name': 'name',
"date_start": {'mapping': {'destination': 'date_start'}, 'type': 'user'}}
user_data_columns = ['date_start']
sample_data = deepcopy(self.SAMPLE_DATA)
for row in sample_data:
row['date_start'] = '2021-01-01'

res = infer_mapping(sample_data, max_level_nest_level=1, user_data_columns=user_data_columns)

self.assertEqual(res, expected)

def test_invalid_characters(self):
data = [{
"$id": 123,
Expand All @@ -75,3 +104,20 @@ def test_dedupe_keys(self):
'name': 'name', 'test_array': 'array'}
res = StuctureAnalyzer.dedupe_values(data)
self.assertEqual(res, expected)

@freeze_time("2021-01-01")
def test_infer_mapping_userdata(self):
component = self._get_test_component('test_007_infer_mapping_userdata')
output = component.infer_mapping()
expected_output = {'id': 'id',
'start_date': {'mapping': {'destination': 'start_date'}, 'type': 'user'},
'status': 'status'}
self.assertEqual(output, expected_output)

def test_infer_mapping_userdata_child(self):
component = self._get_test_component('test_008_infer_mapping_userdata_child')
output = component.infer_mapping()
# child job can't have user data
expected_output = {'id': 'id',
'status': 'status'}
self.assertEqual(output, expected_output)

0 comments on commit c3c7893

Please sign in to comment.