Skip to content

Commit

Permalink
Merge pull request #79 from keboola/KAB-46-prepare-structure-for-meta…
Browse files Browse the repository at this point in the history
…store-metadata-to-be-stored-with-storage-objects

KAB-46 prepare structure for metastore metadata to be stored with storage objects
  • Loading branch information
tomasfejfar authored Apr 24, 2024
2 parents 72a13ff + 859aeb5 commit 6d5d3ca
Show file tree
Hide file tree
Showing 7 changed files with 337 additions and 3 deletions.
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ services:
<<: *ci
tty: true
stdin_open: true
command: bash
entrypoint: bash
volumes:
- .:/code
7 changes: 5 additions & 2 deletions kbcstorage/configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
Full documentation https://keboola.docs.apiary.io/#reference/components-and-configurations
"""
import json
from kbcstorage.base import Endpoint
from kbcstorage.configurations_metadata import ConfigurationsMetadata


class Configurations(Endpoint):
Expand All @@ -21,6 +23,7 @@ def __init__(self, root_url, token, branch_id):
branch_id (str): The ID of branch to use, use 'default' to work without branch (in main).
"""
super().__init__(root_url, f"branch/{branch_id}/components", token)
self.metadata = ConfigurationsMetadata(root_url, token, branch_id)

def detail(self, component_id, configuration_id):
"""
Expand Down Expand Up @@ -111,6 +114,6 @@ def create(self, component_id, name, description='', configuration=None, state=N
'isDisabled': is_disabled
}
if configuration_id:
body['id'] = configuration_id
body['configurationId'] = configuration_id
url = '{}/{}/configs'.format(self.base_url, component_id)
return self._post(url, data=body)
return self._post(url, data=json.dumps(body), headers={'Content-Type': 'application/json'})
104 changes: 104 additions & 0 deletions kbcstorage/configurations_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
Manages calls to the Storage API relating to configurations metadata
Full documentation https://keboola.docs.apiary.io/#reference/metadata/components-configurations-metadata/
"""
import json
from kbcstorage.base import Endpoint


class ConfigurationsMetadata(Endpoint):
"""
Configurations metadata Endpoint
"""

def __init__(self, root_url, token, branch_id):
"""
Create a Component metadata endpoint.
Args:
root_url (:obj:`str`): The base url for the API.
token (:obj:`str`): A storage API key.
branch_id (str): The ID of branch to use, use 'default' to work without branch (in main).
"""
super().__init__(root_url, f"branch/{branch_id}/components", token)

def delete(self, component_id, configuration_id, metadata_id):
"""
Deletes the configuration metadata identified by ``metadata_id``.
Args:
component_id (str): The id of the component.
configuration_id (str): The id of the configuration.
metadata_id (str): The id of the metadata (not key!).
Raises:
requests.HTTPError: If the API request fails.
ValueError: If the component_id/configuration_id/metadata_id is not a string or is empty.
"""
if not isinstance(component_id, str) or component_id == '':
raise ValueError("Invalid component_id '{}'.".format(component_id))
if not isinstance(configuration_id, str) or configuration_id == '':
raise ValueError("Invalid configuration_id '{}'.".format(configuration_id))
if not isinstance(metadata_id, str) or metadata_id == '':
raise ValueError("Invalid metadata_id '{}'.".format(metadata_id))
url = '{}/{}/configs/{}/metadata/{}'.format(self.base_url, component_id, configuration_id, metadata_id)
self._delete(url)

def list(self, component_id, configuration_id):
"""
Lists metadata for a given component configuration.
Args:
component_id (str): The id of the component.
configuration_id (str): The id of the configuration.
Raises:
requests.HTTPError: If the API request fails.
ValueError: If the component_id/configuration_id is not a string or is empty.
"""
if not isinstance(component_id, str) or component_id == '':
raise ValueError("Invalid component_id '{}'.".format(component_id))
if not isinstance(configuration_id, str) or configuration_id == '':
raise ValueError("Invalid configuration_id '{}'.".format(configuration_id))
url = '{}/{}/configs/{}/metadata'.format(self.base_url, component_id, configuration_id)
return self._get(url)

def create(self, component_id, configuration_id, provider, metadata):
"""
Writes metadata for a given component configuration.
Args:
component_id (str): The id of the component.
configuration (str): The id of the configuration.
provider (str): The provider of the configuration (currently ignored and "user" is sent).
metadata (list): A list of metadata items. Item is a dictionary with 'key' and 'value' keys.
Returns:
response_body: The parsed json from the HTTP response.
Raises:
requests.HTTPError: If the API request fails.
ValueError: If the component_id/configuration_id is not a string or is empty.
ValueError: If the metadata is not a list.
ValueError: If the metadata item is not a dictionary.
"""
if not isinstance(component_id, str) or component_id == '':
raise ValueError("Invalid component_id '{}'.".format(component_id))
if not isinstance(configuration_id, str) or configuration_id == '':
raise ValueError("Invalid component_id '{}'.".format(configuration_id))
url = '{}/{}/configs/{}/metadata'.format(self.base_url, component_id, configuration_id)
if not isinstance(metadata, list):
raise ValueError("Metadata must be a list '{}'.".format(metadata))
for metadataItem in metadata:
if not isinstance(metadataItem, dict):
raise ValueError("Metadata item must be a dictionary '{}'.".format(metadataItem))

headers = {
'Content-Type': 'application/json',
}
data = {
# 'provider': provider, # not yet implemented
'metadata': metadata
}
return self._post(url, data=json.dumps(data), headers=headers)
2 changes: 2 additions & 0 deletions kbcstorage/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from kbcstorage.base import Endpoint
from kbcstorage.files import Files
from kbcstorage.jobs import Jobs
from kbcstorage.tables_metadata import TablesMetadata


class Tables(Endpoint):
Expand All @@ -26,6 +27,7 @@ def __init__(self, root_url, token):
token (:obj:`str`): A storage API key.
"""
super().__init__(root_url, 'tables', token)
self.metadata = TablesMetadata(root_url, token)

def list(self, include=None):
"""
Expand Down
107 changes: 107 additions & 0 deletions kbcstorage/tables_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""
Manages calls to the Storage API relating to table metadatas
Full documentation `here`.
.. _here:
http://docs.keboola.apiary.io/#reference/metadata/table-metadata
"""
import json
from kbcstorage.base import Endpoint


class TablesMetadata(Endpoint):
"""
Tables Metadata Endpoint
"""
def __init__(self, root_url, token):
"""
Create a Tables metadata endpoint.
Args:
root_url (:obj:`str`): The base url for the API.
token (:obj:`str`): A storage API key.
"""
super().__init__(root_url, 'tables', token)

def list(self, table_id):
"""
List all metadata for table
Args:
table_id (str): Table id
Returns:
response_body: The parsed json from the HTTP response.
Raises:
requests.HTTPError: If the API request fails.
ValueError: If the table_id is not a string or is empty.
"""
if not isinstance(table_id, str) or table_id == '':
raise ValueError("Invalid table_id '{}'.".format(table_id))

url = '{}/{}/metadata'.format(self.base_url, table_id)

return self._get(url)

def delete(self, table_id, metadata_id):
"""
Delete a table metadata referenced by ``metadata_id``.
Args:
table_id (str): The id of the table.
metadata_id (str): The id of the table metdata entry to be deleted.
Raises:
requests.HTTPError: If the API request fails.
ValueError: If the table_id/metadata_id is not a string or is empty.
"""
if not isinstance(table_id, str) or table_id == '':
raise ValueError("Invalid table_id '{}'.".format(table_id))
if not isinstance(metadata_id, str) or metadata_id == '':
raise ValueError("Invalid metadata_id '{}'.".format(metadata_id))

url = '{}/{}/metadata/{}'.format(self.base_url, table_id, metadata_id)

self._delete(url)

def create(self, table_id, provider, metadata, columns_metadata):
"""
Post metadata to a table.
Args:
table_id (str): Table id
provider (str): Provider of the metadata
metadata (list): List of metadata dictionaries with 'key' and 'value'
columns_metadata (dict): Dictionary with lists of metadata dictionaries with 'key', 'value', 'columnName'.
Returns:
response_body: The parsed json from the HTTP response.
Raises:
requests.HTTPError: If the API request fails.
ValueError: If the table_id is not a string or is empty.
ValueError: If the provider is not a string or is empty.
ValueError: If the metadata is not a list.
ValueError: If the columns_metadata is not a list
"""
if not isinstance(table_id, str) or table_id == '':
raise ValueError("Invalid table_id '{}'.".format(table_id))
if not isinstance(provider, str) or provider == '':
raise ValueError("Invalid provider '{}'.".format(provider))
if not isinstance(metadata, list):
raise ValueError("Invalid metadata '{}'.".format(metadata))
if not isinstance(columns_metadata, list):
raise ValueError("Invalid columns_metadata '{}'.".format(columns_metadata))

url = '{}/{}/metadata'.format(self.base_url, table_id)
headers = {
'Content-Type': 'application/json',
}
data = {
"provider": provider,
"metadata": metadata,
"columnsMetadata": columns_metadata
}
return self._post(url, data=json.dumps(data), headers=headers)
53 changes: 53 additions & 0 deletions tests/functional/test_configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,56 @@ def testListConfigurations(self):
with self.subTest():
with self.assertRaises(exceptions.HTTPError):
configurations = self.configurations.list('non-existent-component')

def testConfigurationMetadata(self):
self.configurations.create(
component_id=self.TEST_COMPONENT_NAME,
configuration_id='test_configuration_metadata',
name='test_configuration_metadata',
)
metadataPayload = [
{
'key': 'testConfigurationMetadata',
'value': 'success',
}
]
metadataList = self.configurations.metadata.create(
component_id=self.TEST_COMPONENT_NAME,
configuration_id='test_configuration_metadata',
provider='test',
metadata=metadataPayload,
)

with (self.subTest('assert metadata create response')):
self.assertEqual(1, len(metadataList))
metadataItem = metadataList[0]
self.assertTrue('id' in metadataItem)
# self.assertTrue('provider' in metadata) not yet
self.assertTrue('key' in metadataItem)
self.assertTrue('value' in metadataItem)

metadataList = self.configurations.metadata.list(
component_id=self.TEST_COMPONENT_NAME,
configuration_id='test_configuration_metadata'
)

with (self.subTest('assert metadata list response')):
self.assertTrue(len(metadataList) > 0)
for metadataList in metadataList:
self.assertTrue('id' in metadataList)
# self.assertTrue('provider' in metadata) not yet
self.assertTrue('key' in metadataList)
self.assertTrue('value' in metadataList)

self.configurations.metadata.delete(
component_id=self.TEST_COMPONENT_NAME,
configuration_id='test_configuration_metadata',
metadata_id=metadataList['id']
)
metadataList = self.configurations.metadata.list(
component_id=self.TEST_COMPONENT_NAME,
configuration_id='test_configuration_metadata'
)

with (self.subTest('assert metadata delete means metadata no longer in list')):
self.assertTrue(len(metadataList) == 0)
65 changes: 65 additions & 0 deletions tests/functional/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,3 +327,68 @@ def test_table_columns(self):
with open(local_path, mode='rt') as file:
lines = file.readlines()
self.assertEqual(['"col3","col2"\n', '"king","pong"\n'], sorted(lines))

def test_table_with_metadata(self):
file, path = tempfile.mkstemp(prefix='sapi-test')
with open(path, 'w') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=['col1', 'col2'],
lineterminator='\n', delimiter=',',
quotechar='"')
writer.writeheader()
writer.writerow({'col1': 'ping', 'col2': 'pong'})
os.close(file)
table_id = self.tables.create(name='some-table', file_path=path,
bucket_id='in.c-py-test-tables')

self.tables.metadata.create(
table_id=table_id,
provider='test',
metadata=[{
'key': 'test_table_with_metadata',
'value': 'success'
}],
columns_metadata=[
[
{
'key': 'test_column_with_metadata',
'value': 'success',
'columnName': 'col1'
}
]
]
)

table_info = self.tables.detail(table_id)
with self.subTest("Test metadata key in response"):
self.assertIn('metadata', table_info)
with self.subTest("Test metadata structure"):
self.assertEqual(1, len(table_info['metadata']))
self.assertIn('id', table_info['metadata'][0])
self.assertEqual('test_table_with_metadata', table_info['metadata'][0]['key'])
self.assertEqual('test', table_info['metadata'][0]['provider'])
self.assertIn('timestamp', table_info['metadata'][0])
self.assertEqual('success', table_info['metadata'][0]['value'])
with self.subTest('Test columns metadata key in response'):
self.assertIn('columnMetadata', table_info)
with self.subTest('Test columns metadata structure'):
self.assertIn('col1', table_info['columnMetadata'])
self.assertEqual(1, len(table_info['columnMetadata']['col1']))
self.assertIn('id', table_info['columnMetadata']['col1'][0])
self.assertEqual('test_column_with_metadata', table_info['columnMetadata']['col1'][0]['key'])
self.assertEqual('test', table_info['columnMetadata']['col1'][0]['provider'])
self.assertIn('timestamp', table_info['columnMetadata']['col1'][0])
self.assertEqual('success', table_info['columnMetadata']['col1'][0]['value'])

listedMetadata = self.tables.metadata.list(table_id=table_id)

with self.subTest("Test metadata key in list response"):
self.assertEqual(1, len(listedMetadata))
self.assertEqual('test_table_with_metadata', listedMetadata[0]['key'])
self.assertEqual('test', listedMetadata[0]['provider'])
self.assertEqual('success', listedMetadata[0]['value'])

self.tables.metadata.delete(table_id=table_id, metadata_id=listedMetadata[0]['id'])

listedMetadata = self.tables.metadata.list(table_id=table_id)
with self.subTest('Test metadata can was deleted'):
self.assertEqual(0, len(listedMetadata))

0 comments on commit 6d5d3ca

Please sign in to comment.