From 0bb8dfb1b8eda445a6efc781c91de15c4e93976a Mon Sep 17 00:00:00 2001 From: pivnicek Date: Wed, 10 Mar 2021 17:31:09 +0100 Subject: [PATCH 01/16] add azure-storage-blob dependency --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index c8ebcdf..67593d9 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ packages=find_packages(exclude=['tests']), install_requires=[ 'boto3', + 'azure-storage-blob', 'requests' ], test_suite='tests', From 046be5247b7acdf3b1607f59e71ad3ba7b7a227d Mon Sep 17 00:00:00 2001 From: pivnicek Date: Wed, 10 Mar 2021 20:05:21 +0100 Subject: [PATCH 02/16] support abs and s3 --- kbcstorage/files.py | 69 ++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/kbcstorage/files.py b/kbcstorage/files.py index 11b0f8e..5f660e8 100644 --- a/kbcstorage/files.py +++ b/kbcstorage/files.py @@ -10,6 +10,9 @@ import boto3 import requests +from azure.storage.blob import BlobServiceClient, ContentSettings, ContainerClient, __version__ + + from kbcstorage.base import Endpoint @@ -80,27 +83,11 @@ def upload_file(self, file_path, tags=None, is_public=False, file_resource = self.prepare_upload(file_name, size, tags, is_public, is_permanent, is_encrypted, is_sliced, do_notify, True) - upload_params = file_resource['uploadParams'] - key_id = upload_params['credentials']['AccessKeyId'] - key = upload_params['credentials']['SecretAccessKey'] - token = upload_params['credentials']['SessionToken'] - s3 = boto3.resource('s3', aws_access_key_id=key_id, - aws_secret_access_key=key, - aws_session_token=token, - region_name=file_resource['region']) + if file_resource['provider'] == 'azure': + self.__upload_to_azure(file_resource, file_path) + elif file_resource['provider'] == 'aws': + self.__upload_to_aws(file_resource, file_path, is_encrypted) - s3_object = s3.Object(bucket_name=upload_params['bucket'], - key=upload_params['key']) - disposition = 'attachment; filename={};'.format(file_resource['name']) - with open(file_path, mode='rb') as file: - if is_encrypted: - encryption = upload_params['x-amz-server-side-encryption'] - s3_object.put(ACL=upload_params['acl'], Body=file, - ContentDisposition=disposition, - ServerSideEncryption=encryption) - else: - s3_object.put(ACL=upload_params['acl'], Body=file, - ContentDisposition=disposition) return file_resource['id'] def prepare_upload(self, name, size_bytes=None, tags=None, is_public=False, @@ -222,3 +209,45 @@ def download(self, file_id, local_path): bucket = s3.Bucket(file_info["s3Path"]["bucket"]) bucket.download_file(file_info["s3Path"]["key"], local_file) return local_file + + def __upload_to_aazure(self, preparation_result, file_path): + + blob_service_client = BlobServiceClient.from_connection_string( + preparation_result['absUploadParams']['absCredentials']['SASConnectionString'] + ) + blob_client = blob_service_client.get_blob_client( + container=preparation_result['absUploadParams']['container'], + blob=file_path + ) + + container_client = blob_service_client.get_container_client(preparation_result['absUploadParams']['container']) + + container_client.upload_blob( + preparation_result['absUploadParams']['blobName'], + open(file_path, 'rb'), + ContentSettings(content_disposition='attachment;filename="%s"' % (preparation_result['name'])) + ) + + def __upload_to_aws(self, prepare_result, file_path, is_encrypted): + upload_params = prepare_result['uploadParams'] + key_id = upload_params['credentials']['AccessKeyId'] + key = upload_params['credentials']['SecretAccessKey'] + token = upload_params['credentials']['SessionToken'] + s3 = boto3.resource('s3', aws_access_key_id=key_id, + aws_secret_access_key=key, + aws_session_token=token, + region_name=prepare_result['region']) + + s3_object = s3.Object(bucket_name=upload_params['bucket'], + key=upload_params['key']) + disposition = 'attachment; filename={};'.format(prepare_result['name']) + with open(file_path, mode='rb') as file: + if is_encrypted: + encryption = upload_params['x-amz-server-side-encryption'] + s3_object.put(ACL=upload_params['acl'], Body=file, + ContentDisposition=disposition, + ServerSideEncryption=encryption) + else: + s3_object.put(ACL=upload_params['acl'], Body=file, + ContentDisposition=disposition) + return \ No newline at end of file From a029ef382698fa927eb576b7c1dcf8dd10d03e3f Mon Sep 17 00:00:00 2001 From: pivnicek Date: Thu, 11 Mar 2021 09:34:40 +0100 Subject: [PATCH 03/16] add docker login and test on azure --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 920152d..880e974 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,10 +3,12 @@ language: bash services: - docker before_script: +- docker login --username "$DOCKERHUB_USER" --password "$DOCKERHUB_TOKEN" - docker-compose build sapi-python-client script: - docker-compose run --rm --entrypoint=flake8 sapi-python-client - docker-compose run --rm -e KBC_TEST_TOKEN -e KBC_TEST_API_URL sapi-python-client -m unittest discover +- docker-compose run --rm -e KBC_AZ_TEST_TOKEN -e KBC_AZ_TEST_API_URL sapi-python-client -m unittest discover after_success: - docker images deploy: From 85696abf2820b7ff7097829bd75044038ea48b74 Mon Sep 17 00:00:00 2001 From: pivnicek Date: Thu, 11 Mar 2021 16:03:16 +0100 Subject: [PATCH 04/16] use blob client instead of container client --- kbcstorage/files.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/kbcstorage/files.py b/kbcstorage/files.py index 5f660e8..cb383a5 100644 --- a/kbcstorage/files.py +++ b/kbcstorage/files.py @@ -210,23 +210,22 @@ def download(self, file_id, local_path): bucket.download_file(file_info["s3Path"]["key"], local_file) return local_file - def __upload_to_aazure(self, preparation_result, file_path): + def __upload_to_azure(self, preparation_result, file_path): blob_service_client = BlobServiceClient.from_connection_string( preparation_result['absUploadParams']['absCredentials']['SASConnectionString'] ) blob_client = blob_service_client.get_blob_client( container=preparation_result['absUploadParams']['container'], - blob=file_path + blob=preparation_result['absUploadParams']['blobName'] ) - container_client = blob_service_client.get_container_client(preparation_result['absUploadParams']['container']) - - container_client.upload_blob( - preparation_result['absUploadParams']['blobName'], - open(file_path, 'rb'), - ContentSettings(content_disposition='attachment;filename="%s"' % (preparation_result['name'])) - ) + with open(file_path, "rb") as blob_data: + blob_client.upload_blob( + blob_data, + blob_type='BlockBlob', + content_settings=ContentSettings(content_disposition='attachment;filename="%s"' % (preparation_result['name'])) + ) def __upload_to_aws(self, prepare_result, file_path, is_encrypted): upload_params = prepare_result['uploadParams'] From fcdd1df11f00e78f8519d8cbfc60db9c54daa1c5 Mon Sep 17 00:00:00 2001 From: pivnicek Date: Thu, 11 Mar 2021 16:03:32 +0100 Subject: [PATCH 05/16] update test for different providers --- tests/functional/test_files.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/tests/functional/test_files.py b/tests/functional/test_files.py index dab82b2..589e48f 100644 --- a/tests/functional/test_files.py +++ b/tests/functional/test_files.py @@ -101,14 +101,26 @@ def test_download_file_credentials(self): file_info = self.files.detail(file_id, federation_token=True) with self.subTest(): self.assertEqual(file_id, file_info['id']) - with self.subTest(): - self.assertTrue('credentials' in file_info) - with self.subTest(): - self.assertTrue('AccessKeyId' in file_info['credentials']) - with self.subTest(): - self.assertTrue('SecretAccessKey' in file_info['credentials']) - with self.subTest(): - self.assertTrue('SessionToken' in file_info['credentials']) + if file_info['provider'] == 'aws': + with self.subTest(): + self.assertTrue('credentials' in file_info) + with self.subTest(): + self.assertTrue('AccessKeyId' in file_info['credentials']) + with self.subTest(): + self.assertTrue('SecretAccessKey' in file_info['credentials']) + with self.subTest(): + self.assertTrue('SessionToken' in file_info['credentials']) + elif file_info['provider'] == 'azure': + with self.subTest(): + self.assertTrue('SASConnectionString' in file_info['absCredentials']) + with self.subTest(): + self.assertTrue('expiration' in file_info['absCredentials']) + with self.subTest(): + self.assertTrue('absPath' in file_info['absCredentials']) + with self.subTest(): + self.assertTrue('container' in file_info['absCredentials']['absPath']) + with self.subTest(): + self.assertTrue('name' in file_info['absCredentials']['absPath']) def test_download_file(self): file, path = tempfile.mkstemp(prefix='sapi-test') From 41fd4920fd71af575fad4b81a4d15afc712087d6 Mon Sep 17 00:00:00 2001 From: pivnicek Date: Thu, 11 Mar 2021 17:50:29 +0100 Subject: [PATCH 06/16] download files depending on provider --- kbcstorage/files.py | 122 +++++++++++++++++++++++++++++++++----------- 1 file changed, 93 insertions(+), 29 deletions(-) diff --git a/kbcstorage/files.py b/kbcstorage/files.py index cb383a5..f9edd83 100644 --- a/kbcstorage/files.py +++ b/kbcstorage/files.py @@ -74,8 +74,7 @@ def upload_file(self, file_path, tags=None, is_public=False, if compress: import gzip import shutil - with open(file_path, 'rb') as f_in, gzip.open(file_path + '.gz', - 'wb') as f_out: + with open(file_path, 'rb') as f_in, gzip.open(file_path + '.gz', 'wb') as f_out: shutil.copyfileobj(f_in, f_out) file_path = file_path + '.gz' file_name = os.path.basename(file_path) @@ -180,34 +179,21 @@ def download(self, file_id, local_path): os.mkdir(local_path) file_info = self.detail(file_id=file_id, federation_token=True) local_file = os.path.join(local_path, file_info['name']) - s3 = boto3.resource( - 's3', - aws_access_key_id=file_info['credentials']['AccessKeyId'], - aws_secret_access_key=file_info['credentials']['SecretAccessKey'], - aws_session_token=file_info['credentials']['SessionToken'], - region_name=file_info['region'] - ) + if file_info['provider'] == 'azure': + if file_info['isSliced']: + self.__download_sliced_file_from_azure() + else: + self.__download_file_from_azure() + elif file_info['provider'] == 'aws': + if file_info['isSliced']: + self.__download_sliced_file_from_aws() + else: + self.__download_file_from_aws(file_info, local_file) + if file_info['isSliced']: - manifest = requests.get(url=file_info['url']).json() - file_names = [] - for entry in manifest["entries"]: - full_path = entry["url"] - file_name = full_path.rsplit("/", 1)[1] - file_names.append(file_name) - splitted_path = full_path.split("/") - file_key = "/".join(splitted_path[3:]) - bucket = s3.Bucket(file_info['s3Path']['bucket']) - bucket.download_file(file_key, file_name) - # merge the downloaded files - with open(local_file, mode='wb') as out_file: - for file_name in file_names: - with open(file_name, mode='rb') as in_file: - for line in in_file: - out_file.write(line) - os.remove(file_name) + else: - bucket = s3.Bucket(file_info["s3Path"]["bucket"]) - bucket.download_file(file_info["s3Path"]["key"], local_file) + return local_file def __upload_to_azure(self, preparation_result, file_path): @@ -249,4 +235,82 @@ def __upload_to_aws(self, prepare_result, file_path, is_encrypted): else: s3_object.put(ACL=upload_params['acl'], Body=file, ContentDisposition=disposition) - return \ No newline at end of file + return + + def __download_file_from_aws(self, file_info, destination): + s3 = boto3.resource( + 's3', + aws_access_key_id=file_info['credentials']['AccessKeyId'], + aws_secret_access_key=file_info['credentials']['SecretAccessKey'], + aws_session_token=file_info['credentials']['SessionToken'], + region_name=file_info['region'] + ) + bucket = s3.Bucket(file_info["s3Path"]["bucket"]) + bucket.download_file(file_info["s3Path"]["key"], destination) + + def __download_sliced_file_from_aws(self, file_info, destination): + s3 = boto3.resource( + 's3', + aws_access_key_id=file_info['credentials']['AccessKeyId'], + aws_secret_access_key=file_info['credentials']['SecretAccessKey'], + aws_session_token=file_info['credentials']['SessionToken'], + region_name=file_info['region'] + ) + manifest = requests.get(url=file_info['url']).json() + file_names = [] + for entry in manifest["entries"]: + full_path = entry["url"] + file_name = full_path.rsplit("/", 1)[1] + file_names.append(file_name) + splitted_path = full_path.split("/") + file_key = "/".join(splitted_path[3:]) + bucket = s3.Bucket(file_info['s3Path']['bucket']) + bucket.download_file(file_key, file_name) + # merge the downloaded files + with open(destination, mode='wb') as out_file: + for file_name in file_names: + with open(file_name, mode='rb') as in_file: + for line in in_file: + out_file.write(line) + os.remove(file_name) + + def __download_file_from_azure(self, file_info, destination): + blob_service_client = BlobServiceClient.from_connection_string( + file_info['absCredentials']['SASConnectionString'] + ) + blob_client = blob_service_client.get_blob_client( + container=file_info['absPath']['container'], + blob=file_info['absPath']['name'] + ) + with open(destination, "wb") as downloaded_blob: + download_stream = blob_client.download_blob() + downloaded_blob.write(download_stream.readall()) + + def __download_sliced_file_from_azure(self, file_info, destination): + blob_service_client = BlobServiceClient.from_connection_string( + file_info['absCredentials']['SASConnectionString'] + ) + container_client = blob_service_client.get_container_client( + container=file_info['absPath']['container'] + ) + manifest_stream = container_client.download_blob( + file_info['absPath']['name'] + 'manifest' + ) + manifest = manifest_stream.readall().json() + file_names = []; + + for entry in manifest['entries']: + blob_path = entry['url'].split('blob.core.windows.net/%s/' % (file_info['absPath']['container']))[1] + full_path = entry["url"] + file_name = full_path.rsplit("/", 1)[1] + file_names.append(file_name) + with open(file_name, "wb") as file_slice: + file_slice.write(container_client.download_blob(blob_path).readall()) + + # merge the downloaded files into one + with open(destination, mode='wb') as out_file: + for file_name in file_names: + with open(file_name, mode='rb') as in_file: + for line in in_file: + out_file.write(line) + os.remove(file_name) From ae239f1fb86e7678e200808383c3c40272073549 Mon Sep 17 00:00:00 2001 From: pivnicek Date: Thu, 11 Mar 2021 17:50:54 +0100 Subject: [PATCH 07/16] add env for az tests --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 880e974..2271f05 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ before_script: script: - docker-compose run --rm --entrypoint=flake8 sapi-python-client - docker-compose run --rm -e KBC_TEST_TOKEN -e KBC_TEST_API_URL sapi-python-client -m unittest discover -- docker-compose run --rm -e KBC_AZ_TEST_TOKEN -e KBC_AZ_TEST_API_URL sapi-python-client -m unittest discover +- docker-compose run --rm -e KBC_TEST_TOKEN=$KBC_AZ_TEST_TOKEN -e KBC_TEST_API_URL=$KBC_AZ_TEST_API_URL sapi-python-client -m unittest discover after_success: - docker images deploy: From f805b2aeb43343f584504c46e407bcb8fa265eef Mon Sep 17 00:00:00 2001 From: pivnicek Date: Thu, 11 Mar 2021 19:54:49 +0100 Subject: [PATCH 08/16] fix file downloads --- kbcstorage/files.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kbcstorage/files.py b/kbcstorage/files.py index f9edd83..be9c7bb 100644 --- a/kbcstorage/files.py +++ b/kbcstorage/files.py @@ -6,6 +6,7 @@ .. _here: http://docs.keboola.apiary.io/#reference/files/ """ +import json import os import boto3 import requests @@ -181,19 +182,15 @@ def download(self, file_id, local_path): local_file = os.path.join(local_path, file_info['name']) if file_info['provider'] == 'azure': if file_info['isSliced']: - self.__download_sliced_file_from_azure() + self.__download_sliced_file_from_azure(file_info, local_file) else: - self.__download_file_from_azure() + self.__download_file_from_azure(file_info, local_file) elif file_info['provider'] == 'aws': if file_info['isSliced']: - self.__download_sliced_file_from_aws() + self.__download_sliced_file_from_aws(file_info, local_file) else: self.__download_file_from_aws(file_info, local_file) - if file_info['isSliced']: - - else: - return local_file def __upload_to_azure(self, preparation_result, file_path): @@ -296,7 +293,7 @@ def __download_sliced_file_from_azure(self, file_info, destination): manifest_stream = container_client.download_blob( file_info['absPath']['name'] + 'manifest' ) - manifest = manifest_stream.readall().json() + manifest = json.loads(manifest_stream.readall()) file_names = []; for entry in manifest['entries']: From f07438af2b94ac6a4d463b55ab71c10e6bc41c37 Mon Sep 17 00:00:00 2001 From: pivnicek Date: Thu, 11 Mar 2021 19:55:09 +0100 Subject: [PATCH 09/16] fixup file tests --- tests/functional/test_files.py | 8 +++++--- tests/functional/test_tables.py | 4 +--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/functional/test_files.py b/tests/functional/test_files.py index 589e48f..58175a4 100644 --- a/tests/functional/test_files.py +++ b/tests/functional/test_files.py @@ -111,16 +111,18 @@ def test_download_file_credentials(self): with self.subTest(): self.assertTrue('SessionToken' in file_info['credentials']) elif file_info['provider'] == 'azure': + with self.subTest(): + self.assertTrue('absCredentials' in file_info) with self.subTest(): self.assertTrue('SASConnectionString' in file_info['absCredentials']) with self.subTest(): self.assertTrue('expiration' in file_info['absCredentials']) with self.subTest(): - self.assertTrue('absPath' in file_info['absCredentials']) + self.assertTrue('absPath' in file_info) with self.subTest(): - self.assertTrue('container' in file_info['absCredentials']['absPath']) + self.assertTrue('container' in file_info['absPath']) with self.subTest(): - self.assertTrue('name' in file_info['absCredentials']['absPath']) + self.assertTrue('name' in file_info['absPath']) def test_download_file(self): file, path = tempfile.mkstemp(prefix='sapi-test') diff --git a/tests/functional/test_tables.py b/tests/functional/test_tables.py index efbacf5..cd0ffc5 100644 --- a/tests/functional/test_tables.py +++ b/tests/functional/test_tables.py @@ -84,9 +84,7 @@ def test_table_detail(self): with self.subTest(): self.assertEqual('some-table', table_info['name']) with self.subTest(): - self.assertEqual('https://connection.keboola.com/v2/storage/' - 'tables/in.c-py-test-tables.some-table', - table_info['uri']) + self.assertTrue('in.c-py-test-tables.some-table' in table_info['uri']) with self.subTest(): self.assertEqual([], table_info['primaryKey']) with self.subTest(): From 4f762dbef91c2792500fec09e73160c67d923e35 Mon Sep 17 00:00:00 2001 From: pivnicek Date: Thu, 11 Mar 2021 20:47:16 +0100 Subject: [PATCH 10/16] cs fixes --- kbcstorage/files.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kbcstorage/files.py b/kbcstorage/files.py index be9c7bb..cd45e6e 100644 --- a/kbcstorage/files.py +++ b/kbcstorage/files.py @@ -11,7 +11,7 @@ import boto3 import requests -from azure.storage.blob import BlobServiceClient, ContentSettings, ContainerClient, __version__ +from azure.storage.blob import BlobServiceClient, ContentSettings from kbcstorage.base import Endpoint @@ -75,7 +75,8 @@ def upload_file(self, file_path, tags=None, is_public=False, if compress: import gzip import shutil - with open(file_path, 'rb') as f_in, gzip.open(file_path + '.gz', 'wb') as f_out: + with open(file_path, 'rb') as f_in, \ + gzip.open(file_path + '.gz', 'wb') as f_out: shutil.copyfileobj(f_in, f_out) file_path = file_path + '.gz' file_name = os.path.basename(file_path) From e73dba50bb2cd4b91f761fd342db096685d8e8ab Mon Sep 17 00:00:00 2001 From: pivnicek Date: Thu, 11 Mar 2021 20:47:31 +0100 Subject: [PATCH 11/16] add flake8 cs config --- .flake8 | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .flake8 diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..6deafc2 --- /dev/null +++ b/.flake8 @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 120 From 9e4db61c3cc9e5c98a86f51a55d877312b787259 Mon Sep 17 00:00:00 2001 From: pivnicek Date: Thu, 11 Mar 2021 21:01:46 +0100 Subject: [PATCH 12/16] last cs fixes --- kbcstorage/files.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kbcstorage/files.py b/kbcstorage/files.py index cd45e6e..5d881b5 100644 --- a/kbcstorage/files.py +++ b/kbcstorage/files.py @@ -208,7 +208,9 @@ def __upload_to_azure(self, preparation_result, file_path): blob_client.upload_blob( blob_data, blob_type='BlockBlob', - content_settings=ContentSettings(content_disposition='attachment;filename="%s"' % (preparation_result['name'])) + content_settings=ContentSettings( + content_disposition='attachment;filename="%s"' % (preparation_result['name']) + ) ) def __upload_to_aws(self, prepare_result, file_path, is_encrypted): @@ -295,7 +297,7 @@ def __download_sliced_file_from_azure(self, file_info, destination): file_info['absPath']['name'] + 'manifest' ) manifest = json.loads(manifest_stream.readall()) - file_names = []; + file_names = [] for entry in manifest['entries']: blob_path = entry['url'].split('blob.core.windows.net/%s/' % (file_info['absPath']['container']))[1] From 5de5bd279a1eae7634f949d2ee47258fee05bf6a Mon Sep 17 00:00:00 2001 From: pivnicek Date: Thu, 11 Mar 2021 21:37:13 +0100 Subject: [PATCH 13/16] cc fixes --- kbcstorage/files.py | 50 +++++++++++++++------------------------------ 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/kbcstorage/files.py b/kbcstorage/files.py index 5d881b5..9ee1aef 100644 --- a/kbcstorage/files.py +++ b/kbcstorage/files.py @@ -12,11 +12,8 @@ import requests from azure.storage.blob import BlobServiceClient, ContentSettings - - from kbcstorage.base import Endpoint - class Files(Endpoint): """ Buckets Endpoint @@ -187,11 +184,17 @@ def download(self, file_id, local_path): else: self.__download_file_from_azure(file_info, local_file) elif file_info['provider'] == 'aws': + s3 = boto3.resource( + 's3', + aws_access_key_id=file_info['credentials']['AccessKeyId'], + aws_secret_access_key=file_info['credentials']['SecretAccessKey'], + aws_session_token=file_info['credentials']['SessionToken'], + region_name=file_info['region'] + ) if file_info['isSliced']: - self.__download_sliced_file_from_aws(file_info, local_file) + self.__download_sliced_file_from_aws(file_info, local_file, s3) else: - self.__download_file_from_aws(file_info, local_file) - + self.__download_file_from_aws(file_info, local_file, s3) return local_file def __upload_to_azure(self, preparation_result, file_path): @@ -203,7 +206,6 @@ def __upload_to_azure(self, preparation_result, file_path): container=preparation_result['absUploadParams']['container'], blob=preparation_result['absUploadParams']['blobName'] ) - with open(file_path, "rb") as blob_data: blob_client.upload_blob( blob_data, @@ -235,27 +237,12 @@ def __upload_to_aws(self, prepare_result, file_path, is_encrypted): else: s3_object.put(ACL=upload_params['acl'], Body=file, ContentDisposition=disposition) - return - - def __download_file_from_aws(self, file_info, destination): - s3 = boto3.resource( - 's3', - aws_access_key_id=file_info['credentials']['AccessKeyId'], - aws_secret_access_key=file_info['credentials']['SecretAccessKey'], - aws_session_token=file_info['credentials']['SessionToken'], - region_name=file_info['region'] - ) + + def __download_file_from_aws(self, file_info, destination, s3): bucket = s3.Bucket(file_info["s3Path"]["bucket"]) bucket.download_file(file_info["s3Path"]["key"], destination) - def __download_sliced_file_from_aws(self, file_info, destination): - s3 = boto3.resource( - 's3', - aws_access_key_id=file_info['credentials']['AccessKeyId'], - aws_secret_access_key=file_info['credentials']['SecretAccessKey'], - aws_session_token=file_info['credentials']['SessionToken'], - region_name=file_info['region'] - ) + def __download_sliced_file_from_aws(self, file_info, destination, s3): manifest = requests.get(url=file_info['url']).json() file_names = [] for entry in manifest["entries"]: @@ -266,13 +253,7 @@ def __download_sliced_file_from_aws(self, file_info, destination): file_key = "/".join(splitted_path[3:]) bucket = s3.Bucket(file_info['s3Path']['bucket']) bucket.download_file(file_key, file_name) - # merge the downloaded files - with open(destination, mode='wb') as out_file: - for file_name in file_names: - with open(file_name, mode='rb') as in_file: - for line in in_file: - out_file.write(line) - os.remove(file_name) + self.__merge_split_files(file_names, destination) def __download_file_from_azure(self, file_info, destination): blob_service_client = BlobServiceClient.from_connection_string( @@ -306,11 +287,12 @@ def __download_sliced_file_from_azure(self, file_info, destination): file_names.append(file_name) with open(file_name, "wb") as file_slice: file_slice.write(container_client.download_blob(blob_path).readall()) + self.__merge_split_files(file_names, destination) - # merge the downloaded files into one + def __merge_split_files(self, file_names, destination): with open(destination, mode='wb') as out_file: for file_name in file_names: with open(file_name, mode='rb') as in_file: for line in in_file: out_file.write(line) - os.remove(file_name) + os.remove(file_name) \ No newline at end of file From d524eb786284c71799e3cb8f5bd8eb122fba7d60 Mon Sep 17 00:00:00 2001 From: pivnicek Date: Fri, 12 Mar 2021 10:31:45 +0100 Subject: [PATCH 14/16] cc fix --- kbcstorage/files.py | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/kbcstorage/files.py b/kbcstorage/files.py index 9ee1aef..8929c31 100644 --- a/kbcstorage/files.py +++ b/kbcstorage/files.py @@ -198,13 +198,10 @@ def download(self, file_id, local_path): return local_file def __upload_to_azure(self, preparation_result, file_path): - - blob_service_client = BlobServiceClient.from_connection_string( - preparation_result['absUploadParams']['absCredentials']['SASConnectionString'] - ) - blob_client = blob_service_client.get_blob_client( - container=preparation_result['absUploadParams']['container'], - blob=preparation_result['absUploadParams']['blobName'] + blob_client = self.__get_blob_client( + preparation_result['absUploadParams']['absCredentials']['SASConnectionString'], + preparation_result['absUploadParams']['container'], + preparation_result['absUploadParams']['blobName'] ) with open(file_path, "rb") as blob_data: blob_client.upload_blob( @@ -224,16 +221,13 @@ def __upload_to_aws(self, prepare_result, file_path, is_encrypted): aws_secret_access_key=key, aws_session_token=token, region_name=prepare_result['region']) - - s3_object = s3.Object(bucket_name=upload_params['bucket'], - key=upload_params['key']) + s3_object = s3.Object(bucket_name=upload_params['bucket'], key=upload_params['key']) disposition = 'attachment; filename={};'.format(prepare_result['name']) with open(file_path, mode='rb') as file: if is_encrypted: encryption = upload_params['x-amz-server-side-encryption'] s3_object.put(ACL=upload_params['acl'], Body=file, - ContentDisposition=disposition, - ServerSideEncryption=encryption) + ContentDisposition=disposition, ServerSideEncryption=encryption) else: s3_object.put(ACL=upload_params['acl'], Body=file, ContentDisposition=disposition) @@ -256,12 +250,10 @@ def __download_sliced_file_from_aws(self, file_info, destination, s3): self.__merge_split_files(file_names, destination) def __download_file_from_azure(self, file_info, destination): - blob_service_client = BlobServiceClient.from_connection_string( - file_info['absCredentials']['SASConnectionString'] - ) - blob_client = blob_service_client.get_blob_client( - container=file_info['absPath']['container'], - blob=file_info['absPath']['name'] + blob_client = self.__get_blob_client( + file_info['absCredentials']['SASConnectionString'], + file_info['absPath']['container'], + file_info['absPath']['name'] ) with open(destination, "wb") as downloaded_blob: download_stream = blob_client.download_blob() @@ -279,7 +271,6 @@ def __download_sliced_file_from_azure(self, file_info, destination): ) manifest = json.loads(manifest_stream.readall()) file_names = [] - for entry in manifest['entries']: blob_path = entry['url'].split('blob.core.windows.net/%s/' % (file_info['absPath']['container']))[1] full_path = entry["url"] @@ -295,4 +286,8 @@ def __merge_split_files(self, file_names, destination): with open(file_name, mode='rb') as in_file: for line in in_file: out_file.write(line) - os.remove(file_name) \ No newline at end of file + os.remove(file_name) + + def __get_blob_client(self, connection_string, container, blob_name): + blob_service_client = BlobServiceClient.from_connection_string(connection_string) + return blob_service_client.get_blob_client(container=container, blob=blob_name) From 319745a55f412458ed39b6e813c6cd1cd435c68f Mon Sep 17 00:00:00 2001 From: pivnicek Date: Fri, 12 Mar 2021 10:33:06 +0100 Subject: [PATCH 15/16] assert provider --- tests/functional/test_files.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/functional/test_files.py b/tests/functional/test_files.py index 58175a4..4ef8b37 100644 --- a/tests/functional/test_files.py +++ b/tests/functional/test_files.py @@ -101,6 +101,8 @@ def test_download_file_credentials(self): file_info = self.files.detail(file_id, federation_token=True) with self.subTest(): self.assertEqual(file_id, file_info['id']) + with self.subTest(): + self.assertTrue(file_info['provider'] in ['aws', 'azure']) if file_info['provider'] == 'aws': with self.subTest(): self.assertTrue('credentials' in file_info) From c91a508b7c7573120d98e6b6ce50f074b2e5a4b5 Mon Sep 17 00:00:00 2001 From: pivnicek Date: Fri, 12 Mar 2021 10:52:34 +0100 Subject: [PATCH 16/16] cs --- kbcstorage/files.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kbcstorage/files.py b/kbcstorage/files.py index 8929c31..5ba5902 100644 --- a/kbcstorage/files.py +++ b/kbcstorage/files.py @@ -14,6 +14,7 @@ from azure.storage.blob import BlobServiceClient, ContentSettings from kbcstorage.base import Endpoint + class Files(Endpoint): """ Buckets Endpoint