Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added support for files on gcp #80

Merged
merged 5 commits into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions kbcstorage/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

from azure.storage.blob import BlobServiceClient, ContentSettings
from kbcstorage.base import Endpoint
from google.oauth2 import credentials
from google.cloud import storage as GCPStorage


class Files(Endpoint):
Expand Down Expand Up @@ -86,6 +88,8 @@ def upload_file(self, file_path, tags=None, is_public=False,
self.__upload_to_azure(file_resource, file_path)
elif file_resource['provider'] == 'aws':
self.__upload_to_aws(file_resource, file_path, is_encrypted)
elif file_resource['provider'] == 'gcp':
self.__upload_to_gcp(file_resource, file_path)

return file_resource['id']

Expand Down Expand Up @@ -196,6 +200,16 @@ def download(self, file_id, local_path):
self.__download_sliced_file_from_aws(file_info, local_file, s3)
else:
self.__download_file_from_aws(file_info, local_file, s3)
elif file_info['provider'] == 'gcp':
storage_client = self.__get_gcp_client(
file_info['gcsCredentials']['access_token'],
file_info['gcsCredentials']['projectId'],
)

if file_info['isSliced']:
self.__download_sliced_file_from_gcp(file_info, local_file, storage_client)
else:
self.__download_file_from_gcp(file_info, local_file, storage_client)
return local_file

def __upload_to_azure(self, preparation_result, file_path):
Expand Down Expand Up @@ -233,6 +247,18 @@ def __upload_to_aws(self, prepare_result, file_path, is_encrypted):
s3_object.put(ACL=upload_params['acl'], Body=file,
ContentDisposition=disposition)

def __upload_to_gcp(self, preparation_result, file_path):
storage_client = self.__get_gcp_client(
preparation_result['gcsUploadParams']['access_token'],
preparation_result['gcsUploadParams']['projectId']
)

bucket = storage_client.bucket(preparation_result['gcsUploadParams']['bucket'])
blob = bucket.blob(preparation_result['gcsUploadParams']['key'])

with open(file_path, "rb") as blob_data:
blob.upload_from_file(blob_data)

def __download_file_from_aws(self, file_info, destination, s3):
bucket = s3.Bucket(file_info["s3Path"]["bucket"])
bucket.download_file(file_info["s3Path"]["key"], destination)
Expand Down Expand Up @@ -281,6 +307,31 @@ def __download_sliced_file_from_azure(self, file_info, destination):
file_slice.write(container_client.download_blob(blob_path).readall())
self.__merge_split_files(file_names, destination)

def __download_file_from_gcp(self, file_info, destination, storage_client):

bucket = storage_client.bucket(file_info['gcsPath']['bucket'])
blob = bucket.blob(file_info['gcsPath']['key'])
blob.download_to_filename(destination)

# with open(destination, "wb") as file:
# file.write(blob.download_as_bytes())

def __download_sliced_file_from_gcp(self, file_info, destination, storage_client):
manifest = requests.get(url=file_info['url']).json()
file_names = []
for entry in manifest["entries"]:
full_path = entry["url"]
file_name = full_path.rsplit("/", 1)[1]
file_names.append(file_name)
splitted_path = full_path.split("/")
file_key = "/".join(splitted_path[3:])
bucket = storage_client.bucket(file_info['gcsPath']['bucket'])

blob = bucket.blob(file_key)
blob.download_to_filename(destination)
bucket.download_file(file_key, file_name)
self.__merge_split_files(file_names, destination)

def __merge_split_files(self, file_names, destination):
with open(destination, mode='wb') as out_file:
for file_name in file_names:
Expand All @@ -292,3 +343,8 @@ def __merge_split_files(self, file_names, destination):
def __get_blob_client(self, connection_string, container, blob_name):
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
return blob_service_client.get_blob_client(container=container, blob=blob_name)

def __get_gcp_client(self, token, project):
creds = credentials.Credentials(token=token)
gcp_storage_client = GCPStorage.Client(credentials=creds, project=project)
return gcp_storage_client
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ dependencies = [
"requests",
"responses",
"python-dotenv",
"google-cloud-storage==2.16.0",
"google-auth==2.29.0"
]
dynamic = ["version"]

Expand Down
Loading