From 7ffc393b8b1a8691d0699c577d9057c053a88223 Mon Sep 17 00:00:00 2001 From: tarilabs Date: Tue, 17 Dec 2024 17:39:51 +0100 Subject: [PATCH] core: add targz_into_ocilayout for modelcard later use Signed-off-by: tarilabs --- olot/basics.py | 17 +++++++++++++++++ tests/basic_test.py | 46 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/olot/basics.py b/olot/basics.py index 00bd139..37bc459 100644 --- a/olot/basics.py +++ b/olot/basics.py @@ -6,6 +6,7 @@ import tarfile from typing import Dict import click +import gzip from .oci.oci_config import OCIManifestConfig @@ -179,6 +180,22 @@ def tar_into_ocilayout(ocilayout: Path, model: Path): return checksum +def targz_into_ocilayout(ocilayout: Path, model: Path): + sha256_path = ocilayout / "blobs" / "sha256" + temp_tar_filename = sha256_path / "temp_layer" + with open(temp_tar_filename, "wb") as temp_file: + writer = HashingWriter(temp_file) + with gzip.GzipFile(fileobj=writer, mode="wb", mtime=0, compresslevel=6) as gz: + inner_writer = HashingWriter(gz) + with tarfile.open(fileobj=inner_writer, mode="w") as tar: + tar.add(model, arcname="/models/"+model.name, filter=tar_filter_fn) + checksum = writer.hash_func.hexdigest() + tar_checksum = inner_writer.hash_func.hexdigest() + final_tar_filename = checksum + os.rename(temp_tar_filename, sha256_path / final_tar_filename) + return (checksum, tar_checksum) + + def tar_filter_fn(input: tarfile.TarInfo) -> tarfile.TarInfo : input.uid = 0 input.gid = 0 diff --git a/tests/basic_test.py b/tests/basic_test.py index 0a7fc27..5b2f33e 100644 --- a/tests/basic_test.py +++ b/tests/basic_test.py @@ -3,10 +3,11 @@ from pathlib import Path import tarfile from typing import Dict - import pytest +import gzip +import shutil -from olot.basics import HashingWriter, get_file_hash, check_ocilayout, read_ocilayout_root_index, crawl_ocilayout_indexes, crawl_ocilayout_manifests, compute_hash_of_str, tar_into_ocilayout +from olot.basics import HashingWriter, get_file_hash, check_ocilayout, read_ocilayout_root_index, crawl_ocilayout_indexes, crawl_ocilayout_manifests, compute_hash_of_str, targz_into_ocilayout, tar_into_ocilayout from olot.oci.oci_image_index import OCIImageIndex from olot.oci.oci_image_manifest import OCIImageManifest @@ -63,7 +64,6 @@ def test_tar_into_ocilayout(tmp_path): sha256_path = tmp_path / "blobs" / "sha256" sha256_path.mkdir(parents=True, exist_ok=True) digest = tar_into_ocilayout(tmp_path, model_path) # forcing it into a partial temp directory with blobs subdir for tests - for file in tmp_path.rglob('*'): if file.is_file(): print(file) @@ -71,6 +71,13 @@ def test_tar_into_ocilayout(tmp_path): checksum_from_disk = get_file_hash(sha256_path / digest) # read the file assert digest == checksum_from_disk # filename should match its digest + found = None + with tarfile.open(sha256_path / digest, "r") as tar: + for tarinfo in tar: + if tarinfo.name == "models/model.joblib" and tarinfo.mode == 0o664: + found = tarinfo # model.joblib is added in models/ inside modelcar with expected permissions + assert found + def test_bespoke_single_file_gz(tmp_path): """Example bespoke use of HashingWriter for .tar.gz @@ -112,6 +119,39 @@ def test_bespoke_single_file_gz(tmp_path): print(file) +def test_targz_into_ocilayout(tmp_path): + """Test targz_into_ocilayout() function is able to produce the expected tar.gz layer blob in the oci-layout + """ + model_path = Path(__file__).parent / "data" / "model.joblib" + sha256_path = tmp_path / "blobs" / "sha256" + sha256_path.mkdir(parents=True, exist_ok=True) + digest_tuple = targz_into_ocilayout(tmp_path, model_path) # forcing it into a partial temp directory with blobs subdir for tests + digest = digest_tuple[0] # digest of the tar.gz + for file in tmp_path.rglob('*'): + if file.is_file(): + print(file) + + checksum_from_disk = get_file_hash(sha256_path / digest) # read the file + assert digest == checksum_from_disk # filename should match its digest + + found = None + with tarfile.open(sha256_path / digest, "r:gz") as tar: + for tarinfo in tar: + if tarinfo.name == "models/model.joblib" and tarinfo.mode == 0o664: + found = tarinfo # model.joblib is added in models/ inside modelcar with expected permissions + assert found + + throwaway_tar = sha256_path / "throwaway.tar" + with gzip.open(sha256_path / digest, "rb") as g_in: + with open(throwaway_tar, "wb") as f_out: + shutil.copyfileobj(g_in, f_out) + for file in tmp_path.rglob('*'): + if file.is_file(): + print(file) + tar_checksum_from_disk = get_file_hash(throwaway_tar) # compute the digest for the .tar from the .tar.gz + assert digest_tuple[1] == tar_checksum_from_disk # digests should match + + def test_check_ocilayout(): """Verify check_ocilayout() fn on known oci-layout and not """