Skip to content

Commit

Permalink
core: add targz_into_ocilayout for modelcard later use
Browse files Browse the repository at this point in the history
Signed-off-by: tarilabs <[email protected]>
  • Loading branch information
tarilabs committed Dec 17, 2024
1 parent ba98953 commit 7ffc393
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 3 deletions.
17 changes: 17 additions & 0 deletions olot/basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import tarfile
from typing import Dict
import click
import gzip

from .oci.oci_config import OCIManifestConfig

Expand Down Expand Up @@ -179,6 +180,22 @@ def tar_into_ocilayout(ocilayout: Path, model: Path):
return checksum


def targz_into_ocilayout(ocilayout: Path, model: Path):
sha256_path = ocilayout / "blobs" / "sha256"
temp_tar_filename = sha256_path / "temp_layer"
with open(temp_tar_filename, "wb") as temp_file:
writer = HashingWriter(temp_file)
with gzip.GzipFile(fileobj=writer, mode="wb", mtime=0, compresslevel=6) as gz:
inner_writer = HashingWriter(gz)
with tarfile.open(fileobj=inner_writer, mode="w") as tar:
tar.add(model, arcname="/models/"+model.name, filter=tar_filter_fn)
checksum = writer.hash_func.hexdigest()
tar_checksum = inner_writer.hash_func.hexdigest()
final_tar_filename = checksum
os.rename(temp_tar_filename, sha256_path / final_tar_filename)
return (checksum, tar_checksum)


def tar_filter_fn(input: tarfile.TarInfo) -> tarfile.TarInfo :
input.uid = 0
input.gid = 0
Expand Down
46 changes: 43 additions & 3 deletions tests/basic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
from pathlib import Path
import tarfile
from typing import Dict

import pytest
import gzip

Check failure on line 7 in tests/basic_test.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (F811)

tests/basic_test.py:7:8: F811 Redefinition of unused `gzip` from line 1
import shutil

from olot.basics import HashingWriter, get_file_hash, check_ocilayout, read_ocilayout_root_index, crawl_ocilayout_indexes, crawl_ocilayout_manifests, compute_hash_of_str, tar_into_ocilayout
from olot.basics import HashingWriter, get_file_hash, check_ocilayout, read_ocilayout_root_index, crawl_ocilayout_indexes, crawl_ocilayout_manifests, compute_hash_of_str, targz_into_ocilayout, tar_into_ocilayout
from olot.oci.oci_image_index import OCIImageIndex
from olot.oci.oci_image_manifest import OCIImageManifest

Expand Down Expand Up @@ -63,14 +64,20 @@ def test_tar_into_ocilayout(tmp_path):
sha256_path = tmp_path / "blobs" / "sha256"
sha256_path.mkdir(parents=True, exist_ok=True)
digest = tar_into_ocilayout(tmp_path, model_path) # forcing it into a partial temp directory with blobs subdir for tests

for file in tmp_path.rglob('*'):
if file.is_file():
print(file)

checksum_from_disk = get_file_hash(sha256_path / digest) # read the file
assert digest == checksum_from_disk # filename should match its digest

found = None
with tarfile.open(sha256_path / digest, "r") as tar:
for tarinfo in tar:
if tarinfo.name == "models/model.joblib" and tarinfo.mode == 0o664:
found = tarinfo # model.joblib is added in models/ inside modelcar with expected permissions
assert found


def test_bespoke_single_file_gz(tmp_path):
"""Example bespoke use of HashingWriter for .tar.gz
Expand Down Expand Up @@ -112,6 +119,39 @@ def test_bespoke_single_file_gz(tmp_path):
print(file)


def test_targz_into_ocilayout(tmp_path):
"""Test targz_into_ocilayout() function is able to produce the expected tar.gz layer blob in the oci-layout
"""
model_path = Path(__file__).parent / "data" / "model.joblib"
sha256_path = tmp_path / "blobs" / "sha256"
sha256_path.mkdir(parents=True, exist_ok=True)
digest_tuple = targz_into_ocilayout(tmp_path, model_path) # forcing it into a partial temp directory with blobs subdir for tests
digest = digest_tuple[0] # digest of the tar.gz
for file in tmp_path.rglob('*'):
if file.is_file():
print(file)

checksum_from_disk = get_file_hash(sha256_path / digest) # read the file
assert digest == checksum_from_disk # filename should match its digest

found = None
with tarfile.open(sha256_path / digest, "r:gz") as tar:
for tarinfo in tar:
if tarinfo.name == "models/model.joblib" and tarinfo.mode == 0o664:
found = tarinfo # model.joblib is added in models/ inside modelcar with expected permissions
assert found

throwaway_tar = sha256_path / "throwaway.tar"
with gzip.open(sha256_path / digest, "rb") as g_in:
with open(throwaway_tar, "wb") as f_out:
shutil.copyfileobj(g_in, f_out)
for file in tmp_path.rglob('*'):
if file.is_file():
print(file)
tar_checksum_from_disk = get_file_hash(throwaway_tar) # compute the digest for the .tar from the .tar.gz
assert digest_tuple[1] == tar_checksum_from_disk # digests should match


def test_check_ocilayout():
"""Verify check_ocilayout() fn on known oci-layout and not
"""
Expand Down

0 comments on commit 7ffc393

Please sign in to comment.