diff --git a/olot/oci/oci_common.py b/olot/oci/oci_common.py index 6a624a9..33f4d03 100644 --- a/olot/oci/oci_common.py +++ b/olot/oci/oci_common.py @@ -27,6 +27,7 @@ class MediaTypes: layer: MediaType = "application/vnd.oci.image.layer.v1.tar" layer_gzip: MediaType = "application/vnd.oci.image.layer.v1.tar+gzip" empty: MediaType = "application/vnd.oci.empty.v1+json" + config: MediaType = "application/vnd.oci.image.config.v1+json" Digest = Annotated[str, Field( ..., diff --git a/olot/oci/oci_image_manifest.py b/olot/oci/oci_image_manifest.py index 525009d..7b36bae 100644 --- a/olot/oci/oci_image_manifest.py +++ b/olot/oci/oci_image_manifest.py @@ -4,14 +4,14 @@ from __future__ import annotations -from typing import Annotated, List, Optional +from typing import Annotated, List, Optional, Dict import os import subprocess from pathlib import Path from pydantic import BaseModel, Field -from olot.oci.oci_common import Digest, Urls, Keys, Values, MediaTypes, MediaType +from olot.oci.oci_common import Urls, Keys, Values, MediaTypes, MediaType from olot.utils.types import Int64, Base64, Annotations from olot.utils.files import MIMETypes @@ -96,24 +96,27 @@ class ContentDescriptor(BaseModel): mediaType: MediaType = Field( - ..., description='the mediatype of the referenced object' + ..., description="The media type of the referenced object" ) - size: Int64 = Field(..., description='the size in bytes of the referenced object') - digest: Digest = Field( - ..., - description="the cryptographic checksum digest of the object, in the pattern ':'", + size: Int64 = Field( + ..., description="The size in bytes of the referenced object" + ) + digest: str = Field( + ..., description="The cryptographic checksum digest of the object, in the pattern ':'" ) urls: Optional[Urls] = Field( - None, description='a list of urls from which this object may be downloaded' + None, description="A list of URLs from which this object may be downloaded" ) data: Optional[Base64] = Field( - None, description='an embedding of the targeted content (base64 encoded)' + None, description="An embedding of the targeted content (base64 encoded)" ) artifactType: Optional[MediaType] = Field( - None, description='the IANA media type of this artifact' + None, description="The IANA media type of this artifact" ) - annotations: Optional[Annotations] = None + annotations: Optional[Dict[str, str]] = None + class Config: + exclude_none = True class OCIImageManifest(BaseModel): schemaVersion: Annotated[int, Field(ge=2, le=2)] = Field( @@ -193,12 +196,12 @@ def create_manifest_layers(files: List[Path], blob_layers: dict) -> List[Content mediaType=get_file_media_type(file), size=os.stat(file).st_size, digest=f"sha256:{file_digest}", - urls=None, - data=None, - artifactType=None, annotations= { Keys.image_title_annotation: os.path.basename(file) - } + }, + urls = None, + data = None, + artifactType = None, ) layers.append(layer) return layers \ No newline at end of file diff --git a/olot/oci_artifact.py b/olot/oci_artifact.py index 709dded..5d810c2 100644 --- a/olot/oci_artifact.py +++ b/olot/oci_artifact.py @@ -1,13 +1,12 @@ from pathlib import Path import os -import datetime import json import argparse from typing import List from olot.oci.oci_image_manifest import create_oci_image_manifest, create_manifest_layers from olot.oci.oci_image_layout import create_ocilayout -from olot.oci.oci_common import Keys, MediaTypes +from olot.oci.oci_common import MediaTypes, Values from olot.oci.oci_image_index import Manifest, create_oci_image_index from olot.utils.files import MIMETypes, tarball_from_file, targz_from_file from olot.utils.types import compute_hash_of_str @@ -39,16 +38,12 @@ def create_oci_artifact_from_model(source_dir: Path, dest_dir: Path): # Create the OCI image manifest manifest_layers = create_manifest_layers(model_files, layers) - annotations = { - Keys.image_created_annotation: datetime.datetime.now().isoformat() - } artifactType = MIMETypes.mlmodel manifest = create_oci_image_manifest( artifactType=artifactType, layers=manifest_layers, - annotations=annotations ) - manifest_json = json.dumps(manifest.model_dump(), indent=4, sort_keys=True) + manifest_json = json.dumps(manifest.dict(exclude_none=True), indent=4, sort_keys=True) manifest_SHA = compute_hash_of_str(manifest_json) with open(sha256_path / manifest_SHA, "w") as f: f.write(manifest_json) @@ -60,22 +55,27 @@ def create_oci_artifact_from_model(source_dir: Path, dest_dir: Path): mediaType=MediaTypes.manifest, size=os.stat(sha256_path / manifest_SHA).st_size, digest=f"sha256:{manifest_SHA}", - urls=None, - platform=None, - annotations=None + urls = None, ) ] ) - index_json = json.dumps(index.model_dump(), indent=4, sort_keys=True) + index_json = json.dumps(index.dict(exclude_none=True), indent=4, sort_keys=True) with open(dest_dir / "index.json", "w") as f: f.write(index_json) - # Create the OCI-layout file oci_layout = create_ocilayout() with open(dest_dir / "oci-layout", "w") as f: f.write(json.dumps(oci_layout.model_dump(), indent=4, sort_keys=True)) + # Create empty config file with digest as name + empty_config: dict[str, str] = {} + empty_digest_split = Values.empty_digest.split(":") + if len(empty_digest_split) == 2: + with open(dest_dir / "blobs" / "sha256" / empty_digest_split[1], "w") as f: + f.write(json.dumps(empty_config)) + else: + raise ValueError(f"Invalid empty_digest format: {Values.empty_digest}") def create_blobs(model_files: List[Path], dest_dir: Path): """ diff --git a/tests/data/sample-model/oci/blobs/sha256/06cd6f2c31be8e4d5a1cea6d6c1e7401cc0128c21439456eaf964180b00dfda1 b/tests/data/sample-model/oci/blobs/sha256/06cd6f2c31be8e4d5a1cea6d6c1e7401cc0128c21439456eaf964180b00dfda1 new file mode 100644 index 0000000..48da1bb Binary files /dev/null and b/tests/data/sample-model/oci/blobs/sha256/06cd6f2c31be8e4d5a1cea6d6c1e7401cc0128c21439456eaf964180b00dfda1 differ diff --git a/tests/data/sample-model/oci/blobs/sha256/44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a b/tests/data/sample-model/oci/blobs/sha256/44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/tests/data/sample-model/oci/blobs/sha256/44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tests/data/sample-model/oci/blobs/sha256/69c637f302502a950e7a5072f78032ce593d73797845751debed90558638d5a1 b/tests/data/sample-model/oci/blobs/sha256/69c637f302502a950e7a5072f78032ce593d73797845751debed90558638d5a1 new file mode 100644 index 0000000..aa7aeb9 Binary files /dev/null and b/tests/data/sample-model/oci/blobs/sha256/69c637f302502a950e7a5072f78032ce593d73797845751debed90558638d5a1 differ diff --git a/tests/data/sample-model/oci/blobs/sha256/70202a75d040e8a232a904310700749d8e630a68e4c91f42173ec597e137c470 b/tests/data/sample-model/oci/blobs/sha256/70202a75d040e8a232a904310700749d8e630a68e4c91f42173ec597e137c470 new file mode 100644 index 0000000..13376f8 --- /dev/null +++ b/tests/data/sample-model/oci/blobs/sha256/70202a75d040e8a232a904310700749d8e630a68e4c91f42173ec597e137c470 @@ -0,0 +1,37 @@ +{ + "artifactType": "application/x-mlmodel", + "config": { + "data": "e30=", + "digest": "sha256:44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a", + "mediaType": "application/vnd.oci.empty.v1+json", + "size": 2 + }, + "layers": [ + { + "annotations": { + "org.opencontainers.image.title": "model.joblib" + }, + "digest": "sha256:80da85cc1eb84676a6adbc8b495381463f1f2302425a96033dc51a81e17cb0bb", + "mediaType": "application/octet-stream", + "size": 3299 + }, + { + "annotations": { + "org.opencontainers.image.title": "README.md" + }, + "digest": "sha256:69c637f302502a950e7a5072f78032ce593d73797845751debed90558638d5a1", + "mediaType": "text/plain", + "size": 6625 + }, + { + "annotations": { + "org.opencontainers.image.title": "hello.md" + }, + "digest": "sha256:06cd6f2c31be8e4d5a1cea6d6c1e7401cc0128c21439456eaf964180b00dfda1", + "mediaType": "text/plain", + "size": 72 + } + ], + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "schemaVersion": 2 +} \ No newline at end of file diff --git a/tests/data/sample-model/oci/blobs/sha256/80da85cc1eb84676a6adbc8b495381463f1f2302425a96033dc51a81e17cb0bb b/tests/data/sample-model/oci/blobs/sha256/80da85cc1eb84676a6adbc8b495381463f1f2302425a96033dc51a81e17cb0bb new file mode 100644 index 0000000..4ee65f9 Binary files /dev/null and b/tests/data/sample-model/oci/blobs/sha256/80da85cc1eb84676a6adbc8b495381463f1f2302425a96033dc51a81e17cb0bb differ diff --git a/tests/data/sample-model/oci/index.json b/tests/data/sample-model/oci/index.json new file mode 100644 index 0000000..ef24c0a --- /dev/null +++ b/tests/data/sample-model/oci/index.json @@ -0,0 +1,11 @@ +{ + "manifests": [ + { + "digest": "sha256:70202a75d040e8a232a904310700749d8e630a68e4c91f42173ec597e137c470", + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "size": 1250 + } + ], + "mediaType": "application/vnd.oci.image.index.v1+json", + "schemaVersion": 2 +} \ No newline at end of file diff --git a/tests/data/sample-model/oci/oci-layout b/tests/data/sample-model/oci/oci-layout new file mode 100644 index 0000000..224a869 --- /dev/null +++ b/tests/data/sample-model/oci/oci-layout @@ -0,0 +1,3 @@ +{ + "imageLayoutVersion": "1.0.0" +} \ No newline at end of file