Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add model source properties to store metadata about origin of a model artifact, fixes RHOAIENG-19885 #838

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions api/openapi/model-registry.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1550,6 +1550,35 @@ components:
serviceAccountName:
description: Name of the service account with storage secret.
type: string
modelSourceKind:
type: string
description: |-
A string identifier describing the source kind. It differentiates various sources of model artifacts.
This identifier should be agreed upon by producers and consumers of source model metadata.
It is not an enumeration to keep the source of model metadata open ended.
E.g. Kubeflow pipelines could use `pipelines` to identify models it produces.
modelSourceClass:
type: string
description: |-
A subgroup within the source kind. It is a specific sub-component or instance within the source kind.
E.g. `pipelinerun` for a Kubeflow pipeline run.
modelSourceGroup:
type: string
description: |-
Unique identifier for a source group for models from source class.
It maps to a physical group of source models.
E.g. a Kubernetes namespace where the pipeline run was executed.
modelSourceId:
type: string
description: |-
A unique identifier for a source model within kind, class, and group.
It should be a url friendly string if source supports using URLs to locate source models.
E.g. a pipeline run ID.
modelSourceName:
type: string
description: |-
A human-readable name for the source model.
E.g. `my-project/1`, `ibm-granite/granite-3.1-8b-base:2.1.2`.
ModelArtifactCreate:
description: An ML model artifact.
properties:
Expand Down
99 changes: 57 additions & 42 deletions clients/python/src/model_registry/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,17 @@ class ModelRegistry:
"""Model registry client."""

def __init__(
self,
server_address: str,
port: int = 443,
*,
author: str,
is_secure: bool = True,
user_token: str | None = None,
user_token_envvar: str = DEFAULT_USER_TOKEN_ENVVAR,
custom_ca: str | None = None,
custom_ca_envvar: str | None = None,
log_level: int = logging.WARNING,
self,
server_address: str,
port: int = 443,
*,
author: str,
is_secure: bool = True,
user_token: str | None = None,
user_token_envvar: str = DEFAULT_USER_TOKEN_ENVVAR,
custom_ca: str | None = None,
custom_ca_envvar: str | None = None,
log_level: int = logging.WARNING,
):
"""Constructor.

Expand Down Expand Up @@ -103,9 +103,9 @@ def __init__(

if is_secure:
if (
not custom_ca
and custom_ca_envvar
and (cert := os.getenv(custom_ca_envvar))
not custom_ca
and custom_ca_envvar
and (cert := os.getenv(custom_ca_envvar))
):
logger.info(
"Using custom CA envvar %s",
Expand Down Expand Up @@ -146,7 +146,7 @@ async def _register_model(self, name: str, **kwargs) -> RegisteredModel:
)

async def _register_new_version(
self, rm: RegisteredModel, version: str, author: str, /, **kwargs
self, rm: RegisteredModel, version: str, author: str, /, **kwargs
) -> ModelVersion:
assert rm.id is not None, "Registered model must have an ID"
if await self._api.get_model_version_by_params(rm.id, version):
Expand All @@ -158,28 +158,33 @@ async def _register_new_version(
)

async def _register_model_artifact(
self, mv: ModelVersion, name: str, uri: str, /, **kwargs
self, mv: ModelVersion, name: str, uri: str, /, **kwargs
) -> ModelArtifact:
assert mv.id is not None, "Model version must have an ID"
return await self._api.upsert_model_version_artifact(
ModelArtifact(name=name, uri=uri, **kwargs), mv.id
)

def register_model(
self,
name: str,
uri: str,
*,
model_format_name: str,
model_format_version: str,
version: str,
storage_key: str | None = None,
storage_path: str | None = None,
service_account_name: str | None = None,
author: str | None = None,
owner: str | None = None,
description: str | None = None,
metadata: Mapping[str, SupportedTypes] | None = None,
self,
name: str,
uri: str,
*,
model_format_name: str,
model_format_version: str,
version: str,
storage_key: str | None = None,
storage_path: str | None = None,
service_account_name: str | None = None,
model_source_kind: str | None = None,
model_source_class: str | None = None,
model_source_group: str | None = None,
model_source_id: str | None = None,
model_source_name: str | None = None,
author: str | None = None,
owner: str | None = None,
description: str | None = None,
metadata: Mapping[str, SupportedTypes] | None = None,
) -> RegisteredModel:
"""Register a model.

Expand All @@ -205,6 +210,11 @@ def register_model(
storage_key: Storage key.
storage_path: Storage path.
service_account_name: Service account name.
model_source_kind: A string identifier describing the source kind.
model_source_class: A subgroup within the source kind.
model_source_group: This identifies a source group for models from source class.
model_source_id: A unique identifier for a source model within kind, class, and group.
model_source_name: A human-readable name for the source model.
metadata: Additional version metadata. Defaults to values returned by `default_metadata()`.

Returns:
Expand All @@ -230,6 +240,11 @@ def register_model(
storage_key=storage_key,
storage_path=storage_path,
service_account_name=service_account_name,
model_source_kind=model_source_kind,
model_source_class=model_source_class,
model_source_group=model_source_group,
model_source_id=model_source_id,
model_source_name=model_source_name,
)
)

Expand All @@ -250,18 +265,18 @@ def update(self, model: TModel) -> TModel:
return self.async_runner(self._api.upsert_model_artifact(model))

def register_hf_model(
self,
repo: str,
path: str,
*,
version: str,
model_format_name: str,
model_format_version: str,
author: str | None = None,
owner: str | None = None,
model_name: str | None = None,
description: str | None = None,
git_ref: str = "main",
self,
repo: str,
path: str,
*,
version: str,
model_format_name: str,
model_format_version: str,
author: str | None = None,
owner: str | None = None,
model_name: str | None = None,
description: str | None = None,
git_ref: str = "main",
) -> RegisteredModel:
"""Register a Hugging Face model.

Expand Down
15 changes: 15 additions & 0 deletions clients/python/src/model_registry/types/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ class ModelArtifact(Artifact):
storage_key: Storage secret name.
storage_path: Storage path of the model.
service_account_name: Name of the service account with storage secret.
model_source_kind: A string identifier describing the source kind.
model_source_class: A subgroup within the source kind.
model_source_group: This identifies a source group for models from source class.
model_source_id: A unique identifier for a source model within kind, class, and group.
model_source_name: A human-readable name for the source model.
"""

# TODO: this could be an enum of valid formats
Expand All @@ -143,6 +148,11 @@ class ModelArtifact(Artifact):
storage_key: str | None = None
storage_path: str | None = None
service_account_name: str | None = None
model_source_kind: str | None = None
model_source_class: str | None = None
model_source_group: str | None = None
model_source_id: str | None = None
model_source_name: str | None = None

_model_version_id: str | None = None

Expand Down Expand Up @@ -194,6 +204,11 @@ def from_basemodel(cls, source: ModelArtifactBaseModel) -> ModelArtifact:
storage_key=source.storage_key,
storage_path=source.storage_path,
service_account_name=source.service_account_name,
model_source_kind=source.model_source_kind,
model_source_class=source.model_source_class,
model_source_group=source.model_source_group,
model_source_id=source.model_source_id,
model_source_name=source.model_source_name,
state=source.state,
custom_properties=cls._unmap_custom_properties(source.custom_properties)
if source.custom_properties
Expand Down
35 changes: 35 additions & 0 deletions clients/python/src/mr_openapi/models/model_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,31 @@ class ModelArtifact(BaseModel):
service_account_name: StrictStr | None = Field(
default=None, description="Name of the service account with storage secret.", alias="serviceAccountName"
)
model_source_kind: StrictStr | None = Field(
default=None,
description="A string identifier describing the source kind. It differentiates various sources of model artifacts. This identifier should be agreed upon by producers and consumers of source model metadata. It is not an enumeration to keep the source of model metadata open ended. E.g. Kubeflow pipelines could use `pipelines` to identify models it produces.",
alias="modelSourceKind",
)
model_source_class: StrictStr | None = Field(
default=None,
description="A subgroup within the source kind. It is a specific sub-component or instance within the source kind. E.g. `pipelinerun` for a Kubeflow pipeline run.",
alias="modelSourceClass",
)
model_source_group: StrictStr | None = Field(
default=None,
description="Unique identifier for a source group for models from source class. It maps to a physical group of source models. E.g. a Kubernetes namespace where the pipeline run was executed.",
alias="modelSourceGroup",
)
model_source_id: StrictStr | None = Field(
default=None,
description="A unique identifier for a source model within kind, class, and group. It should be a url friendly string if source supports using URLs to locate source models. E.g. a pipeline run ID.",
alias="modelSourceId",
)
model_source_name: StrictStr | None = Field(
default=None,
description="A human-readable name for the source model. E.g. `my-project/1`, `ibm-granite/granite-3.1-8b-base:2.1.2`.",
alias="modelSourceName",
)
__properties: ClassVar[list[str]] = [
"customProperties",
"description",
Expand All @@ -86,6 +111,11 @@ class ModelArtifact(BaseModel):
"storagePath",
"modelFormatVersion",
"serviceAccountName",
"modelSourceKind",
"modelSourceClass",
"modelSourceGroup",
"modelSourceId",
"modelSourceName",
]

model_config = ConfigDict(
Expand Down Expand Up @@ -170,5 +200,10 @@ def from_dict(cls, obj: dict[str, Any] | None) -> Self | None:
"storagePath": obj.get("storagePath"),
"modelFormatVersion": obj.get("modelFormatVersion"),
"serviceAccountName": obj.get("serviceAccountName"),
"modelSourceKind": obj.get("modelSourceKind"),
"modelSourceClass": obj.get("modelSourceClass"),
"modelSourceGroup": obj.get("modelSourceGroup"),
"modelSourceId": obj.get("modelSourceId"),
"modelSourceName": obj.get("modelSourceName"),
}
)
35 changes: 35 additions & 0 deletions clients/python/src/mr_openapi/models/model_artifact_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,31 @@ class ModelArtifactCreate(BaseModel):
service_account_name: StrictStr | None = Field(
default=None, description="Name of the service account with storage secret.", alias="serviceAccountName"
)
model_source_kind: StrictStr | None = Field(
default=None,
description="A string identifier describing the source kind. It differentiates various sources of model artifacts. This identifier should be agreed upon by producers and consumers of source model metadata. It is not an enumeration to keep the source of model metadata open ended. E.g. Kubeflow pipelines could use `pipelines` to identify models it produces.",
alias="modelSourceKind",
)
model_source_class: StrictStr | None = Field(
default=None,
description="A subgroup within the source kind. It is a specific sub-component or instance within the source kind. E.g. `pipelinerun` for a Kubeflow pipeline run.",
alias="modelSourceClass",
)
model_source_group: StrictStr | None = Field(
default=None,
description="Unique identifier for a source group for models from source class. It maps to a physical group of source models. E.g. a Kubernetes namespace where the pipeline run was executed.",
alias="modelSourceGroup",
)
model_source_id: StrictStr | None = Field(
default=None,
description="A unique identifier for a source model within kind, class, and group. It should be a url friendly string if source supports using URLs to locate source models. E.g. a pipeline run ID.",
alias="modelSourceId",
)
model_source_name: StrictStr | None = Field(
default=None,
description="A human-readable name for the source model. E.g. `my-project/1`, `ibm-granite/granite-3.1-8b-base:2.1.2`.",
alias="modelSourceName",
)
__properties: ClassVar[list[str]] = [
"customProperties",
"description",
Expand All @@ -72,6 +97,11 @@ class ModelArtifactCreate(BaseModel):
"storagePath",
"modelFormatVersion",
"serviceAccountName",
"modelSourceKind",
"modelSourceClass",
"modelSourceGroup",
"modelSourceId",
"modelSourceName",
]

model_config = ConfigDict(
Expand Down Expand Up @@ -147,5 +177,10 @@ def from_dict(cls, obj: dict[str, Any] | None) -> Self | None:
"storagePath": obj.get("storagePath"),
"modelFormatVersion": obj.get("modelFormatVersion"),
"serviceAccountName": obj.get("serviceAccountName"),
"modelSourceKind": obj.get("modelSourceKind"),
"modelSourceClass": obj.get("modelSourceClass"),
"modelSourceGroup": obj.get("modelSourceGroup"),
"modelSourceId": obj.get("modelSourceId"),
"modelSourceName": obj.get("modelSourceName"),
}
)
35 changes: 35 additions & 0 deletions clients/python/src/mr_openapi/models/model_artifact_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,31 @@ class ModelArtifactUpdate(BaseModel):
service_account_name: StrictStr | None = Field(
default=None, description="Name of the service account with storage secret.", alias="serviceAccountName"
)
model_source_kind: StrictStr | None = Field(
default=None,
description="A string identifier describing the source kind. It differentiates various sources of model artifacts. This identifier should be agreed upon by producers and consumers of source model metadata. It is not an enumeration to keep the source of model metadata open ended. E.g. Kubeflow pipelines could use `pipelines` to identify models it produces.",
alias="modelSourceKind",
)
model_source_class: StrictStr | None = Field(
default=None,
description="A subgroup within the source kind. It is a specific sub-component or instance within the source kind. E.g. `pipelinerun` for a Kubeflow pipeline run.",
alias="modelSourceClass",
)
model_source_group: StrictStr | None = Field(
default=None,
description="Unique identifier for a source group for models from source class. It maps to a physical group of source models. E.g. a Kubernetes namespace where the pipeline run was executed.",
alias="modelSourceGroup",
)
model_source_id: StrictStr | None = Field(
default=None,
description="A unique identifier for a source model within kind, class, and group. It should be a url friendly string if source supports using URLs to locate source models. E.g. a pipeline run ID.",
alias="modelSourceId",
)
model_source_name: StrictStr | None = Field(
default=None,
description="A human-readable name for the source model. E.g. `my-project/1`, `ibm-granite/granite-3.1-8b-base:2.1.2`.",
alias="modelSourceName",
)
__properties: ClassVar[list[str]] = [
"customProperties",
"description",
Expand All @@ -67,6 +92,11 @@ class ModelArtifactUpdate(BaseModel):
"storagePath",
"modelFormatVersion",
"serviceAccountName",
"modelSourceKind",
"modelSourceClass",
"modelSourceGroup",
"modelSourceId",
"modelSourceName",
]

model_config = ConfigDict(
Expand Down Expand Up @@ -141,5 +171,10 @@ def from_dict(cls, obj: dict[str, Any] | None) -> Self | None:
"storagePath": obj.get("storagePath"),
"modelFormatVersion": obj.get("modelFormatVersion"),
"serviceAccountName": obj.get("serviceAccountName"),
"modelSourceKind": obj.get("modelSourceKind"),
"modelSourceClass": obj.get("modelSourceClass"),
"modelSourceGroup": obj.get("modelSourceGroup"),
"modelSourceId": obj.get("modelSourceId"),
"modelSourceName": obj.get("modelSourceName"),
}
)
Loading
Loading