Skip to content

Commit

Permalink
Include new fields
Browse files Browse the repository at this point in the history
  • Loading branch information
ccl-core committed Feb 12, 2025
1 parent 428dbfc commit 9d53059
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 2 deletions.
2 changes: 2 additions & 0 deletions python/mlcroissant/mlcroissant/_src/core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ def ML_COMMONS(ctx) -> rdflib.Namespace:
ML_COMMONS_REFERENCES = lambda ctx: ML_COMMONS(ctx).references
ML_COMMONS_REGEX = lambda ctx: ML_COMMONS(ctx).regex
ML_COMMONS_REPEATED = lambda ctx: ML_COMMONS(ctx).repeated
ML_COMMONS_IS_ARRAY = lambda ctx: ML_COMMONS(ctx).isArray
ML_COMMONS_ARRAY_SHAPE = lambda ctx: ML_COMMONS(ctx).arrayShape
# ML_COMMONS.replace is understood as the `replace` method on the class Namespace.
ML_COMMONS_REPLACE = lambda ctx: ML_COMMONS(ctx)["replace"]
ML_COMMONS_SEPARATOR = lambda ctx: ML_COMMONS(ctx).separator
Expand Down
2 changes: 2 additions & 0 deletions python/mlcroissant/mlcroissant/_src/core/json_ld_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def test_make_context():
assert make_context(foo="bar") == {
"@language": "en",
"@vocab": "https://schema.org/",
"arrayShape": "cr:arrayShape",
"citeAs": "cr:citeAs",
"column": "cr:column",
"conformsTo": "dct:conformsTo",
Expand All @@ -50,6 +51,7 @@ def test_make_context():
"fileSet": "cr:fileSet",
"format": "cr:format",
"includes": "cr:includes",
"isArray": "cr:isArray",
"isLiveDataset": "cr:isLiveDataset",
"jsonPath": "cr:jsonPath",
"key": "cr:key",
Expand Down
2 changes: 2 additions & 0 deletions python/mlcroissant/mlcroissant/_src/core/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def make_context(ctx=None, **kwargs):
context = {
"@language": "en",
"@vocab": "https://schema.org/",
"arrayShape": "cr:arrayShape",
"citeAs": "cr:citeAs",
"column": "cr:column",
"conformsTo": "dct:conformsTo",
Expand All @@ -41,6 +42,7 @@ def make_context(ctx=None, **kwargs):
"fileSet": "cr:fileSet",
"format": "cr:format",
"includes": "cr:includes",
"isArray": "cr:isArray",
"isLiveDataset": "cr:isLiveDataset",
"jsonPath": "cr:jsonPath",
"key": "sc:key" if ctx is not None and ctx.is_v0() else "cr:key",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def _parse_jsonpath(json_path: str):


def _is_repeated_field(field: Field | None) -> bool | None:
return isinstance(field, Field) and field.repeated
return isinstance(field, Field) and (field.repeated or field.is_array)


def _apply_transform_fn(value: Any, transform: Transform, field: Field) -> Any:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@ class Field(Node):

JSONLD_TYPE = constants.ML_COMMONS_FIELD_TYPE

# array_shape: list[int] = mlc_dataclasses.jsonld_field(
array_shape: list[int] | None = mlc_dataclasses.jsonld_field(
cardinality="MANY",
# default_factory=lambda: [-1], # Defaults to one undefined dimension `(-1,)`?
default=None,
description=(
"The shape of the array, where -1 indicates dimensions of"
" unknown/unspecified size."
),
input_types=[SDO.Integer],
url=constants.ML_COMMONS_ARRAY_SHAPE,
)
description: str | None = mlc_dataclasses.jsonld_field(
default=None,
input_types=[SDO.Text],
Expand Down Expand Up @@ -69,6 +81,12 @@ class Field(Node):
input_types=[SDO.URL],
url=constants.ML_COMMONS_EQUIVALENT_PROPERTY,
)
is_array: bool | None = mlc_dataclasses.jsonld_field(
default=None,
description="If true, then the Field is an array of values of type dataType.",
input_types=[SDO.Boolean],
url=constants.ML_COMMONS_IS_ARRAY,
)
is_enumeration: bool | None = mlc_dataclasses.jsonld_field(
default=None,
input_types=[SDO.Boolean],
Expand Down Expand Up @@ -101,7 +119,10 @@ class Field(Node):
)
repeated: bool | None = mlc_dataclasses.jsonld_field(
default=None,
description="If true, then the Field is a list of values of type dataType.",
description=(
"[DEPRECATED]. Please use `is_array` and `array_shape`. If true, then the"
" Field is a list of values of type dataType."
),
input_types=[SDO.Boolean],
url=constants.ML_COMMONS_REPEATED,
)
Expand Down

0 comments on commit 9d53059

Please sign in to comment.