Skip to content

Commit

Permalink
Use @list for arrayshapeto maintain the order info
Browse files Browse the repository at this point in the history
  • Loading branch information
ccl-core committed Feb 13, 2025
1 parent 749e5f0 commit 95d18e3
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 5 deletions.
12 changes: 7 additions & 5 deletions datasets/1.0/huggingface-baratilab-flow3d/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,13 @@
"description": "Column 'pressure' from the Hugging Face parquet file.",
"dataType": "sc:Float",
"isArray": true,
"arrayShape": [
-1,
-1,
-1
],
"arrayShape": {
"@list": [
-1,
-1,
-1
]
},
"source": {
"fileSet": {
"@id": "parquet-files-for-config-0_0100_01.4_1.0E-4_1.0E-2"
Expand Down
2 changes: 2 additions & 0 deletions python/mlcroissant/mlcroissant/_src/core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ def ML_COMMONS(ctx) -> rdflib.Namespace:
SCHEMA_ORG_MD5 = lambda ctx: SCHEMA_ORG.md5 if ctx.is_v0() else ML_COMMONS(ctx)["md5"]

TO_CROISSANT = lambda ctx: {
ML_COMMONS_ARRAY_SHAPE(ctx): "array_shape",
ML_COMMONS_CITE_AS(ctx): "cite_as",
ML_COMMONS_COLUMN(ctx): "csv_column",
ML_COMMONS_DATA_TYPE(ctx): "data_type",
Expand All @@ -159,6 +160,7 @@ def ML_COMMONS(ctx) -> rdflib.Namespace:
ML_COMMONS_FILE_PROPERTY(ctx): "file_property",
ML_COMMONS_FORMAT(ctx): "format",
ML_COMMONS_INCLUDES(ctx): "includes",
ML_COMMONS_IS_ARRAY(ctx): "is_array",
ML_COMMONS_IS_LIVE_DATASET(ctx): "is_live_dataset",
ML_COMMONS_JSON_PATH(ctx): "json_path",
ML_COMMONS_REFERENCES(ctx): "references",
Expand Down
3 changes: 3 additions & 0 deletions python/mlcroissant/mlcroissant/_src/core/json_ld.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ def recursively_populate_jsonld(entry_node: Json, id_to_node: dict[str, Json]) -
for key, value in entry_node.copy().items():
if key == "@type" and isinstance(value, list):
entry_node[key] = term.URIRef(value[0])
elif key == "@list" and isinstance(value, list):
del entry_node[key]
return [recursively_populate_jsonld(child, id_to_node) for child in value]
elif isinstance(value, list):
del entry_node[key]
value = [recursively_populate_jsonld(child, id_to_node) for child in value]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class Field(Node):
" unknown/unspecified size. [-1] represents a simple list."
),
input_types=[SDO.Integer],
to_jsonld=lambda _, array_shape: {"@list": array_shape},
url=constants.ML_COMMONS_ARRAY_SHAPE,
)
description: str | None = mlc_dataclasses.jsonld_field(
Expand Down

0 comments on commit 95d18e3

Please sign in to comment.