-
Notifications
You must be signed in to change notification settings - Fork 89
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into virtual-arrays
- Loading branch information
Showing
7 changed files
with
288 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,8 +35,8 @@ jobs: | |
run: ls -l dist/ | ||
|
||
- name: Generate artifact attestation for sdist and wheel | ||
uses: actions/attest-build-provenance@7668571508540a607bdfd90a87a560489fe372eb # v2.1.0 | ||
uses: actions/attest-build-provenance@520d128f165991a6c774bcb264f323e3d70747f4 # v2.2.0 | ||
with: | ||
subject-path: "dist/awkward*cpp-*" | ||
|
||
- uses: pypa/[email protected].3 | ||
- uses: pypa/[email protected].4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -88,7 +88,7 @@ jobs: | |
run: pipx run twine check dist/* | ||
|
||
- name: Generate artifact attestation for sdist and wheel | ||
uses: actions/attest-build-provenance@7668571508540a607bdfd90a87a560489fe372eb # v2.1.0 | ||
uses: actions/attest-build-provenance@520d128f165991a6c774bcb264f323e3d70747f4 # v2.2.0 | ||
with: | ||
subject-path: "dist/awkward-*" | ||
|
||
|
@@ -135,7 +135,7 @@ jobs: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
run: gh attestation verify dist/awkward-*.whl --repo ${{ github.repository }} | ||
|
||
- uses: pypa/[email protected].3 | ||
- uses: pypa/[email protected].4 | ||
|
||
publish-headers: | ||
name: "Publish header-only libraries alongside release" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE | ||
|
||
from __future__ import annotations | ||
|
||
from collections.abc import Mapping | ||
from functools import reduce | ||
|
||
import awkward as ak | ||
from awkward._dispatch import high_level_function | ||
from awkward._layout import HighLevelContext, ensure_same_backend | ||
from awkward._namedaxis import _get_named_axis, _unify_named_axis | ||
from awkward._nplikes.numpy_like import NumpyMetadata | ||
|
||
__all__ = ("zip_no_broadcast",) | ||
|
||
np = NumpyMetadata.instance() | ||
|
||
|
||
@high_level_function() | ||
def zip_no_broadcast( | ||
arrays, | ||
*, | ||
parameters=None, | ||
with_name=None, | ||
highlevel=True, | ||
behavior=None, | ||
attrs=None, | ||
): | ||
""" | ||
Args: | ||
arrays (mapping or sequence of arrays): Each value in this mapping or | ||
sequence can be any array-like data that #ak.to_layout recognizes. | ||
parameters (None or dict): Parameters for the new | ||
#ak.contents.RecordArray node that is created by this operation. | ||
with_name (None or str): Assigns a `"__record__"` name to the new | ||
#ak.contents.RecordArray node that is created by this operation | ||
(overriding `parameters`, if necessary). | ||
highlevel (bool): If True, return an #ak.Array; otherwise, return | ||
a low-level #ak.contents.Content subclass. | ||
behavior (None or dict): Custom #ak.behavior for the output array, if | ||
high-level. | ||
attrs (None or dict): Custom attributes for the output array, if | ||
high-level. | ||
Combines `arrays` into a single structure as the fields of a collection | ||
of records or the slots of a collection of tuples. | ||
Caution: unlike #ak.zip this function will _not_ broadcast the arrays together. | ||
During typetracing, it assumes that the given arrays have already the same layouts and lengths. | ||
This operation may be thought of as the opposite of projection in | ||
#ak.Array.__getitem__, which extracts fields one at a time, or | ||
#ak.unzip, which extracts them all in one call. | ||
Consider the following arrays, `one` and `two`. | ||
>>> one = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6]]) | ||
>>> two = ak.Array([["a", "b", "c"], [], ["d", "e"], ["f"]]) | ||
Zipping them together using a dict creates a collection of records with | ||
the same nesting structure as `one` and `two`. | ||
>>> ak.zip_no_broadcast({"x": one, "y": two}).show() | ||
[[{x: 1.1, y: 'a'}, {x: 2.2, y: 'b'}, {x: 3.3, y: 'c'}], | ||
[], | ||
[{x: 4.4, y: 'd'}], | ||
[]] | ||
Doing so with a list creates tuples, whose fields are not named. | ||
>>> ak.zip_no_broadcast([one, two]).show() | ||
[[(1.1, 'a'), (2.2, 'b'), (3.3, 'c')], | ||
[], | ||
[(4.4, 'd')], | ||
[]] | ||
See also #ak.zip and #ak.unzip. | ||
""" | ||
# Dispatch | ||
if isinstance(arrays, Mapping): | ||
yield arrays.values() | ||
else: | ||
yield arrays | ||
|
||
# Implementation | ||
return _impl( | ||
arrays, | ||
parameters, | ||
with_name, | ||
highlevel, | ||
behavior, | ||
attrs, | ||
) | ||
|
||
|
||
def _impl( | ||
arrays, | ||
parameters, | ||
with_name, | ||
highlevel, | ||
behavior, | ||
attrs, | ||
): | ||
with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: | ||
if isinstance(arrays, Mapping): | ||
layouts = ensure_same_backend( | ||
*( | ||
ctx.unwrap( | ||
x, | ||
allow_record=False, | ||
allow_unknown=False, | ||
none_policy="pass-through", | ||
primitive_policy="pass-through", | ||
) | ||
for x in arrays.values() | ||
) | ||
) | ||
fields = list(arrays.keys()) | ||
|
||
# propagate named axis from input to output, | ||
# use strategy "unify" (see: awkward._namedaxis) | ||
out_named_axis = reduce( | ||
_unify_named_axis, map(_get_named_axis, arrays.values()) | ||
) | ||
|
||
else: | ||
layouts = ensure_same_backend( | ||
*( | ||
ctx.unwrap( | ||
x, | ||
allow_record=False, | ||
allow_unknown=False, | ||
none_policy="pass-through", | ||
primitive_policy="pass-through", | ||
) | ||
for x in arrays | ||
) | ||
) | ||
fields = None | ||
|
||
# propagate named axis from input to output, | ||
# use strategy "unify" (see: awkward._namedaxis) | ||
out_named_axis = reduce(_unify_named_axis, map(_get_named_axis, arrays)) | ||
|
||
# determine backend | ||
backend = next((b.backend for b in layouts if hasattr(b, "backend")), "cpu") | ||
|
||
if with_name is not None: | ||
if parameters is None: | ||
parameters = {} | ||
else: | ||
parameters = dict(parameters) | ||
parameters["__record__"] = with_name | ||
|
||
# only allow all NumpyArrays and ListOffsetArrays | ||
if all(isinstance(layout, ak.contents.NumpyArray) for layout in layouts): | ||
length = _check_equal_lengths(layouts) | ||
out = ak.contents.RecordArray( | ||
layouts, fields, length=length, parameters=parameters, backend=backend | ||
) | ||
elif all(isinstance(layout, ak.contents.ListOffsetArray) for layout in layouts): | ||
contents = [] | ||
for layout in layouts: | ||
# get the content of the ListOffsetArray | ||
if not isinstance(layout.content, ak.contents.NumpyArray): | ||
raise ValueError( | ||
"can not (unsafe) zip ListOffsetArrays with non-NumpyArray contents" | ||
) | ||
contents.append(layout.content) | ||
|
||
if backend.name == "typetracer": | ||
# just get from the first one | ||
# we're in typetracer mode, so we can't check the offsets (see else branch) | ||
offsets = layouts[0].offsets | ||
else: | ||
# this is at 'runtime' with actual data, that means we can check the offsets, | ||
# but only those that have actual data, i.e. no PlaceholderArrays | ||
# so first, let's filter out any PlaceholderArrays | ||
comparable_offsets = filter( | ||
lambda o: not isinstance(o, ak._nplikes.placeholder.PlaceholderArray), | ||
(layout.offsets for layout in layouts), | ||
) | ||
# check that offsets are the same | ||
first = next(comparable_offsets) | ||
if not all( | ||
first.nplike.all(offsets.data == first.data) | ||
for offsets in comparable_offsets | ||
): | ||
raise ValueError("all ListOffsetArrays must have the same offsets") | ||
offsets = first | ||
|
||
length = _check_equal_lengths(contents) | ||
out = ak.contents.ListOffsetArray( | ||
offsets=offsets, | ||
content=ak.contents.RecordArray( | ||
contents, fields, length=length, parameters=parameters, backend=backend | ||
), | ||
) | ||
else: | ||
raise ValueError( | ||
"all array layouts must be either NumpyArrays or ListOffsetArrays" | ||
) | ||
|
||
# Unify named axes propagated through the broadcast | ||
wrapped_out = ctx.wrap(out, highlevel=highlevel) | ||
return ak.operations.ak_with_named_axis._impl( | ||
wrapped_out, | ||
named_axis=out_named_axis, | ||
highlevel=highlevel, | ||
behavior=ctx.behavior, | ||
attrs=ctx.attrs, | ||
) | ||
|
||
|
||
def _check_equal_lengths( | ||
contents: ak.contents.Content, | ||
) -> int | ak._nplikes.shape.UnknownLength: | ||
length = contents[0].length | ||
for layout in contents: | ||
if layout.length != length: | ||
raise ValueError("all arrays must have the same length") | ||
return length |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE | ||
# ruff: noqa: E402 | ||
|
||
from __future__ import annotations | ||
|
||
import awkward as ak | ||
|
||
|
||
def test_ak_zip_no_broadcast_NumpyArray_dict(): | ||
a = ak.Array([1]) | ||
b = ak.Array([2]) | ||
c = ak.zip_no_broadcast({"a": a, "b": b}) | ||
assert ak.to_list(c) == ak.to_list(ak.zip({"a": a, "b": b})) | ||
|
||
|
||
def test_ak_zip_no_broadcast_ListOffsetArray_dict(): | ||
a = ak.Array([[1], []]) | ||
b = ak.Array([[2], []]) | ||
c = ak.zip_no_broadcast({"a": a, "b": b}) | ||
assert ak.to_list(c) == ak.to_list(ak.zip({"a": a, "b": b})) | ||
|
||
|
||
def test_ak_zip_no_broadcast_NumpyArray_list(): | ||
a = ak.Array([1]) | ||
b = ak.Array([2]) | ||
c = ak.zip_no_broadcast([a, b]) | ||
assert ak.to_list(c) == ak.to_list(ak.zip([a, b])) | ||
|
||
|
||
def test_ak_zip_no_broadcast_ListOffsetArray_list(): | ||
a = ak.Array([[1], []]) | ||
b = ak.Array([[2], []]) | ||
c = ak.zip_no_broadcast([a, b]) | ||
assert ak.to_list(c) == ak.to_list(ak.zip([a, b])) | ||
|
||
|
||
def test_typetracer_NumpyArray_non_touching(): | ||
tracer = ak.Array([1], backend="typetracer") | ||
|
||
tracer, report = ak.typetracer.typetracer_with_report( | ||
tracer.layout.form_with_key(), highlevel=True | ||
) | ||
|
||
_ = ak.zip_no_broadcast({"foo": tracer, "bar": tracer}) | ||
assert len(report.shape_touched) == 1 | ||
assert len(report.data_touched) == 0 | ||
|
||
|
||
def test_typetracer_ListOffsetArray_non_touching(): | ||
tracer = ak.Array([[1], [], [2, 3]], backend="typetracer") | ||
|
||
tracer, report = ak.typetracer.typetracer_with_report( | ||
tracer.layout.form_with_key(), highlevel=True | ||
) | ||
|
||
_ = ak.zip_no_broadcast({"foo": tracer, "bar": tracer}) | ||
assert len(report.shape_touched) == 1 | ||
assert len(report.data_touched) == 0 |