Skip to content

Commit

Permalink
feat: add to_dicts
Browse files Browse the repository at this point in the history
fixes #9185
  • Loading branch information
NickCrews committed Jan 21, 2025
1 parent dc23b9f commit b402555
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 1 deletion.
38 changes: 38 additions & 0 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,44 @@ def to_delta(
with expr.to_pyarrow_batches(params=params) as batch_reader:
write_deltalake(path, batch_reader, **kwargs)

@util.experimental
def to_dicts(
self, expr: ir.Table, *, chunk_size: int = 1_000_000
) -> Iterable[dict[str, Any]]:
"""Iterate through each row as a `dict` of column_name -> value.
Parameters
----------
expr
The ibis expression to materialize as an iterable of row dictionaries.
chunk_size
We materialize the results in chunks of this size, to keep memory usage under control.
Larger values probably will be faster but consume more memory.
Returns
-------
Iterable[dict[str, Any]]
An iterator of dictionaries, each representing a row in the table.
Examples
--------
>>> t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})
>>> list(t.to_dicts())
[{'i': 1, 's': 'a'}, {'i': 2, 's': 'b'}, {'i': 3, 's': 'c'}]
Single Columns are returned as dictionaries with a single key:
>>> column = t.i
>>> list(column.to_dicts())
[{'i': 1}, {'i': 2}, {'i': 3}]
See Also
--------
[`Column.to_list`](./expression-generic.qmd##ibis.expr.types.generic.Column.to_list)
"""
for batch in self.to_pyarrow_batches(expr, chunk_size=chunk_size):
yield from batch.to_pylist()

@util.experimental
def to_json(
self,
Expand Down
18 changes: 18 additions & 0 deletions ibis/backends/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,24 @@ def test_table_to_csv(tmp_path, backend, awards_players):
backend.assert_frame_equal(awards_players.to_pandas(), df)


@pytest.mark.parametrize("chunk_size", [1, 1000])
def test_to_dicts(con, chunk_size):
t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})
t = con.create_table("t", t)

result = list(t.to_dicts(chunk_size=chunk_size))
expected = [{"i": 1, "s": "a"}, {"i": 2, "s": "b"}, {"i": 3, "s": "c"}]
assert result == expected

result = list(t.limit(0).to_dicts(chunk_size=chunk_size))
expected = []
assert result == expected

result = list(t.i.to_dicts(chunk_size=chunk_size))
expected = [{"i": 1}, {"i": 2}, {"i": 3}]
assert result == expected


@pytest.mark.notimpl(
[
"athena",
Expand Down
37 changes: 36 additions & 1 deletion ibis/expr/types/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ibis.util import experimental

if TYPE_CHECKING:
from collections.abc import Iterator, Mapping
from collections.abc import Iterable, Iterator, Mapping
from pathlib import Path

import pandas as pd
Expand Down Expand Up @@ -771,6 +771,41 @@ def to_delta(
"""
self._find_backend(use_default=True).to_delta(self, path, **kwargs)

@experimental
def to_dicts(self, *, chunk_size: int = 1_000_000) -> Iterable[dict[str, Any]]:
"""Iterate through each row as a `dict` of column_name -> value.
Parameters
----------
chunk_size
We materialize the results in chunks of this size, to keep memory usage under control.
Larger values probably will be faster but consume more memory.
Returns
-------
Iterable[dict[str, Any]]
An iterator of dictionaries, each representing a row in the table.
Examples
--------
>>> t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})
>>> list(t.to_dicts())
[{'i': 1, 's': 'a'}, {'i': 2, 's': 'b'}, {'i': 3, 's': 'c'}]
Single Columns are returned as dictionaries with a single key:
>>> column = t.i
>>> list(column.to_dicts())
[{'i': 1}, {'i': 2}, {'i': 3}]
See Also
--------
[`Column.to_list`](./expression-generic.qmd##ibis.expr.types.generic.Column.to_list)
"""
return self._find_backend(use_default=True).to_dicts(
self, chunk_size=chunk_size
)

@experimental
def to_json(
self,
Expand Down

0 comments on commit b402555

Please sign in to comment.