From 777153a727006d7363201d356569392e10af2f8a Mon Sep 17 00:00:00 2001
From: Nick Crews <nicholas.b.crews@gmail.com>
Date: Tue, 21 Jan 2025 12:16:49 -0900
Subject: [PATCH 1/2] feat: add to_dicts

fixes https://github.com/ibis-project/ibis/issues/9185
---
 ibis/backends/__init__.py          | 38 ++++++++++++++++++++++++++++++
 ibis/backends/tests/test_export.py | 25 ++++++++++++++++++++
 ibis/expr/types/core.py            | 37 ++++++++++++++++++++++++++++-
 3 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py
index c77be762da1a..db07f207f804 100644
--- a/ibis/backends/__init__.py
+++ b/ibis/backends/__init__.py
@@ -586,6 +586,44 @@ def to_delta(
         with expr.to_pyarrow_batches(params=params) as batch_reader:
             write_deltalake(path, batch_reader, **kwargs)
 
+    @util.experimental
+    def to_dicts(
+        self, expr: ir.Table, *, chunk_size: int = 1_000_000
+    ) -> Iterable[dict[str, Any]]:
+        """Iterate through each row as a `dict` of column_name -> value.
+
+        Parameters
+        ----------
+        expr
+            The ibis expression to materialize as an iterable of row dictionaries.
+        chunk_size
+            We materialize the results in chunks of this size, to keep memory usage under control.
+            Larger values probably will be faster but consume more memory.
+
+        Returns
+        -------
+        Iterable[dict[str, Any]]
+            An iterator of dictionaries, each representing a row in the table.
+
+        Examples
+        --------
+        >>> t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})
+        >>> list(t.to_dicts())
+        [{'i': 1, 's': 'a'}, {'i': 2, 's': 'b'}, {'i': 3, 's': 'c'}]
+
+        Single Columns are returned as dictionaries with a single key:
+
+        >>> column = t.i
+        >>> list(column.to_dicts())
+        [{'i': 1}, {'i': 2}, {'i': 3}]
+
+        See Also
+        --------
+        [`Column.to_list`](./expression-generic.qmd#ibis.expr.types.generic.Column.to_list)
+        """
+        for batch in self.to_pyarrow_batches(expr, chunk_size=chunk_size):
+            yield from batch.to_pylist()
+
     @util.experimental
     def to_json(
         self,
diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py
index f2de3959b9fc..d93ee721109d 100644
--- a/ibis/backends/tests/test_export.py
+++ b/ibis/backends/tests/test_export.py
@@ -350,6 +350,31 @@ def test_table_to_csv(tmp_path, backend, awards_players):
     backend.assert_frame_equal(awards_players.to_pandas(), df)
 
 
+@pytest.mark.parametrize("chunk_size", [1, 1000])
+def test_to_dicts(chunk_size, awards_players):
+    t = (
+        awards_players.select("playerID", "yearID")
+        .order_by("playerID", "yearID")
+        .limit(3)
+    )
+
+    result = list(t.to_dicts(chunk_size=chunk_size))
+    expected = [
+        {"playerID": "aaronha01", "yearID": 1956},
+        {"playerID": "aaronha01", "yearID": 1956},
+        {"playerID": "aaronha01", "yearID": 1957},
+    ]
+    assert result == expected
+
+    result = list(t.limit(0).to_dicts(chunk_size=chunk_size))
+    expected = []
+    assert result == expected
+
+    result = list(t.yearID.to_dicts(chunk_size=chunk_size))
+    expected = [{"yearID": 1956}, {"yearID": 1956}, {"yearID": 1957}]
+    assert result == expected
+
+
 @pytest.mark.notimpl(
     [
         "athena",
diff --git a/ibis/expr/types/core.py b/ibis/expr/types/core.py
index 359cbcd83f70..94f1b069d026 100644
--- a/ibis/expr/types/core.py
+++ b/ibis/expr/types/core.py
@@ -20,7 +20,7 @@
 from ibis.util import experimental
 
 if TYPE_CHECKING:
-    from collections.abc import Iterator, Mapping
+    from collections.abc import Iterable, Iterator, Mapping
     from pathlib import Path
 
     import pandas as pd
@@ -771,6 +771,41 @@ def to_delta(
         """
         self._find_backend(use_default=True).to_delta(self, path, **kwargs)
 
+    @experimental
+    def to_dicts(self, *, chunk_size: int = 1_000_000) -> Iterable[dict[str, Any]]:
+        """Iterate through each row as a `dict` of column_name -> value.
+
+        Parameters
+        ----------
+        chunk_size
+            We materialize the results in chunks of this size, to keep memory usage under control.
+            Larger values probably will be faster but consume more memory.
+
+        Returns
+        -------
+        Iterable[dict[str, Any]]
+            An iterator of dictionaries, each representing a row in the table.
+
+        Examples
+        --------
+        >>> t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})
+        >>> list(t.to_dicts())
+        [{'i': 1, 's': 'a'}, {'i': 2, 's': 'b'}, {'i': 3, 's': 'c'}]
+
+        Single Columns are returned as dictionaries with a single key:
+
+        >>> column = t.i
+        >>> list(column.to_dicts())
+        [{'i': 1}, {'i': 2}, {'i': 3}]
+
+        See Also
+        --------
+        [`Column.to_list`](./expression-generic.qmd#ibis.expr.types.generic.Column.to_list)
+        """
+        return self._find_backend(use_default=True).to_dicts(
+            self, chunk_size=chunk_size
+        )
+
     @experimental
     def to_json(
         self,

From 5c951704cb5b5ba865246de0ca2520a9a322aa70 Mon Sep 17 00:00:00 2001
From: Nick Crews <nicholas.b.crews@gmail.com>
Date: Tue, 21 Jan 2025 15:31:53 -0900
Subject: [PATCH 2/2] test: mark druid as notimpl

---
 ibis/backends/tests/test_export.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py
index d93ee721109d..1582c2b4486c 100644
--- a/ibis/backends/tests/test_export.py
+++ b/ibis/backends/tests/test_export.py
@@ -350,6 +350,11 @@ def test_table_to_csv(tmp_path, backend, awards_players):
     backend.assert_frame_equal(awards_players.to_pandas(), df)
 
 
+@pytest.mark.notimpl(
+    ["druid"],
+    raises=PyDruidProgrammingError,
+    reason="druid can only order by time columns",
+)
 @pytest.mark.parametrize("chunk_size", [1, 1000])
 def test_to_dicts(chunk_size, awards_players):
     t = (