chore(duckdb): try removing hard pyarrow dependency

ibis-project · Jan 14, 2025 · 35da604 · 35da604
1 parent 6408964
commit 35da604
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 32 deletions.
diff --git a/docs/tutorials/browser/repl.qmd b/docs/tutorials/browser/repl.qmd
@@ -16,9 +16,7 @@ penguins[^1] dataset loaded into the DuckDB backend!
 from urllib.parse import urlencode
 
 lines = """
-%pip install numpy pandas tzdata
-%pip install pyarrow
-%pip install duckdb
+%pip install numpy pandas tzdata duckdb
 import pathlib, js
 penguins_csv_url = "https://storage.googleapis.com/ibis-tutorial-data/penguins.csv"
 pathlib.Path("penguins.csv").write_text(await (await js.fetch(penguins_csv_url)).text())

diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py
@@ -11,8 +11,6 @@
 from typing import TYPE_CHECKING, Any
 
 import duckdb
-import pyarrow as pa
-import pyarrow_hotfix  # noqa: F401
 import sqlglot as sg
 import sqlglot.expressions as sge
 from packaging.version import parse as vparse
@@ -37,6 +35,7 @@
 
     import pandas as pd
     import polars as pl
+    import pyarrow as pa
     import torch
     from fsspec import AbstractFileSystem
 
@@ -1281,6 +1280,9 @@ def to_pyarrow_batches(
         chunk_size
             The number of rows to fetch per batch
         """
+        import pyarrow as pa
+        import pyarrow_hotfix  # noqa: F401
+
         self._run_pre_execute_hooks(expr)
         table = expr.as_table()
         sql = self.compile(table, limit=limit, params=params)
@@ -1312,27 +1314,8 @@ def execute(
         **_: Any,
     ) -> Any:
         """Execute an expression."""
-        import pandas as pd
-        import pyarrow.types as pat
-
-        table = self._to_duckdb_relation(expr, params=params, limit=limit).arrow()
-
-        df = pd.DataFrame(
-            {
-                name: (
-                    col.to_pylist()
-                    if (
-                        pat.is_nested(col.type)
-                        or
-                        # pyarrow / duckdb type null literals columns as int32?
-                        # but calling `to_pylist()` will render it as None
-                        col.null_count
-                    )
-                    else col.to_pandas()
-                )
-                for name, col in zip(table.column_names, table.columns)
-            }
-        )
+        rel = self._to_duckdb_relation(expr, params=params, limit=limit)
+        df = rel.df()
         df = DuckDBPandasData.convert_table(df, expr.as_table().schema())
         return expr.__pandas_result__(df)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -98,8 +98,6 @@ druid = [
 ]
 duckdb = [
   "duckdb>=0.10,<1.2",
-  "pyarrow>=10.0.1",
-  "pyarrow-hotfix>=0.4,<1",
   "numpy>=1.23.2,<3",
   "pandas>=1.5.3,<3",
   "rich>=12.4.4,<14",

diff --git a/uv.lock b/uv.lock