Skip to content

Commit

Permalink
chore(duckdb): try removing hard pyarrow dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Jan 14, 2025
1 parent 6408964 commit 35da604
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 32 deletions.
4 changes: 1 addition & 3 deletions docs/tutorials/browser/repl.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ penguins[^1] dataset loaded into the DuckDB backend!
from urllib.parse import urlencode
lines = """
%pip install numpy pandas tzdata
%pip install pyarrow
%pip install duckdb
%pip install numpy pandas tzdata duckdb
import pathlib, js
penguins_csv_url = "https://storage.googleapis.com/ibis-tutorial-data/penguins.csv"
pathlib.Path("penguins.csv").write_text(await (await js.fetch(penguins_csv_url)).text())
Expand Down
29 changes: 6 additions & 23 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
from typing import TYPE_CHECKING, Any

import duckdb
import pyarrow as pa
import pyarrow_hotfix # noqa: F401
import sqlglot as sg
import sqlglot.expressions as sge
from packaging.version import parse as vparse
Expand All @@ -37,6 +35,7 @@

import pandas as pd
import polars as pl
import pyarrow as pa
import torch
from fsspec import AbstractFileSystem

Expand Down Expand Up @@ -1281,6 +1280,9 @@ def to_pyarrow_batches(
chunk_size
The number of rows to fetch per batch
"""
import pyarrow as pa
import pyarrow_hotfix # noqa: F401

Check warning on line 1284 in ibis/backends/duckdb/__init__.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/duckdb/__init__.py#L1283-L1284

Added lines #L1283 - L1284 were not covered by tests

self._run_pre_execute_hooks(expr)
table = expr.as_table()
sql = self.compile(table, limit=limit, params=params)
Expand Down Expand Up @@ -1312,27 +1314,8 @@ def execute(
**_: Any,
) -> Any:
"""Execute an expression."""
import pandas as pd
import pyarrow.types as pat

table = self._to_duckdb_relation(expr, params=params, limit=limit).arrow()

df = pd.DataFrame(
{
name: (
col.to_pylist()
if (
pat.is_nested(col.type)
or
# pyarrow / duckdb type null literals columns as int32?
# but calling `to_pylist()` will render it as None
col.null_count
)
else col.to_pandas()
)
for name, col in zip(table.column_names, table.columns)
}
)
rel = self._to_duckdb_relation(expr, params=params, limit=limit)
df = rel.df()

Check warning on line 1318 in ibis/backends/duckdb/__init__.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/duckdb/__init__.py#L1317-L1318

Added lines #L1317 - L1318 were not covered by tests
df = DuckDBPandasData.convert_table(df, expr.as_table().schema())
return expr.__pandas_result__(df)

Expand Down
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,6 @@ druid = [
]
duckdb = [
"duckdb>=0.10,<1.2",
"pyarrow>=10.0.1",
"pyarrow-hotfix>=0.4,<1",
"numpy>=1.23.2,<3",
"pandas>=1.5.3,<3",
"rich>=12.4.4,<14",
Expand Down
4 changes: 0 additions & 4 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 35da604

Please sign in to comment.