Skip to content

Commit

Permalink
refactor: verify a user cannot create a dataset that already exists
Browse files Browse the repository at this point in the history
  • Loading branch information
gventuri committed Jan 15, 2025
1 parent 1f0c3b4 commit 9ea50c7
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 22 deletions.
22 changes: 0 additions & 22 deletions examples/test.py

This file was deleted.

9 changes: 9 additions & 0 deletions pandasai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ def create(path: str, df: pd.DataFrame, schema: SemanticLayerSchema):
Returns:
DataFrame: A new PandaAI DataFrame instance with loaded data
Raises:
ValueError: If path format is invalid or dataset already exists
"""

# Validate path format
Expand Down Expand Up @@ -65,6 +68,12 @@ def create(path: str, df: pd.DataFrame, schema: SemanticLayerSchema):
find_project_root(), "datasets", org_name, dataset_name
)

# Check if dataset already exists
if os.path.exists(dataset_directory):
schema_path = os.path.join(dataset_directory, "schema.yaml")
if os.path.exists(schema_path):
raise ValueError(f"Dataset already exists at path: {path}")

os.makedirs(dataset_directory, exist_ok=True)

# Save DataFrame to parquet
Expand Down
40 changes: 40 additions & 0 deletions tests/unit_tests/test_pandasai_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,3 +336,43 @@ def test_create_empty_dataset_name(self, sample_df, sample_schema):
ValueError, match="Both organization and dataset names are required"
):
pandasai.create("test-org/", sample_df, sample_schema)

@patch("pandasai.helpers.path.find_project_root")
@patch("os.makedirs")
def test_create_existing_dataset(
self, mock_makedirs, mock_find_project_root, sample_df, sample_schema
):
"""Test creating a dataset that already exists."""
mock_find_project_root.return_value = "/mock/root"

with patch("os.path.exists") as mock_exists:
# Mock that both directory and schema file exist
mock_exists.side_effect = lambda path: True

with pytest.raises(ValueError, match="Dataset already exists at path: test-org/test-dataset"):
pandasai.create("test-org/test-dataset", sample_df, sample_schema)

@patch("pandasai.helpers.path.find_project_root")
@patch("os.makedirs")
def test_create_existing_directory_no_dataset(
self, mock_makedirs, mock_find_project_root, sample_df, sample_schema
):
"""Test creating a dataset in an existing directory but without existing dataset files."""
mock_find_project_root.return_value = "/mock/root"

def mock_exists_side_effect(path):
# Return True for directory, False for schema and data files
return not (path.endswith("schema.yaml") or path.endswith("data.parquet"))

with patch("os.path.exists", side_effect=mock_exists_side_effect), \
patch("builtins.open", mock_open()) as mock_file, \
patch.object(sample_df, "to_parquet") as mock_to_parquet, \
patch("pandasai.find_project_root", return_value="/mock/root"):

result = pandasai.create("test-org/test-dataset", sample_df, sample_schema)

# Verify dataset was created successfully
assert isinstance(result, DataFrame)
assert result.name == sample_schema.name
mock_to_parquet.assert_called_once()
mock_file.assert_called_once()

0 comments on commit 9ea50c7

Please sign in to comment.