From f3f698ef9c03dcb1f5b51eed4da830c58336747a Mon Sep 17 00:00:00 2001 From: Arslan Saleem Date: Wed, 5 Feb 2025 11:22:54 +0100 Subject: [PATCH] fix(load): update error message in case of dataset not found locally and missing api keys (#1589) * tests: add tests for config, smart dataframe and smart datalake * fix(dataset): update exception message --------- Co-authored-by: Gabriele Venturi --- pandasai/__init__.py | 21 ++++++++++++++++++--- tests/unit_tests/test_pandasai_init.py | 25 ++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/pandasai/__init__.py b/pandasai/__init__.py index a65ce95e0..ca978a737 100644 --- a/pandasai/__init__.py +++ b/pandasai/__init__.py @@ -212,11 +212,17 @@ def load(dataset_path: str) -> DataFrame: raise ValueError("The path must be in the format 'organization/dataset'.") dataset_full_path = os.path.join(find_project_root(), "datasets", dataset_path) - if not os.path.exists(dataset_full_path): + + local_dataset_exists = os.path.exists(dataset_full_path) + + if not local_dataset_exists: api_key = os.environ.get("PANDABI_API_KEY", None) api_url = os.environ.get("PANDABI_API_URL", DEFAULT_API_URL) + if not api_url or not api_key: - raise PandaAIApiKeyError() + raise PandaAIApiKeyError( + f'The dataset "{dataset_path}" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.' + ) request_session = get_pandaai_session() @@ -232,7 +238,16 @@ def load(dataset_path: str) -> DataFrame: zip_file.extractall(dataset_full_path) loader = DatasetLoader.create_loader_from_path(dataset_path) - return loader.load() + df = loader.load() + + message = ( + "Dataset loaded successfully." + if local_dataset_exists + else "Dataset fetched successfully from the remote server." + ) + print(message) + + return df def read_csv(filepath: str) -> DataFrame: diff --git a/tests/unit_tests/test_pandasai_init.py b/tests/unit_tests/test_pandasai_init.py index 3f1cce487..b8e783cbc 100644 --- a/tests/unit_tests/test_pandasai_init.py +++ b/tests/unit_tests/test_pandasai_init.py @@ -137,6 +137,25 @@ def test_load_dataset_not_found(self, mockenviron, mock_bytes_io, mock_zip_file) with pytest.raises(DatasetNotFound): pandasai.load(dataset_path) + @patch("pandasai.os.path.exists") + @patch("pandasai.os.environ", {}) + @patch("pandasai.get_pandaai_session") + def test_load_missing_not_found_locally_and_no_remote_key( + self, mock_session, mock_exists + ): + """Test loading when API URL is missing.""" + mock_exists.return_value = False + mock_response = MagicMock() + mock_response.status_code = 404 + mock_session.return_value.get.return_value = mock_response + dataset_path = "org/dataset_name" + + with pytest.raises( + PandaAIApiKeyError, + match='The dataset "org/dataset_name" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.', + ): + pandasai.load(dataset_path) + @patch("pandasai.os.path.exists") @patch("pandasai.os.environ", {"PANDABI_API_KEY": "key"}) def test_load_missing_api_url(self, mock_exists): @@ -144,13 +163,13 @@ def test_load_missing_api_url(self, mock_exists): mock_exists.return_value = False dataset_path = "org/dataset_name" - with pytest.raises(PandaAIApiKeyError): + with pytest.raises(DatasetNotFound): pandasai.load(dataset_path) @patch("pandasai.os.path.exists") @patch("pandasai.os.environ", {"PANDABI_API_KEY": "key"}) @patch("pandasai.get_pandaai_session") - def test_load_missing_api_url(self, mock_session, mock_exists): + def test_load_missing_not_found(self, mock_session, mock_exists): """Test loading when API URL is missing.""" mock_exists.return_value = False mock_response = MagicMock() @@ -202,7 +221,7 @@ def test_load_without_api_credentials( pandasai.load("test/dataset") assert ( str(exc_info.value) - == "PandaAI API key not found. Please set your API key using PandaAI.set_api_key() or by setting the PANDASAI_API_KEY environment variable." + == 'The dataset "test/dataset" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.' ) def test_clear_cache(self):