feat(sandbox): add sandbox support in pai.chat and df.chat (#1595)

* feat(sandbox): add sandbox support in pai.chat and df.chat * fix: typos in function name and comments
sinaptik-ai · Feb 6, 2025 · be3e158 · be3e158
1 parent 8a0123c
commit be3e158
Show file tree

Hide file tree

Showing 5 changed files with 80 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ You can either decide to use PandaAI in your Jupyter notebooks, Streamlit apps,
 
 ## ☁️ Using the platform
 
-The library can be used alongside our powerful data platform, making end-to-end conversational data analytics possible with as little as a few lines of code. 
+The library can be used alongside our powerful data platform, making end-to-end conversational data analytics possible with as little as a few lines of code.
 
 Load your data, save them as a dataframe, and push them to the platform
 
@@ -36,9 +36,10 @@ dataset = pai.create(path="your-organization/dataset-name",
 
 dataset.push()
 ```
+
 Your team can now access and query this data using natural language through the platform.
 
-![PandaAI](assets/demo.gif) 
+![PandaAI](assets/demo.gif)
 
 ## 📚 Using the library
 
@@ -144,6 +145,54 @@ pai.chat("Who gets paid the most?", employees_df, salaries_df)
 Olivia gets paid the most.
 ```
 
+#### Docker Sandbox
+
+You can run PandaAI in a Docker sandbox, providing a secure, isolated environment to execute code safely and mitigate the risk of malicious attacks.
+
+##### Python Requirements
+
+```bash
+pip install "pandasai-docker"
+```
+
+##### Usage
+
+```python
+import pandasai as pai
+from pandasai_docker import DockerSandbox
+
+# Initialize the sandbox
+sandbox = DockerSandbox()
+sandbox.start()
+
+employees_data = {
+    'EmployeeID': [1, 2, 3, 4, 5],
+    'Name': ['John', 'Emma', 'Liam', 'Olivia', 'William'],
+    'Department': ['HR', 'Sales', 'IT', 'Marketing', 'Finance']
+}
+
+salaries_data = {
+    'EmployeeID': [1, 2, 3, 4, 5],
+    'Salary': [5000, 6000, 4500, 7000, 5500]
+}
+
+employees_df = pai.DataFrame(employees_data)
+salaries_df = pai.DataFrame(salaries_data)
+
+# By default, unless you choose a different LLM, it will use BambooLLM.
+# You can get your free API key signing up at https://app.pandabi.ai (you can also configure it in your .env file)
+pai.api_key.set("your-pai-api-key")
+
+pai.chat("Who gets paid the most?", employees_df, salaries_df, sandbox=sandbox)
+
+# Don't forget to stop the sandbox when done
+sandbox.stop()
+```
+
+```
+Olivia gets paid the most.
+```
+
 You can find more examples in the [examples](examples) directory.
 
 ## 📜 License
@@ -161,7 +210,6 @@ If you are interested in managed PandaAI Cloud or self-hosted Enterprise Offerin
 - [Examples](examples) for example notebooks
 - [Discord](https://discord.gg/KYKj9F2FRH) for discussion with the community and PandaAI team
 
-
 ## 🤝 Contributing
 
 Contributions are welcome! Please check the outstanding issues and feel free to open a pull request.

diff --git a/pandasai/__init__.py b/pandasai/__init__.py
@@ -22,6 +22,7 @@
 from pandasai.helpers.path import find_project_root, get_validated_dataset_path
 from pandasai.helpers.session import get_pandaai_session
 from pandasai.query_builders import SqlQueryBuilder
+from pandasai.sandbox.sandbox import Sandbox
 
 from .agent import Agent
 from .constants import LOCAL_SOURCE_TYPES, SQL_SOURCE_TYPES
@@ -158,13 +159,14 @@ def clear_cache(filename: str = None):
     cache.clear()
 
 
-def chat(query: str, *dataframes: DataFrame):
+def chat(query: str, *dataframes: DataFrame, sandbox: Optional[Sandbox] = None):
     """
     Start a new chat interaction with the assistant on Dataframe(s).
 
     Args:
         query (str): The query to run against the dataframes.
         *dataframes: Variable number of dataframes to query.
+        sandbox (Sandbox, optional): The sandbox to execute code securely.
 
     Returns:
         The result of the query.
@@ -173,7 +175,7 @@ def chat(query: str, *dataframes: DataFrame):
     if not dataframes:
         raise ValueError("At least one dataframe must be provided.")
 
-    _current_agent = Agent(list(dataframes))
+    _current_agent = Agent(list(dataframes), sandbox=sandbox)
     return _current_agent.chat(query)
 
 

diff --git a/pandasai/dataframe/base.py b/pandasai/dataframe/base.py
@@ -20,6 +20,7 @@
 from pandasai.exceptions import DatasetNotFound, PandaAIApiKeyError
 from pandasai.helpers.dataframe_serializer import DataframeSerializer
 from pandasai.helpers.session import get_pandaai_session
+from pandasai.sandbox.sandbox import Sandbox
 
 if TYPE_CHECKING:
     from pandasai.agent.base import Agent
@@ -94,12 +95,13 @@ def column_hash(self):
     def type(self) -> str:
         return "pd.DataFrame"
 
-    def chat(self, prompt: str) -> BaseResponse:
+    def chat(self, prompt: str, sandbox: Optional[Sandbox] = None) -> BaseResponse:
         """
         Interact with the DataFrame using natural language.
 
         Args:
             prompt (str): The natural language query or instruction.
+            sandbox (Sandbox, optional): The sandbox to execute code securely.
 
         Returns:
             str: The response to the prompt.
@@ -109,7 +111,7 @@ def chat(self, prompt: str) -> BaseResponse:
                 Agent,
             )
 
-            self._agent = Agent([self])
+            self._agent = Agent([self], sandbox=sandbox)
 
         return self._agent.chat(prompt)
 

diff --git a/tests/unit_tests/dataframe/test_dataframe.py b/tests/unit_tests/dataframe/test_dataframe.py
@@ -32,7 +32,18 @@ def test_chat_creates_agent(self, mock_env, mock_agent, sample_dict_data):
         sample_df = DataFrame(sample_dict_data)
         mock_env.return_value = {"PANDABI_API_URL": "localhost:8000"}
         sample_df.chat("Test query")
-        mock_agent.assert_called_once_with([sample_df])
+        mock_agent.assert_called_once_with([sample_df], sandbox=None)
+
+    @patch("pandasai.agent.Agent")
+    @patch("os.environ")
+    def test_chat_creates_agent_with_sandbox(
+        self, mock_env, mock_agent, sample_dict_data
+    ):
+        sandbox = MagicMock()
+        sample_df = DataFrame(sample_dict_data)
+        mock_env.return_value = {"PANDABI_API_URL": "localhost:8000"}
+        sample_df.chat("Test query", sandbox=sandbox)
+        mock_agent.assert_called_once_with([sample_df], sandbox=sandbox)
 
     @patch("pandasai.Agent")
     def test_chat_reuses_existing_agent(self, sample_df):

diff --git a/tests/unit_tests/test_pandasai_init.py b/tests/unit_tests/test_pandasai_init.py
@@ -56,7 +56,13 @@ def sqlite_connection_json(self):
     def test_chat_creates_agent(self, sample_df):
         with patch("pandasai.Agent") as MockAgent:
             pandasai.chat("Test query", sample_df)
-            MockAgent.assert_called_once_with([sample_df])
+            MockAgent.assert_called_once_with([sample_df], sandbox=None)
+
+    def test_chat_sandbox_passed_to_agent(self, sample_df):
+        with patch("pandasai.Agent") as MockAgent:
+            sandbox = MagicMock()
+            pandasai.chat("Test query", sample_df, sandbox=sandbox)
+            MockAgent.assert_called_once_with([sample_df], sandbox=sandbox)
 
     def test_chat_without_dataframes_raises_error(self):
         with pytest.raises(ValueError, match="At least one dataframe must be provided"):
@@ -82,7 +88,7 @@ def test_chat_with_multiple_dataframes(self, sample_dataframes):
 
             result = pandasai.chat("What is the sum of column A?", *sample_dataframes)
 
-            MockAgent.assert_called_once_with(sample_dataframes)
+            MockAgent.assert_called_once_with(sample_dataframes, sandbox=None)
             mock_agent_instance.chat.assert_called_once_with(
                 "What is the sum of column A?"
             )
@@ -98,7 +104,7 @@ def test_chat_with_single_dataframe(self, sample_dataframes):
                 "What is the average of column X?", sample_dataframes[1]
             )
 
-            MockAgent.assert_called_once_with([sample_dataframes[1]])
+            MockAgent.assert_called_once_with([sample_dataframes[1]], sandbox=None)
             mock_agent_instance.chat.assert_called_once_with(
                 "What is the average of column X?"
             )