From 3420f07ddcc2a6ada2124f6e3e8705110664d578 Mon Sep 17 00:00:00 2001
From: dayland <dayland@microsoft.com>
Date: Tue, 2 Jul 2024 02:05:24 +0100
Subject: [PATCH] Add extra warning documentation for allow_dangerous_code flag

---
 app/backend/approaches/tabulardataassistant.py | 6 ++++++
 docs/features/features.md                      | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/app/backend/approaches/tabulardataassistant.py b/app/backend/approaches/tabulardataassistant.py
index fedf946f..d55a5c86 100644
--- a/app/backend/approaches/tabulardataassistant.py
+++ b/app/backend/approaches/tabulardataassistant.py
@@ -106,6 +106,12 @@ def process_agent_scratch_pad(question, df):
     deployment_name=OPENAI_DEPLOYMENT_NAME)  
          
     question = save_chart(question)
+    # This agent relies on access to a python repl tool which can execute arbitrary code.
+    # This can be dangerous and requires a specially sandboxed environment to be safely used.
+    # Failure to properly sandbox this class can lead to arbitrary code execution vulnerabilities,
+    # which can lead to data breaches, data loss, or other security incidents. You must opt in
+    # to use this functionality by setting allow_dangerous_code=True.
+    # https://api.python.langchain.com/en/latest/agents/langchain_experimental.agents.agent_toolkits.pandas.base.create_pandas_dataframe_agent.html
     pdagent = create_pandas_dataframe_agent(chat, df, verbose=True,agent_type=AgentType.OPENAI_FUNCTIONS,allow_dangerous_code=True , handle_parsing_errors=True )
     for chunk in pdagent.stream({"input": question}):
         if "actions" in chunk:
diff --git a/docs/features/features.md b/docs/features/features.md
index 138fcf61..b50d2fac 100644
--- a/docs/features/features.md
+++ b/docs/features/features.md
@@ -97,6 +97,12 @@ To learn more, please visit the [Cognitive Search](/docs/features/cognitive_sear
 
 We are rolling out the Math Assistant and Tabular Data Assistant in a preview mode. The Math Assistant combines natural language understanding with robust mathematical reasoning, enabling users to express mathematical queries in plain language and receive step-by-step solutions and insights.The Tabular Data Assistants allows users to ask natural language questions about tabular data stored in CSV files and extract insights from structured datasets with the ability to filter, aggregate, and perform computations on CSV data. The key strength of Agents lies in their ability to autonomously reason about tasks, decompose them into steps, and determine the appropriate tools and data sources to leverage, all without the need for predefined task definitions or rigid workflows.The Math Assistant and Tabular Data assistant are being released in preview mode as we continue to evaluate and mitigate the potential risks associated with autonomous reasoning Agents, such as misuse of external tools, lack of transparency, biased outputs, privacy concerns, and remote code execution vulnerabilities. With future release we plan work to enhance the safety and robustness of these autonomous reasoning capabilities.
 
+### :warning: Security Notice
+
+The Tabular Data Assistant relies on access to a python repl tool which can execute arbitrary code. This can be dangerous and requires a specially sandboxed environment to be safely used. Failure to run this code in a properly sandboxed environment can lead to arbitrary code execution vulnerabilities, which can lead to data breaches, data loss, or other security incidents.
+
+Do not use this code with untrusted inputs, with elevated permissions, or without consulting your security team about proper sandboxing!
+
 ## Customization and Personalization
 
 **User-Selectable Options:** Users can fine-tune their interactions by adjusting settings such as temperature and persona, tailoring the AI experience to their specific needs.