Merge pull request #35 from Supahands/feat/redacted_words

Feat/redacted words
Supahands · Nov 20, 2024 · 17c0446 · 17c0446
2 parents e595354 + d8b87fd
commit 17c0446
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 6 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,2 @@
+{
+}
diff --git a/ai_router.py b/ai_router.py
@@ -6,7 +6,8 @@
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
 from contextlib import contextmanager
-from modal import Image, App, asgi_app, Secret
+from modal import Image, App, asgi_app, Secret, gpu
+from const import LIST_OF_REDACTED_WORDS
 
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -32,7 +33,13 @@
 )
 
 image = Image.debian_slim().pip_install(
-    ["litellm", "supabase", "pydantic==2.5.3", "fastapi==0.109.0", "openai"]
+    [
+        "litellm", 
+        "supabase", 
+        "pydantic==2.5.3", 
+        "fastapi==0.109.0", 
+        "openai", 
+    ]
 )
 llm_compare_app = App(
     name="llm-compare-api",
@@ -49,13 +56,13 @@
     from litellm import completion
     from supabase import create_client, Client
     from openai import OpenAIError
+    import re
 
     # Initialize Supabase client
     supabase_url = os.environ["SUPABASE_URL"]
     supabase_key = os.environ["SUPABASE_KEY"]
     supabase: Client = create_client(supabase_url, supabase_key)
 
-
 # Pydantic models
 class FunctionCall(BaseModel):
     name: Optional[str] = None
@@ -171,7 +178,12 @@ def temporary_env_var(key: str, value: str):
             del os.environ[key]
         else:
             os.environ[key] = original_value
-
+
+def redact_words(model_name, text):
+    for word in LIST_OF_REDACTED_WORDS:
+        text = re.sub(rf"(?i)\b{re.escape(word)}\b", r"<redacted>\g<0></redacted>", text)
+
+    return text
 
 async def handle_completion(
     model_name: str, message: str, api_base: Optional[str] = None
@@ -193,8 +205,10 @@ async def handle_completion(
             )
 
         end_time = time.time()
+
         response_obj.usage.response_time = (end_time - start_time) * 1000
-
+
+        response_obj.choices[0].message.content = redact_words(model_name, response_obj.choices[0].message.content)
         # Convert the usage object
         response_obj.usage = Usage.from_response(response_obj)
 
@@ -229,7 +243,7 @@ async def handle_completion(
     responses={
         200: {"description": "Successful response with the model's reply."},
         400: {"description": "Bad Request. Model or message not provided."},
-        404: {"description": "Model not supported."},
+        404: {"description": "Model is not supported."},
         500: {"description": "Internal Server Error."},
     },
 )

diff --git a/const.py b/const.py
@@ -0,0 +1,16 @@
+LIST_OF_REDACTED_WORDS = [
+    "Gemini", 
+    "Google", 
+    "DeepMind",
+    "ChatGPT", 
+    "OpenAI",
+    "LLaMA", 
+    "Meta",
+    "Claude", 
+    "Anthropic",
+    "Qwen", 
+    "Alibaba",
+    "Gemma",
+    "Mistral", 
+    "Mistral AI"
+]
diff --git a/ollama_service.py b/ollama_service.py
@@ -16,6 +16,7 @@
 MODEL_IDS: list[str] = [
     "llama3",
     "llama3.2",
+    "llama3.2:1b",
     "mistral",
     "gemma2",
     "qwen2.5",