From a297a95eed72b09d0fe875e2ac67483e644d5146 Mon Sep 17 00:00:00 2001
From: V4LER11 <work@valerii.cc>
Date: Tue, 17 Dec 2024 13:07:39 +0000
Subject: [PATCH] added grok models

added prefix to grok models, fixed API Key issue

# Conflicts:
#	refact_known_models/passthrough.py
#	refact_webgui/webgui/selfhost_fastapi_completions.py
#	refact_webgui/webgui/selfhost_model_assigner.py
#	refact_webgui/webgui/selfhost_queue.py
#	refact_webgui/webgui/static/tab-model-hosting.html
#	refact_webgui/webgui/static/tab-model-hosting.js
#	refact_webgui/webgui/static/tab-settings.html
#	refact_webgui/webgui/static/tab-settings.js
#	refact_webgui/webgui/tab_models_host.py
#	refact_webgui/webgui/tab_settings.py
---
 refact_known_models/passthrough.py            | 50 +++++++++++++++++++
 refact_utils/finetune/utils.py                |  3 ++
 .../webgui/selfhost_fastapi_completions.py    |  1 +
 .../webgui/selfhost_model_assigner.py         |  2 +
 refact_webgui/webgui/selfhost_queue.py        |  2 +
 .../webgui/static/tab-model-hosting.html      |  4 ++
 .../webgui/static/tab-model-hosting.js        |  4 +-
 refact_webgui/webgui/static/tab-settings.html |  2 +
 refact_webgui/webgui/static/tab-settings.js   |  4 ++
 refact_webgui/webgui/tab_models_host.py       |  1 +
 refact_webgui/webgui/tab_settings.py          |  1 +
 11 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py
index 4ed43cb7..d94f8af2 100644
--- a/refact_known_models/passthrough.py
+++ b/refact_known_models/passthrough.py
@@ -260,5 +260,55 @@
         "pp1000t_prompt": 1250,  # $1.25 / 1M tokens
         "pp1000t_generated": 5000,  # $5.00 / 1M tokens
         "filter_caps": ["chat", "tools", "completion", "vision"],
+    },
+    # XAI Models
+    # WARNING: tokenizer is non-precise as there's no publicly available tokenizer for these models
+    # XAI says that for exact same model different tokenizers could be used
+    # therefore, using tokenizer for grok-1 which may or may not provide proximate enough results
+    # T is decreased not to encounter tokens overflow
+
+    "grok-beta": {
+        "backend": "litellm",
+        "provider": "xai",
+        "tokenizer_path": "Xenova/grok-1-tokenizer",
+        "resolve_as": "xai/grok-beta",
+        "T": 128_000,
+        "T_out": 4096,
+        "pp1000t_prompt": 5_000,
+        "pp1000t_generated": 15_000,  # $15.00 / 1M tokens
+        "filter_caps": ["chat", "completion"],
+    },
+    "grok-vision-beta": {
+        "backend": "litellm",
+        "provider": "xai",
+        "tokenizer_path": "Xenova/grok-1-tokenizer",
+        "resolve_as": "xai/grok-vision-beta",
+        "T": 8_192,
+        "T_out": 4096,
+        "pp1000t_prompt": 5_000,
+        "pp1000t_generated": 15_000,  # $15.00 / 1M tokens
+        "filter_caps": ["chat", "completion", "vision"],
+    },
+    "grok-2-vision-1212": {
+        "backend": "litellm",
+        "provider": "xai",
+        "tokenizer_path": "Xenova/grok-1-tokenizer",
+        "resolve_as": "xai/grok-2-vision-1212",
+        "T": 32_000,
+        "T_out": 4096,
+        "pp1000t_prompt": 2_000,
+        "pp1000t_generated": 10_000,  # $10.00 / 1M tokens
+        "filter_caps": ["chat", "completion", "vision"],
+    },
+    "grok-2-1212": {
+        "backend": "litellm",
+        "provider": "xai",
+        "tokenizer_path": "Xenova/grok-1-tokenizer",
+        "resolve_as": "xai/grok-2-1212",
+        "T": 128_000,
+        "T_out": 4096,
+        "pp1000t_prompt": 2_000,
+        "pp1000t_generated": 10_000,  # $10.00 / 1M tokens
+        "filter_caps": ["chat", "completion"],
     }
 }
diff --git a/refact_utils/finetune/utils.py b/refact_utils/finetune/utils.py
index 81b753e0..69558235 100644
--- a/refact_utils/finetune/utils.py
+++ b/refact_utils/finetune/utils.py
@@ -118,6 +118,9 @@ def _add_results_for_passthrough_provider(provider: str) -> None:
     if data.get('groq_api_enable'):
         _add_results_for_passthrough_provider('groq')
 
+    if data.get('xai_api_enable'):
+        _add_results_for_passthrough_provider('xai')
+
     for k, v in data.get("model_assign", {}).items():
         if model_dict := [d for d in data['models'] if d['name'] == k]:
             model_dict = model_dict[0]
diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py
index 4f9bb8c4..6fbfa0ec 100644
--- a/refact_webgui/webgui/selfhost_fastapi_completions.py
+++ b/refact_webgui/webgui/selfhost_fastapi_completions.py
@@ -234,6 +234,7 @@ def _integrations_env_setup(env_var_name: str, api_key_name: str, api_enable_nam
         _integrations_env_setup("GROQ_API_KEY", "groq_api_key", "groq_api_enable")
         _integrations_env_setup("CEREBRAS_API_KEY", "cerebras_api_key", "cerebras_api_enable")
         _integrations_env_setup("GEMINI_API_KEY", "gemini_api_key", "gemini_api_enable")
+        _integrations_env_setup("XAI_API_KEY", "xai_api_key", "xai_api_enable")
 
     def _models_available_dict_rewrite(self, models_available: List[str]) -> Dict[str, Any]:
         rewrite_dict = {}
diff --git a/refact_webgui/webgui/selfhost_model_assigner.py b/refact_webgui/webgui/selfhost_model_assigner.py
index 0569c65a..0a6a9fa4 100644
--- a/refact_webgui/webgui/selfhost_model_assigner.py
+++ b/refact_webgui/webgui/selfhost_model_assigner.py
@@ -187,6 +187,7 @@ def first_run(self):
             "groq_api_enable": False,
             "cerebras_api_enable": False,
             "gemini_api_enable": False,
+            "xai_api_enable": False,
         }
         self.models_to_watchdog_configs(default_config)
 
@@ -261,6 +262,7 @@ def model_assignment(self):
             j["groq_api_enable"] = j.get("groq_api_enable", False)
             j["cerebras_api_enable"] = j.get("cerebras_api_enable", False)
             j["gemini_api_enable"] = j.get("gemini_api_enable", False)
+            j["xai_api_enable"] = j.get("xai_api_enable", False)
         else:
             j = {"model_assign": {}}
 
diff --git a/refact_webgui/webgui/selfhost_queue.py b/refact_webgui/webgui/selfhost_queue.py
index 13b4a3b0..e401bfe3 100644
--- a/refact_webgui/webgui/selfhost_queue.py
+++ b/refact_webgui/webgui/selfhost_queue.py
@@ -70,6 +70,8 @@ def _add_models_for_passthrough_provider(provider):
                 _add_models_for_passthrough_provider('cerebras')
             if j.get("gemini_api_enable"):
                 _add_models_for_passthrough_provider('gemini')
+            if j.get("xai_api_enable"):
+                _add_models_for_passthrough_provider('xai')
 
         return self._models_available
 
diff --git a/refact_webgui/webgui/static/tab-model-hosting.html b/refact_webgui/webgui/static/tab-model-hosting.html
index ea4a19cc..9d765dda 100644
--- a/refact_webgui/webgui/static/tab-model-hosting.html
+++ b/refact_webgui/webgui/static/tab-model-hosting.html
@@ -50,6 +50,10 @@ <h3>3rd Party APIs</h3>
       <input class="form-check-input" type="checkbox" role="switch" id="enable_gemini">
       <label class="form-check-label" for="enable_gemini">Enable Gemini API</label>
     </div>
+    <div class="form-check form-switch">
+      <input class="form-check-input" type="checkbox" role="switch" id="enable_xai">
+      <label class="form-check-label" for="enable_xai">Enable XAI API</label>
+    </div>
 
     <div class="chat-enabler-status">
       To enable Chat GPT add your API key in the <span id="redirect2credentials" class="main-tab-button fake-link" data-tab="settings">API Keys tab</span>.
diff --git a/refact_webgui/webgui/static/tab-model-hosting.js b/refact_webgui/webgui/static/tab-model-hosting.js
index ed32070a..b1f4dbc1 100644
--- a/refact_webgui/webgui/static/tab-model-hosting.js
+++ b/refact_webgui/webgui/static/tab-model-hosting.js
@@ -120,7 +120,7 @@ function get_models()
         integration_switch_init('enable_groq', models_data['groq_api_enable']);
         integration_switch_init('enable_cerebras', models_data['cerebras_api_enable']);
         integration_switch_init('enable_gemini', models_data['gemini_api_enable']);
-
+        integration_switch_init('enable_xai', models_data['xai_api_enable']);
 
         const more_gpus_notification = document.querySelector('.model-hosting-error');
         if(data.hasOwnProperty('more_models_than_gpus') && data.more_models_than_gpus) {
@@ -147,6 +147,7 @@ function save_model_assigned() {
     const groq_enable = document.querySelector('#enable_groq');
     const cerebras_enable = document.querySelector('#enable_cerebras');
     const gemini_enable = document.querySelector('#enable_gemini');
+    const xai_enable = document.querySelector('#enable_xai');
 
     const data = {
         model_assign: {
@@ -157,6 +158,7 @@ function save_model_assigned() {
         groq_api_enable: groq_enable.checked,
         cerebras_api_enable: cerebras_enable.checked,
         gemini_api_enable: gemini_enable.checked,
+        xai_api_enable: xai_enable.checked,
     };
     console.log(data);
     fetch("/tab-host-models-assign", {
diff --git a/refact_webgui/webgui/static/tab-settings.html b/refact_webgui/webgui/static/tab-settings.html
index 511ac395..916595e0 100644
--- a/refact_webgui/webgui/static/tab-settings.html
+++ b/refact_webgui/webgui/static/tab-settings.html
@@ -12,6 +12,8 @@ <h2>API Integrations</h2>
     <input type="text" name="cerebras_api_key" value="" class="form-control" id="cerebras_api_key">
     <label for="gemini_api_key" class="form-label mt-4">Gemini API Key</label>
     <input type="text" name="gemini_api_key" value="" class="form-control" id="gemini_api_key">
+    <label for="xai_api_key" class="form-label mt-4">XAI API Key</label>
+    <input type="text" name="xai_api_key" value="" class="form-control" id="xai_api_key">
 
 <!--    <div class="d-flex flex-row-reverse mt-3"><button type="button" class="btn btn-primary" id="integrations-save">Save</button></div>-->
   </div>
diff --git a/refact_webgui/webgui/static/tab-settings.js b/refact_webgui/webgui/static/tab-settings.js
index fa4ddc45..802a7b0d 100644
--- a/refact_webgui/webgui/static/tab-settings.js
+++ b/refact_webgui/webgui/static/tab-settings.js
@@ -175,6 +175,7 @@ function save_integration_api_keys() {
     const groq_api_key = document.getElementById('groq_api_key');
     const cerebras_api_key = document.getElementById('cerebras_api_key');
     const gemini_api_key = document.getElementById("gemini_api_key");
+    const xai_api_key = document.getElementById('xai_api_key');
 
     const huggingface_api_key = document.getElementById('huggingface_api_key');
     fetch("/tab-settings-integrations-save", {
@@ -188,6 +189,7 @@ function save_integration_api_keys() {
             groq_api_key: groq_api_key.getAttribute('data-value'),
             cerebras_api_key: cerebras_api_key.getAttribute('data-value'),
             gemini_api_key: gemini_api_key.getAttribute("data-value"),
+            xai_api_key: xai_api_key.getAttribute('data-value'),
 
             huggingface_api_key: huggingface_api_key.getAttribute('data-value'),
         })
@@ -200,6 +202,7 @@ function save_integration_api_keys() {
         groq_api_key.setAttribute('data-saved-value', groq_api_key.getAttribute('data-value'))
         cerebras_api_key.setAttribute('data-saved-value', cerebras_api_key.getAttribute('data-value'))
         gemini_api_key.setAttribute('data-saved-value', gemini_api_key.getAttribute('data-value'))
+        xai_api_key.setAttribute('data-saved-value', xai_api_key.getAttribute('data-value'))
 
         huggingface_api_key.setAttribute('data-saved-value', huggingface_api_key.getAttribute('data-value'))
     });
@@ -237,6 +240,7 @@ export function tab_settings_integrations_get() {
             integrations_input_init(document.getElementById('groq_api_key'), data['groq_api_key']);
             integrations_input_init(document.getElementById('cerebras_api_key'), data['cerebras_api_key']);
             integrations_input_init(document.getElementById('gemini_api_key'), data['gemini_api_key']);
+            integrations_input_init(document.getElementById('xai_api_key'), data['xai_api_key']);
 
             integrations_input_init(document.getElementById('huggingface_api_key'), data['huggingface_api_key']);
         });
diff --git a/refact_webgui/webgui/tab_models_host.py b/refact_webgui/webgui/tab_models_host.py
index 08dda4b3..43f1b776 100644
--- a/refact_webgui/webgui/tab_models_host.py
+++ b/refact_webgui/webgui/tab_models_host.py
@@ -45,6 +45,7 @@ class TabHostModelsAssign(BaseModel):
     groq_api_enable: bool = False
     cerebras_api_enable: bool = False
     gemini_api_enable: bool = False
+    xai_api_enable: bool = False
 
     model_config = ConfigDict(protected_namespaces=())  # avoiding model_ namespace protection
 
diff --git a/refact_webgui/webgui/tab_settings.py b/refact_webgui/webgui/tab_settings.py
index 814c2f0a..23a74aae 100644
--- a/refact_webgui/webgui/tab_settings.py
+++ b/refact_webgui/webgui/tab_settings.py
@@ -25,6 +25,7 @@ class Integrations(BaseModel):
         groq_api_key: Optional[str] = None
         cerebras_api_key: Optional[str] = None
         gemini_api_key: Optional[str] = None
+        xai_api_key: Optional[str] = None
 
         huggingface_api_key: Optional[str] = None