diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py index 4ed43cb7..d94f8af2 100644 --- a/refact_known_models/passthrough.py +++ b/refact_known_models/passthrough.py @@ -260,5 +260,55 @@ "pp1000t_prompt": 1250, # $1.25 / 1M tokens "pp1000t_generated": 5000, # $5.00 / 1M tokens "filter_caps": ["chat", "tools", "completion", "vision"], + }, + # XAI Models + # WARNING: tokenizer is non-precise as there's no publicly available tokenizer for these models + # XAI says that for exact same model different tokenizers could be used + # therefore, using tokenizer for grok-1 which may or may not provide proximate enough results + # T is decreased not to encounter tokens overflow + + "grok-beta": { + "backend": "litellm", + "provider": "xai", + "tokenizer_path": "Xenova/grok-1-tokenizer", + "resolve_as": "xai/grok-beta", + "T": 128_000, + "T_out": 4096, + "pp1000t_prompt": 5_000, + "pp1000t_generated": 15_000, # $15.00 / 1M tokens + "filter_caps": ["chat", "completion"], + }, + "grok-vision-beta": { + "backend": "litellm", + "provider": "xai", + "tokenizer_path": "Xenova/grok-1-tokenizer", + "resolve_as": "xai/grok-vision-beta", + "T": 8_192, + "T_out": 4096, + "pp1000t_prompt": 5_000, + "pp1000t_generated": 15_000, # $15.00 / 1M tokens + "filter_caps": ["chat", "completion", "vision"], + }, + "grok-2-vision-1212": { + "backend": "litellm", + "provider": "xai", + "tokenizer_path": "Xenova/grok-1-tokenizer", + "resolve_as": "xai/grok-2-vision-1212", + "T": 32_000, + "T_out": 4096, + "pp1000t_prompt": 2_000, + "pp1000t_generated": 10_000, # $10.00 / 1M tokens + "filter_caps": ["chat", "completion", "vision"], + }, + "grok-2-1212": { + "backend": "litellm", + "provider": "xai", + "tokenizer_path": "Xenova/grok-1-tokenizer", + "resolve_as": "xai/grok-2-1212", + "T": 128_000, + "T_out": 4096, + "pp1000t_prompt": 2_000, + "pp1000t_generated": 10_000, # $10.00 / 1M tokens + "filter_caps": ["chat", "completion"], } } diff --git a/refact_utils/finetune/utils.py b/refact_utils/finetune/utils.py index 81b753e0..69558235 100644 --- a/refact_utils/finetune/utils.py +++ b/refact_utils/finetune/utils.py @@ -118,6 +118,9 @@ def _add_results_for_passthrough_provider(provider: str) -> None: if data.get('groq_api_enable'): _add_results_for_passthrough_provider('groq') + if data.get('xai_api_enable'): + _add_results_for_passthrough_provider('xai') + for k, v in data.get("model_assign", {}).items(): if model_dict := [d for d in data['models'] if d['name'] == k]: model_dict = model_dict[0] diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py index 4f9bb8c4..6fbfa0ec 100644 --- a/refact_webgui/webgui/selfhost_fastapi_completions.py +++ b/refact_webgui/webgui/selfhost_fastapi_completions.py @@ -234,6 +234,7 @@ def _integrations_env_setup(env_var_name: str, api_key_name: str, api_enable_nam _integrations_env_setup("GROQ_API_KEY", "groq_api_key", "groq_api_enable") _integrations_env_setup("CEREBRAS_API_KEY", "cerebras_api_key", "cerebras_api_enable") _integrations_env_setup("GEMINI_API_KEY", "gemini_api_key", "gemini_api_enable") + _integrations_env_setup("XAI_API_KEY", "xai_api_key", "xai_api_enable") def _models_available_dict_rewrite(self, models_available: List[str]) -> Dict[str, Any]: rewrite_dict = {} diff --git a/refact_webgui/webgui/selfhost_model_assigner.py b/refact_webgui/webgui/selfhost_model_assigner.py index 0569c65a..0a6a9fa4 100644 --- a/refact_webgui/webgui/selfhost_model_assigner.py +++ b/refact_webgui/webgui/selfhost_model_assigner.py @@ -187,6 +187,7 @@ def first_run(self): "groq_api_enable": False, "cerebras_api_enable": False, "gemini_api_enable": False, + "xai_api_enable": False, } self.models_to_watchdog_configs(default_config) @@ -261,6 +262,7 @@ def model_assignment(self): j["groq_api_enable"] = j.get("groq_api_enable", False) j["cerebras_api_enable"] = j.get("cerebras_api_enable", False) j["gemini_api_enable"] = j.get("gemini_api_enable", False) + j["xai_api_enable"] = j.get("xai_api_enable", False) else: j = {"model_assign": {}} diff --git a/refact_webgui/webgui/selfhost_queue.py b/refact_webgui/webgui/selfhost_queue.py index 13b4a3b0..e401bfe3 100644 --- a/refact_webgui/webgui/selfhost_queue.py +++ b/refact_webgui/webgui/selfhost_queue.py @@ -70,6 +70,8 @@ def _add_models_for_passthrough_provider(provider): _add_models_for_passthrough_provider('cerebras') if j.get("gemini_api_enable"): _add_models_for_passthrough_provider('gemini') + if j.get("xai_api_enable"): + _add_models_for_passthrough_provider('xai') return self._models_available diff --git a/refact_webgui/webgui/static/tab-model-hosting.html b/refact_webgui/webgui/static/tab-model-hosting.html index ea4a19cc..9d765dda 100644 --- a/refact_webgui/webgui/static/tab-model-hosting.html +++ b/refact_webgui/webgui/static/tab-model-hosting.html @@ -50,6 +50,10 @@

3rd Party APIs

+
+ + +
To enable Chat GPT add your API key in the API Keys tab. diff --git a/refact_webgui/webgui/static/tab-model-hosting.js b/refact_webgui/webgui/static/tab-model-hosting.js index ed32070a..b1f4dbc1 100644 --- a/refact_webgui/webgui/static/tab-model-hosting.js +++ b/refact_webgui/webgui/static/tab-model-hosting.js @@ -120,7 +120,7 @@ function get_models() integration_switch_init('enable_groq', models_data['groq_api_enable']); integration_switch_init('enable_cerebras', models_data['cerebras_api_enable']); integration_switch_init('enable_gemini', models_data['gemini_api_enable']); - + integration_switch_init('enable_xai', models_data['xai_api_enable']); const more_gpus_notification = document.querySelector('.model-hosting-error'); if(data.hasOwnProperty('more_models_than_gpus') && data.more_models_than_gpus) { @@ -147,6 +147,7 @@ function save_model_assigned() { const groq_enable = document.querySelector('#enable_groq'); const cerebras_enable = document.querySelector('#enable_cerebras'); const gemini_enable = document.querySelector('#enable_gemini'); + const xai_enable = document.querySelector('#enable_xai'); const data = { model_assign: { @@ -157,6 +158,7 @@ function save_model_assigned() { groq_api_enable: groq_enable.checked, cerebras_api_enable: cerebras_enable.checked, gemini_api_enable: gemini_enable.checked, + xai_api_enable: xai_enable.checked, }; console.log(data); fetch("/tab-host-models-assign", { diff --git a/refact_webgui/webgui/static/tab-settings.html b/refact_webgui/webgui/static/tab-settings.html index 511ac395..916595e0 100644 --- a/refact_webgui/webgui/static/tab-settings.html +++ b/refact_webgui/webgui/static/tab-settings.html @@ -12,6 +12,8 @@

API Integrations

+ +
diff --git a/refact_webgui/webgui/static/tab-settings.js b/refact_webgui/webgui/static/tab-settings.js index fa4ddc45..802a7b0d 100644 --- a/refact_webgui/webgui/static/tab-settings.js +++ b/refact_webgui/webgui/static/tab-settings.js @@ -175,6 +175,7 @@ function save_integration_api_keys() { const groq_api_key = document.getElementById('groq_api_key'); const cerebras_api_key = document.getElementById('cerebras_api_key'); const gemini_api_key = document.getElementById("gemini_api_key"); + const xai_api_key = document.getElementById('xai_api_key'); const huggingface_api_key = document.getElementById('huggingface_api_key'); fetch("/tab-settings-integrations-save", { @@ -188,6 +189,7 @@ function save_integration_api_keys() { groq_api_key: groq_api_key.getAttribute('data-value'), cerebras_api_key: cerebras_api_key.getAttribute('data-value'), gemini_api_key: gemini_api_key.getAttribute("data-value"), + xai_api_key: xai_api_key.getAttribute('data-value'), huggingface_api_key: huggingface_api_key.getAttribute('data-value'), }) @@ -200,6 +202,7 @@ function save_integration_api_keys() { groq_api_key.setAttribute('data-saved-value', groq_api_key.getAttribute('data-value')) cerebras_api_key.setAttribute('data-saved-value', cerebras_api_key.getAttribute('data-value')) gemini_api_key.setAttribute('data-saved-value', gemini_api_key.getAttribute('data-value')) + xai_api_key.setAttribute('data-saved-value', xai_api_key.getAttribute('data-value')) huggingface_api_key.setAttribute('data-saved-value', huggingface_api_key.getAttribute('data-value')) }); @@ -237,6 +240,7 @@ export function tab_settings_integrations_get() { integrations_input_init(document.getElementById('groq_api_key'), data['groq_api_key']); integrations_input_init(document.getElementById('cerebras_api_key'), data['cerebras_api_key']); integrations_input_init(document.getElementById('gemini_api_key'), data['gemini_api_key']); + integrations_input_init(document.getElementById('xai_api_key'), data['xai_api_key']); integrations_input_init(document.getElementById('huggingface_api_key'), data['huggingface_api_key']); }); diff --git a/refact_webgui/webgui/tab_models_host.py b/refact_webgui/webgui/tab_models_host.py index 08dda4b3..43f1b776 100644 --- a/refact_webgui/webgui/tab_models_host.py +++ b/refact_webgui/webgui/tab_models_host.py @@ -45,6 +45,7 @@ class TabHostModelsAssign(BaseModel): groq_api_enable: bool = False cerebras_api_enable: bool = False gemini_api_enable: bool = False + xai_api_enable: bool = False model_config = ConfigDict(protected_namespaces=()) # avoiding model_ namespace protection diff --git a/refact_webgui/webgui/tab_settings.py b/refact_webgui/webgui/tab_settings.py index 814c2f0a..23a74aae 100644 --- a/refact_webgui/webgui/tab_settings.py +++ b/refact_webgui/webgui/tab_settings.py @@ -25,6 +25,7 @@ class Integrations(BaseModel): groq_api_key: Optional[str] = None cerebras_api_key: Optional[str] = None gemini_api_key: Optional[str] = None + xai_api_key: Optional[str] = None huggingface_api_key: Optional[str] = None