feat: add deepseek-ai/DeepSeek-R1-Distill-Qwen-32B to HuggingChat c…

…onfig (#1660) * feat: add `deepseek-ai/DeepSeek-R1-Distill-Qwen-32B` to HuggingChat config * feat: hide reasoning modal when its empty * fix: put r1 model higher in list
huggingface · Jan 21, 2025 · 18bf2e7 · 18bf2e7
1 parent a8b1563
commit 18bf2e7
Show file tree

Hide file tree

Showing 3 changed files with 95 additions and 44 deletions.
diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml
@@ -136,23 +136,15 @@ envVars:
         ]
       },
       {
-        "name": "Qwen/QwQ-32B-Preview",
-        "preprompt": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.",
-        "modelUrl": "https://huggingface.co/Qwen/QwQ-32B-Preview",
-        "websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
-        "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
-        "description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
+        "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "modelUrl": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "websiteUrl": "https://deepseek.com/",
+        "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/deepseek-logo.png",
+        "description": "The first reasoning model from DeepSeek, distilled into a 32B dense model. Outperforms o1-mini on multiple benchmarks.",
         "reasoning": {
-          "type": "summarize"
-        },
-        "parameters": {
-          "stop": ["<|im_end|>"],
-          "truncate": 12288,
-          "max_new_tokens": 4096,
-          "temperature": 0.7,
-          "top_k": 20,
-          "top_p": 0.8,
-          "repetition_penalty": 1.05
+          "type": "tokens",
+          "beginToken": "<think>",
+          "endToken": "</think>"
         },
         "promptExamples": [
           {
@@ -167,6 +159,12 @@ envVars:
             "title": "Measuring 6 liters",
             "prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
           }
+        ],
+        "endpoints": [
+          {
+            "type": "openai",
+            "baseURL": "https://api-inference.huggingface.co/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/v1"
+          }
         ]
       },
       {
@@ -196,10 +194,46 @@ envVars:
             "prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
           }
         ],
-        "endpoints": [{
-          "type": "openai",
-          "baseURL": "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/v1"
-        }]
+        "endpoints": [
+          {
+            "type": "openai",
+            "baseURL": "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/v1"
+          }
+        ]
+      },
+      {
+        "name": "Qwen/QwQ-32B-Preview",
+        "preprompt": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.",
+        "modelUrl": "https://huggingface.co/Qwen/QwQ-32B-Preview",
+        "websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
+        "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
+        "description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
+        "reasoning": {
+          "type": "summarize"
+        },
+        "parameters": {
+          "stop": ["<|im_end|>"],
+          "truncate": 12288,
+          "max_new_tokens": 4096,
+          "temperature": 0.7,
+          "top_k": 20,
+          "top_p": 0.8,
+          "repetition_penalty": 1.05
+        },
+        "promptExamples": [
+          {
+            "title": "Rs in strawberry",
+            "prompt": "how many R in strawberry?"
+          },
+          {
+            "title": "Larger number",
+            "prompt": "9.11 or 9.9 which number is larger?"
+          },
+          {
+            "title": "Measuring 6 liters",
+            "prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
+          }
+        ]
       },
       {
         "name": "Qwen/Qwen2.5-Coder-32B-Instruct",
@@ -228,10 +262,12 @@ envVars:
             "prompt": "Generate a snazzy static landing page for a local coffee shop using HTML and CSS. You can use tailwind using <script src='https://cdn.tailwindcss.com'></script>."
           }
         ],
-        "endpoints": [{
-          "type": "openai",
-          "baseURL": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1"
-        }]
+        "endpoints": [
+          {
+            "type": "openai",
+            "baseURL": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1"
+          }
+        ]
       },
       {
         "name": "meta-llama/Llama-3.2-11B-Vision-Instruct",
@@ -245,19 +281,21 @@ envVars:
           "truncate": 14336,
           "max_new_tokens": 1536
         },
-        "endpoints": [{
-          "type": "openai",
-          "baseURL": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1",
-          "multimodal": {
-            "image": {
-              "maxSizeInMB": 10,
-              "maxWidth": 560,
-              "maxHeight": 560,
-              "supportedMimeTypes": ["image/png", "image/jpeg", "image/webp"],
-              "preferredMimeType": "image/webp"
+        "endpoints": [
+          {
+            "type": "openai",
+            "baseURL": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1",
+            "multimodal": {
+              "image": {
+                "maxSizeInMB": 10,
+                "maxWidth": 560,
+                "maxHeight": 560,
+                "supportedMimeTypes": ["image/png", "image/jpeg", "image/webp"],
+                "preferredMimeType": "image/webp"
+              }
             }
           }
-        }]
+        ]
       },
       {
         "name": "NousResearch/Hermes-3-Llama-3.1-8B",

diff --git a/src/lib/components/chat/ChatMessage.svelte b/src/lib/components/chat/ChatMessage.svelte
@@ -217,7 +217,7 @@
 			{#if searchUpdates && searchUpdates.length > 0}
 				<OpenWebSearchResults webSearchMessages={searchUpdates} />
 			{/if}
-			{#if reasoningUpdates && reasoningUpdates.length > 0}
+			{#if reasoningUpdates && reasoningUpdates.length > 0 && message.reasoning && message.reasoning.trim().length > 0}
 				{@const summaries = reasoningUpdates
 					.filter((u) => u.subtype === MessageReasoningUpdateType.Status)
 					.map((u) => u.status)}

diff --git a/src/lib/server/textGeneration/generate.ts b/src/lib/server/textGeneration/generate.ts
@@ -101,15 +101,26 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
 					finalAnswer = text;
 					logger.error(e);
 				}
-			}
+			} else if (model.reasoning && model.reasoning.type === "tokens") {
+				// make sure to remove the content of the reasoning buffer from
+				// the final answer to avoid duplication
+				const beginIndex = reasoningBuffer.indexOf(model.reasoning.beginToken);
+				const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);
 
-			yield {
-				type: MessageUpdateType.FinalAnswer,
-				text: finalAnswer,
-				interrupted,
-				webSources: output.webSources,
-			};
-			continue;
+				if (beginIndex !== -1 && endIndex !== -1) {
+					// Remove the reasoning section (including tokens) from final answer
+					finalAnswer =
+						text.slice(0, beginIndex) + text.slice(endIndex + model.reasoning.endToken.length);
+				}
+
+				yield {
+					type: MessageUpdateType.FinalAnswer,
+					text: finalAnswer,
+					interrupted,
+					webSources: output.webSources,
+				};
+				continue;
+			}
 		}
 
 		if (model.reasoning && model.reasoning.type === "tokens") {
@@ -121,6 +132,7 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
 					subtype: MessageReasoningUpdateType.Status,
 					status: "Started thinking...",
 				};
+				continue;
 			} else if (output.token.text === model.reasoning.endToken) {
 				reasoning = false;
 				reasoningBuffer += output.token.text;
@@ -129,6 +141,7 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
 					subtype: MessageReasoningUpdateType.Status,
 					status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
 				};
+				continue;
 			}
 		}
 		// ignore special tokens