Skip to content

Commit

Permalink
feat: add deepseek-ai/DeepSeek-R1-Distill-Qwen-32B to HuggingChat c…
Browse files Browse the repository at this point in the history
…onfig (#1660)

* feat: add `deepseek-ai/DeepSeek-R1-Distill-Qwen-32B` to HuggingChat config

* feat: hide reasoning modal when its empty

* fix: put r1 model higher in list
  • Loading branch information
nsarrazin authored Jan 21, 2025
1 parent a8b1563 commit 18bf2e7
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 44 deletions.
108 changes: 73 additions & 35 deletions chart/env/prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -136,23 +136,15 @@ envVars:
]
},
{
"name": "Qwen/QwQ-32B-Preview",
"preprompt": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.",
"modelUrl": "https://huggingface.co/Qwen/QwQ-32B-Preview",
"websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
"description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
"name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
"modelUrl": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
"websiteUrl": "https://deepseek.com/",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/deepseek-logo.png",
"description": "The first reasoning model from DeepSeek, distilled into a 32B dense model. Outperforms o1-mini on multiple benchmarks.",
"reasoning": {
"type": "summarize"
},
"parameters": {
"stop": ["<|im_end|>"],
"truncate": 12288,
"max_new_tokens": 4096,
"temperature": 0.7,
"top_k": 20,
"top_p": 0.8,
"repetition_penalty": 1.05
"type": "tokens",
"beginToken": "<think>",
"endToken": "</think>"
},
"promptExamples": [
{
Expand All @@ -167,6 +159,12 @@ envVars:
"title": "Measuring 6 liters",
"prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
}
],
"endpoints": [
{
"type": "openai",
"baseURL": "https://api-inference.huggingface.co/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/v1"
}
]
},
{
Expand Down Expand Up @@ -196,10 +194,46 @@ envVars:
"prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
}
],
"endpoints": [{
"type": "openai",
"baseURL": "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/v1"
}]
"endpoints": [
{
"type": "openai",
"baseURL": "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/v1"
}
]
},
{
"name": "Qwen/QwQ-32B-Preview",
"preprompt": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.",
"modelUrl": "https://huggingface.co/Qwen/QwQ-32B-Preview",
"websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
"description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
"reasoning": {
"type": "summarize"
},
"parameters": {
"stop": ["<|im_end|>"],
"truncate": 12288,
"max_new_tokens": 4096,
"temperature": 0.7,
"top_k": 20,
"top_p": 0.8,
"repetition_penalty": 1.05
},
"promptExamples": [
{
"title": "Rs in strawberry",
"prompt": "how many R in strawberry?"
},
{
"title": "Larger number",
"prompt": "9.11 or 9.9 which number is larger?"
},
{
"title": "Measuring 6 liters",
"prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
}
]
},
{
"name": "Qwen/Qwen2.5-Coder-32B-Instruct",
Expand Down Expand Up @@ -228,10 +262,12 @@ envVars:
"prompt": "Generate a snazzy static landing page for a local coffee shop using HTML and CSS. You can use tailwind using <script src='https://cdn.tailwindcss.com'></script>."
}
],
"endpoints": [{
"type": "openai",
"baseURL": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1"
}]
"endpoints": [
{
"type": "openai",
"baseURL": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1"
}
]
},
{
"name": "meta-llama/Llama-3.2-11B-Vision-Instruct",
Expand All @@ -245,19 +281,21 @@ envVars:
"truncate": 14336,
"max_new_tokens": 1536
},
"endpoints": [{
"type": "openai",
"baseURL": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1",
"multimodal": {
"image": {
"maxSizeInMB": 10,
"maxWidth": 560,
"maxHeight": 560,
"supportedMimeTypes": ["image/png", "image/jpeg", "image/webp"],
"preferredMimeType": "image/webp"
"endpoints": [
{
"type": "openai",
"baseURL": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1",
"multimodal": {
"image": {
"maxSizeInMB": 10,
"maxWidth": 560,
"maxHeight": 560,
"supportedMimeTypes": ["image/png", "image/jpeg", "image/webp"],
"preferredMimeType": "image/webp"
}
}
}
}]
]
},
{
"name": "NousResearch/Hermes-3-Llama-3.1-8B",
Expand Down
2 changes: 1 addition & 1 deletion src/lib/components/chat/ChatMessage.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@
{#if searchUpdates && searchUpdates.length > 0}
<OpenWebSearchResults webSearchMessages={searchUpdates} />
{/if}
{#if reasoningUpdates && reasoningUpdates.length > 0}
{#if reasoningUpdates && reasoningUpdates.length > 0 && message.reasoning && message.reasoning.trim().length > 0}
{@const summaries = reasoningUpdates
.filter((u) => u.subtype === MessageReasoningUpdateType.Status)
.map((u) => u.status)}
Expand Down
29 changes: 21 additions & 8 deletions src/lib/server/textGeneration/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,26 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
finalAnswer = text;
logger.error(e);
}
}
} else if (model.reasoning && model.reasoning.type === "tokens") {
// make sure to remove the content of the reasoning buffer from
// the final answer to avoid duplication
const beginIndex = reasoningBuffer.indexOf(model.reasoning.beginToken);
const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);

yield {
type: MessageUpdateType.FinalAnswer,
text: finalAnswer,
interrupted,
webSources: output.webSources,
};
continue;
if (beginIndex !== -1 && endIndex !== -1) {
// Remove the reasoning section (including tokens) from final answer
finalAnswer =
text.slice(0, beginIndex) + text.slice(endIndex + model.reasoning.endToken.length);
}

yield {
type: MessageUpdateType.FinalAnswer,
text: finalAnswer,
interrupted,
webSources: output.webSources,
};
continue;
}
}

if (model.reasoning && model.reasoning.type === "tokens") {
Expand All @@ -121,6 +132,7 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
subtype: MessageReasoningUpdateType.Status,
status: "Started thinking...",
};
continue;
} else if (output.token.text === model.reasoning.endToken) {
reasoning = false;
reasoningBuffer += output.token.text;
Expand All @@ -129,6 +141,7 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
subtype: MessageReasoningUpdateType.Status,
status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
};
continue;
}
}
// ignore special tokens
Expand Down

0 comments on commit 18bf2e7

Please sign in to comment.