diff --git a/README.md b/README.md index 1eec8de..2969127 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,11 @@ Vercel AI Provider for running Large Language Models locally using Ollama > **Note: This module is under development and may contain errors and frequent incompatible changes.** +> +> All releases will be of type MAJOR following the 0.MAJOR.MINOR scheme. Only bugs and model updates will be released as MINOR. +> Please read the [Tested models and capabilities](#tested-models-and-capabilities) section to know about the features +> implemented in this provider. + ## Installation @@ -50,28 +55,65 @@ The first argument is the model id, e.g. `phi3`. const model = ollama('phi3'); ``` -### Tested models and capabilities +## Examples + +Inside the `examples` folder, you will find some example projects to see how the provider works. Each folder +has its own README with the usage description. + +## Tested models and capabilities -This provider is capable of generating and streaming text and objects. It does not -support function calling (tools). Object generation may fail depending +This provider is capable of generating and streaming text and objects. Object generation may fail depending on the model used and the schema used. -At least it has been verified to work on the following models: - -| Model | Image input | Object generation | Tool usage | Tool streaming | -|------------|--------------------|--------------------|--------------------|----------------| -| llama2 | :x: | :white_check_mark: | :x: | :x: | -| llama3 | :x: | :white_check_mark: | :x: | :x: | -| llava | :white_check_mark: | :white_check_mark: | :x: | :x: | -| mistral | :x: | :white_check_mark: | :x: | :x: | -| mixtral | :x: | :white_check_mark: | :white_check_mark: | :x: | -| openhermes | :x: | :white_check_mark: | :white_check_mark: | :x: | -| phi3 | :x: | :white_check_mark: | :x: | :x: | - -### Caveats - -* Some models have been found to be slow when streaming objects. See https://github.com/ollama/ollama/issues/3851 -* The use of tools is not supported by the Ollama API and has been simulated with system prompt injection, so the behavior -depending on the model can be erratic. -* This library is highly experimental and can change constantly. All releases will be of type MAJOR following the -0.MAJOR.MINOR scheme. Only bugs and model updates will be released as MINOR. +At least it has been tested with the following features: + +| Image input | Object generation | Tool usage | Tool streaming | +|--------------------|--------------------|------------|----------------| +| :white_check_mark: | :white_check_mark: | :warning: | :warning: | + +### Image input + +You need to use any model with visual understanding. These are tested: + +* llava +* llava-llama3 +* llava-phi3 +* moondream + +### Object generation + +> This feature is unstable with some models + + +Some models are better than others. Also, there is a bug in Ollama that sometimes causes the JSON generation to be slow or +end with an error. In my tests, I detected this behavior with llama3 and phi3 models more than others like +`openhermes` and `mistral`, but you can experiment with them too. + +More info about the bugs: + +* https://github.com/ollama/ollama/issues/3851 +* https://github.com/ollama/ollama/pull/3785 + +Remember that Ollama and this module are free software, so be patient. + +### Tool usage (no streaming) + +> This feature is not completed and unstable + +Ollama does not support tooling, so this provider simulates tool usage with prompt injection. That means that +this feature can fail very often. Again, it depends on the model you use, and it is very related to the object +generation issues explained in the previous section. + +I recommend you use `openhermes` and `mistral` or experiment with your preferred models. + + +### Tool streaming + +> This feature is not completed and unstable + +Again, since Ollama does not support tooling, we should simulate the feature. In this case, the problem is worse than +in non-streaming tool usage. We don't have the full response before knowing if the model has detected function calling. +We are waiting for the first characters before sending the deltas to detect if we are in a tool call flow. + +Obviously, this is very buggy and should be used with caution. Right now, you cannot use it in chats and with more than +one tool. diff --git a/examples/ai-core/Makefile b/examples/ai-core/Makefile index fa594af..f3952c1 100644 --- a/examples/ai-core/Makefile +++ b/examples/ai-core/Makefile @@ -2,11 +2,6 @@ .DEFAULT_GOAL := all define RUN_EXAMPLE_TARGET - echo -- examples/$(subst _,/,$(1)) - pnpm tsx src/$(subst _,/,$(1)).ts > /dev/null -endef - -define RUN_EXAMPLE_CHAT echo -- examples/$(subst _,/,$(1)) pnpm tsx src/$(subst _,/,$(1)).ts endef @@ -88,17 +83,19 @@ generate-text_ollama-tool-call: # stream-object -.PHONY: stream-object stream-object-run stream-object-all stream-object_ollama stream-object_ollama-fullstream stream-object_ollama-json +.PHONY: stream-object stream-object-run stream-object-all stream-object_ollama stream-object_ollama-fullstream stream-object_ollama-json stream-object_ollama-tool stream-object: stream-object-run stream-object-all stream-object-run: echo - examples/stream-object: -stream-object-all: stream-object_ollama stream-object_ollama-fullstream stream-object_ollama-json +stream-object-all: stream-object_ollama stream-object_ollama-fullstream stream-object_ollama-json stream-object_ollama-tool stream-object_ollama: $(call RUN_EXAMPLE_TARGET,$@) stream-object_ollama-fullstream: $(call RUN_EXAMPLE_TARGET,$@) stream-object_ollama-json: $(call RUN_EXAMPLE_TARGET,$@) +stream-object_ollama-tool: + $(call RUN_EXAMPLE_TARGET,$@) # stream-text @@ -106,17 +103,19 @@ stream-object_ollama-json: stream-text: stream-text-run stream-text-all stream-text-run: echo - examples/stream-text: -stream-text-all: stream-text_ollama stream-text_ollama-abort stream-text_ollama-completion stream-text_ollama-completion-chat stream-text_ollama-reader +stream-text-all: stream-text_ollama stream-text_ollama-abort stream-text_ollama-completion stream-text_ollama-completion-chat stream-text_ollama-fullstream stream-text_ollama-reader stream-text_ollama: $(call RUN_EXAMPLE_TARGET,$@) stream-text_ollama-abort: $(call RUN_EXAMPLE_TARGET,$@) stream-text_ollama-chatbot: # manual - $(call RUN_EXAMPLE_CHAT,$@) + $(call RUN_EXAMPLE_TARGET,$@) stream-text_ollama-completion: $(call RUN_EXAMPLE_TARGET,$@) stream-text_ollama-completion-chat: - $(call RUN_EXAMPLE_CHAT,$@) + $(call RUN_EXAMPLE_TARGET,$@) +stream-text_ollama-fullstream: + $(call RUN_EXAMPLE_TARGET,$@) stream-text_ollama-reader: $(call RUN_EXAMPLE_TARGET,$@) diff --git a/examples/ai-core/src/stream-object/ollama-fullstream.ts b/examples/ai-core/src/stream-object/ollama-fullstream.ts index 1fa762e..0b0a918 100755 --- a/examples/ai-core/src/stream-object/ollama-fullstream.ts +++ b/examples/ai-core/src/stream-object/ollama-fullstream.ts @@ -48,4 +48,4 @@ async function main(model: Parameters[0]) { } } -buildProgram('llama3', main).catch(console.error) +buildProgram('mistral', main).catch(console.error) diff --git a/examples/ai-core/src/stream-object/ollama-json.ts b/examples/ai-core/src/stream-object/ollama-json.ts index 7f25a69..54b33b4 100755 --- a/examples/ai-core/src/stream-object/ollama-json.ts +++ b/examples/ai-core/src/stream-object/ollama-json.ts @@ -32,4 +32,4 @@ async function main(model: Parameters[0]) { } } -buildProgram('llama3', main).catch(console.error) +buildProgram('mistral', main).catch(console.error) diff --git a/examples/ai-core/src/stream-object/ollama-tool.ts b/examples/ai-core/src/stream-object/ollama-tool.ts new file mode 100644 index 0000000..d74e5bf --- /dev/null +++ b/examples/ai-core/src/stream-object/ollama-tool.ts @@ -0,0 +1,35 @@ +#! /usr/bin/env -S pnpm tsx + +import { streamObject } from 'ai' +import { ollama } from 'ollama-ai-provider' +import { z } from 'zod' + +import { buildProgram } from '../tools/command' + +async function main(model: Parameters[0]) { + const result = await streamObject({ + maxTokens: 2000, + mode: 'tool', + model: ollama(model), + prompt: + 'Generate 3 character descriptions for a fantasy role playing game.', + schema: z.object({ + characters: z.array( + z.object({ + class: z + .string() + .describe('Character class, e.g. warrior, mage, or thief.'), + description: z.string(), + name: z.string(), + }), + ), + }), + }) + + for await (const partialObject of result.partialObjectStream) { + console.clear() + console.log(partialObject) + } +} + +buildProgram('mistral', main).catch(console.error) diff --git a/examples/ai-core/src/stream-object/ollama.ts b/examples/ai-core/src/stream-object/ollama.ts index e6a3115..3bae8b1 100755 --- a/examples/ai-core/src/stream-object/ollama.ts +++ b/examples/ai-core/src/stream-object/ollama.ts @@ -31,4 +31,4 @@ async function main(model: Parameters[0]) { } } -buildProgram('llama3', main).catch(console.error) +buildProgram('mistral', main).catch(console.error) diff --git a/examples/ai-core/src/stream-text/ollama-fullstream.ts b/examples/ai-core/src/stream-text/ollama-fullstream.ts new file mode 100755 index 0000000..3c5aff6 --- /dev/null +++ b/examples/ai-core/src/stream-text/ollama-fullstream.ts @@ -0,0 +1,82 @@ +#! /usr/bin/env -S pnpm tsx + +import { streamText } from 'ai' +import { ollama } from 'ollama-ai-provider' +import { z } from 'zod' + +import { buildProgram } from '../tools/command' +import { weatherTool } from '../tools/weather-tool' + +async function main(model: Parameters[0]) { + const result = await streamText({ + model: ollama(model), + prompt: 'What is the weather in San Francisco?', + tools: { + cityAttractions: { + parameters: z.object({ city: z.string() }), + }, + weather: weatherTool, + }, + }) + + for await (const part of result.fullStream) { + switch (part.type) { + case 'text-delta': { + console.log('Text delta:', part.textDelta) + break + } + + case 'tool-call': { + switch (part.toolName) { + case 'cityAttractions': { + console.log('TOOL CALL cityAttractions') + console.log(`city: ${part.args.city}`) // string + break + } + + case 'weather': { + console.log('TOOL CALL weather') + console.log(`location: ${part.args.location}`) // string + break + } + } + + break + } + + case 'tool-result': { + switch (part.toolName) { + // NOT AVAILABLE (NO EXECUTE METHOD) + // case 'cityAttractions': { + // console.log('TOOL RESULT cityAttractions'); + // console.log(`city: ${part.args.city}`); // string + // console.log(`result: ${part.result}`); + // break; + // } + + case 'weather': { + console.log('TOOL RESULT weather') + console.log(`location: ${part.args.location}`) // string + console.log(`temperature: ${part.result.temperature}`) // number + break + } + } + + break + } + + case 'finish': { + console.log('Finish reason:', part.finishReason) + console.log('Usage:', part.usage) + break + } + + case 'error': { + console.error('Error:', part.error) + break + } + } + } +} + +buildProgram('openhermes', main).catch(console.error) diff --git a/packages/ollama/package.json b/packages/ollama/package.json index 38a5dd0..b21ab2b 100644 --- a/packages/ollama/package.json +++ b/packages/ollama/package.json @@ -27,7 +27,8 @@ "license": "Apache-2.0", "dependencies": { "@ai-sdk/provider": "0.0.5", - "@ai-sdk/provider-utils": "0.0.8" + "@ai-sdk/provider-utils": "0.0.8", + "partial-json": "^0.1.7" }, "devDependencies": { "@edge-runtime/vm": "^3.2.0", diff --git a/packages/ollama/src/generate-tool/infer-tool-calls-from-stream.test.ts b/packages/ollama/src/generate-tool/infer-tool-calls-from-stream.test.ts new file mode 100644 index 0000000..2f31013 --- /dev/null +++ b/packages/ollama/src/generate-tool/infer-tool-calls-from-stream.test.ts @@ -0,0 +1,188 @@ +import type { LanguageModelV1StreamPart } from '@ai-sdk/provider' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +import { + type CallModeType, + InferToolCallsFromStream, +} from '@/generate-tool/infer-tool-calls-from-stream' + +describe('InferToolCallsFromStream', () => { + let controller: TransformStreamDefaultController + + beforeEach(() => { + controller = { + desiredSize: 0, + enqueue: vi.fn(), + error: vi.fn(), + terminate: vi.fn(), + } + }) + + afterEach(() => { + vi.resetAllMocks() + }) + + describe.each([ + ['object-json'], + ['object-grammar'], + ['regular'], + ])('should ignore no tooling %s mode types', (type) => { + it('should return is not a call stream', () => { + // Arrange + const inferToolCallsFromStream = new InferToolCallsFromStream({ + type, + }) + const delta = 'Hi!' + + // Act + const isToolCallStream = inferToolCallsFromStream.parse({ + controller, + delta, + }) + + // Assert + expect(isToolCallStream).toBeFalsy() + expect(inferToolCallsFromStream.detectedToolCall).toBeFalsy() + expect(controller.enqueue).not.toBeCalled() + }) + + it('should return stop finish reason', () => { + // Arrange + const inferToolCallsFromStream = new InferToolCallsFromStream({ + type, + }) + const delta = 'Hi!' + + // Act + inferToolCallsFromStream.parse({ + controller, + delta, + }) + + // Assert + expect(inferToolCallsFromStream.finish({ controller })).toEqual('stop') + expect(controller.enqueue).not.toBeCalled() + }) + }) + + describe('should parse object-tool mode calls', () => { + it('should detect is a tool call stream', () => { + // Arrange + const inferToolCallsFromStream = new InferToolCallsFromStream({ + type: 'object-tool', + }) + const delta = '{' + + // Act + const isToolCallStream = inferToolCallsFromStream.parse({ + controller, + delta, + }) + + // Assert + expect(isToolCallStream).toBeTruthy() + }) + + it('should wait until function name is present', () => { + // Arrange + const inferToolCallsFromStream = new InferToolCallsFromStream({ + type: 'object-tool', + }) + const delta = '{ "name":' + + // Act + inferToolCallsFromStream.parse({ + controller, + delta, + }) + + // Assert + expect(controller.enqueue).not.toBeCalled() + }) + + it('should enqueue function name ', () => { + // Arrange + const inferToolCallsFromStream = new InferToolCallsFromStream({ + type: 'object-tool', + }) + const deltas = ['{ "name":', '"json"', ', "arguments": {'] + + // Act + deltas.map((delta: string) => + inferToolCallsFromStream.parse({ + controller, + delta, + }), + ) + + // Assert + expect(controller.enqueue).toBeCalledWith({ + argsTextDelta: expect.any(String), + toolCallId: expect.any(String), + toolCallType: 'function', + toolName: 'json', + type: 'tool-call-delta', + }) + }) + + it('should enqueue tool-call-deltas ', () => { + // Arrange + const inferToolCallsFromStream = new InferToolCallsFromStream({ + type: 'object-tool', + }) + const deltas = [ + '{"name":', + '"json"', + ',"arguments":{', + '"foo":"bar"', + '}', + '}', + ] + + // Act + deltas.map((delta: string) => + inferToolCallsFromStream.parse({ + controller, + delta, + }), + ) + + // Assert + expect(controller.enqueue).toBeCalledTimes(4) + }) + + it('should enqueue complete tool call at end ', () => { + // Arrange + const inferToolCallsFromStream = new InferToolCallsFromStream({ + type: 'object-tool', + }) + const deltas = [ + '{"name":', + '"json"', + ',"arguments":{', + '"foo":"bar"', + '}', + '}', + ] + + // Act + deltas.map((delta: string) => + inferToolCallsFromStream.parse({ + controller, + delta, + }), + ) + const finishReason = inferToolCallsFromStream.finish({ controller }) + + // Assert + expect(controller.enqueue).toBeCalledWith({ + args: JSON.stringify({ foo: 'bar' }), + toolCallId: expect.any(String), + toolCallType: 'function', + toolName: 'json', + type: 'tool-call', + }) + expect(finishReason).toEqual('stop') + }) + }) +}) diff --git a/packages/ollama/src/generate-tool/infer-tool-calls-from-stream.ts b/packages/ollama/src/generate-tool/infer-tool-calls-from-stream.ts new file mode 100644 index 0000000..0df7353 --- /dev/null +++ b/packages/ollama/src/generate-tool/infer-tool-calls-from-stream.ts @@ -0,0 +1,136 @@ +import type { + LanguageModelV1CallOptions, + LanguageModelV1FinishReason, + LanguageModelV1StreamPart, +} from '@ai-sdk/provider' +import { generateId } from '@ai-sdk/provider-utils' +import { parse } from 'partial-json' + +type ToolCall = { + function: { + arguments: string + name: string + } + id: string + type: 'function' +} + +export type CallModeType = LanguageModelV1CallOptions['mode']['type'] + +export class InferToolCallsFromStream { + private _firstMessage: boolean + private readonly _toolCalls: ToolCall[] + private _toolPartial: string + private readonly _type: CallModeType + private _detectedToolCall: boolean + + constructor({ type }: { type: CallModeType }) { + this._firstMessage = true + this._toolPartial = '' + this._toolCalls = [] + this._type = type + this._detectedToolCall = false + } + + get toolCalls(): ToolCall[] { + return this._toolCalls + } + + get detectedToolCall(): boolean { + return this._detectedToolCall + } + + parse({ + controller, + delta, + }: { + controller: TransformStreamDefaultController + delta: string + }): boolean { + this.detectToolCall(delta) + + if (!this._detectedToolCall) { + return false + } + + this._toolPartial += delta + + let parsedFunctions = parse(this._toolPartial) + if (!Array.isArray(parsedFunctions)) { + parsedFunctions = [parsedFunctions] + } + + for (const [index, parsedFunction] of parsedFunctions.entries()) { + const parsedArguments = JSON.stringify(parsedFunction?.arguments) ?? '' + + if (parsedArguments === '') { + continue + } + + if (!this._toolCalls[index]) { + this._toolCalls[index] = { + function: { + arguments: '', + name: parsedFunction.name, + }, + id: generateId(), + type: 'function', + } + } + + const toolCall = this._toolCalls[index] + toolCall.function.arguments = parsedArguments + + controller.enqueue({ + argsTextDelta: delta, + toolCallId: toolCall.id, + toolCallType: 'function', + toolName: toolCall.function.name, + type: 'tool-call-delta', + }) + } + + return true + } + + finish({ + controller, + }: { + controller: TransformStreamDefaultController + }): LanguageModelV1FinishReason { + for (const toolCall of this.toolCalls) { + controller.enqueue({ + args: toolCall.function.arguments, + toolCallId: toolCall.id, + toolCallType: 'function', + toolName: toolCall.function.name, + type: 'tool-call', + }) + } + + return this.finishReason() + } + + private detectToolCall(delta: string) { + if (this._firstMessage) { + if (this._type === 'object-tool') { + this._detectedToolCall = true + } else if ( + this._type === 'regular' && + (delta.trim().startsWith('{') || delta.trim().startsWith('[')) + ) { + this._detectedToolCall = true + } + + this._firstMessage = false + } + } + + private finishReason(): LanguageModelV1FinishReason { + if (!this.detectedToolCall) { + return 'stop' + } + + return this._type === 'object-tool' ? 'stop' : 'tool-calls' + } +} diff --git a/packages/ollama/src/ollama-chat-language-model.test.ts b/packages/ollama/src/ollama-chat-language-model.test.ts index 7d34737..8cd0657 100644 --- a/packages/ollama/src/ollama-chat-language-model.test.ts +++ b/packages/ollama/src/ollama-chat-language-model.test.ts @@ -180,6 +180,89 @@ describe('doStream', () => { ]) }) + it('should stream tool deltas', async () => { + // Arrange + + server.responseChunks = [ + `{"model":"llama3","created_at":"2024-05-04T01:59:32.077465Z","message":{"role":"assistant","content":"{\\"name\\":"},"done":false}\n`, + `{"model":"llama3","created_at":"2024-05-04T01:59:32.077465Z","message":{"role":"assistant","content":"\\"json\\","},"done":false}\n`, + `{"model":"llama3","created_at":"2024-05-04T01:59:32.077465Z","message":{"role":"assistant","content":"\\"argum"},"done":false}\n`, + `{"model":"llama3","created_at":"2024-05-04T01:59:32.077465Z","message":{"role":"assistant","content":"ents\\":"},"done":false}\n`, + `{"model":"llama3","created_at":"2024-05-04T01:59:32.077465Z","message":{"role":"assistant","content":"{\\"numb"},"done":false}\n`, + `{"model":"llama3","created_at":"2024-05-04T01:59:32.077465Z","message":{"role":"assistant","content":"ers\\":"},"done":false}\n`, + `{"model":"llama3","created_at":"2024-05-04T01:59:32.077465Z","message":{"role":"assistant","content":"[1,2]}"},"done":false}\n`, + `{"model":"llama3","created_at":"2024-05-04T01:59:32.077465Z","message":{"role":"assistant","content":"}"},"done":false}\n`, + `{"model":"llama3","created_at":"2024-05-04T01:59:32.137913Z","message":{"role":"assistant","content":""},"done":true,"total_duration":1820013000,"load_duration":5921416,"prompt_eval_count":10,"prompt_eval_duration":1750224000,"eval_count":10,"eval_duration":60669000}\n`, + ] + // Act + const { stream } = await model.doStream({ + inputFormat: 'prompt', + mode: { + tool: { + description: 'Test tool', + name: 'test-tool', + parameters: { + $schema: 'http://json-schema.org/draft-07/schema#', + additionalProperties: false, + properties: { value: { type: 'string' } }, + required: ['value'], + type: 'object', + }, + type: 'function', + }, + type: 'object-tool', + }, + prompt: TEST_PROMPT, + }) + + // Assert + expect(await convertStreamToArray(stream)).toStrictEqual([ + { + argsTextDelta: '{"numb', + toolCallId: expect.any(String), + toolCallType: 'function', + toolName: 'json', + type: 'tool-call-delta', + }, + { + argsTextDelta: 'ers":', + toolCallId: expect.any(String), + toolCallType: 'function', + toolName: 'json', + type: 'tool-call-delta', + }, + { + argsTextDelta: '[1,2]}', + toolCallId: expect.any(String), + toolCallType: 'function', + toolName: 'json', + type: 'tool-call-delta', + }, + { + argsTextDelta: '}', + toolCallId: expect.any(String), + toolCallType: 'function', + toolName: 'json', + type: 'tool-call-delta', + }, + { + args: '{"numbers":[1,2]}', + toolCallId: expect.any(String), + toolCallType: 'function', + toolName: 'json', + type: 'tool-call', + }, + { + finishReason: 'stop', + type: 'finish', + usage: { + completionTokens: 10, + promptTokens: Number.NaN, + }, + }, + ]) + }) + it('should expose the raw response headers', async () => { prepareStreamResponse({ content: [] }) diff --git a/packages/ollama/src/ollama-chat-language-model.ts b/packages/ollama/src/ollama-chat-language-model.ts index 05e9334..af1b307 100644 --- a/packages/ollama/src/ollama-chat-language-model.ts +++ b/packages/ollama/src/ollama-chat-language-model.ts @@ -16,6 +16,7 @@ import { z } from 'zod' import { convertToOllamaChatMessages } from '@/convert-to-ollama-chat-messages' import { inferToolCallsFromResponse } from '@/generate-tool/infer-tool-calls-from-response' +import { InferToolCallsFromStream } from '@/generate-tool/infer-tool-calls-from-stream' import { mapOllamaFinishReason } from '@/map-ollama-finish-reason' import { OllamaChatModelId, OllamaChatSettings } from '@/ollama-chat-settings' import { ollamaFailedResponseHandler } from '@/ollama-error' @@ -93,6 +94,7 @@ export class OllamaChatLanguageModel implements LanguageModelV1 { type: 'function', })), }, + type, warnings, } } @@ -104,6 +106,7 @@ export class OllamaChatLanguageModel implements LanguageModelV1 { format: 'json', messages: convertToOllamaChatMessages(prompt), }, + type, warnings, } } @@ -133,6 +136,7 @@ export class OllamaChatLanguageModel implements LanguageModelV1 { }, ], }, + type, warnings, } } @@ -194,7 +198,7 @@ export class OllamaChatLanguageModel implements LanguageModelV1 { async doStream( options: Parameters[0], ): Promise>> { - const { args, warnings } = this.getArguments(options) + const { args, type, warnings } = this.getArguments(options) const { responseHeaders, value: response } = await postJsonToApi({ abortSignal: options.abortSignal, @@ -208,6 +212,9 @@ export class OllamaChatLanguageModel implements LanguageModelV1 { }) const { messages: rawPrompt, ...rawSettings } = args + + const inferToolCallsFromStream = new InferToolCallsFromStream({ type }) + let finishReason: LanguageModelV1FinishReason = 'other' let usage: { completionTokens: number; promptTokens: number } = { completionTokens: Number.NaN, @@ -238,11 +245,21 @@ export class OllamaChatLanguageModel implements LanguageModelV1 { const value = chunk.value if (value.done) { - finishReason = mapOllamaFinishReason('stop') + finishReason = inferToolCallsFromStream.finish({ controller }) usage = { completionTokens: value.eval_count, promptTokens: Number.NaN, } + + return + } + + const isToolCallStream = inferToolCallsFromStream.parse({ + controller, + delta: value.message.content, + }) + + if (isToolCallStream) { return } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f2a5160..603772d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -105,6 +105,9 @@ importers: '@ai-sdk/provider-utils': specifier: 0.0.8 version: 0.0.8(zod@3.22.4) + partial-json: + specifier: ^0.1.7 + version: 0.1.7 devDependencies: '@edge-runtime/vm': specifier: ^3.2.0 @@ -2373,6 +2376,9 @@ packages: resolution: {integrity: sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==} engines: {node: '>=8'} + partial-json@0.1.7: + resolution: {integrity: sha512-Njv/59hHaokb/hRUjce3Hdv12wd60MtM9Z5Olmn+nehe0QDAsRtRbJPvJ0Z91TusF0SuZRIvnM+S4l6EIP8leA==} + path-exists@4.0.0: resolution: {integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==} engines: {node: '>=8'} @@ -5664,6 +5670,8 @@ snapshots: json-parse-even-better-errors: 2.3.1 lines-and-columns: 1.2.4 + partial-json@0.1.7: {} + path-exists@4.0.0: {} path-exists@5.0.0: {}