Merge pull request #6 from sgomez/stream-tool

Experimental stream tool support
sgomez · May 19, 2024 · 9369baf · 9369baf
2 parents 08ddec5 + 01991f6
commit 9369baf
Show file tree

Hide file tree

Showing 13 changed files with 629 additions and 38 deletions.
diff --git a/README.md b/README.md
@@ -3,6 +3,11 @@
 Vercel AI Provider for running Large Language Models locally using Ollama
 
 > **Note: This module is under development and may contain errors and frequent incompatible changes.**
+> 
+> All releases will be of type MAJOR following the 0.MAJOR.MINOR scheme. Only bugs and model updates will be released as MINOR.
+> Please read the [Tested models and capabilities](#tested-models-and-capabilities) section to know about the features
+> implemented in this provider.
+
 
 ## Installation
 
@@ -50,28 +55,65 @@ The first argument is the model id, e.g. `phi3`.
 const model = ollama('phi3');
 ```
 
-### Tested models and capabilities
+## Examples
+
+Inside the `examples` folder, you will find some example projects to see how the provider works. Each folder 
+has its own README with the usage description.
+
+## Tested models and capabilities
 
-This provider is capable of generating and streaming text and objects. It does not
-support function calling (tools). Object generation may fail depending 
+This provider is capable of generating and streaming text and objects. Object generation may fail depending 
 on the model used and the schema used.
 
-At least it has been verified to work on the following models:
-
-| Model      | Image input        | Object generation  | Tool usage         | Tool streaming |
-|------------|--------------------|--------------------|--------------------|----------------|
-| llama2     | :x:                | :white_check_mark: | :x:                | :x:            | 
-| llama3     | :x:                | :white_check_mark: | :x:                | :x:            | 
-| llava      | :white_check_mark: | :white_check_mark: | :x:                | :x:            | 
-| mistral    | :x:                | :white_check_mark: | :x:                | :x:            | 
-| mixtral    | :x:                | :white_check_mark: | :white_check_mark: | :x:            | 
-| openhermes | :x:                | :white_check_mark: | :white_check_mark: | :x:            | 
-| phi3       | :x:                | :white_check_mark: | :x:                | :x:            | 
-
-### Caveats
-
-* Some models have been found to be slow when streaming objects. See https://github.com/ollama/ollama/issues/3851
-* The use of tools is not supported by the Ollama API and has been simulated with system prompt injection, so the behavior
-depending on the model can be erratic.
-* This library is highly experimental and can change constantly. All releases will be of type MAJOR following the
-0.MAJOR.MINOR scheme. Only bugs and model updates will be released as MINOR.
+At least it has been tested with the following features:
+
+| Image input        | Object generation  | Tool usage | Tool streaming |
+|--------------------|--------------------|------------|----------------|
+| :white_check_mark: | :white_check_mark: | :warning:  | :warning:      | 
+
+### Image input
+
+You need to use any model with visual understanding. These are tested:
+
+* llava
+* llava-llama3
+* llava-phi3
+* moondream
+
+### Object generation
+
+> This feature is unstable with some models
+
+
+Some models are better than others. Also, there is a bug in Ollama that sometimes causes the JSON generation to be slow or
+end with an error. In my tests, I detected this behavior with llama3 and phi3 models more than others like
+`openhermes` and `mistral`, but you can experiment with them too.
+
+More info about the bugs:
+
+* https://github.com/ollama/ollama/issues/3851
+* https://github.com/ollama/ollama/pull/3785
+
+Remember that Ollama and this module are free software, so be patient.
+
+### Tool usage (no streaming)
+
+> This feature is not completed and unstable
+
+Ollama does not support tooling, so this provider simulates tool usage with prompt injection. That means that
+this feature can fail very often. Again, it depends on the model you use, and it is very related to the object
+generation issues explained in the previous section.
+
+I recommend you use `openhermes` and `mistral` or experiment with your preferred models.
+
+
+### Tool streaming
+
+> This feature is not completed and unstable
+
+Again, since Ollama does not support tooling, we should simulate the feature. In this case, the problem is worse than
+in non-streaming tool usage. We don't have the full response before knowing if the model has detected function calling.
+We are waiting for the first characters before sending the deltas to detect if we are in a tool call flow.
+
+Obviously, this is very buggy and should be used with caution. Right now, you cannot use it in chats and with more than
+one tool.
diff --git a/examples/ai-core/Makefile b/examples/ai-core/Makefile
@@ -2,11 +2,6 @@
 .DEFAULT_GOAL := all
 
 define RUN_EXAMPLE_TARGET
-	echo -- examples/$(subst _,/,$(1))
-	pnpm tsx src/$(subst _,/,$(1)).ts > /dev/null
-endef
-
-define RUN_EXAMPLE_CHAT
 	echo -- examples/$(subst _,/,$(1))
 	pnpm tsx src/$(subst _,/,$(1)).ts
 endef
@@ -88,35 +83,39 @@ generate-text_ollama-tool-call:
 
 
 # stream-object
-.PHONY: stream-object stream-object-run stream-object-all stream-object_ollama stream-object_ollama-fullstream stream-object_ollama-json
+.PHONY: stream-object stream-object-run stream-object-all stream-object_ollama stream-object_ollama-fullstream stream-object_ollama-json stream-object_ollama-tool
 stream-object: stream-object-run stream-object-all
 stream-object-run:
 	echo - examples/stream-object:
-stream-object-all: stream-object_ollama stream-object_ollama-fullstream stream-object_ollama-json
+stream-object-all: stream-object_ollama stream-object_ollama-fullstream stream-object_ollama-json stream-object_ollama-tool
 stream-object_ollama:
 	$(call RUN_EXAMPLE_TARGET,$@)
 stream-object_ollama-fullstream:
 	$(call RUN_EXAMPLE_TARGET,$@)
 stream-object_ollama-json:
 	$(call RUN_EXAMPLE_TARGET,$@)
+stream-object_ollama-tool:
+	$(call RUN_EXAMPLE_TARGET,$@)
 
 
 # stream-text
 .PHONY: stream-text stream-text-run stream-text-all stream-text_ollama stream-text_ollama-abort stream-text_ollama-completion stream-text_ollama-completion-chat stream-text_ollama-reader
 stream-text: stream-text-run stream-text-all
 stream-text-run:
 	echo - examples/stream-text:
-stream-text-all: stream-text_ollama stream-text_ollama-abort stream-text_ollama-completion stream-text_ollama-completion-chat stream-text_ollama-reader
+stream-text-all: stream-text_ollama stream-text_ollama-abort stream-text_ollama-completion stream-text_ollama-completion-chat stream-text_ollama-fullstream stream-text_ollama-reader
 stream-text_ollama:
 	$(call RUN_EXAMPLE_TARGET,$@)
 stream-text_ollama-abort:
 	$(call RUN_EXAMPLE_TARGET,$@)
 stream-text_ollama-chatbot: # manual
-	$(call RUN_EXAMPLE_CHAT,$@)
+	$(call RUN_EXAMPLE_TARGET,$@)
 stream-text_ollama-completion:
 	$(call RUN_EXAMPLE_TARGET,$@)
 stream-text_ollama-completion-chat:
-	$(call RUN_EXAMPLE_CHAT,$@)
+	$(call RUN_EXAMPLE_TARGET,$@)
+stream-text_ollama-fullstream:
+	$(call RUN_EXAMPLE_TARGET,$@)
 stream-text_ollama-reader:
 	$(call RUN_EXAMPLE_TARGET,$@)
 
diff --git a/examples/ai-core/src/stream-object/ollama-fullstream.ts b/examples/ai-core/src/stream-object/ollama-fullstream.ts
@@ -48,4 +48,4 @@ async function main(model: Parameters<typeof ollama>[0]) {
   }
 }
 
-buildProgram('llama3', main).catch(console.error)
+buildProgram('mistral', main).catch(console.error)
diff --git a/examples/ai-core/src/stream-object/ollama-json.ts b/examples/ai-core/src/stream-object/ollama-json.ts
@@ -32,4 +32,4 @@ async function main(model: Parameters<typeof ollama>[0]) {
   }
 }
 
-buildProgram('llama3', main).catch(console.error)
+buildProgram('mistral', main).catch(console.error)
diff --git a/examples/ai-core/src/stream-object/ollama-tool.ts b/examples/ai-core/src/stream-object/ollama-tool.ts
@@ -0,0 +1,35 @@
+#! /usr/bin/env -S pnpm tsx
+
+import { streamObject } from 'ai'
+import { ollama } from 'ollama-ai-provider'
+import { z } from 'zod'
+
+import { buildProgram } from '../tools/command'
+
+async function main(model: Parameters<typeof ollama>[0]) {
+  const result = await streamObject({
+    maxTokens: 2000,
+    mode: 'tool',
+    model: ollama(model),
+    prompt:
+      'Generate 3 character descriptions for a fantasy role playing game.',
+    schema: z.object({
+      characters: z.array(
+        z.object({
+          class: z
+            .string()
+            .describe('Character class, e.g. warrior, mage, or thief.'),
+          description: z.string(),
+          name: z.string(),
+        }),
+      ),
+    }),
+  })
+
+  for await (const partialObject of result.partialObjectStream) {
+    console.clear()
+    console.log(partialObject)
+  }
+}
+
+buildProgram('mistral', main).catch(console.error)
diff --git a/examples/ai-core/src/stream-object/ollama.ts b/examples/ai-core/src/stream-object/ollama.ts
@@ -31,4 +31,4 @@ async function main(model: Parameters<typeof ollama>[0]) {
   }
 }
 
-buildProgram('llama3', main).catch(console.error)
+buildProgram('mistral', main).catch(console.error)
diff --git a/examples/ai-core/src/stream-text/ollama-fullstream.ts b/examples/ai-core/src/stream-text/ollama-fullstream.ts
@@ -0,0 +1,82 @@
+#! /usr/bin/env -S pnpm tsx
+
+import { streamText } from 'ai'
+import { ollama } from 'ollama-ai-provider'
+import { z } from 'zod'
+
+import { buildProgram } from '../tools/command'
+import { weatherTool } from '../tools/weather-tool'
+
+async function main(model: Parameters<typeof ollama>[0]) {
+  const result = await streamText({
+    model: ollama(model),
+    prompt: 'What is the weather in San Francisco?',
+    tools: {
+      cityAttractions: {
+        parameters: z.object({ city: z.string() }),
+      },
+      weather: weatherTool,
+    },
+  })
+
+  for await (const part of result.fullStream) {
+    switch (part.type) {
+      case 'text-delta': {
+        console.log('Text delta:', part.textDelta)
+        break
+      }
+
+      case 'tool-call': {
+        switch (part.toolName) {
+          case 'cityAttractions': {
+            console.log('TOOL CALL cityAttractions')
+            console.log(`city: ${part.args.city}`) // string
+            break
+          }
+
+          case 'weather': {
+            console.log('TOOL CALL weather')
+            console.log(`location: ${part.args.location}`) // string
+            break
+          }
+        }
+
+        break
+      }
+
+      case 'tool-result': {
+        switch (part.toolName) {
+          // NOT AVAILABLE (NO EXECUTE METHOD)
+          // case 'cityAttractions': {
+          //   console.log('TOOL RESULT cityAttractions');
+          //   console.log(`city: ${part.args.city}`); // string
+          //   console.log(`result: ${part.result}`);
+          //   break;
+          // }
+
+          case 'weather': {
+            console.log('TOOL RESULT weather')
+            console.log(`location: ${part.args.location}`) // string
+            console.log(`temperature: ${part.result.temperature}`) // number
+            break
+          }
+        }
+
+        break
+      }
+
+      case 'finish': {
+        console.log('Finish reason:', part.finishReason)
+        console.log('Usage:', part.usage)
+        break
+      }
+
+      case 'error': {
+        console.error('Error:', part.error)
+        break
+      }
+    }
+  }
+}
+
+buildProgram('openhermes', main).catch(console.error)
diff --git a/packages/ollama/package.json b/packages/ollama/package.json
@@ -27,7 +27,8 @@
   "license": "Apache-2.0",
   "dependencies": {
     "@ai-sdk/provider": "0.0.5",
-    "@ai-sdk/provider-utils": "0.0.8"
+    "@ai-sdk/provider-utils": "0.0.8",
+    "partial-json": "^0.1.7"
   },
   "devDependencies": {
     "@edge-runtime/vm": "^3.2.0",
-Original file line number
+Diff line change
@@ Expand Up / @@ -48,4 +48,4 @@ async function main(model: Parameters<typeof ollama>[0]) { @@
       }
     }
-    buildProgram('llama3', main).catch(console.error)
+    buildProgram('mistral', main).catch(console.error)