Skip to content

Commit

Permalink
Merge pull request #6 from sgomez/stream-tool
Browse files Browse the repository at this point in the history
Experimental stream tool support
  • Loading branch information
sgomez authored May 19, 2024
2 parents 08ddec5 + 01991f6 commit 9369baf
Show file tree
Hide file tree
Showing 13 changed files with 629 additions and 38 deletions.
86 changes: 64 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
Vercel AI Provider for running Large Language Models locally using Ollama

> **Note: This module is under development and may contain errors and frequent incompatible changes.**
>
> All releases will be of type MAJOR following the 0.MAJOR.MINOR scheme. Only bugs and model updates will be released as MINOR.
> Please read the [Tested models and capabilities](#tested-models-and-capabilities) section to know about the features
> implemented in this provider.

## Installation

Expand Down Expand Up @@ -50,28 +55,65 @@ The first argument is the model id, e.g. `phi3`.
const model = ollama('phi3');
```

### Tested models and capabilities
## Examples

Inside the `examples` folder, you will find some example projects to see how the provider works. Each folder
has its own README with the usage description.

## Tested models and capabilities

This provider is capable of generating and streaming text and objects. It does not
support function calling (tools). Object generation may fail depending
This provider is capable of generating and streaming text and objects. Object generation may fail depending
on the model used and the schema used.

At least it has been verified to work on the following models:

| Model | Image input | Object generation | Tool usage | Tool streaming |
|------------|--------------------|--------------------|--------------------|----------------|
| llama2 | :x: | :white_check_mark: | :x: | :x: |
| llama3 | :x: | :white_check_mark: | :x: | :x: |
| llava | :white_check_mark: | :white_check_mark: | :x: | :x: |
| mistral | :x: | :white_check_mark: | :x: | :x: |
| mixtral | :x: | :white_check_mark: | :white_check_mark: | :x: |
| openhermes | :x: | :white_check_mark: | :white_check_mark: | :x: |
| phi3 | :x: | :white_check_mark: | :x: | :x: |

### Caveats

* Some models have been found to be slow when streaming objects. See https://github.com/ollama/ollama/issues/3851
* The use of tools is not supported by the Ollama API and has been simulated with system prompt injection, so the behavior
depending on the model can be erratic.
* This library is highly experimental and can change constantly. All releases will be of type MAJOR following the
0.MAJOR.MINOR scheme. Only bugs and model updates will be released as MINOR.
At least it has been tested with the following features:

| Image input | Object generation | Tool usage | Tool streaming |
|--------------------|--------------------|------------|----------------|
| :white_check_mark: | :white_check_mark: | :warning: | :warning: |

### Image input

You need to use any model with visual understanding. These are tested:

* llava
* llava-llama3
* llava-phi3
* moondream

### Object generation

> This feature is unstable with some models

Some models are better than others. Also, there is a bug in Ollama that sometimes causes the JSON generation to be slow or
end with an error. In my tests, I detected this behavior with llama3 and phi3 models more than others like
`openhermes` and `mistral`, but you can experiment with them too.

More info about the bugs:

* https://github.com/ollama/ollama/issues/3851
* https://github.com/ollama/ollama/pull/3785

Remember that Ollama and this module are free software, so be patient.

### Tool usage (no streaming)

> This feature is not completed and unstable
Ollama does not support tooling, so this provider simulates tool usage with prompt injection. That means that
this feature can fail very often. Again, it depends on the model you use, and it is very related to the object
generation issues explained in the previous section.

I recommend you use `openhermes` and `mistral` or experiment with your preferred models.


### Tool streaming

> This feature is not completed and unstable
Again, since Ollama does not support tooling, we should simulate the feature. In this case, the problem is worse than
in non-streaming tool usage. We don't have the full response before knowing if the model has detected function calling.
We are waiting for the first characters before sending the deltas to detect if we are in a tool call flow.

Obviously, this is very buggy and should be used with caution. Right now, you cannot use it in chats and with more than
one tool.
19 changes: 9 additions & 10 deletions examples/ai-core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@
.DEFAULT_GOAL := all

define RUN_EXAMPLE_TARGET
echo -- examples/$(subst _,/,$(1))
pnpm tsx src/$(subst _,/,$(1)).ts > /dev/null
endef

define RUN_EXAMPLE_CHAT
echo -- examples/$(subst _,/,$(1))
pnpm tsx src/$(subst _,/,$(1)).ts
endef
Expand Down Expand Up @@ -88,35 +83,39 @@ generate-text_ollama-tool-call:


# stream-object
.PHONY: stream-object stream-object-run stream-object-all stream-object_ollama stream-object_ollama-fullstream stream-object_ollama-json
.PHONY: stream-object stream-object-run stream-object-all stream-object_ollama stream-object_ollama-fullstream stream-object_ollama-json stream-object_ollama-tool
stream-object: stream-object-run stream-object-all
stream-object-run:
echo - examples/stream-object:
stream-object-all: stream-object_ollama stream-object_ollama-fullstream stream-object_ollama-json
stream-object-all: stream-object_ollama stream-object_ollama-fullstream stream-object_ollama-json stream-object_ollama-tool
stream-object_ollama:
$(call RUN_EXAMPLE_TARGET,$@)
stream-object_ollama-fullstream:
$(call RUN_EXAMPLE_TARGET,$@)
stream-object_ollama-json:
$(call RUN_EXAMPLE_TARGET,$@)
stream-object_ollama-tool:
$(call RUN_EXAMPLE_TARGET,$@)


# stream-text
.PHONY: stream-text stream-text-run stream-text-all stream-text_ollama stream-text_ollama-abort stream-text_ollama-completion stream-text_ollama-completion-chat stream-text_ollama-reader
stream-text: stream-text-run stream-text-all
stream-text-run:
echo - examples/stream-text:
stream-text-all: stream-text_ollama stream-text_ollama-abort stream-text_ollama-completion stream-text_ollama-completion-chat stream-text_ollama-reader
stream-text-all: stream-text_ollama stream-text_ollama-abort stream-text_ollama-completion stream-text_ollama-completion-chat stream-text_ollama-fullstream stream-text_ollama-reader
stream-text_ollama:
$(call RUN_EXAMPLE_TARGET,$@)
stream-text_ollama-abort:
$(call RUN_EXAMPLE_TARGET,$@)
stream-text_ollama-chatbot: # manual
$(call RUN_EXAMPLE_CHAT,$@)
$(call RUN_EXAMPLE_TARGET,$@)
stream-text_ollama-completion:
$(call RUN_EXAMPLE_TARGET,$@)
stream-text_ollama-completion-chat:
$(call RUN_EXAMPLE_CHAT,$@)
$(call RUN_EXAMPLE_TARGET,$@)
stream-text_ollama-fullstream:
$(call RUN_EXAMPLE_TARGET,$@)
stream-text_ollama-reader:
$(call RUN_EXAMPLE_TARGET,$@)

2 changes: 1 addition & 1 deletion examples/ai-core/src/stream-object/ollama-fullstream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ async function main(model: Parameters<typeof ollama>[0]) {
}
}

buildProgram('llama3', main).catch(console.error)
buildProgram('mistral', main).catch(console.error)
2 changes: 1 addition & 1 deletion examples/ai-core/src/stream-object/ollama-json.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ async function main(model: Parameters<typeof ollama>[0]) {
}
}

buildProgram('llama3', main).catch(console.error)
buildProgram('mistral', main).catch(console.error)
35 changes: 35 additions & 0 deletions examples/ai-core/src/stream-object/ollama-tool.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#! /usr/bin/env -S pnpm tsx

import { streamObject } from 'ai'
import { ollama } from 'ollama-ai-provider'
import { z } from 'zod'

import { buildProgram } from '../tools/command'

async function main(model: Parameters<typeof ollama>[0]) {
const result = await streamObject({
maxTokens: 2000,
mode: 'tool',
model: ollama(model),
prompt:
'Generate 3 character descriptions for a fantasy role playing game.',
schema: z.object({
characters: z.array(
z.object({
class: z
.string()
.describe('Character class, e.g. warrior, mage, or thief.'),
description: z.string(),
name: z.string(),
}),
),
}),
})

for await (const partialObject of result.partialObjectStream) {
console.clear()
console.log(partialObject)
}
}

buildProgram('mistral', main).catch(console.error)
2 changes: 1 addition & 1 deletion examples/ai-core/src/stream-object/ollama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ async function main(model: Parameters<typeof ollama>[0]) {
}
}

buildProgram('llama3', main).catch(console.error)
buildProgram('mistral', main).catch(console.error)
82 changes: 82 additions & 0 deletions examples/ai-core/src/stream-text/ollama-fullstream.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#! /usr/bin/env -S pnpm tsx

import { streamText } from 'ai'
import { ollama } from 'ollama-ai-provider'
import { z } from 'zod'

import { buildProgram } from '../tools/command'
import { weatherTool } from '../tools/weather-tool'

async function main(model: Parameters<typeof ollama>[0]) {
const result = await streamText({
model: ollama(model),
prompt: 'What is the weather in San Francisco?',
tools: {
cityAttractions: {
parameters: z.object({ city: z.string() }),
},
weather: weatherTool,
},
})

for await (const part of result.fullStream) {
switch (part.type) {
case 'text-delta': {
console.log('Text delta:', part.textDelta)
break
}

case 'tool-call': {
switch (part.toolName) {
case 'cityAttractions': {
console.log('TOOL CALL cityAttractions')
console.log(`city: ${part.args.city}`) // string
break
}

case 'weather': {
console.log('TOOL CALL weather')
console.log(`location: ${part.args.location}`) // string
break
}
}

break
}

case 'tool-result': {
switch (part.toolName) {
// NOT AVAILABLE (NO EXECUTE METHOD)
// case 'cityAttractions': {
// console.log('TOOL RESULT cityAttractions');
// console.log(`city: ${part.args.city}`); // string
// console.log(`result: ${part.result}`);
// break;
// }

case 'weather': {
console.log('TOOL RESULT weather')
console.log(`location: ${part.args.location}`) // string
console.log(`temperature: ${part.result.temperature}`) // number
break
}
}

break
}

case 'finish': {
console.log('Finish reason:', part.finishReason)
console.log('Usage:', part.usage)
break
}

case 'error': {
console.error('Error:', part.error)
break
}
}
}
}

buildProgram('openhermes', main).catch(console.error)
3 changes: 2 additions & 1 deletion packages/ollama/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
"license": "Apache-2.0",
"dependencies": {
"@ai-sdk/provider": "0.0.5",
"@ai-sdk/provider-utils": "0.0.8"
"@ai-sdk/provider-utils": "0.0.8",
"partial-json": "^0.1.7"
},
"devDependencies": {
"@edge-runtime/vm": "^3.2.0",
Expand Down
Loading

0 comments on commit 9369baf

Please sign in to comment.