Skip to content

Commit

Permalink
Implement mock streaming for UI support
Browse files Browse the repository at this point in the history
  • Loading branch information
stwiname committed Oct 9, 2024
1 parent 8575073 commit 829e6c7
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 9 deletions.
27 changes: 22 additions & 5 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,20 +1,37 @@
services:
subql-ai:
build:
context: .
dockerfile: ./Dockerfile
image: subquerynetwork/subql-ai-app
# build:
# context: .
# dockerfile: ./Dockerfile
ports:
- 7827:7827
restart: unless-stopped
volumes:
- ./subquery-delegator:/app
command:
- ${SUB_COMMAND:-} # set SUB_COMMAND env variable to "test" to run tests
# - -p=/app/index.ts
- -p=ipfs://QmXtvuU63gtv9fbEUNh7cKY3cb9U3BwxsFtKuqzNVDdYpt
# - -p=/app/index.ts # TODO this doesn't work because dependencies are not copied
- -p=ipfs://QmNaNBhXJoFpRJeNQcnTH8Yh6Rf4pzJy6VSnfnQSZHysdZ
- -h=http://host.docker.internal:11434
# healthcheck:
# test: ["CMD", "curl", "-f", "http://subql-ai:7827/health"]
# interval: 3s
# timeout: 5s
# retries: 10

# A simple chat UI
ui:
image: ghcr.io/open-webui/open-webui:main
ports:
- 8080:8080
restart: always
environment:
- 'OPENAI_API_BASE_URLS=http://subql-ai:7827/v1'
- 'OPENAI_API_KEYS=foobar'
- 'WEBUI_AUTH=false'
volumes:
- open-webui:/app/backend/data

volumes:
open-webui:
90 changes: 86 additions & 4 deletions src/http.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { type Static, Type } from "@sinclair/typebox";
import { Value } from "@sinclair/typebox/value";
import { Hono } from "hono";
import { streamSSE } from "hono/streaming";
import { HTTPException } from "hono/http-exception";
import type { RunnerHost } from "./runnerHost.ts";

Expand All @@ -18,7 +19,14 @@ const CompletionChoice = Type.Object({
index: Type.Integer(),
message: Message,
logprobs: Type.Null(),
finish_reason: Type.String(),
finish_reason: Type.Union([Type.String(), Type.Null()]),
});

const CompletionChunkChoice = Type.Object({
index: Type.Integer(),
delta: Message, // OpenAI has more types to this but were not using them
logprobs: Type.Null(),
finish_reason: Type.Union([Type.String(), Type.Null()]),
});

const ChatUsage = Type.Object({
Expand All @@ -45,7 +53,17 @@ const ChatResponse = Type.Object({
usage: ChatUsage,
});

const ChatChunkResponse = Type.Object({
id: Type.String(),
model: Type.String(),
choices: Type.Array(CompletionChunkChoice),
created: Type.Number({ description: "Unix timestamp in seconds" }),
object: Type.Literal("chat.completion.chunk"),
// usage: ChatUsage, // TODO enable only if stream_options: {"include_usage": true}
});

export type ChatResponse = Static<typeof ChatResponse>;
export type ChatChunkResponse = Static<typeof ChatChunkResponse>;

/**
* A minimal implementation of https://platform.openai.com/docs/api-reference/chat/create interface
Expand All @@ -60,21 +78,62 @@ export function http(
return c.text("ok");
});

app.get("/v1/models", (c) => {
return c.json({
object: "list",
data: [
{
id: "subql-ai-0",
object: "model",
created: new Date().getTime(),
owner: "SubQuery",
},
],
});
});

app.post("/v1/chat/completions", async (c) => {
try {
const body = await c.req.json();
const req = Value.Parse(ChatRequest, body);

if (req.stream) {
throw new HTTPException(400, { message: "Streaming is not supported" });
}
if (req.n != 1) {
throw new HTTPException(400, { message: "Only `n` of 1 is supported" });
}

const runner = await runnerHost.getAnonymousRunner();
const chatRes = await runner.promptMessages(req.messages);

// Mock streaming, current Ollama doesn't support streaming with tools. See https://github.com/subquery/subql-ai-app-framework/issues/3
if (req.stream) {
const parts = chatRes.message.content.split(" ");
return streamSSE(c, async (stream) => {
for (const [i, part] of parts.entries()) {
const last = i == parts.length - 1;

const res = createChatChunkResponse(
part,
chatRes.model,
chatRes.created_at,
last ? "stop" : null,
);
await stream.writeSSE({ data: JSON.stringify(res) });
await stream.sleep(20);

// Bring back white space
if (!last) {
const res_space = createChatChunkResponse(
" ",
chatRes.model,
chatRes.created_at,
);
await stream.writeSSE({ data: JSON.stringify(res_space) });
await stream.sleep(20);
}
}
});
}

const response: ChatResponse = {
id: "0",
model: chatRes.model,
Expand All @@ -100,6 +159,7 @@ export function http(
};

Value.Assert(ChatResponse, response);

return c.json(response);
} catch (e) {
if (e instanceof HTTPException) {
Expand All @@ -112,3 +172,25 @@ export function http(

return Deno.serve({ port }, app.fetch);
}

function createChatChunkResponse(
message: string,
model: string,
createdAt: Date,
finish_reason: string | null = null,
): ChatChunkResponse {
const res: ChatChunkResponse = {
id: "0",
object: "chat.completion.chunk",
model,
created: new Date(createdAt).getTime() / 1000,
choices: [{
index: 0,
delta: { role: "assistant", content: message },
logprobs: null,
finish_reason,
}],
};
Value.Assert(ChatChunkResponse, res);
return res;
}
1 change: 1 addition & 0 deletions subquery-delegator/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ const PROMPT = `
You are an agent designed to help a user with their token delegation on the SubQuery Network.
Given an input question, use the available tools to answer the users question quickly and concisely.
You answer must use the result of the tools available.
Do not mention that you used a tool or the name of a tool.
If you need more information to answer the question, ask the user for more details.
All token amounts are in SQT.
Expand Down

0 comments on commit 829e6c7

Please sign in to comment.