diff --git a/docker-compose.yml b/docker-compose.yml
index 562da24..7f57bcf 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,8 +1,9 @@
 services:
   subql-ai:
-    build:
-      context: .
-      dockerfile: ./Dockerfile
+    image: subquerynetwork/subql-ai-app
+    # build:
+    #   context: .
+    #   dockerfile: ./Dockerfile
     ports:
       - 7827:7827
     restart: unless-stopped
@@ -10,11 +11,27 @@ services:
       - ./subquery-delegator:/app
     command:
       - ${SUB_COMMAND:-} # set SUB_COMMAND env variable to "test" to run tests
-      # - -p=/app/index.ts
-      - -p=ipfs://QmXtvuU63gtv9fbEUNh7cKY3cb9U3BwxsFtKuqzNVDdYpt
+      # - -p=/app/index.ts # TODO this doesn't work because dependencies are not copied
+      - -p=ipfs://QmNaNBhXJoFpRJeNQcnTH8Yh6Rf4pzJy6VSnfnQSZHysdZ
       - -h=http://host.docker.internal:11434
     # healthcheck:
     #   test: ["CMD", "curl", "-f", "http://subql-ai:7827/health"]
     #   interval: 3s
     #   timeout: 5s
     #   retries: 10
+
+  # A simple chat UI
+  ui:
+    image: ghcr.io/open-webui/open-webui:main
+    ports:
+      - 8080:8080
+    restart: always
+    environment:
+      - 'OPENAI_API_BASE_URLS=http://subql-ai:7827/v1'
+      - 'OPENAI_API_KEYS=foobar'
+      - 'WEBUI_AUTH=false'
+    volumes:
+     - open-webui:/app/backend/data
+
+volumes:
+  open-webui:
diff --git a/src/http.ts b/src/http.ts
index 1affcdb..bc114da 100644
--- a/src/http.ts
+++ b/src/http.ts
@@ -1,6 +1,7 @@
 import { type Static, Type } from "@sinclair/typebox";
 import { Value } from "@sinclair/typebox/value";
 import { Hono } from "hono";
+import { streamSSE } from "hono/streaming";
 import { HTTPException } from "hono/http-exception";
 import type { RunnerHost } from "./runnerHost.ts";
 
@@ -18,7 +19,14 @@ const CompletionChoice = Type.Object({
   index: Type.Integer(),
   message: Message,
   logprobs: Type.Null(),
-  finish_reason: Type.String(),
+  finish_reason: Type.Union([Type.String(), Type.Null()]),
+});
+
+const CompletionChunkChoice = Type.Object({
+  index: Type.Integer(),
+  delta: Message, // OpenAI has more types to this but were not using them
+  logprobs: Type.Null(),
+  finish_reason: Type.Union([Type.String(), Type.Null()]),
 });
 
 const ChatUsage = Type.Object({
@@ -45,7 +53,17 @@ const ChatResponse = Type.Object({
   usage: ChatUsage,
 });
 
+const ChatChunkResponse = Type.Object({
+  id: Type.String(),
+  model: Type.String(),
+  choices: Type.Array(CompletionChunkChoice),
+  created: Type.Number({ description: "Unix timestamp in seconds" }),
+  object: Type.Literal("chat.completion.chunk"),
+  // usage: ChatUsage, // TODO enable only if stream_options: {"include_usage": true}
+});
+
 export type ChatResponse = Static<typeof ChatResponse>;
+export type ChatChunkResponse = Static<typeof ChatChunkResponse>;
 
 /**
  * A minimal implementation of https://platform.openai.com/docs/api-reference/chat/create interface
@@ -60,14 +78,25 @@ export function http(
     return c.text("ok");
   });
 
+  app.get("/v1/models", (c) => {
+    return c.json({
+      object: "list",
+      data: [
+        {
+          id: "subql-ai-0",
+          object: "model",
+          created: new Date().getTime(),
+          owner: "SubQuery",
+        },
+      ],
+    });
+  });
+
   app.post("/v1/chat/completions", async (c) => {
     try {
       const body = await c.req.json();
       const req = Value.Parse(ChatRequest, body);
 
-      if (req.stream) {
-        throw new HTTPException(400, { message: "Streaming is not supported" });
-      }
       if (req.n != 1) {
         throw new HTTPException(400, { message: "Only `n` of 1 is supported" });
       }
@@ -75,6 +104,36 @@ export function http(
       const runner = await runnerHost.getAnonymousRunner();
       const chatRes = await runner.promptMessages(req.messages);
 
+      // Mock streaming, current Ollama doesn't support streaming with tools. See https://github.com/subquery/subql-ai-app-framework/issues/3
+      if (req.stream) {
+        const parts = chatRes.message.content.split(" ");
+        return streamSSE(c, async (stream) => {
+          for (const [i, part] of parts.entries()) {
+            const last = i == parts.length - 1;
+
+            const res = createChatChunkResponse(
+              part,
+              chatRes.model,
+              chatRes.created_at,
+              last ? "stop" : null,
+            );
+            await stream.writeSSE({ data: JSON.stringify(res) });
+            await stream.sleep(20);
+
+            // Bring back white space
+            if (!last) {
+              const res_space = createChatChunkResponse(
+                " ",
+                chatRes.model,
+                chatRes.created_at,
+              );
+              await stream.writeSSE({ data: JSON.stringify(res_space) });
+              await stream.sleep(20);
+            }
+          }
+        });
+      }
+
       const response: ChatResponse = {
         id: "0",
         model: chatRes.model,
@@ -100,6 +159,7 @@ export function http(
       };
 
       Value.Assert(ChatResponse, response);
+
       return c.json(response);
     } catch (e) {
       if (e instanceof HTTPException) {
@@ -112,3 +172,25 @@ export function http(
 
   return Deno.serve({ port }, app.fetch);
 }
+
+function createChatChunkResponse(
+  message: string,
+  model: string,
+  createdAt: Date,
+  finish_reason: string | null = null,
+): ChatChunkResponse {
+  const res: ChatChunkResponse = {
+    id: "0",
+    object: "chat.completion.chunk",
+    model,
+    created: new Date(createdAt).getTime() / 1000,
+    choices: [{
+      index: 0,
+      delta: { role: "assistant", content: message },
+      logprobs: null,
+      finish_reason,
+    }],
+  };
+  Value.Assert(ChatChunkResponse, res);
+  return res;
+}
diff --git a/subquery-delegator/index.ts b/subquery-delegator/index.ts
index 5734a4e..5458108 100644
--- a/subquery-delegator/index.ts
+++ b/subquery-delegator/index.ts
@@ -30,6 +30,7 @@ const PROMPT = `
 You are an agent designed to help a user with their token delegation on the SubQuery Network.
 Given an input question, use the available tools to answer the users question quickly and concisely.
 You answer must use the result of the tools available.
+Do not mention that you used a tool or the name of a tool.
 If you need more information to answer the question, ask the user for more details.
 All token amounts are in SQT.