From fdbd1f372a908659864480b2daf357bca8368f89 Mon Sep 17 00:00:00 2001
From: johnd0e <1838643+johnd0e@users.noreply.github.com>
Date: Mon, 8 Apr 2024 01:39:21 +0200
Subject: [PATCH 1/3] Enable the stream callback function to get complete chunk
 information

And do not demand `object: "chat.completion.chunk"` within the chunk object
to support not fully OpenAI-compliant services
---
 openai/init.moon | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/openai/init.moon b/openai/init.moon
index 072242d..25402e3 100644
--- a/openai/init.moon
+++ b/openai/init.moon
@@ -83,7 +83,7 @@ parse_chat_response = types.partial {
 
 
 parse_completion_chunk = types.partial {
-  object: "chat.completion.chunk"
+  --object: "chat.completion.chunk"
   -- not sure of the whole range of chunks, so for now we strictly parse an append
   choices: types.shape {
     types.partial {
@@ -190,7 +190,8 @@ class ChatSession
 
       parts = {}
       f = @client\create_stream_filter (c) ->
-        table.insert parts, c.content
+        if c = parse_completion_chunk c
+          table.insert parts, c.content
 
       f response
       message = {
@@ -250,7 +251,7 @@ class OpenAI
             break
 
           accumulation_buffer = rest
-          if chunk = parse_completion_chunk cjson.decode json_blob
+          if chunk = cjson.decode json_blob
             chunk_callback chunk
 
       ...

From 4a359bf6e41042a418f736b3d397c7ad3c079340 Mon Sep 17 00:00:00 2001
From: johnd0e <1838643+johnd0e@users.noreply.github.com>
Date: Mon, 8 Apr 2024 20:45:08 +0200
Subject: [PATCH 2/3] refactor OpenAI\_request

Make _request response more useful when stream: true.
Collect chunks during streaming, and instead of bare text,
return synthetic response object, similar to one returned when stream: false

Simplify ChatSession\generate_response, as there is no longer a need to re-parse the response strings
---
 openai/init.moon | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/openai/init.moon b/openai/init.moon
index 25402e3..f943c34 100644
--- a/openai/init.moon
+++ b/openai/init.moon
@@ -183,21 +183,8 @@ class ChatSession
 
       return nil, err_msg, response
 
-    -- if we are streaming we need to pase the entire fragmented response
     if stream_callback
-      assert type(response) == "string",
-        "Expected string response from streaming output"
-
-      parts = {}
-      f = @client\create_stream_filter (c) ->
-        if c = parse_completion_chunk c
-          table.insert parts, c.content
-
-      f response
-      message = {
-        role: "assistant"
-        content: table.concat parts
-      }
+      message = response.choices[1].message
 
       if append_response
         @append_message message
@@ -274,10 +261,7 @@ class OpenAI
       for k,v in pairs opts
         payload[k] = v
 
-    stream_filter = if payload.stream
-      @create_stream_filter chunk_callback
-
-    @_request "POST", "/chat/completions", payload, nil, stream_filter
+    @_request "POST", "/chat/completions", payload, nil, if payload.stream then chunk_callback else nil
 
   -- call /completions
   -- opts: additional parameters as described in https://platform.openai.com/docs/api-reference/completions
@@ -363,7 +347,7 @@ class OpenAI
   image_generation: (params) =>
     @_request "POST", "/images/generations", params
 
-  _request: (method, path, payload, more_headers, stream_fn) =>
+  _request: (method, path, payload, more_headers, chunk_callback) =>
     assert path, "missing path"
     assert method, "missing method"
 
@@ -393,7 +377,13 @@ class OpenAI
 
     sink = ltn12.sink.table out
 
-    if stream_fn
+    parts = {}
+    if chunk_callback
+      stream_fn = @create_stream_filter (c) ->
+        if parsed = parse_completion_chunk c
+          parts[parsed.index] = parts[parsed.index] or {}
+          table.insert parts[parsed.index], parsed.content
+        chunk_callback(c)
       sink = ltn12.sink.chain stream_fn, sink
 
     _, status, out_headers = @get_http!.request {
@@ -404,6 +394,22 @@ class OpenAI
       :headers
     }
 
+    if status == 200 and chunk_callback
+      choices = {}
+      data = {
+        object: "chat.completion"
+        :choices
+      }
+      index = 0
+      while parts[index]
+        message = {
+          role: "assistant"
+          content: table.concat parts[index]
+        }
+        choices[index+1] = { :index, :message }
+        index += 1
+      return status, data, out_headers
+
     response = table.concat out
     pcall -> response = cjson.decode response
     status, response, out_headers

From 88a02ff059f1e6453da7e568bd58054a412eab8a Mon Sep 17 00:00:00 2001
From: johnd0e <1838643+johnd0e@users.noreply.github.com>
Date: Fri, 19 Apr 2024 22:24:27 +0200
Subject: [PATCH 3/3] Construct a united comprehensive object as the return
 value of the streamed response

remove parse_completion_chunk as not used
---
 openai/init.moon | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/openai/init.moon b/openai/init.moon
index f943c34..60fc3e4 100644
--- a/openai/init.moon
+++ b/openai/init.moon
@@ -82,19 +82,6 @@ parse_chat_response = types.partial {
 -- }
 
 
-parse_completion_chunk = types.partial {
-  --object: "chat.completion.chunk"
-  -- not sure of the whole range of chunks, so for now we strictly parse an append
-  choices: types.shape {
-    types.partial {
-      delta: types.shape {
-        "content": types.string\tag "content"
-      }
-      index: types.number\tag "index"
-    }
-  }
-}
-
 -- lpeg pattern to read a json data block from the front of a string, returns
 -- the json blob and the rest of the string if it could parse one
 consume_json_head = do
@@ -380,9 +367,13 @@ class OpenAI
     parts = {}
     if chunk_callback
       stream_fn = @create_stream_filter (c) ->
-        if parsed = parse_completion_chunk c
-          parts[parsed.index] = parts[parsed.index] or {}
-          table.insert parts[parsed.index], parsed.content
+        c0 = c.choices[1]
+        part = parts[c0.index] or {}
+        part.data = c
+        part.finish_reason = c0.finish_reason
+        parts[c0.index] = part
+        if c0.delta.content and c0.delta.content ~= cjson.null
+          table.insert part, c0.delta.content
         chunk_callback(c)
       sink = ltn12.sink.chain stream_fn, sink
 
@@ -396,18 +387,19 @@ class OpenAI
 
     if status == 200 and chunk_callback
       choices = {}
-      data = {
-        object: "chat.completion"
-        :choices
-      }
       index = 0
+      local data
       while parts[index]
+        part = parts[index]
+        data = part.data
         message = {
           role: "assistant"
-          content: table.concat parts[index]
+          content: table.concat part
         }
-        choices[index+1] = { :index, :message }
+        choices[index+1] = { :index, :message, finish_reason: part.finish_reason }
         index += 1
+      data.object = "chat.completion"
+      data.choices = choices
       return status, data, out_headers
 
     response = table.concat out