From fdbd1f372a908659864480b2daf357bca8368f89 Mon Sep 17 00:00:00 2001 From: johnd0e <1838643+johnd0e@users.noreply.github.com> Date: Mon, 8 Apr 2024 01:39:21 +0200 Subject: [PATCH 1/3] Enable the stream callback function to get complete chunk information And do not demand `object: "chat.completion.chunk"` within the chunk object to support not fully OpenAI-compliant services --- openai/init.moon | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/openai/init.moon b/openai/init.moon index 072242d..25402e3 100644 --- a/openai/init.moon +++ b/openai/init.moon @@ -83,7 +83,7 @@ parse_chat_response = types.partial { parse_completion_chunk = types.partial { - object: "chat.completion.chunk" + --object: "chat.completion.chunk" -- not sure of the whole range of chunks, so for now we strictly parse an append choices: types.shape { types.partial { @@ -190,7 +190,8 @@ class ChatSession parts = {} f = @client\create_stream_filter (c) -> - table.insert parts, c.content + if c = parse_completion_chunk c + table.insert parts, c.content f response message = { @@ -250,7 +251,7 @@ class OpenAI break accumulation_buffer = rest - if chunk = parse_completion_chunk cjson.decode json_blob + if chunk = cjson.decode json_blob chunk_callback chunk ... From 4a359bf6e41042a418f736b3d397c7ad3c079340 Mon Sep 17 00:00:00 2001 From: johnd0e <1838643+johnd0e@users.noreply.github.com> Date: Mon, 8 Apr 2024 20:45:08 +0200 Subject: [PATCH 2/3] refactor OpenAI\_request Make _request response more useful when stream: true. Collect chunks during streaming, and instead of bare text, return synthetic response object, similar to one returned when stream: false Simplify ChatSession\generate_response, as there is no longer a need to re-parse the response strings --- openai/init.moon | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/openai/init.moon b/openai/init.moon index 25402e3..f943c34 100644 --- a/openai/init.moon +++ b/openai/init.moon @@ -183,21 +183,8 @@ class ChatSession return nil, err_msg, response - -- if we are streaming we need to pase the entire fragmented response if stream_callback - assert type(response) == "string", - "Expected string response from streaming output" - - parts = {} - f = @client\create_stream_filter (c) -> - if c = parse_completion_chunk c - table.insert parts, c.content - - f response - message = { - role: "assistant" - content: table.concat parts - } + message = response.choices[1].message if append_response @append_message message @@ -274,10 +261,7 @@ class OpenAI for k,v in pairs opts payload[k] = v - stream_filter = if payload.stream - @create_stream_filter chunk_callback - - @_request "POST", "/chat/completions", payload, nil, stream_filter + @_request "POST", "/chat/completions", payload, nil, if payload.stream then chunk_callback else nil -- call /completions -- opts: additional parameters as described in https://platform.openai.com/docs/api-reference/completions @@ -363,7 +347,7 @@ class OpenAI image_generation: (params) => @_request "POST", "/images/generations", params - _request: (method, path, payload, more_headers, stream_fn) => + _request: (method, path, payload, more_headers, chunk_callback) => assert path, "missing path" assert method, "missing method" @@ -393,7 +377,13 @@ class OpenAI sink = ltn12.sink.table out - if stream_fn + parts = {} + if chunk_callback + stream_fn = @create_stream_filter (c) -> + if parsed = parse_completion_chunk c + parts[parsed.index] = parts[parsed.index] or {} + table.insert parts[parsed.index], parsed.content + chunk_callback(c) sink = ltn12.sink.chain stream_fn, sink _, status, out_headers = @get_http!.request { @@ -404,6 +394,22 @@ class OpenAI :headers } + if status == 200 and chunk_callback + choices = {} + data = { + object: "chat.completion" + :choices + } + index = 0 + while parts[index] + message = { + role: "assistant" + content: table.concat parts[index] + } + choices[index+1] = { :index, :message } + index += 1 + return status, data, out_headers + response = table.concat out pcall -> response = cjson.decode response status, response, out_headers From 88a02ff059f1e6453da7e568bd58054a412eab8a Mon Sep 17 00:00:00 2001 From: johnd0e <1838643+johnd0e@users.noreply.github.com> Date: Fri, 19 Apr 2024 22:24:27 +0200 Subject: [PATCH 3/3] Construct a united comprehensive object as the return value of the streamed response remove parse_completion_chunk as not used --- openai/init.moon | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/openai/init.moon b/openai/init.moon index f943c34..60fc3e4 100644 --- a/openai/init.moon +++ b/openai/init.moon @@ -82,19 +82,6 @@ parse_chat_response = types.partial { -- } -parse_completion_chunk = types.partial { - --object: "chat.completion.chunk" - -- not sure of the whole range of chunks, so for now we strictly parse an append - choices: types.shape { - types.partial { - delta: types.shape { - "content": types.string\tag "content" - } - index: types.number\tag "index" - } - } -} - -- lpeg pattern to read a json data block from the front of a string, returns -- the json blob and the rest of the string if it could parse one consume_json_head = do @@ -380,9 +367,13 @@ class OpenAI parts = {} if chunk_callback stream_fn = @create_stream_filter (c) -> - if parsed = parse_completion_chunk c - parts[parsed.index] = parts[parsed.index] or {} - table.insert parts[parsed.index], parsed.content + c0 = c.choices[1] + part = parts[c0.index] or {} + part.data = c + part.finish_reason = c0.finish_reason + parts[c0.index] = part + if c0.delta.content and c0.delta.content ~= cjson.null + table.insert part, c0.delta.content chunk_callback(c) sink = ltn12.sink.chain stream_fn, sink @@ -396,18 +387,19 @@ class OpenAI if status == 200 and chunk_callback choices = {} - data = { - object: "chat.completion" - :choices - } index = 0 + local data while parts[index] + part = parts[index] + data = part.data message = { role: "assistant" - content: table.concat parts[index] + content: table.concat part } - choices[index+1] = { :index, :message } + choices[index+1] = { :index, :message, finish_reason: part.finish_reason } index += 1 + data.object = "chat.completion" + data.choices = choices return status, data, out_headers response = table.concat out