feat: Migrate from legacy completions to chat completions (#158)

defenseunicorns · Mar 20, 2024 · e20ba3f · e20ba3f
1 parent 747018a
commit e20ba3f
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 20 deletions.
diff --git a/.env.example b/.env.example
@@ -8,6 +8,7 @@ PUBLIC_AI4NS_BRANDING=false
 
 ORIGIN=http://localhost:3000
 SUMMARIZATION_MODEL=llama-cpp-python
+MAX_TOKENS=16384
 INTERMEDIATE_SUMMARIZATION_PROMPT="You are a summarizer tasked with creating summaries. Your key activities include identifying the main points and key details in the given text, and condensing the information into a concise summary that accurately reflects the original text.  It is important to avoid any risks such as misinterpreting the text, omitting crucial information,  or distorting the original meaning. Use clear and specific language,  ensuring that the summary is coherent, well-organized, and effectively communicates the main ideas of the  original text."
 FINAL_SUMMARIZATION_PROMPT="You are a summarizer tasked with creating summaries. You will return an coherent and concise summary using 3 concise sections that are each separated by a newline character:  1) BOTTOM LINE UP FRONT: this section will be a concise paragraph containing an overarching, executive summary of all the notes.  2) NOTES: this section will be bullet points highlighting and summarizing key points, risks, issues, and opportunities.  3) ACTION ITEMS: this section will focus on listing any action items, unanswered questions, or issues present in the text;  if there are none that can be identified from the notes, just return 'None' for ACTION ITEMS;  if possible, also include the individual or team assigned to each item in ACTION ITEMS."
 OPENAI_API_KEY=my-test-key

diff --git a/chart/templates/ui/deployment.yaml b/chart/templates/ui/deployment.yaml
@@ -43,6 +43,8 @@ spec:
               value: "###ZARF_VAR_INTERMEDIATE_SUMMARY_PROMPT###"
             - name: PUBLIC_DEFAULT_TEMPERATURE
               value: "###ZARF_VAR_TEMPERATURE###"
+            - name: MAX_TOKENS
+              value: "###ZARF_VAR_MAX_TOKENS###"              
             - name: OPENAI_API_KEY
               value: "###ZARF_VAR_LEAPFROGAI_API_KEY###"
           resources:

diff --git a/src/routes/upload/+page.server.ts b/src/routes/upload/+page.server.ts
@@ -15,22 +15,23 @@ import { clearTmp } from "$lib/cleanup";
 
 const TEMPORARY_DIRECTORY = tmpdir();
 const REQUEST_TIMEOUT = 36000 * 1000 // 10 hours
+const MAX_TOKENS = Number(env.MAX_TOKENS);
 
-const createCompletion = async (
+const createChatCompletion = async (
   openaiClient: OpenAI,
   model: string,
-  prompt: string,
+  messages: Message[],
   maxTokens: number
 ) => {
-  const completion = await openaiClient.completions.create({
+  const completion = await openaiClient.chat.completions.create({
+    messages: messages as OpenAI.Chat.Completions.ChatCompletionMessageParam[],
     model: model,
+    temperature: 0,
     max_tokens: maxTokens,
-    temperature: 0.1,
-    frequency_penalty: 0.5,
-    presence_penalty: 0.0,
-    prompt,
+    stream: false,
   });
-  return completion.choices[0].text.trim();
+
+  return completion.choices[0].message.content.trim();
 };
 
 export const actions = {
@@ -58,8 +59,7 @@ export const actions = {
     const uid = uuidv4();
 
     console.log(
-      `Started new workflow for ${filename} (${audioFile.type}) of size ${
-        audioFile.size / 1000000
+      `Started new workflow for ${filename} (${audioFile.type}) of size ${audioFile.size / 1000000
       }MB.`
     );
 
@@ -117,27 +117,31 @@ export const actions = {
 
     // batching method only occurs at high token counts
     let intermediateSummary = "";
-    if (tokenizedTranscript.length > 7500) {
+    if (tokenizedTranscript.length > MAX_TOKENS) {
       console.log(`\tUsing batching method for ${filename}`);
-      const transcriptBatches = batchTranscript(tokenizedTranscript, 1500);
+      const maxBatchSize = MAX_TOKENS / 8;
+      const transcriptBatches = batchTranscript(tokenizedTranscript, maxBatchSize);
 
       for (let i = 0; i < transcriptBatches.length; i++) {
         const chunk = transcriptBatches[i];
-        const prompt = generateSummarizationPrompt(model, chunk);
-        const text = createCompletion(openaiClient, model, prompt, 500);
+        const message: Message[] = [
+          { role: "system", content: env.INTERMEDIATE_SUMMARIZATION_PROMPT },
+          { role: "user", content: chunk },
+        ];
+        const maxMessageSize = MAX_TOKENS / 16;
+        const text = createChatCompletion(openaiClient, model, message, maxMessageSize);
         intermediateSummary += text;
       }
     } else {
       intermediateSummary = tokenizedTranscript.join(" ");
     }
 
-    const prompt = generateSummarizationPrompt(
-      model,
-      intermediateSummary,
-      true // finalSummary
-    );
+    const message: Message[] = [
+      { role: "system", content: env.FINAL_SUMMARIZATION_PROMPT },
+      { role: "user", content: intermediateSummary },
+    ];
 
-    const summary = await createCompletion(openaiClient, model, prompt, 8192);;
+    const summary = await createChatCompletion(openaiClient, model, message, MAX_TOKENS);
 
     await unlink(transcriptionFile);
     console.log(`\tSuccessfully summarized ${filename}`);

diff --git a/zarf.yaml b/zarf.yaml
@@ -72,6 +72,11 @@ variables:
     default: "0.1"
     prompt: true
     sensitive: false
+  - name: MAX_TOKENS
+    description: The default max tokens for the LLM
+    default: "8192"
+    prompt: true
+    sensitive: false
   - name: PREFIX
     description: Prefix for requests to the application
     default: ""