Skip to content
This repository has been archived by the owner on Jul 18, 2024. It is now read-only.

Commit

Permalink
feat: Migrate from legacy completions to chat completions (#158)
Browse files Browse the repository at this point in the history
  • Loading branch information
gphorvath authored Mar 20, 2024
1 parent 747018a commit e20ba3f
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 20 deletions.
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ PUBLIC_AI4NS_BRANDING=false

ORIGIN=http://localhost:3000
SUMMARIZATION_MODEL=llama-cpp-python
MAX_TOKENS=16384
INTERMEDIATE_SUMMARIZATION_PROMPT="You are a summarizer tasked with creating summaries. Your key activities include identifying the main points and key details in the given text, and condensing the information into a concise summary that accurately reflects the original text. It is important to avoid any risks such as misinterpreting the text, omitting crucial information, or distorting the original meaning. Use clear and specific language, ensuring that the summary is coherent, well-organized, and effectively communicates the main ideas of the original text."
FINAL_SUMMARIZATION_PROMPT="You are a summarizer tasked with creating summaries. You will return an coherent and concise summary using 3 concise sections that are each separated by a newline character: 1) BOTTOM LINE UP FRONT: this section will be a concise paragraph containing an overarching, executive summary of all the notes. 2) NOTES: this section will be bullet points highlighting and summarizing key points, risks, issues, and opportunities. 3) ACTION ITEMS: this section will focus on listing any action items, unanswered questions, or issues present in the text; if there are none that can be identified from the notes, just return 'None' for ACTION ITEMS; if possible, also include the individual or team assigned to each item in ACTION ITEMS."
OPENAI_API_KEY=my-test-key
Expand Down
2 changes: 2 additions & 0 deletions chart/templates/ui/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ spec:
value: "###ZARF_VAR_INTERMEDIATE_SUMMARY_PROMPT###"
- name: PUBLIC_DEFAULT_TEMPERATURE
value: "###ZARF_VAR_TEMPERATURE###"
- name: MAX_TOKENS
value: "###ZARF_VAR_MAX_TOKENS###"
- name: OPENAI_API_KEY
value: "###ZARF_VAR_LEAPFROGAI_API_KEY###"
resources:
Expand Down
44 changes: 24 additions & 20 deletions src/routes/upload/+page.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,23 @@ import { clearTmp } from "$lib/cleanup";

const TEMPORARY_DIRECTORY = tmpdir();
const REQUEST_TIMEOUT = 36000 * 1000 // 10 hours
const MAX_TOKENS = Number(env.MAX_TOKENS);

const createCompletion = async (
const createChatCompletion = async (
openaiClient: OpenAI,
model: string,
prompt: string,
messages: Message[],
maxTokens: number
) => {
const completion = await openaiClient.completions.create({
const completion = await openaiClient.chat.completions.create({
messages: messages as OpenAI.Chat.Completions.ChatCompletionMessageParam[],
model: model,
temperature: 0,
max_tokens: maxTokens,
temperature: 0.1,
frequency_penalty: 0.5,
presence_penalty: 0.0,
prompt,
stream: false,
});
return completion.choices[0].text.trim();

return completion.choices[0].message.content.trim();
};

export const actions = {
Expand Down Expand Up @@ -58,8 +59,7 @@ export const actions = {
const uid = uuidv4();

console.log(
`Started new workflow for ${filename} (${audioFile.type}) of size ${
audioFile.size / 1000000
`Started new workflow for ${filename} (${audioFile.type}) of size ${audioFile.size / 1000000
}MB.`
);

Expand Down Expand Up @@ -117,27 +117,31 @@ export const actions = {

// batching method only occurs at high token counts
let intermediateSummary = "";
if (tokenizedTranscript.length > 7500) {
if (tokenizedTranscript.length > MAX_TOKENS) {
console.log(`\tUsing batching method for ${filename}`);
const transcriptBatches = batchTranscript(tokenizedTranscript, 1500);
const maxBatchSize = MAX_TOKENS / 8;
const transcriptBatches = batchTranscript(tokenizedTranscript, maxBatchSize);

for (let i = 0; i < transcriptBatches.length; i++) {
const chunk = transcriptBatches[i];
const prompt = generateSummarizationPrompt(model, chunk);
const text = createCompletion(openaiClient, model, prompt, 500);
const message: Message[] = [
{ role: "system", content: env.INTERMEDIATE_SUMMARIZATION_PROMPT },
{ role: "user", content: chunk },
];
const maxMessageSize = MAX_TOKENS / 16;
const text = createChatCompletion(openaiClient, model, message, maxMessageSize);
intermediateSummary += text;
}
} else {
intermediateSummary = tokenizedTranscript.join(" ");
}

const prompt = generateSummarizationPrompt(
model,
intermediateSummary,
true // finalSummary
);
const message: Message[] = [
{ role: "system", content: env.FINAL_SUMMARIZATION_PROMPT },
{ role: "user", content: intermediateSummary },
];

const summary = await createCompletion(openaiClient, model, prompt, 8192);;
const summary = await createChatCompletion(openaiClient, model, message, MAX_TOKENS);

await unlink(transcriptionFile);
console.log(`\tSuccessfully summarized ${filename}`);
Expand Down
5 changes: 5 additions & 0 deletions zarf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ variables:
default: "0.1"
prompt: true
sensitive: false
- name: MAX_TOKENS
description: The default max tokens for the LLM
default: "8192"
prompt: true
sensitive: false
- name: PREFIX
description: Prefix for requests to the application
default: ""
Expand Down

0 comments on commit e20ba3f

Please sign in to comment.