Implement chat_vllm() (#148)

Fixes #140
tidyverse · Dec 18, 2024 · 06c4720 · 06c4720
1 parent f8a8bec
commit 06c4720
Show file tree

Hide file tree

Showing 8 changed files with 176 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -76,6 +76,7 @@ Collate:
     'provider-groq.R'
     'provider-ollama.R'
     'provider-perplexity.R'
+    'provider-vllm.R'
     'shiny.R'
     'tokens.R'
     'tools-def-auto.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -25,6 +25,7 @@ export(chat_groq)
 export(chat_ollama)
 export(chat_openai)
 export(chat_perplexity)
+export(chat_vllm)
 export(content_image_file)
 export(content_image_plot)
 export(content_image_url)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,7 @@
 # elmer (development version)
 
+* New `chat_vllm()` to chat with models served by vLLM (#140).
+
 * The default `chat_openai()` model is now GPT-4o.
 
 * New `Chat$set_turns()` to set turns. `Chat$turns()` is now `Chat$get_turns()`. `Chat$system_prompt()` is replaced with `Chat$set_system_prompt()` and `Chat$get_system_prompt()`.

diff --git a/R/provider-vllm.R b/R/provider-vllm.R
@@ -0,0 +1,84 @@
+#' @include provider-openai.R
+#' @include content.R
+NULL
+
+#' Chat with a model hosted by vLLM
+#'
+#' @description
+#' [vLLM](https://docs.vllm.ai/en/latest/) is an open source library that
+#' provides an efficient and convenient LLMs model server. You can use
+#' `chat_vllm()` to connect to endpoints powered by vLLM.
+#'
+#' @inheritParams chat_openai
+#' @inherit chat_openai return
+#' @export
+chat_vllm <- function(base_url,
+                      system_prompt = NULL,
+                      turns = NULL,
+                      model,
+                      seed = NULL,
+                      api_args = list(),
+                      api_key = vllm_key(),
+                      echo = NULL) {
+
+  check_string(base_url)
+  turns <- normalize_turns(turns, system_prompt)
+  check_string(api_key)
+  if (missing(model)) {
+    models <- vllm_models(base_url, api_key)
+    cli::cli_abort(c(
+      "Must specify {.arg model}.",
+      i = "Available models: {.str {models}}."
+    ))
+  }
+  echo <- check_echo(echo)
+
+  provider <- ProviderVllm(
+    base_url = base_url,
+    model = model,
+    seed = seed,
+    extra_args = api_args,
+    api_key = api_key
+  )
+  Chat$new(provider = provider, turns = turns, echo = echo)
+}
+
+chat_vllm_test <- function(...) {
+  chat_vllm(
+    base_url = "https://llm.nrp-nautilus.io/",
+    ...,
+    model = "llama3"
+  )
+}
+
+ProviderVllm <- new_class(
+  "ProviderVllm",
+  parent = ProviderOpenAI,
+  package = "elmer",
+)
+
+# Just like OpenAI but no strict
+method(as_json, list(ProviderVllm, ToolDef)) <- function(provider, x) {
+  list(
+    type = "function",
+    "function" = compact(list(
+      name = x@name,
+      description = x@description,
+      parameters = as_json(provider, x@arguments)
+    ))
+  )
+}
+
+vllm_key <- function() {
+  key_get("VLLM_KEY")
+}
+
+vllm_models <- function(base_url, key = vllm_key()) {
+  req <- request(base_url)
+  req <- req_auth_bearer_token(req, key)
+  req <- req_url_path(req, "/v1/models")
+  resp <- req_perform(req)
+  json <- resp_body_json(resp)
+
+  map_chr(json$data, "[[", "id")
+}
diff --git a/README.Rmd b/README.Rmd
@@ -49,6 +49,7 @@ elmer supports a wide variety of model providers:
 * OpenAI: `chat_openai()`.
 * perplexity.ai: `chat_perplexity()`.
 * Snowflake Cortex: `chat_cortex()`.
+* VLLM: `chat_vllm()`.
 
 ## Model choice
 

diff --git a/README.md b/README.md
@@ -43,6 +43,7 @@ elmer supports a wide variety of model providers:
 - OpenAI: `chat_openai()`.
 - perplexity.ai: `chat_perplexity()`.
 - Snowflake Cortex: `chat_cortex()`.
+- VLLM: `chat_vllm()`.
 
 ## Model choice
 

diff --git a/man/chat_vllm.Rd b/man/chat_vllm.Rd
diff --git a/tests/testthat/test-provider-vllm.R b/tests/testthat/test-provider-vllm.R
@@ -0,0 +1,23 @@
+# Getting started --------------------------------------------------------
+
+test_that("can make simple request", {
+  chat <- chat_vllm_test("Be as terse as possible; no punctuation")
+  resp <- chat$chat("What is 1 + 1?", echo = FALSE)
+  expect_match(resp, "2")
+  expect_equal(chat$last_turn()@tokens, c(64, 2))
+})
+
+test_that("can make simple streaming request", {
+  chat <- chat_vllm_test("Be as terse as possible; no punctuation")
+  resp <- coro::collect(chat$stream("What is 1 + 1?"))
+  expect_match(paste0(unlist(resp), collapse = ""), "2")
+})
+
+# Common provider interface -----------------------------------------------
+
+test_that("respects turns interface", {
+  chat_fun <- chat_vllm_test
+
+  test_turns_system(chat_fun)
+  test_turns_existing(chat_fun)
+})