Document new server flags

Mozilla-Ocho · Nov 30, 2024 · 4f88da6 · 4f88da6
1 parent 6d89f8f
commit 4f88da6
Show file tree

Hide file tree

Showing 9 changed files with 149 additions and 15 deletions.
diff --git a/llama.cpp/main/main.1 b/llama.cpp/main/main.1
@@ -616,6 +616,22 @@ mode to print the llamafile logo in ASCII rather than UNICODE.
 .It Fl Fl verbose
 Enables verbose logger output in chatbot. This can be helpful for
 troubleshooting issues.
+.It Fl Fl chat-template Ar NAME
+Specifies or overrides chat template for model.
+.Pp
+Normally the GGUF metadata tokenizer.chat_template will specify this
+value for instruct models. This flag may be used to either override the
+chat template, or specify one when the GGUF metadata field is absent,
+which effectively forces the web ui to enable chatbot mode.
+.Pp
+Supported chat template names are: chatml, llama2, llama3, mistral
+(alias for llama2), phi3, zephyr, monarch, gemma, gemma2 (alias for
+gemma), orion, openchat, vicuna, vicuna-orca, deepseek, command-r,
+chatglm3, chatglm4, minicpm, deepseek2, or exaone3.
+.Pp
+It is also possible to pass the jinja2 template itself to this argument.
+Since llamafiler doesn't currently support jinja2, a heuristic will be
+used to guess which of the above templates the template represents.
 .El
 .Sh CLI OPTIONS
 The following options may be specified when

diff --git a/llama.cpp/main/main.1.asc b/llama.cpp/main/main.1.asc
@@ -592,6 +592,26 @@
                Enables verbose logger output in chatbot. This can  be  helpful
                for troubleshooting issues.
 
+       [1m--chat-template [4m[22mNAME[0m
+               Specifies or overrides chat template for model.
+
+               Normally the GGUF metadata tokenizer.chat_template will specify
+               this value for instruct models. This flag may be used to either
+               override  the chat template, or specify one when the GGUF meta‐
+               data field is absent, which effectively forces the  web  ui  to
+               enable chatbot mode.
+
+               Supported chat template names are: chatml, llama2, llama3, mis‐
+               tral  (alias  for llama2), phi3, zephyr, monarch, gemma, gemma2
+               (alias  for  gemma),  orion,  openchat,  vicuna,   vicuna-orca,
+               deepseek, command-r, chatglm3, chatglm4, minicpm, deepseek2, or
+               exaone3.
+
+               It  is also possible to pass the jinja2 template itself to this
+               argument.  Since llamafiler doesn't currently support jinja2, a
+               heuristic will be used to guess which of  the  above  templates
+               the template represents.
+
 [1mCLI OPTIONS[0m
        The  following  options  may  be specified when [1mllamafile [22mis running in
        [1m--cli [22mmode.

diff --git a/llamafile/db.cpp b/llamafile/db.cpp
@@ -86,14 +86,9 @@ static sqlite3 *open_impl() {
         return nullptr;
     }
     char *errmsg = nullptr;
-    if (sqlite3_exec(db, "PRAGMA journal_mode=WAL;", nullptr, nullptr, &errmsg) != SQLITE_OK) {
-        fprintf(stderr, "%s: failed to set journal mode to wal: %s\n", path.c_str(), errmsg);
-        sqlite3_free(errmsg);
-        sqlite3_close(db);
-        return nullptr;
-    }
-    if (sqlite3_exec(db, "PRAGMA synchronous=NORMAL;", nullptr, nullptr, &errmsg) != SQLITE_OK) {
-        fprintf(stderr, "%s: failed to set synchronous to normal: %s\n", path.c_str(), errmsg);
+    if (sqlite3_exec(db, FLAG_db_startup_sql, nullptr, nullptr, &errmsg) != SQLITE_OK) {
+        fprintf(stderr, "%s: failed to execute startup SQL (%s) because: %s", path.c_str(),
+                FLAG_db_startup_sql, errmsg);
         sqlite3_free(errmsg);
         sqlite3_close(db);
         return nullptr;

diff --git a/llamafile/flags.cpp b/llamafile/flags.cpp
@@ -54,6 +54,8 @@ bool FLAG_trace = false;
 bool FLAG_unsecure = false;
 const char *FLAG_chat_template = "";
 const char *FLAG_db = nullptr;
+const char *FLAG_db_startup_sql = "PRAGMA journal_mode=WAL;"
+                                  "PRAGMA synchronous=NORMAL;";
 const char *FLAG_file = nullptr;
 const char *FLAG_ip_header = nullptr;
 const char *FLAG_listen = "127.0.0.1:8080";
@@ -193,6 +195,13 @@ void llamafile_get_flags(int argc, char **argv) {
             continue;
         }
 
+        if (!strcmp(flag, "--db-startup-sql")) {
+            if (i == argc)
+                missing("--db-startup-sql");
+            FLAG_db_startup_sql = argv[i++];
+            continue;
+        }
+
         //////////////////////////////////////////////////////////////////////
         // server flags
 

diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h
@@ -25,6 +25,7 @@ extern bool FLAG_trap;
 extern bool FLAG_unsecure;
 extern const char *FLAG_chat_template;
 extern const char *FLAG_db;
+extern const char *FLAG_db_startup_sql;
 extern const char *FLAG_file;
 extern const char *FLAG_ip_header;
 extern const char *FLAG_listen;

diff --git a/llamafile/server/main.1 b/llamafile/server/main.1
@@ -1,4 +1,4 @@
-.Dd August 17, 2024
+.Dd November 30, 2024
 .Dt LLAMAFILER 1
 .Os Mozilla Ocho
 .Sh NAME
@@ -30,6 +30,11 @@ recommended that you run multiple instances of llamafiler behind a
 reverse proxy such as NGINX or Redbean.
 .It Fl mm Ar FNAME , Fl Fl mmproj Ar FNAME
 Path of vision model weights.
+.It Fl Fl db Ar FILE
+Specifies path of sqlite3 database.
+.Pp
+The default is
+.Pa ~/.llamafile/llamafile.sqlite3
 .It Fl l Ar HOSTPORT , Fl Fl listen Ar HOSTPORT
 Specifies the local [HOST:]PORT on which the HTTP server should listen.
 By default this is 0.0.0.0:8080 which means llamafiler will bind to port
@@ -55,8 +60,10 @@ Please note that
 has a strong influence on how many slots can be created.
 .It Fl p Ar TEXT , Fl Fl prompt Ar TEXT
 Specifies system prompt. This value is passed along to the web frontend.
-.It Fl Fl no-display-prompt Ar TEXT
+.It Fl Fl no-display-prompt
 Hide system prompt from web user interface.
+.It Fl Fl nologo
+Hide llamafile logo icon from web ui.
 .It Fl Fl url-prefix Ar URLPREFIX
 Specifies a URL prefix (subdirectory) under which the HTTP server will
 make the API accessible, e.g. /lamafiler. Useful when running llamafiler
@@ -130,6 +137,39 @@ supported by the host operating system. The default keepalive is 5.
 Size of HTTP output buffer size, in bytes. Default is 1048576.
 .It Fl Fl http-ibuf-size Ar N
 Size of HTTP input buffer size, in bytes. Default is 1048576.
+.It Fl Fl chat-template Ar NAME
+Specifies or overrides chat template for model.
+.Pp
+Normally the GGUF metadata tokenizer.chat_template will specify this
+value for instruct models. This flag may be used to either override the
+chat template, or specify one when the GGUF metadata field is absent,
+which effectively forces the web ui to enable chatbot mode.
+.Pp
+Supported chat template names are: chatml, llama2, llama3, mistral
+(alias for llama2), phi3, zephyr, monarch, gemma, gemma2 (alias for
+gemma), orion, openchat, vicuna, vicuna-orca, deepseek, command-r,
+chatglm3, chatglm4, minicpm, deepseek2, or exaone3.
+.Pp
+It is also possible to pass the jinja2 template itself to this argument.
+Since llamafiler doesn't currently support jinja2, a heuristic will be
+used to guess which of the above templates the template represents.
+.It Fl Fl completion-mode
+Forces web ui to operate in completion mode, rather than chat mode.
+Normally the web ui chooses its mode based on the GGUF metadata. Base
+models normally don't define tokenizer.chat_template whereas instruct
+models do. If it's a base model, then the web ui will automatically use
+completion mode only, without needing to specify this flag. This flag is
+useful in cases where a prompt template is defined by the gguf, but it
+is desirable for the chat interface to be disabled.
+.It Fl Fl db-startup-sql
+Specifies SQL code that should be executed whenever connecting to the
+SQLite database. The default is the following code, which enables the
+write-ahead log.
+.Bd -literal -offset indent
+PRAGMA journal_mode=WAL;
+PRAGMA synchronous=NORMAL;
+.Ed
+.El
 .Sh EXAMPLE
 Here's an example of how you might start this server:
 .Pp

diff --git a/llamafile/server/main.1.asc b/llamafile/server/main.1.asc
@@ -32,6 +32,11 @@
        [1m-mm [4m[22mFNAME[24m, [1m--mmproj [4m[22mFNAME[0m
                Path of vision model weights.
 
+       [1m--db [4m[22mFILE[0m
+               Specifies path of sqlite3 database.
+
+               The default is [4m~/.llamafile/llamafile.sqlite3[0m
+
        [1m-l [4m[22mHOSTPORT[24m, [1m--listen [4m[22mHOSTPORT[0m
                Specifies the local [HOST:]PORT on which the HTTP server should
                listen.  By default this is 0.0.0.0:8080 which means llamafiler
@@ -63,9 +68,12 @@
                Specifies system prompt. This value is passed along to the  web
                frontend.
 
-       [1m--no-display-prompt [4m[22mTEXT[0m
+       [1m--no-display-prompt[0m
                Hide system prompt from web user interface.
 
+       [1m--nologo[0m
+               Hide llamafile logo icon from web ui.
+
        [1m--url-prefix [4m[22mURLPREFIX[0m
                Specifies  a  URL  prefix  (subdirectory)  under which the HTTP
                server will make the API accessible,  e.g.  /lamafiler.  Useful
@@ -158,6 +166,44 @@
        [1m--http-ibuf-size [4m[22mN[0m
                Size of HTTP input buffer size, in bytes. Default is 1048576.
 
+       [1m--chat-template [4m[22mNAME[0m
+               Specifies or overrides chat template for model.
+
+               Normally the GGUF metadata tokenizer.chat_template will specify
+               this value for instruct models. This flag may be used to either
+               override the chat template, or specify one when the GGUF  meta‐
+               data  field  is  absent, which effectively forces the web ui to
+               enable chatbot mode.
+
+               Supported chat template names are: chatml, llama2, llama3, mis‐
+               tral (alias for llama2), phi3, zephyr, monarch,  gemma,  gemma2
+               (alias   for  gemma),  orion,  openchat,  vicuna,  vicuna-orca,
+               deepseek, command-r, chatglm3, chatglm4, minicpm, deepseek2, or
+               exaone3.
+
+               It is also possible to pass the jinja2 template itself to  this
+               argument.  Since llamafiler doesn't currently support jinja2, a
+               heuristic  will  be  used to guess which of the above templates
+               the template represents.
+
+       [1m--completion-mode[0m
+               Forces web ui to operate in completion mode, rather  than  chat
+               mode.   Normally  the web ui chooses its mode based on the GGUF
+               metadata. Base models normally don't define tokenizer.chat_tem‐
+               plate whereas instruct models do. If it's a  base  model,  then
+               the web ui will automatically use completion mode only, without
+               needing  to  specify  this  flag.  This flag is useful in cases
+               where a prompt template is defined by the gguf, but it  is  de‐
+               sirable for the chat interface to be disabled.
+
+       [1m--db-startup-sql[0m
+               Specifies  SQL code that should be executed whenever connecting
+               to the SQLite database. The  default  is  the  following  code,
+               which enables the write-ahead log.
+
+                     PRAGMA journal_mode=WAL;
+                     PRAGMA synchronous=NORMAL;
+
 [1mEXAMPLE[0m
        Here's an example of how you might start this server:
 
@@ -172,10 +218,10 @@
              [1mcurl -v http://127.0.0.1:8080/embedding?content=hello+world[0m
 
 [1mDOCUMENTATION[0m
-       Read our Markdown documentation for additional help and tutorials.  See
+       Read  our Markdown documentation for additional help and tutorials. See
        llamafile/server/doc/index.md in the source repository on GitHub.
 
 [1mSEE ALSO[0m
        [4mllamafile[24m(1), [4mwhisperfile[24m(1)
 
-Mozilla Ocho                    August 17, 2024                  [4mLLAMAFILER[24m(1)
+Mozilla Ocho                   November 30, 2024                 [4mLLAMAFILER[24m(1)
diff --git a/llamafile/server/www/chatbot.js b/llamafile/server/www/chatbot.js
@@ -22,6 +22,7 @@ const DEFAULT_SYSTEM_PROMPT =
 const DEFAULT_FLAGZ = {
   "model": null,
   "prompt": null,
+  "nologo": false,
   "no_display_prompt": false,
   "frequency_penalty": 0,
   "presence_penalty": 0,
@@ -369,6 +370,9 @@ function updateModelInfo() {
     document.getElementById("model").textContent = modelName;
     document.getElementById("model-completions").textContent = modelName;
   }
+  if (!flagz.nologo) {
+    document.getElementById("logo").style.display = "inline-block";
+  }
 }
 
 function startChat(history) {

diff --git a/llamafile/server/www/index.html b/llamafile/server/www/index.html
@@ -9,8 +9,11 @@
 <div class="chat-container" id="chat-interface">
   <div class="chat-header">
     <h1>
-      <img src="chatbot.png" alt="[llamafile]" title="llamafile" width="64" height="64">
-      <span id="model">llamafile</span>
+      <img src="chatbot.png"
+           width="64" height="64"
+           id="logo" style="display:none"
+           alt="[llamafile]" title="llamafile">
+      <span id="model">loading...</span>
     </h1>
     <div class="menu-dropdown">
       <button class="menu-trigger">▼</button>