Skip to content

Commit

Permalink
Embed man page into --help flag of each program
Browse files Browse the repository at this point in the history
See #166
  • Loading branch information
jart committed Jan 5, 2024
1 parent ce4aac6 commit 156f0a6
Show file tree
Hide file tree
Showing 25 changed files with 391 additions and 97 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ o/$(MODE)/: o/$(MODE)/llama.cpp o/$(MODE)/llamafile
# for installing to `make PREFIX=/usr/local`
.PHONY: install
install: llamafile/zipalign.1 \
llama.cpp/main/main.1 \
llamafile/llamafile.1 \
llama.cpp/quantize/quantize.1 \
llama.cpp/perplexity/perplexity.1 \
llama.cpp/llava/llava-quantize.1 \
Expand All @@ -40,7 +40,7 @@ install: llamafile/zipalign.1 \
$(INSTALL) o/$(MODE)/llama.cpp/llava/llava-quantize $(PREFIX)/bin/llava-quantize
mkdir -p $(PREFIX)/share/man/man1
$(INSTALL) -m 0644 llamafile/zipalign.1 $(PREFIX)/share/man/man1/zipalign.1
$(INSTALL) -m 0644 llama.cpp/main/main.1 $(PREFIX)/share/man/man1/llamafile.1
$(INSTALL) -m 0644 llamafile/llamafile.1 $(PREFIX)/share/man/man1/llamafile.1
$(INSTALL) -m 0644 llama.cpp/quantize/quantize.1 $(PREFIX)/share/man/man1/llamafile-quantize.1
$(INSTALL) -m 0644 llama.cpp/perplexity/perplexity.1 $(PREFIX)/share/man/man1/llamafile-perplexity.1
$(INSTALL) -m 0644 llama.cpp/llava/llava-quantize.1 $(PREFIX)/share/man/man1/llava-quantize.1
Expand Down
5 changes: 5 additions & 0 deletions build/rules.mk
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ o/$(MODE)/%.o: %.cpp $(COSMOCC)
o/$(MODE)/%: o/$(MODE)/%.o
$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@

.PRECIOUS: %.1.asc
%.1.asc: %.1
-man $< >$@.tmp && mv -f $@.tmp $@
@rm -f $@.tmp

o/$(MODE)/%.zip.o: % $(COSMOCC)
@mkdir -p $(dir $@)/.aarch64
$(ZIPOBJ) $(ZIPOBJ_FLAGS) -a x86_64 -o $@ $<
Expand Down
13 changes: 7 additions & 6 deletions llama.cpp/llava/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,16 @@ LLAMA_CPP_LLAVA_OBJS = $(LLAMA_CPP_LLAVA_SRCS:%.cpp=o/$(MODE)/%.o)

.PHONY: tool/args/args.h

o/$(MODE)/llama.cpp/llava/llava.a: \
o/$(MODE)/llama.cpp/llava/llava.a: \
$(LLAMA_CPP_LLAVA_OBJS)

o/$(MODE)/llama.cpp/llava/llava-quantize: \
o/$(MODE)/llama.cpp/llava/llava-quantize.o \
o/$(MODE)/llama.cpp/llava/llava.a \
o/$(MODE)/llama.cpp/llava/llava-quantize: \
o/$(MODE)/llama.cpp/llava/llava-quantize.o \
o/$(MODE)/llama.cpp/llava/llava-quantize.1.asc.zip.o \
o/$(MODE)/llama.cpp/llava/llava.a \
o/$(MODE)/llama.cpp/llama.cpp.a

.PHONY: o/$(MODE)/llama.cpp/llava
o/$(MODE)/llama.cpp/llava: \
o/$(MODE)/llama.cpp/llava/llava.a \
o/$(MODE)/llama.cpp/llava: \
o/$(MODE)/llama.cpp/llava/llava.a \
o/$(MODE)/llama.cpp/llava/llava-quantize
4 changes: 2 additions & 2 deletions llama.cpp/llava/llava-quantize.1
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.Dd December 5, 2023
.Dt llava-quantize 1
.Os
.Dt LLAVA-QUANTIZE 1
.Os Llamafile Manual
.Sh NAME
.Nm llava-quantize
.Nd CLIP model quantizer
Expand Down
46 changes: 46 additions & 0 deletions llama.cpp/llava/llava-quantize.1.asc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
LLAVA-QUANTIZE(1) General Commands Manual LLAVA-QUANTIZE(1)

NNAAMMEE
llllaavvaa--qquuaannttiizzee - CLIP model quantizer

SSYYNNOOPPSSIISS
llllaavvaa--qquuaannttiizzee [options...] _I_N_P_U_T _O_U_T_P_U_T _F_O_R_M_A_T

DDEESSCCRRIIPPTTIIOONN
llllaavvaa--qquuaannttiizzee makes LLaVA mmproj files smaller.

AARRGGUUMMEENNTTSS
The following positional arguments are accepted:

_I_N_P_U_T Is the input file, which should be a CLIP model in the GGUF
format using float16 values.

_O_U_T_P_U_T Is the output file, which will be a CLIP model in the GGUF format
using the desired number type.

_F_O_R_M_A_T Is the desired quantization format, which may be the integer id
of a supported quantization type. See the quantization types
section below for acceptable formats.

OOPPTTIIOONNSS
The following options are accepted:

--hh, ----hheellpp
Show help message and exit.

----vveerrssiioonn
Print llamafile version.

QQUUAANNTTIIZZAATTIIOONN TTYYPPEESS
The following quantization types are available:

-- 2 is Q4_0
-- 3 is Q4_1
-- 6 is Q5_0
-- 7 is Q5_1
-- 8 is Q8_0

SSEEEE AALLSSOO
llamafile(1)

Llamafile Manual December 5, 2023 Llamafile Manual
26 changes: 15 additions & 11 deletions llama.cpp/llava/llava-quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,27 @@
#include <string.h>

int main(int argc, char *argv[]) {
if (argc == 2 && !strcmp(argv[1], "--version")) {
printf("llamafile v" LLAMAFILE_VERSION_STRING " llava-quantize\n");
exit(0);

if (llamafile_has(argv, "--version")) {
puts("llava-quantize v" LLAMAFILE_VERSION_STRING);
return 0;
}

if (llamafile_has(argv, "-h") ||
llamafile_has(argv, "-help") ||
llamafile_has(argv, "--help")) {
llamafile_help("/zip/llama.cpp/llava/llava-quantize.1.asc");
__builtin_unreachable();
}

llamafile_init();
llamafile_check_cpu();

if (argc != 4) {
fprintf(stderr,
"Usage: %s INPUT OUTPUT FORMAT\n"
" - 2 is Q4_0\n"
" - 3 is Q4_1\n"
" - 6 is Q5_0\n"
" - 7 is Q5_1\n"
" - 8 is Q8_0\n",
argv[0]);
fprintf(stderr, "%s: missing argument\n", argv[0]);
return 1;
}

if (!clip_model_quantize(argv[1], argv[2], atoi(argv[3]))) {
exit(1);
}
Expand Down
6 changes: 1 addition & 5 deletions llama.cpp/main/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,9 @@ o/$(MODE)/llama.cpp/main/main: \
o/$(MODE)/llama.cpp/server/server.a \
o/$(MODE)/llama.cpp/llava/llava.a \
o/$(MODE)/llama.cpp/llama.cpp.a \
o/$(MODE)/llama.cpp/main/main.1.asc.zip.o \
o/$(MODE)/llamafile/llamafile.1.asc.zip.o \
$(LLAMA_CPP_SERVER_ASSETS:%=o/$(MODE)/%.zip.o)

llama.cpp/main/main.1.asc: llama.cpp/main/main.1
-man $< >$@.tmp && mv -f $@.tmp $@
@rm -f $@.tmp

.PHONY: o/$(MODE)/llama.cpp/main
o/$(MODE)/llama.cpp/main: \
o/$(MODE)/llama.cpp/main/main
46 changes: 20 additions & 26 deletions llama.cpp/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,47 +99,41 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v
LOG_TEE("%s", text);
}

static bool has_argument(int argc, char ** argv, const char * arg) {
for (int i = 1; i < argc; ++i) {
if (!strcmp(argv[i], arg)) {
return true;
}
}
return false;
}

int main(int argc, char ** argv) {
llamafile_init();
llamafile_check_cpu();
ShowCrashReports();
LoadZipArgs(&argc, &argv);

if (has_argument(argc, argv, "--version")) {
printf("llamafile v" LLAMAFILE_VERSION_STRING "\n");
if (llamafile_has(argv, "--version")) {
puts("llamafile v" LLAMAFILE_VERSION_STRING);
return 0;
}

if (has_argument(argc, argv, "--help")) {
llamafile_help("/zip/llama.cpp/main/main.1.asc");
if (llamafile_has(argv, "-h") ||
llamafile_has(argv, "-help") ||
llamafile_has(argv, "--help")) {
llamafile_help("/zip/llamafile/llamafile.1.asc");
__builtin_unreachable();
}

llamafile_init();
llamafile_check_cpu();
ShowCrashReports();
LoadZipArgs(&argc, &argv);

if (!IsXnuSilicon() &&
(!has_argument(argc, argv, "-ngl") &&
!has_argument(argc, argv, "--gpu-layers") &&
!has_argument(argc, argv, "--n-gpu-layers"))) {
(!llamafile_has(argv, "-ngl") &&
!llamafile_has(argv, "--gpu-layers") &&
!llamafile_has(argv, "--n-gpu-layers"))) {
FLAG_gpu = LLAMAFILE_GPU_DISABLE;
}

if (!has_argument(argc, argv, "--cli") &&
(has_argument(argc, argv, "--server") ||
(!has_argument(argc, argv, "-p") &&
!has_argument(argc, argv, "-f") &&
!has_argument(argc, argv, "--random-prompt")))) {
if (!llamafile_has(argv, "--cli") &&
(llamafile_has(argv, "--server") ||
(!llamafile_has(argv, "-p") &&
!llamafile_has(argv, "-f") &&
!llamafile_has(argv, "--random-prompt")))) {
return server_cli(argc, argv);
}

if (has_argument(argc, argv, "--image")) {
if (llamafile_has(argv, "--image")) {
return llava_cli(argc, argv);
}

Expand Down
7 changes: 4 additions & 3 deletions llama.cpp/perplexity/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ LLAMA_CPP_PERPLEXITY_SRCS = $(filter %.cpp,$(LLAMA_CPP_PERPLEXITY_FILES))
LLAMA_CPP_PERPLEXITY_OBJS = $(LLAMA_CPP_PERPLEXITY_SRCS:%.cpp=o/$(MODE)/%.o)

.PHONY: o/$(MODE)/llama.cpp/perplexity
o/$(MODE)/llama.cpp/perplexity: \
o/$(MODE)/llama.cpp/perplexity: \
o/$(MODE)/llama.cpp/perplexity/perplexity

o/$(MODE)/llama.cpp/perplexity/perplexity: \
o/$(MODE)/llama.cpp/perplexity/perplexity.o \
o/$(MODE)/llama.cpp/perplexity/perplexity: \
o/$(MODE)/llama.cpp/perplexity/perplexity.o \
o/$(MODE)/llama.cpp/perplexity/perplexity.1.asc.zip.o \
o/$(MODE)/llama.cpp/llama.cpp.a
4 changes: 2 additions & 2 deletions llama.cpp/perplexity/perplexity.1
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.Dd December 5, 2023
.Dt llamafile-perplexity 1
.Os
.Dt LLAMAFILE-PERPLEXITY 1
.Os Llamafile Manual
.Sh NAME
.Nm llamafile-perplexity
.Nd LLM benchmarking tool
Expand Down
35 changes: 35 additions & 0 deletions llama.cpp/perplexity/perplexity.1.asc
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
LLAMAFILE-PERPLEXITY(1) General Commands Manual LLAMAFILE-PERPLEXITY(1)

NNAAMMEE
llllaammaaffiillee--ppeerrpplleexxiittyy - LLM benchmarking tool

SSYYNNOOPPSSIISS
llllaammaaffiillee--ppeerrpplleexxiittyy [flags...]

DDEESSCCRRIIPPTTIIOONN
llllaammaaffiillee--ppeerrpplleexxiittyy can be used to gauge the quality of an LLM
implementation.

OOPPTTIIOONNSS
The following options are available:

--hh, ----hheellpp
Show help message and exit.

--mm _F_N_A_M_E, ----mmooddeell _F_N_A_M_E
Model path (default: models/7B/ggml-model-f16.gguf)

--ff _F_N_A_M_E, ----ffiillee _F_N_A_M_E
Raw data input file.

--tt _N, ----tthhrreeaaddss _N
Number of threads to use during generation (default: nproc/2)

--ss _S_E_E_D, ----sseeeedd _S_E_E_D
Random Number Generator (RNG) seed (default: -1, use random seed
for < 0)

SSEEEE AALLSSOO
llamafile(1)

Llamafile Manual December 5, 2023 Llamafile Manual
14 changes: 13 additions & 1 deletion llama.cpp/perplexity/perplexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -680,11 +680,23 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
}

int main(int argc, char ** argv) {
gpt_params params;

if (llamafile_has(argv, "--version")) {
puts("llamafile-perplexity v" LLAMAFILE_VERSION_STRING);
return 0;
}

if (llamafile_has(argv, "-h") ||
llamafile_has(argv, "-help") ||
llamafile_has(argv, "--help")) {
llamafile_help("/zip/llama.cpp/perplexity/perplexity.1.asc");
__builtin_unreachable();
}

llamafile_init();
llamafile_check_cpu();

gpt_params params;
params.n_batch = 512;
if (!gpt_params_parse(argc, argv, params)) {
return 1;
Expand Down
7 changes: 4 additions & 3 deletions llama.cpp/quantize/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ LLAMA_CPP_QUANTIZE_SRCS = $(filter %.cpp,$(LLAMA_CPP_QUANTIZE_FILES))
LLAMA_CPP_QUANTIZE_OBJS = $(LLAMA_CPP_QUANTIZE_SRCS:%.cpp=o/$(MODE)/%.o)

.PHONY: o/$(MODE)/llama.cpp/quantize
o/$(MODE)/llama.cpp/quantize: \
o/$(MODE)/llama.cpp/quantize: \
o/$(MODE)/llama.cpp/quantize/quantize

o/$(MODE)/llama.cpp/quantize/quantize: \
o/$(MODE)/llama.cpp/quantize/quantize.o \
o/$(MODE)/llama.cpp/quantize/quantize: \
o/$(MODE)/llama.cpp/quantize/quantize.o \
o/$(MODE)/llama.cpp/quantize/quantize.1.asc.zip.o \
o/$(MODE)/llama.cpp/llama.cpp.a
4 changes: 2 additions & 2 deletions llama.cpp/quantize/quantize.1
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.Dd December 5, 2023
.Dt llamafile-quantize 1
.Os
.Dt LLAMAFILE-QUANTIZE 1
.Os Llamafile Manual
.Sh NAME
.Nm llamafile-quantize
.Nd large language model quantizer
Expand Down
Loading

0 comments on commit 156f0a6

Please sign in to comment.