diff --git a/Makefile b/Makefile index 69543a1ab1..614ac39d4c 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ o/$(MODE)/: o/$(MODE)/llama.cpp o/$(MODE)/llamafile # for installing to `make PREFIX=/usr/local` .PHONY: install install: llamafile/zipalign.1 \ - llama.cpp/main/main.1 \ + llamafile/llamafile.1 \ llama.cpp/quantize/quantize.1 \ llama.cpp/perplexity/perplexity.1 \ llama.cpp/llava/llava-quantize.1 \ @@ -40,7 +40,7 @@ install: llamafile/zipalign.1 \ $(INSTALL) o/$(MODE)/llama.cpp/llava/llava-quantize $(PREFIX)/bin/llava-quantize mkdir -p $(PREFIX)/share/man/man1 $(INSTALL) -m 0644 llamafile/zipalign.1 $(PREFIX)/share/man/man1/zipalign.1 - $(INSTALL) -m 0644 llama.cpp/main/main.1 $(PREFIX)/share/man/man1/llamafile.1 + $(INSTALL) -m 0644 llamafile/llamafile.1 $(PREFIX)/share/man/man1/llamafile.1 $(INSTALL) -m 0644 llama.cpp/quantize/quantize.1 $(PREFIX)/share/man/man1/llamafile-quantize.1 $(INSTALL) -m 0644 llama.cpp/perplexity/perplexity.1 $(PREFIX)/share/man/man1/llamafile-perplexity.1 $(INSTALL) -m 0644 llama.cpp/llava/llava-quantize.1 $(PREFIX)/share/man/man1/llava-quantize.1 diff --git a/build/rules.mk b/build/rules.mk index 4ddfc212af..c33522765e 100644 --- a/build/rules.mk +++ b/build/rules.mk @@ -23,6 +23,11 @@ o/$(MODE)/%.o: %.cpp $(COSMOCC) o/$(MODE)/%: o/$(MODE)/%.o $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ +.PRECIOUS: %.1.asc +%.1.asc: %.1 + -man $< >$@.tmp && mv -f $@.tmp $@ + @rm -f $@.tmp + o/$(MODE)/%.zip.o: % $(COSMOCC) @mkdir -p $(dir $@)/.aarch64 $(ZIPOBJ) $(ZIPOBJ_FLAGS) -a x86_64 -o $@ $< diff --git a/llama.cpp/llava/BUILD.mk b/llama.cpp/llava/BUILD.mk index 0451bcf2d2..4308066cc9 100644 --- a/llama.cpp/llava/BUILD.mk +++ b/llama.cpp/llava/BUILD.mk @@ -10,15 +10,16 @@ LLAMA_CPP_LLAVA_OBJS = $(LLAMA_CPP_LLAVA_SRCS:%.cpp=o/$(MODE)/%.o) .PHONY: tool/args/args.h -o/$(MODE)/llama.cpp/llava/llava.a: \ +o/$(MODE)/llama.cpp/llava/llava.a: \ $(LLAMA_CPP_LLAVA_OBJS) -o/$(MODE)/llama.cpp/llava/llava-quantize: \ - o/$(MODE)/llama.cpp/llava/llava-quantize.o \ - o/$(MODE)/llama.cpp/llava/llava.a \ +o/$(MODE)/llama.cpp/llava/llava-quantize: \ + o/$(MODE)/llama.cpp/llava/llava-quantize.o \ + o/$(MODE)/llama.cpp/llava/llava-quantize.1.asc.zip.o \ + o/$(MODE)/llama.cpp/llava/llava.a \ o/$(MODE)/llama.cpp/llama.cpp.a .PHONY: o/$(MODE)/llama.cpp/llava -o/$(MODE)/llama.cpp/llava: \ - o/$(MODE)/llama.cpp/llava/llava.a \ +o/$(MODE)/llama.cpp/llava: \ + o/$(MODE)/llama.cpp/llava/llava.a \ o/$(MODE)/llama.cpp/llava/llava-quantize diff --git a/llama.cpp/llava/llava-quantize.1 b/llama.cpp/llava/llava-quantize.1 index 3c39a1040d..dac4ada629 100644 --- a/llama.cpp/llava/llava-quantize.1 +++ b/llama.cpp/llava/llava-quantize.1 @@ -1,6 +1,6 @@ .Dd December 5, 2023 -.Dt llava-quantize 1 -.Os +.Dt LLAVA-QUANTIZE 1 +.Os Llamafile Manual .Sh NAME .Nm llava-quantize .Nd CLIP model quantizer diff --git a/llama.cpp/llava/llava-quantize.1.asc b/llama.cpp/llava/llava-quantize.1.asc new file mode 100644 index 0000000000..3c242974ae --- /dev/null +++ b/llama.cpp/llava/llava-quantize.1.asc @@ -0,0 +1,46 @@ +LLAVA-QUANTIZE(1) General Commands Manual LLAVA-QUANTIZE(1) + +NNAAMMEE + llllaavvaa--qquuaannttiizzee - CLIP model quantizer + +SSYYNNOOPPSSIISS + llllaavvaa--qquuaannttiizzee [options...] _I_N_P_U_T _O_U_T_P_U_T _F_O_R_M_A_T + +DDEESSCCRRIIPPTTIIOONN + llllaavvaa--qquuaannttiizzee makes LLaVA mmproj files smaller. + +AARRGGUUMMEENNTTSS + The following positional arguments are accepted: + + _I_N_P_U_T Is the input file, which should be a CLIP model in the GGUF + format using float16 values. + + _O_U_T_P_U_T Is the output file, which will be a CLIP model in the GGUF format + using the desired number type. + + _F_O_R_M_A_T Is the desired quantization format, which may be the integer id + of a supported quantization type. See the quantization types + section below for acceptable formats. + +OOPPTTIIOONNSS + The following options are accepted: + + --hh, ----hheellpp + Show help message and exit. + + ----vveerrssiioonn + Print llamafile version. + +QQUUAANNTTIIZZAATTIIOONN TTYYPPEESS + The following quantization types are available: + + -- 2 is Q4_0 + -- 3 is Q4_1 + -- 6 is Q5_0 + -- 7 is Q5_1 + -- 8 is Q8_0 + +SSEEEE AALLSSOO + llamafile(1) + +Llamafile Manual December 5, 2023 Llamafile Manual diff --git a/llama.cpp/llava/llava-quantize.cpp b/llama.cpp/llava/llava-quantize.cpp index a15446e941..7b5350adac 100644 --- a/llama.cpp/llava/llava-quantize.cpp +++ b/llama.cpp/llava/llava-quantize.cpp @@ -8,23 +8,27 @@ #include int main(int argc, char *argv[]) { - if (argc == 2 && !strcmp(argv[1], "--version")) { - printf("llamafile v" LLAMAFILE_VERSION_STRING " llava-quantize\n"); - exit(0); + + if (llamafile_has(argv, "--version")) { + puts("llava-quantize v" LLAMAFILE_VERSION_STRING); + return 0; + } + + if (llamafile_has(argv, "-h") || + llamafile_has(argv, "-help") || + llamafile_has(argv, "--help")) { + llamafile_help("/zip/llama.cpp/llava/llava-quantize.1.asc"); + __builtin_unreachable(); } + llamafile_init(); llamafile_check_cpu(); + if (argc != 4) { - fprintf(stderr, - "Usage: %s INPUT OUTPUT FORMAT\n" - " - 2 is Q4_0\n" - " - 3 is Q4_1\n" - " - 6 is Q5_0\n" - " - 7 is Q5_1\n" - " - 8 is Q8_0\n", - argv[0]); + fprintf(stderr, "%s: missing argument\n", argv[0]); return 1; } + if (!clip_model_quantize(argv[1], argv[2], atoi(argv[3]))) { exit(1); } diff --git a/llama.cpp/main/BUILD.mk b/llama.cpp/main/BUILD.mk index 6e255e06c1..13e63c7240 100644 --- a/llama.cpp/main/BUILD.mk +++ b/llama.cpp/main/BUILD.mk @@ -13,13 +13,9 @@ o/$(MODE)/llama.cpp/main/main: \ o/$(MODE)/llama.cpp/server/server.a \ o/$(MODE)/llama.cpp/llava/llava.a \ o/$(MODE)/llama.cpp/llama.cpp.a \ - o/$(MODE)/llama.cpp/main/main.1.asc.zip.o \ + o/$(MODE)/llamafile/llamafile.1.asc.zip.o \ $(LLAMA_CPP_SERVER_ASSETS:%=o/$(MODE)/%.zip.o) -llama.cpp/main/main.1.asc: llama.cpp/main/main.1 - -man $< >$@.tmp && mv -f $@.tmp $@ - @rm -f $@.tmp - .PHONY: o/$(MODE)/llama.cpp/main o/$(MODE)/llama.cpp/main: \ o/$(MODE)/llama.cpp/main/main diff --git a/llama.cpp/main/main.cpp b/llama.cpp/main/main.cpp index 1fea2e9e37..b4b6334d99 100644 --- a/llama.cpp/main/main.cpp +++ b/llama.cpp/main/main.cpp @@ -99,47 +99,41 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v LOG_TEE("%s", text); } -static bool has_argument(int argc, char ** argv, const char * arg) { - for (int i = 1; i < argc; ++i) { - if (!strcmp(argv[i], arg)) { - return true; - } - } - return false; -} - int main(int argc, char ** argv) { - llamafile_init(); - llamafile_check_cpu(); - ShowCrashReports(); - LoadZipArgs(&argc, &argv); - if (has_argument(argc, argv, "--version")) { - printf("llamafile v" LLAMAFILE_VERSION_STRING "\n"); + if (llamafile_has(argv, "--version")) { + puts("llamafile v" LLAMAFILE_VERSION_STRING); return 0; } - if (has_argument(argc, argv, "--help")) { - llamafile_help("/zip/llama.cpp/main/main.1.asc"); + if (llamafile_has(argv, "-h") || + llamafile_has(argv, "-help") || + llamafile_has(argv, "--help")) { + llamafile_help("/zip/llamafile/llamafile.1.asc"); __builtin_unreachable(); } + llamafile_init(); + llamafile_check_cpu(); + ShowCrashReports(); + LoadZipArgs(&argc, &argv); + if (!IsXnuSilicon() && - (!has_argument(argc, argv, "-ngl") && - !has_argument(argc, argv, "--gpu-layers") && - !has_argument(argc, argv, "--n-gpu-layers"))) { + (!llamafile_has(argv, "-ngl") && + !llamafile_has(argv, "--gpu-layers") && + !llamafile_has(argv, "--n-gpu-layers"))) { FLAG_gpu = LLAMAFILE_GPU_DISABLE; } - if (!has_argument(argc, argv, "--cli") && - (has_argument(argc, argv, "--server") || - (!has_argument(argc, argv, "-p") && - !has_argument(argc, argv, "-f") && - !has_argument(argc, argv, "--random-prompt")))) { + if (!llamafile_has(argv, "--cli") && + (llamafile_has(argv, "--server") || + (!llamafile_has(argv, "-p") && + !llamafile_has(argv, "-f") && + !llamafile_has(argv, "--random-prompt")))) { return server_cli(argc, argv); } - if (has_argument(argc, argv, "--image")) { + if (llamafile_has(argv, "--image")) { return llava_cli(argc, argv); } diff --git a/llama.cpp/perplexity/BUILD.mk b/llama.cpp/perplexity/BUILD.mk index fa8987b788..b64258800a 100644 --- a/llama.cpp/perplexity/BUILD.mk +++ b/llama.cpp/perplexity/BUILD.mk @@ -9,9 +9,10 @@ LLAMA_CPP_PERPLEXITY_SRCS = $(filter %.cpp,$(LLAMA_CPP_PERPLEXITY_FILES)) LLAMA_CPP_PERPLEXITY_OBJS = $(LLAMA_CPP_PERPLEXITY_SRCS:%.cpp=o/$(MODE)/%.o) .PHONY: o/$(MODE)/llama.cpp/perplexity -o/$(MODE)/llama.cpp/perplexity: \ +o/$(MODE)/llama.cpp/perplexity: \ o/$(MODE)/llama.cpp/perplexity/perplexity -o/$(MODE)/llama.cpp/perplexity/perplexity: \ - o/$(MODE)/llama.cpp/perplexity/perplexity.o \ +o/$(MODE)/llama.cpp/perplexity/perplexity: \ + o/$(MODE)/llama.cpp/perplexity/perplexity.o \ + o/$(MODE)/llama.cpp/perplexity/perplexity.1.asc.zip.o \ o/$(MODE)/llama.cpp/llama.cpp.a diff --git a/llama.cpp/perplexity/perplexity.1 b/llama.cpp/perplexity/perplexity.1 index 68fcdf47ca..f53ee5dcb6 100644 --- a/llama.cpp/perplexity/perplexity.1 +++ b/llama.cpp/perplexity/perplexity.1 @@ -1,6 +1,6 @@ .Dd December 5, 2023 -.Dt llamafile-perplexity 1 -.Os +.Dt LLAMAFILE-PERPLEXITY 1 +.Os Llamafile Manual .Sh NAME .Nm llamafile-perplexity .Nd LLM benchmarking tool diff --git a/llama.cpp/perplexity/perplexity.1.asc b/llama.cpp/perplexity/perplexity.1.asc new file mode 100644 index 0000000000..1962e774bd --- /dev/null +++ b/llama.cpp/perplexity/perplexity.1.asc @@ -0,0 +1,35 @@ +LLAMAFILE-PERPLEXITY(1) General Commands Manual LLAMAFILE-PERPLEXITY(1) + +NNAAMMEE + llllaammaaffiillee--ppeerrpplleexxiittyy - LLM benchmarking tool + +SSYYNNOOPPSSIISS + llllaammaaffiillee--ppeerrpplleexxiittyy [flags...] + +DDEESSCCRRIIPPTTIIOONN + llllaammaaffiillee--ppeerrpplleexxiittyy can be used to gauge the quality of an LLM + implementation. + +OOPPTTIIOONNSS + The following options are available: + + --hh, ----hheellpp + Show help message and exit. + + --mm _F_N_A_M_E, ----mmooddeell _F_N_A_M_E + Model path (default: models/7B/ggml-model-f16.gguf) + + --ff _F_N_A_M_E, ----ffiillee _F_N_A_M_E + Raw data input file. + + --tt _N, ----tthhrreeaaddss _N + Number of threads to use during generation (default: nproc/2) + + --ss _S_E_E_D, ----sseeeedd _S_E_E_D + Random Number Generator (RNG) seed (default: -1, use random seed + for < 0) + +SSEEEE AALLSSOO + llamafile(1) + +Llamafile Manual December 5, 2023 Llamafile Manual diff --git a/llama.cpp/perplexity/perplexity.cpp b/llama.cpp/perplexity/perplexity.cpp index 8d91db37e6..161e855174 100644 --- a/llama.cpp/perplexity/perplexity.cpp +++ b/llama.cpp/perplexity/perplexity.cpp @@ -680,11 +680,23 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) { } int main(int argc, char ** argv) { - gpt_params params; + + if (llamafile_has(argv, "--version")) { + puts("llamafile-perplexity v" LLAMAFILE_VERSION_STRING); + return 0; + } + + if (llamafile_has(argv, "-h") || + llamafile_has(argv, "-help") || + llamafile_has(argv, "--help")) { + llamafile_help("/zip/llama.cpp/perplexity/perplexity.1.asc"); + __builtin_unreachable(); + } llamafile_init(); llamafile_check_cpu(); + gpt_params params; params.n_batch = 512; if (!gpt_params_parse(argc, argv, params)) { return 1; diff --git a/llama.cpp/quantize/BUILD.mk b/llama.cpp/quantize/BUILD.mk index ec2bff1054..fb58d809df 100644 --- a/llama.cpp/quantize/BUILD.mk +++ b/llama.cpp/quantize/BUILD.mk @@ -9,9 +9,10 @@ LLAMA_CPP_QUANTIZE_SRCS = $(filter %.cpp,$(LLAMA_CPP_QUANTIZE_FILES)) LLAMA_CPP_QUANTIZE_OBJS = $(LLAMA_CPP_QUANTIZE_SRCS:%.cpp=o/$(MODE)/%.o) .PHONY: o/$(MODE)/llama.cpp/quantize -o/$(MODE)/llama.cpp/quantize: \ +o/$(MODE)/llama.cpp/quantize: \ o/$(MODE)/llama.cpp/quantize/quantize -o/$(MODE)/llama.cpp/quantize/quantize: \ - o/$(MODE)/llama.cpp/quantize/quantize.o \ +o/$(MODE)/llama.cpp/quantize/quantize: \ + o/$(MODE)/llama.cpp/quantize/quantize.o \ + o/$(MODE)/llama.cpp/quantize/quantize.1.asc.zip.o \ o/$(MODE)/llama.cpp/llama.cpp.a diff --git a/llama.cpp/quantize/quantize.1 b/llama.cpp/quantize/quantize.1 index 50068d0e4d..1afe57e458 100644 --- a/llama.cpp/quantize/quantize.1 +++ b/llama.cpp/quantize/quantize.1 @@ -1,6 +1,6 @@ .Dd December 5, 2023 -.Dt llamafile-quantize 1 -.Os +.Dt LLAMAFILE-QUANTIZE 1 +.Os Llamafile Manual .Sh NAME .Nm llamafile-quantize .Nd large language model quantizer diff --git a/llama.cpp/quantize/quantize.1.asc b/llama.cpp/quantize/quantize.1.asc new file mode 100644 index 0000000000..cb975dbdb8 --- /dev/null +++ b/llama.cpp/quantize/quantize.1.asc @@ -0,0 +1,76 @@ +LLAMAFILE-QUANTIZE(1) General Commands Manual LLAMAFILE-QUANTIZE(1) + +NNAAMMEE + llllaammaaffiillee--qquuaannttiizzee - large language model quantizer + +SSYYNNOOPPSSIISS + llllaammaaffiillee--qquuaannttiizzee [flags...] _m_o_d_e_l_-_f_3_2_._g_g_u_f [_m_o_d_e_l_-_q_u_a_n_t_._g_g_u_f] _t_y_p_e + [_n_t_h_r_e_a_d_s] + +DDEESSCCRRIIPPTTIIOONN + llllaammaaffiillee--qquuaannttiizzee converts large language model weights from the float32 + or float16 formats into smaller data types from 2 to 8 bits in size. + +OOPPTTIIOONNSS + The following flags are available: + + ----aallllooww--rreeqquuaannttiizzee + Allows requantizing tensors that have already been quantized. + Warning: This can severely reduce quality compared to quantizing + from 16bit or 32bit + + ----lleeaavvee--oouuttppuutt--tteennssoorr + Will leave output.weight un(re)quantized. Increases model size + but may also increase quality, especially when requantizing + + ----ppuurree Disable k-quant mixtures and quantize all tensors to the same + type + +AARRGGUUMMEENNTTSS + The following positional arguments are accepted: + + _m_o_d_e_l_-_f_3_2_._g_g_u_f + Is the input file, which contains the unquantized model weights + in either the float32 or float16 format. + + _m_o_d_e_l_-_q_u_a_n_t_._g_g_u_f + Is the output file, which will contain quantized weights in the + desired format. If this path isn't specified, it'll default to + [inp path]/ggml-model-[ftype].gguf. + + _t_y_p_e Is the desired quantization format, which may be the integer id + of a supported quantization type, or its name. See the + quantization types section below for acceptable formats. + + _n_t_h_r_e_a_d_s + Number of threads to use during computation (default: nproc/2) + +QQUUAANNTTIIZZAATTIIOONN TTYYPPEESS + The following quantization types are available: + + -- 2 Q4_0 3.56G +0.2166 ppl @ LLaMA-v1-7B + -- 3 Q4_1 3.90G +0.1585 ppl @ LLaMA-v1-7B + -- 8 Q5_0 4.33G +0.0683 ppl @ LLaMA-v1-7B + -- 9 Q5_1 4.70G +0.0349 ppl @ LLaMA-v1-7B + -- 10 Q2_K 2.63G +0.6717 ppl @ LLaMA-v1-7B + -- 12 Q3_K alias for Q3_K_M + -- 11 Q3_K_S 2.75G +0.5551 ppl @ LLaMA-v1-7B + -- 12 Q3_K_M 3.07G +0.2496 ppl @ LLaMA-v1-7B + -- 13 Q3_K_L 3.35G +0.1764 ppl @ LLaMA-v1-7B + -- 15 Q4_K alias for Q4_K_M + -- 14 Q4_K_S 3.59G +0.0992 ppl @ LLaMA-v1-7B + -- 15 Q4_K_M 3.80G +0.0532 ppl @ LLaMA-v1-7B + -- 17 Q5_K alias for Q5_K_M + -- 16 Q5_K_S 4.33G +0.0400 ppl @ LLaMA-v1-7B + -- 17 Q5_K_M 4.45G +0.0122 ppl @ LLaMA-v1-7B + -- 18 Q6_K 5.15G -0.0008 ppl @ LLaMA-v1-7B + -- 7 Q8_0 6.70G +0.0004 ppl @ LLaMA-v1-7B + -- 1 F16 13.00G @ 7B + -- 0 F32 26.00G @ 7B + -- COPY Only copy tensors, no quantizing. + +SSEEEE AALLSSOO + llamafile(1), llamafile-perplexity(1), llava-quantize(1), zipalign(1), + unzip(1) + +Llamafile Manual December 5, 2023 Llamafile Manual diff --git a/llama.cpp/quantize/quantize.cpp b/llama.cpp/quantize/quantize.cpp index 3ab628a7ca..b46d3b18c7 100644 --- a/llama.cpp/quantize/quantize.cpp +++ b/llama.cpp/quantize/quantize.cpp @@ -96,9 +96,17 @@ static void usage(const char * executable) { } int main(int argc, char ** argv) { - if (argc == 2 && !strcmp(argv[1], "--version")) { - printf("llamafile v" LLAMAFILE_VERSION_STRING " quantize\n"); - exit(0); + + if (llamafile_has(argv, "--version")) { + puts("llamafile-quantize v" LLAMAFILE_VERSION_STRING); + return 0; + } + + if (llamafile_has(argv, "-h") || + llamafile_has(argv, "-help") || + llamafile_has(argv, "--help")) { + llamafile_help("/zip/llama.cpp/quantize/quantize.1.asc"); + __builtin_unreachable(); } llamafile_init(); diff --git a/llamafile/BUILD.mk b/llamafile/BUILD.mk index 28c7d5750e..a3cf257f79 100644 --- a/llamafile/BUILD.mk +++ b/llamafile/BUILD.mk @@ -6,13 +6,17 @@ PKGS += LLAMAFILE LLAMAFILE_FILES := $(wildcard llamafile/*.*) LLAMAFILE_HDRS = $(filter %.h,$(LLAMAFILE_FILES)) LLAMAFILE_SRCS = $(filter %.c,$(LLAMAFILE_FILES)) +LLAMAFILE_DOCS = $(filter %.1,$(LLAMAFILE_FILES)) LLAMAFILE_OBJS = \ $(LLAMAFILE_SRCS:%.c=o/$(MODE)/%.o) \ $(LLAMAFILE_FILES:%=o/$(MODE)/%.zip.o) o/$(MODE)/llamafile/zipalign: \ - o/$(MODE)/llamafile/zipalign.o + o/$(MODE)/llamafile/zipalign.o \ + o/$(MODE)/llamafile/help.o \ + o/$(MODE)/llamafile/has.o \ + o/$(MODE)/llamafile/zipalign.1.asc.zip.o o/$(MODE)/llamafile/zipcheck: \ o/$(MODE)/llamafile/zipcheck.o \ diff --git a/llamafile/has.c b/llamafile/has.c new file mode 100644 index 0000000000..df882f0b81 --- /dev/null +++ b/llamafile/has.c @@ -0,0 +1,28 @@ +// -*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*- +// vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi +// +// Copyright 2024 Mozilla Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "llamafile.h" +#include + +bool llamafile_has(char **a, const char *x) { + for (int i = 0; a[i]; ++i) { + if (!strcmp(a[i], x)) { + return true; + } + } + return false; +} diff --git a/llama.cpp/main/main.1 b/llamafile/llamafile.1 similarity index 97% rename from llama.cpp/main/main.1 rename to llamafile/llamafile.1 index 579d0d438f..5517161198 100644 --- a/llama.cpp/main/main.1 +++ b/llamafile/llamafile.1 @@ -1,3 +1,16 @@ +.\" Copyright 2024 Mozilla Foundation +.\" +.\" Licensed under the Apache License, Version 2.0 (the "License"); +.\" you may not use this file except in compliance with the License. +.\" You may obtain a copy of the License at +.\" +.\" http://www.apache.org/licenses/LICENSE-2.0 +.\" +.\" Unless required by applicable law or agreed to in writing, software +.\" distributed under the License is distributed on an "AS IS" BASIS, +.\" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.\" See the License for the specific language governing permissions and +.\" limitations under the License. .Dd January 1, 2024 .Dt LLAMAFILE 1 .Os Mozilla Ocho diff --git a/llama.cpp/main/main.1.asc b/llamafile/llamafile.1.asc similarity index 100% rename from llama.cpp/main/main.1.asc rename to llamafile/llamafile.1.asc diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h index 62524fab7d..a401a7df33 100644 --- a/llamafile/llamafile.h +++ b/llamafile/llamafile.h @@ -21,6 +21,7 @@ void llamafile_init(void); void llamafile_check_cpu(void); void llamafile_help(const char *); const char *llamafile_get_tmp_dir(void); +bool llamafile_has(char **, const char *); bool llamafile_extract(const char *, const char *); int llamafile_is_file_newer_than(const char *, const char *); void llamafile_schlep(const void *, size_t); diff --git a/llamafile/metal.c b/llamafile/metal.c index 1ffb82aa31..022cfc9247 100644 --- a/llamafile/metal.c +++ b/llamafile/metal.c @@ -28,6 +28,7 @@ #include #include #include +#include "llamafile/log.h" #include "llama.cpp/ggml-metal.h" __static_yoink("llama.cpp/ggml.h"); diff --git a/llamafile/zipalign.1 b/llamafile/zipalign.1 index cd6bfc0afe..91b57c0435 100644 --- a/llamafile/zipalign.1 +++ b/llamafile/zipalign.1 @@ -1,6 +1,19 @@ +.\" Copyright 2023 Mozilla Foundation +.\" +.\" Licensed under the Apache License, Version 2.0 (the "License"); +.\" you may not use this file except in compliance with the License. +.\" You may obtain a copy of the License at +.\" +.\" http://www.apache.org/licenses/LICENSE-2.0 +.\" +.\" Unless required by applicable law or agreed to in writing, software +.\" distributed under the License is distributed on an "AS IS" BASIS, +.\" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.\" See the License for the specific language governing permissions and +.\" limitations under the License. .Dd December 5, 2023 -.Dt zipalign 1 -.Os +.Dt ZIPALIGN 1 +.Os Llamafile Manual .Sh NAME .Nm zipalign .Nd PKZIP for LLMs diff --git a/llamafile/zipalign.1.asc b/llamafile/zipalign.1.asc new file mode 100644 index 0000000000..91bf36aff1 --- /dev/null +++ b/llamafile/zipalign.1.asc @@ -0,0 +1,73 @@ +ZIPALIGN(1) General Commands Manual ZIPALIGN(1) + +NNAAMMEE + zziippaalliiggnn - PKZIP for LLMs + +SSYYNNOOPPSSIISS + zziippaalliiggnn [FLAG...] _Z_I_P _F_I_L_E_._._. + +DDEESSCCRRIIPPTTIIOONN + zziippaalliiggnn adds aligned uncompressed files to a PKZIP archive. + + This tool is designed to concatenate gigabytes of LLM weights to an + executable. This command goes 10x faster than `zip -j0`. Unlike zip you + are not required to use the .com file extension for it to work. But most + importantly, this tool has a flag that lets you insert zip files that are + aligned on a specific boundary. The result is things like GPUs that have + specific memory alignment requirements will now be able to perform math + directly on the zip file's mmap()'d weights. + + This tool always operates in an append-only manner. Unlike the InfoZIP + zip(1) command, zziippaalliiggnn does not reflow existing assets to shave away + space. For example, if zziippaalliiggnn is used on an existing PKZIP archive to + replace an existing asset, then the bytes for the old revision of the + asset will still be there, along with any alignment gaps that currently + exist in the file between assets. + + The same concept also applies to the central directory listing that's + stored at the end of the file. When changes are made, the old central + directory is left behind as junk data. Therefore it's important, when + adding multiple files to an archive at once, that the files all be passed + in arguments at once, rather than calling this command multiple times. + +OOPPTTIIOONNSS + The following options are available: + + --hh Show help. + + --vv Operate in verbose mode. + + --NN Run in nondeterministic mode. This will cause the date/time of + inserted assets to reflect the file modified time. + + --aa _I_N_T Byte alignment for inserted zip assets. This must be a two power. + It defaults to 65536 since that ensures your asset will be page- + aligned on all conceivable platforms, both now and in the future. + + --jj Strip directory components. The filename of each input filepath + will be used as the zip asset name. This is otherwise known as + the basename. An error will be raised if the same zip asset name + ends up being specified multiple times. + + --00 Store zip assets without compression. This is the default. This + option must be chosen when adding weights to a llamafile, + otherwise it won't be possible to map them into memory. Using --00 + goes orders of a magnitude faster than using --66 compression. + + --66 Store zip assets with sweet spot compression. Any value between + --00 and --99 is accepted as choices for compression level. Using --66 + will oftentimes go 10x faster than --99 and only has a marginal + increase of size. Note uncompression speeds are unaffected. + + --99 Store zip assets with the maximum compression. This takes a very + long time to compress. Uncompression will go just as fast. This + might be a good idea when publishing archives that'll be widely + consumed via the Internet for a long time. + +SSEEEE AALLSSOO + unzip(1), llamafile(1) + +AAUUTTHHOORRSS + Justine Alexandra Roberts Tunney + +Llamafile Manual December 5, 2023 Llamafile Manual diff --git a/llamafile/zipalign.c b/llamafile/zipalign.c index 64ebaf31ad..cddbcecaaf 100644 --- a/llamafile/zipalign.c +++ b/llamafile/zipalign.c @@ -29,27 +29,9 @@ #include #include #include +#include "llamafile.h" #include -#define USAGE \ - " ZIP FILE...\n\ -\n\ -DESCRIPTION\n\ -\n\ - Adds aligned uncompressed files to PKZIP archive\n\ -\n\ -FLAGS\n\ -\n\ - -h help\n\ - -v verbose\n\ - -N nondeterministic mode\n\ - -a INT alignment (default 65536)\n\ - -j strip directory components\n\ - -0 store uncompressed (default)\n\ - -6 store with faster compression\n\ - -9 store with maximum compression\n\ -\n" - #define CHUNK 2097152 #define Min(a, b) ((a) < (b) ? (a) : (b)) @@ -96,11 +78,6 @@ static void *Realloc(void *p, size_t n) { return p; } -static wontreturn void PrintUsage(int fd, int rc) { - tinyprint(fd, "SYNOPSIS\n\n ", prog, USAGE, NULL); - exit(rc); -} - static void GetDosLocalTime(int64_t utcunixts, uint16_t *out_time, uint16_t *out_date) { @@ -112,13 +89,20 @@ static void GetDosLocalTime(int64_t utcunixts, int main(int argc, char *argv[]) { + if (llamafile_has(argv, "-h") || + llamafile_has(argv, "-help") || + llamafile_has(argv, "--help")) { + llamafile_help("/zip/llamafile/zipalign.1.asc"); + __builtin_unreachable(); + } + // get name of program prog = argv[0]; if (!prog) prog = "zipalign"; // parse flags int opt; - while ((opt = getopt(argc, argv, "0123456789hvjNa:")) != -1) { + while ((opt = getopt(argc, argv, "0123456789vjNa:")) != -1) { switch (opt) { case '0': case '1': @@ -150,10 +134,8 @@ int main(int argc, char *argv[]) { Die(prog, "FLAG_alignment must be two power"); } break; - case 'h': - PrintUsage(1, 0); default: - PrintUsage(2, 1); + return 1; } } if (optind == argc) {