diff --git a/Makefile b/Makefile
index 69543a1ab1..614ac39d4c 100644
--- a/Makefile
+++ b/Makefile
@@ -22,7 +22,7 @@ o/$(MODE)/: o/$(MODE)/llama.cpp o/$(MODE)/llamafile
 # for installing to `make PREFIX=/usr/local`
 .PHONY: install
 install:	llamafile/zipalign.1					\
-		llama.cpp/main/main.1					\
+		llamafile/llamafile.1					\
 		llama.cpp/quantize/quantize.1				\
 		llama.cpp/perplexity/perplexity.1			\
 		llama.cpp/llava/llava-quantize.1			\
@@ -40,7 +40,7 @@ install:	llamafile/zipalign.1					\
 	$(INSTALL) o/$(MODE)/llama.cpp/llava/llava-quantize $(PREFIX)/bin/llava-quantize
 	mkdir -p $(PREFIX)/share/man/man1
 	$(INSTALL) -m 0644 llamafile/zipalign.1 $(PREFIX)/share/man/man1/zipalign.1
-	$(INSTALL) -m 0644 llama.cpp/main/main.1 $(PREFIX)/share/man/man1/llamafile.1
+	$(INSTALL) -m 0644 llamafile/llamafile.1 $(PREFIX)/share/man/man1/llamafile.1
 	$(INSTALL) -m 0644 llama.cpp/quantize/quantize.1 $(PREFIX)/share/man/man1/llamafile-quantize.1
 	$(INSTALL) -m 0644 llama.cpp/perplexity/perplexity.1 $(PREFIX)/share/man/man1/llamafile-perplexity.1
 	$(INSTALL) -m 0644 llama.cpp/llava/llava-quantize.1 $(PREFIX)/share/man/man1/llava-quantize.1
diff --git a/build/rules.mk b/build/rules.mk
index 4ddfc212af..c33522765e 100644
--- a/build/rules.mk
+++ b/build/rules.mk
@@ -23,6 +23,11 @@ o/$(MODE)/%.o: %.cpp $(COSMOCC)
 o/$(MODE)/%: o/$(MODE)/%.o
 	$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
 
+.PRECIOUS: %.1.asc
+%.1.asc: %.1
+	-man $< >$@.tmp && mv -f $@.tmp $@
+	@rm -f $@.tmp
+
 o/$(MODE)/%.zip.o: % $(COSMOCC)
 	@mkdir -p $(dir $@)/.aarch64
 	$(ZIPOBJ) $(ZIPOBJ_FLAGS) -a x86_64 -o $@ $<
diff --git a/llama.cpp/llava/BUILD.mk b/llama.cpp/llava/BUILD.mk
index 0451bcf2d2..4308066cc9 100644
--- a/llama.cpp/llava/BUILD.mk
+++ b/llama.cpp/llava/BUILD.mk
@@ -10,15 +10,16 @@ LLAMA_CPP_LLAVA_OBJS = $(LLAMA_CPP_LLAVA_SRCS:%.cpp=o/$(MODE)/%.o)
 
 .PHONY: tool/args/args.h
 
-o/$(MODE)/llama.cpp/llava/llava.a:				\
+o/$(MODE)/llama.cpp/llava/llava.a:					\
 		$(LLAMA_CPP_LLAVA_OBJS)
 
-o/$(MODE)/llama.cpp/llava/llava-quantize:			\
-		o/$(MODE)/llama.cpp/llava/llava-quantize.o	\
-		o/$(MODE)/llama.cpp/llava/llava.a		\
+o/$(MODE)/llama.cpp/llava/llava-quantize:				\
+		o/$(MODE)/llama.cpp/llava/llava-quantize.o		\
+		o/$(MODE)/llama.cpp/llava/llava-quantize.1.asc.zip.o	\
+		o/$(MODE)/llama.cpp/llava/llava.a			\
 		o/$(MODE)/llama.cpp/llama.cpp.a
 
 .PHONY: o/$(MODE)/llama.cpp/llava
-o/$(MODE)/llama.cpp/llava:					\
-		o/$(MODE)/llama.cpp/llava/llava.a		\
+o/$(MODE)/llama.cpp/llava:						\
+		o/$(MODE)/llama.cpp/llava/llava.a			\
 		o/$(MODE)/llama.cpp/llava/llava-quantize
diff --git a/llama.cpp/llava/llava-quantize.1 b/llama.cpp/llava/llava-quantize.1
index 3c39a1040d..dac4ada629 100644
--- a/llama.cpp/llava/llava-quantize.1
+++ b/llama.cpp/llava/llava-quantize.1
@@ -1,6 +1,6 @@
 .Dd December 5, 2023
-.Dt llava-quantize 1
-.Os
+.Dt LLAVA-QUANTIZE 1
+.Os Llamafile Manual
 .Sh NAME
 .Nm llava-quantize
 .Nd CLIP model quantizer
diff --git a/llama.cpp/llava/llava-quantize.1.asc b/llama.cpp/llava/llava-quantize.1.asc
new file mode 100644
index 0000000000..3c242974ae
--- /dev/null
+++ b/llama.cpp/llava/llava-quantize.1.asc
@@ -0,0 +1,46 @@
+LLAVA-QUANTIZE(1)           General Commands Manual          LLAVA-QUANTIZE(1)
+
+NNAAMMEE
+     llllaavvaa--qquuaannttiizzee - CLIP model quantizer
+
+SSYYNNOOPPSSIISS
+     llllaavvaa--qquuaannttiizzee [options...] _I_N_P_U_T _O_U_T_P_U_T _F_O_R_M_A_T
+
+DDEESSCCRRIIPPTTIIOONN
+     llllaavvaa--qquuaannttiizzee makes LLaVA mmproj files smaller.
+
+AARRGGUUMMEENNTTSS
+     The following positional arguments are accepted:
+
+     _I_N_P_U_T   Is the input file, which should be a CLIP model in the GGUF
+             format using float16 values.
+
+     _O_U_T_P_U_T  Is the output file, which will be a CLIP model in the GGUF format
+             using the desired number type.
+
+     _F_O_R_M_A_T  Is the desired quantization format, which may be the integer id
+             of a supported quantization type. See the quantization types
+             section below for acceptable formats.
+
+OOPPTTIIOONNSS
+     The following options are accepted:
+
+     --hh, ----hheellpp
+             Show help message and exit.
+
+     ----vveerrssiioonn
+             Print llamafile version.
+
+QQUUAANNTTIIZZAATTIIOONN TTYYPPEESS
+     The following quantization types are available:
+
+     --   2 is Q4_0
+     --   3 is Q4_1
+     --   6 is Q5_0
+     --   7 is Q5_1
+     --   8 is Q8_0
+
+SSEEEE AALLSSOO
+     llamafile(1)
+
+Llamafile Manual               December 5, 2023               Llamafile Manual
diff --git a/llama.cpp/llava/llava-quantize.cpp b/llama.cpp/llava/llava-quantize.cpp
index a15446e941..7b5350adac 100644
--- a/llama.cpp/llava/llava-quantize.cpp
+++ b/llama.cpp/llava/llava-quantize.cpp
@@ -8,23 +8,27 @@
 #include <string.h>
 
 int main(int argc, char *argv[]) {
-    if (argc == 2 && !strcmp(argv[1], "--version")) {
-        printf("llamafile v" LLAMAFILE_VERSION_STRING " llava-quantize\n");
-        exit(0);
+
+    if (llamafile_has(argv, "--version")) {
+        puts("llava-quantize v" LLAMAFILE_VERSION_STRING);
+        return 0;
+    }
+
+    if (llamafile_has(argv, "-h") ||
+        llamafile_has(argv, "-help") ||
+        llamafile_has(argv, "--help")) {
+        llamafile_help("/zip/llama.cpp/llava/llava-quantize.1.asc");
+        __builtin_unreachable();
     }
+
     llamafile_init();
     llamafile_check_cpu();
+
     if (argc != 4) {
-        fprintf(stderr,
-                "Usage: %s INPUT OUTPUT FORMAT\n"
-                "  - 2 is Q4_0\n"
-                "  - 3 is Q4_1\n"
-                "  - 6 is Q5_0\n"
-                "  - 7 is Q5_1\n"
-                "  - 8 is Q8_0\n",
-                argv[0]);
+        fprintf(stderr, "%s: missing argument\n", argv[0]);
         return 1;
     }
+
     if (!clip_model_quantize(argv[1], argv[2], atoi(argv[3]))) {
         exit(1);
     }
diff --git a/llama.cpp/main/BUILD.mk b/llama.cpp/main/BUILD.mk
index 6e255e06c1..13e63c7240 100644
--- a/llama.cpp/main/BUILD.mk
+++ b/llama.cpp/main/BUILD.mk
@@ -13,13 +13,9 @@ o/$(MODE)/llama.cpp/main/main:					\
 		o/$(MODE)/llama.cpp/server/server.a		\
 		o/$(MODE)/llama.cpp/llava/llava.a		\
 		o/$(MODE)/llama.cpp/llama.cpp.a			\
-		o/$(MODE)/llama.cpp/main/main.1.asc.zip.o	\
+		o/$(MODE)/llamafile/llamafile.1.asc.zip.o	\
 		$(LLAMA_CPP_SERVER_ASSETS:%=o/$(MODE)/%.zip.o)
 
-llama.cpp/main/main.1.asc: llama.cpp/main/main.1
-	-man $< >$@.tmp && mv -f $@.tmp $@
-	@rm -f $@.tmp
-
 .PHONY: o/$(MODE)/llama.cpp/main
 o/$(MODE)/llama.cpp/main:					\
 		o/$(MODE)/llama.cpp/main/main
diff --git a/llama.cpp/main/main.cpp b/llama.cpp/main/main.cpp
index 1fea2e9e37..b4b6334d99 100644
--- a/llama.cpp/main/main.cpp
+++ b/llama.cpp/main/main.cpp
@@ -99,47 +99,41 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v
     LOG_TEE("%s", text);
 }
 
-static bool has_argument(int argc, char ** argv, const char * arg) {
-    for (int i = 1; i < argc; ++i) {
-        if (!strcmp(argv[i], arg)) {
-            return true;
-        }
-    }
-    return false;
-}
-
 int main(int argc, char ** argv) {
-    llamafile_init();
-    llamafile_check_cpu();
-    ShowCrashReports();
-    LoadZipArgs(&argc, &argv);
 
-    if (has_argument(argc, argv, "--version")) {
-        printf("llamafile v" LLAMAFILE_VERSION_STRING "\n");
+    if (llamafile_has(argv, "--version")) {
+        puts("llamafile v" LLAMAFILE_VERSION_STRING);
         return 0;
     }
 
-    if (has_argument(argc, argv, "--help")) {
-        llamafile_help("/zip/llama.cpp/main/main.1.asc");
+    if (llamafile_has(argv, "-h") ||
+        llamafile_has(argv, "-help") ||
+        llamafile_has(argv, "--help")) {
+        llamafile_help("/zip/llamafile/llamafile.1.asc");
         __builtin_unreachable();
     }
 
+    llamafile_init();
+    llamafile_check_cpu();
+    ShowCrashReports();
+    LoadZipArgs(&argc, &argv);
+
     if (!IsXnuSilicon() &&
-        (!has_argument(argc, argv, "-ngl") &&
-         !has_argument(argc, argv, "--gpu-layers") &&
-         !has_argument(argc, argv, "--n-gpu-layers"))) {
+        (!llamafile_has(argv, "-ngl") &&
+         !llamafile_has(argv, "--gpu-layers") &&
+         !llamafile_has(argv, "--n-gpu-layers"))) {
         FLAG_gpu = LLAMAFILE_GPU_DISABLE;
     }
 
-    if (!has_argument(argc, argv, "--cli") &&
-        (has_argument(argc, argv, "--server") ||
-         (!has_argument(argc, argv, "-p") &&
-          !has_argument(argc, argv, "-f") &&
-          !has_argument(argc, argv, "--random-prompt")))) {
+    if (!llamafile_has(argv, "--cli") &&
+        (llamafile_has(argv, "--server") ||
+         (!llamafile_has(argv, "-p") &&
+          !llamafile_has(argv, "-f") &&
+          !llamafile_has(argv, "--random-prompt")))) {
         return server_cli(argc, argv);
     }
 
-    if (has_argument(argc, argv, "--image")) {
+    if (llamafile_has(argv, "--image")) {
         return llava_cli(argc, argv);
     }
 
diff --git a/llama.cpp/perplexity/BUILD.mk b/llama.cpp/perplexity/BUILD.mk
index fa8987b788..b64258800a 100644
--- a/llama.cpp/perplexity/BUILD.mk
+++ b/llama.cpp/perplexity/BUILD.mk
@@ -9,9 +9,10 @@ LLAMA_CPP_PERPLEXITY_SRCS = $(filter %.cpp,$(LLAMA_CPP_PERPLEXITY_FILES))
 LLAMA_CPP_PERPLEXITY_OBJS = $(LLAMA_CPP_PERPLEXITY_SRCS:%.cpp=o/$(MODE)/%.o)
 
 .PHONY: o/$(MODE)/llama.cpp/perplexity
-o/$(MODE)/llama.cpp/perplexity:					\
+o/$(MODE)/llama.cpp/perplexity:						\
 		o/$(MODE)/llama.cpp/perplexity/perplexity
 
-o/$(MODE)/llama.cpp/perplexity/perplexity:			\
-		o/$(MODE)/llama.cpp/perplexity/perplexity.o	\
+o/$(MODE)/llama.cpp/perplexity/perplexity:				\
+		o/$(MODE)/llama.cpp/perplexity/perplexity.o		\
+		o/$(MODE)/llama.cpp/perplexity/perplexity.1.asc.zip.o	\
 		o/$(MODE)/llama.cpp/llama.cpp.a
diff --git a/llama.cpp/perplexity/perplexity.1 b/llama.cpp/perplexity/perplexity.1
index 68fcdf47ca..f53ee5dcb6 100644
--- a/llama.cpp/perplexity/perplexity.1
+++ b/llama.cpp/perplexity/perplexity.1
@@ -1,6 +1,6 @@
 .Dd December 5, 2023
-.Dt llamafile-perplexity 1
-.Os
+.Dt LLAMAFILE-PERPLEXITY 1
+.Os Llamafile Manual
 .Sh NAME
 .Nm llamafile-perplexity
 .Nd LLM benchmarking tool
diff --git a/llama.cpp/perplexity/perplexity.1.asc b/llama.cpp/perplexity/perplexity.1.asc
new file mode 100644
index 0000000000..1962e774bd
--- /dev/null
+++ b/llama.cpp/perplexity/perplexity.1.asc
@@ -0,0 +1,35 @@
+LLAMAFILE-PERPLEXITY(1)     General Commands Manual    LLAMAFILE-PERPLEXITY(1)
+
+NNAAMMEE
+     llllaammaaffiillee--ppeerrpplleexxiittyy - LLM benchmarking tool
+
+SSYYNNOOPPSSIISS
+     llllaammaaffiillee--ppeerrpplleexxiittyy [flags...]
+
+DDEESSCCRRIIPPTTIIOONN
+     llllaammaaffiillee--ppeerrpplleexxiittyy can be used to gauge the quality of an LLM
+     implementation.
+
+OOPPTTIIOONNSS
+     The following options are available:
+
+     --hh, ----hheellpp
+             Show help message and exit.
+
+     --mm _F_N_A_M_E, ----mmooddeell _F_N_A_M_E
+             Model path (default: models/7B/ggml-model-f16.gguf)
+
+     --ff _F_N_A_M_E, ----ffiillee _F_N_A_M_E
+             Raw data input file.
+
+     --tt _N, ----tthhrreeaaddss _N
+             Number of threads to use during generation (default: nproc/2)
+
+     --ss _S_E_E_D, ----sseeeedd _S_E_E_D
+             Random Number Generator (RNG) seed (default: -1, use random seed
+             for < 0)
+
+SSEEEE AALLSSOO
+     llamafile(1)
+
+Llamafile Manual               December 5, 2023               Llamafile Manual
diff --git a/llama.cpp/perplexity/perplexity.cpp b/llama.cpp/perplexity/perplexity.cpp
index 8d91db37e6..161e855174 100644
--- a/llama.cpp/perplexity/perplexity.cpp
+++ b/llama.cpp/perplexity/perplexity.cpp
@@ -680,11 +680,23 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
 }
 
 int main(int argc, char ** argv) {
-    gpt_params params;
+
+    if (llamafile_has(argv, "--version")) {
+        puts("llamafile-perplexity v" LLAMAFILE_VERSION_STRING);
+        return 0;
+    }
+
+    if (llamafile_has(argv, "-h") ||
+        llamafile_has(argv, "-help") ||
+        llamafile_has(argv, "--help")) {
+        llamafile_help("/zip/llama.cpp/perplexity/perplexity.1.asc");
+        __builtin_unreachable();
+    }
 
     llamafile_init();
     llamafile_check_cpu();
 
+    gpt_params params;
     params.n_batch = 512;
     if (!gpt_params_parse(argc, argv, params)) {
         return 1;
diff --git a/llama.cpp/quantize/BUILD.mk b/llama.cpp/quantize/BUILD.mk
index ec2bff1054..fb58d809df 100644
--- a/llama.cpp/quantize/BUILD.mk
+++ b/llama.cpp/quantize/BUILD.mk
@@ -9,9 +9,10 @@ LLAMA_CPP_QUANTIZE_SRCS = $(filter %.cpp,$(LLAMA_CPP_QUANTIZE_FILES))
 LLAMA_CPP_QUANTIZE_OBJS = $(LLAMA_CPP_QUANTIZE_SRCS:%.cpp=o/$(MODE)/%.o)
 
 .PHONY: o/$(MODE)/llama.cpp/quantize
-o/$(MODE)/llama.cpp/quantize:					\
+o/$(MODE)/llama.cpp/quantize:						\
 		o/$(MODE)/llama.cpp/quantize/quantize
 
-o/$(MODE)/llama.cpp/quantize/quantize:				\
-		o/$(MODE)/llama.cpp/quantize/quantize.o		\
+o/$(MODE)/llama.cpp/quantize/quantize:					\
+		o/$(MODE)/llama.cpp/quantize/quantize.o			\
+		o/$(MODE)/llama.cpp/quantize/quantize.1.asc.zip.o	\
 		o/$(MODE)/llama.cpp/llama.cpp.a
diff --git a/llama.cpp/quantize/quantize.1 b/llama.cpp/quantize/quantize.1
index 50068d0e4d..1afe57e458 100644
--- a/llama.cpp/quantize/quantize.1
+++ b/llama.cpp/quantize/quantize.1
@@ -1,6 +1,6 @@
 .Dd December 5, 2023
-.Dt llamafile-quantize 1
-.Os
+.Dt LLAMAFILE-QUANTIZE 1
+.Os Llamafile Manual
 .Sh NAME
 .Nm llamafile-quantize
 .Nd large language model quantizer
diff --git a/llama.cpp/quantize/quantize.1.asc b/llama.cpp/quantize/quantize.1.asc
new file mode 100644
index 0000000000..cb975dbdb8
--- /dev/null
+++ b/llama.cpp/quantize/quantize.1.asc
@@ -0,0 +1,76 @@
+LLAMAFILE-QUANTIZE(1)       General Commands Manual      LLAMAFILE-QUANTIZE(1)
+
+NNAAMMEE
+     llllaammaaffiillee--qquuaannttiizzee - large language model quantizer
+
+SSYYNNOOPPSSIISS
+     llllaammaaffiillee--qquuaannttiizzee [flags...] _m_o_d_e_l_-_f_3_2_._g_g_u_f [_m_o_d_e_l_-_q_u_a_n_t_._g_g_u_f] _t_y_p_e
+                        [_n_t_h_r_e_a_d_s]
+
+DDEESSCCRRIIPPTTIIOONN
+     llllaammaaffiillee--qquuaannttiizzee converts large language model weights from the float32
+     or float16 formats into smaller data types from 2 to 8 bits in size.
+
+OOPPTTIIOONNSS
+     The following flags are available:
+
+     ----aallllooww--rreeqquuaannttiizzee
+             Allows requantizing tensors that have already been quantized.
+             Warning: This can severely reduce quality compared to quantizing
+             from 16bit or 32bit
+
+     ----lleeaavvee--oouuttppuutt--tteennssoorr
+             Will leave output.weight un(re)quantized. Increases model size
+             but may also increase quality, especially when requantizing
+
+     ----ppuurree  Disable k-quant mixtures and quantize all tensors to the same
+             type
+
+AARRGGUUMMEENNTTSS
+     The following positional arguments are accepted:
+
+     _m_o_d_e_l_-_f_3_2_._g_g_u_f
+             Is the input file, which contains the unquantized model weights
+             in either the float32 or float16 format.
+
+     _m_o_d_e_l_-_q_u_a_n_t_._g_g_u_f
+             Is the output file, which will contain quantized weights in the
+             desired format. If this path isn't specified, it'll default to
+             [inp path]/ggml-model-[ftype].gguf.
+
+     _t_y_p_e    Is the desired quantization format, which may be the integer id
+             of a supported quantization type, or its name. See the
+             quantization types section below for acceptable formats.
+
+     _n_t_h_r_e_a_d_s
+             Number of threads to use during computation (default: nproc/2)
+
+QQUUAANNTTIIZZAATTIIOONN TTYYPPEESS
+     The following quantization types are available:
+
+     --      2 Q4_0   3.56G +0.2166 ppl @ LLaMA-v1-7B
+     --      3 Q4_1   3.90G +0.1585 ppl @ LLaMA-v1-7B
+     --      8 Q5_0   4.33G +0.0683 ppl @ LLaMA-v1-7B
+     --      9 Q5_1   4.70G +0.0349 ppl @ LLaMA-v1-7B
+     --     10 Q2_K   2.63G +0.6717 ppl @ LLaMA-v1-7B
+     --     12 Q3_K   alias for Q3_K_M
+     --     11 Q3_K_S 2.75G +0.5551 ppl @ LLaMA-v1-7B
+     --     12 Q3_K_M 3.07G +0.2496 ppl @ LLaMA-v1-7B
+     --     13 Q3_K_L 3.35G +0.1764 ppl @ LLaMA-v1-7B
+     --     15 Q4_K   alias for Q4_K_M
+     --     14 Q4_K_S 3.59G +0.0992 ppl @ LLaMA-v1-7B
+     --     15 Q4_K_M 3.80G +0.0532 ppl @ LLaMA-v1-7B
+     --     17 Q5_K   alias for Q5_K_M
+     --     16 Q5_K_S 4.33G +0.0400 ppl @ LLaMA-v1-7B
+     --     17 Q5_K_M 4.45G +0.0122 ppl @ LLaMA-v1-7B
+     --     18 Q6_K   5.15G -0.0008 ppl @ LLaMA-v1-7B
+     --      7 Q8_0   6.70G +0.0004 ppl @ LLaMA-v1-7B
+     --      1 F16    13.00G @ 7B
+     --      0 F32    26.00G @ 7B
+     --   COPY Only copy tensors, no quantizing.
+
+SSEEEE AALLSSOO
+     llamafile(1), llamafile-perplexity(1), llava-quantize(1), zipalign(1),
+     unzip(1)
+
+Llamafile Manual               December 5, 2023               Llamafile Manual
diff --git a/llama.cpp/quantize/quantize.cpp b/llama.cpp/quantize/quantize.cpp
index 3ab628a7ca..b46d3b18c7 100644
--- a/llama.cpp/quantize/quantize.cpp
+++ b/llama.cpp/quantize/quantize.cpp
@@ -96,9 +96,17 @@ static void usage(const char * executable) {
 }
 
 int main(int argc, char ** argv) {
-    if (argc == 2 && !strcmp(argv[1], "--version")) {
-        printf("llamafile v" LLAMAFILE_VERSION_STRING " quantize\n");
-        exit(0);
+
+    if (llamafile_has(argv, "--version")) {
+        puts("llamafile-quantize v" LLAMAFILE_VERSION_STRING);
+        return 0;
+    }
+
+    if (llamafile_has(argv, "-h") ||
+        llamafile_has(argv, "-help") ||
+        llamafile_has(argv, "--help")) {
+        llamafile_help("/zip/llama.cpp/quantize/quantize.1.asc");
+        __builtin_unreachable();
     }
 
     llamafile_init();
diff --git a/llamafile/BUILD.mk b/llamafile/BUILD.mk
index 28c7d5750e..a3cf257f79 100644
--- a/llamafile/BUILD.mk
+++ b/llamafile/BUILD.mk
@@ -6,13 +6,17 @@ PKGS += LLAMAFILE
 LLAMAFILE_FILES := $(wildcard llamafile/*.*)
 LLAMAFILE_HDRS = $(filter %.h,$(LLAMAFILE_FILES))
 LLAMAFILE_SRCS = $(filter %.c,$(LLAMAFILE_FILES))
+LLAMAFILE_DOCS = $(filter %.1,$(LLAMAFILE_FILES))
 
 LLAMAFILE_OBJS =					\
 	$(LLAMAFILE_SRCS:%.c=o/$(MODE)/%.o)		\
 	$(LLAMAFILE_FILES:%=o/$(MODE)/%.zip.o)
 
 o/$(MODE)/llamafile/zipalign:				\
-		o/$(MODE)/llamafile/zipalign.o
+		o/$(MODE)/llamafile/zipalign.o		\
+		o/$(MODE)/llamafile/help.o		\
+		o/$(MODE)/llamafile/has.o		\
+		o/$(MODE)/llamafile/zipalign.1.asc.zip.o
 
 o/$(MODE)/llamafile/zipcheck:				\
 		o/$(MODE)/llamafile/zipcheck.o		\
diff --git a/llamafile/has.c b/llamafile/has.c
new file mode 100644
index 0000000000..df882f0b81
--- /dev/null
+++ b/llamafile/has.c
@@ -0,0 +1,28 @@
+// -*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
+// vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi
+//
+// Copyright 2024 Mozilla Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "llamafile.h"
+#include <string.h>
+
+bool llamafile_has(char **a, const char *x) {
+    for (int i = 0; a[i]; ++i) {
+        if (!strcmp(a[i], x)) {
+            return true;
+        }
+    }
+    return false;
+}
diff --git a/llama.cpp/main/main.1 b/llamafile/llamafile.1
similarity index 97%
rename from llama.cpp/main/main.1
rename to llamafile/llamafile.1
index 579d0d438f..5517161198 100644
--- a/llama.cpp/main/main.1
+++ b/llamafile/llamafile.1
@@ -1,3 +1,16 @@
+.\" Copyright 2024 Mozilla Foundation
+.\"
+.\" Licensed under the Apache License, Version 2.0 (the "License");
+.\" you may not use this file except in compliance with the License.
+.\" You may obtain a copy of the License at
+.\"
+.\"     http://www.apache.org/licenses/LICENSE-2.0
+.\"
+.\" Unless required by applicable law or agreed to in writing, software
+.\" distributed under the License is distributed on an "AS IS" BASIS,
+.\" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+.\" See the License for the specific language governing permissions and
+.\" limitations under the License.
 .Dd January 1, 2024
 .Dt LLAMAFILE 1
 .Os Mozilla Ocho
diff --git a/llama.cpp/main/main.1.asc b/llamafile/llamafile.1.asc
similarity index 100%
rename from llama.cpp/main/main.1.asc
rename to llamafile/llamafile.1.asc
diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h
index 62524fab7d..a401a7df33 100644
--- a/llamafile/llamafile.h
+++ b/llamafile/llamafile.h
@@ -21,6 +21,7 @@ void llamafile_init(void);
 void llamafile_check_cpu(void);
 void llamafile_help(const char *);
 const char *llamafile_get_tmp_dir(void);
+bool llamafile_has(char **, const char *);
 bool llamafile_extract(const char *, const char *);
 int llamafile_is_file_newer_than(const char *, const char *);
 void llamafile_schlep(const void *, size_t);
diff --git a/llamafile/metal.c b/llamafile/metal.c
index 1ffb82aa31..022cfc9247 100644
--- a/llamafile/metal.c
+++ b/llamafile/metal.c
@@ -28,6 +28,7 @@
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <stdatomic.h>
+#include "llamafile/log.h"
 #include "llama.cpp/ggml-metal.h"
 
 __static_yoink("llama.cpp/ggml.h");
diff --git a/llamafile/zipalign.1 b/llamafile/zipalign.1
index cd6bfc0afe..91b57c0435 100644
--- a/llamafile/zipalign.1
+++ b/llamafile/zipalign.1
@@ -1,6 +1,19 @@
+.\" Copyright 2023 Mozilla Foundation
+.\"
+.\" Licensed under the Apache License, Version 2.0 (the "License");
+.\" you may not use this file except in compliance with the License.
+.\" You may obtain a copy of the License at
+.\"
+.\"     http://www.apache.org/licenses/LICENSE-2.0
+.\"
+.\" Unless required by applicable law or agreed to in writing, software
+.\" distributed under the License is distributed on an "AS IS" BASIS,
+.\" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+.\" See the License for the specific language governing permissions and
+.\" limitations under the License.
 .Dd December 5, 2023
-.Dt zipalign 1
-.Os
+.Dt ZIPALIGN 1
+.Os Llamafile Manual
 .Sh NAME
 .Nm zipalign
 .Nd PKZIP for LLMs
diff --git a/llamafile/zipalign.1.asc b/llamafile/zipalign.1.asc
new file mode 100644
index 0000000000..91bf36aff1
--- /dev/null
+++ b/llamafile/zipalign.1.asc
@@ -0,0 +1,73 @@
+ZIPALIGN(1)                 General Commands Manual                ZIPALIGN(1)
+
+NNAAMMEE
+     zziippaalliiggnn - PKZIP for LLMs
+
+SSYYNNOOPPSSIISS
+     zziippaalliiggnn [FLAG...] _Z_I_P _F_I_L_E_._._.
+
+DDEESSCCRRIIPPTTIIOONN
+     zziippaalliiggnn adds aligned uncompressed files to a PKZIP archive.
+
+     This tool is designed to concatenate gigabytes of LLM weights to an
+     executable. This command goes 10x faster than `zip -j0`. Unlike zip you
+     are not required to use the .com file extension for it to work.  But most
+     importantly, this tool has a flag that lets you insert zip files that are
+     aligned on a specific boundary. The result is things like GPUs that have
+     specific memory alignment requirements will now be able to perform math
+     directly on the zip file's mmap()'d weights.
+
+     This tool always operates in an append-only manner. Unlike the InfoZIP
+     zip(1) command, zziippaalliiggnn does not reflow existing assets to shave away
+     space. For example, if zziippaalliiggnn is used on an existing PKZIP archive to
+     replace an existing asset, then the bytes for the old revision of the
+     asset will still be there, along with any alignment gaps that currently
+     exist in the file between assets.
+
+     The same concept also applies to the central directory listing that's
+     stored at the end of the file. When changes are made, the old central
+     directory is left behind as junk data. Therefore it's important, when
+     adding multiple files to an archive at once, that the files all be passed
+     in arguments at once, rather than calling this command multiple times.
+
+OOPPTTIIOONNSS
+     The following options are available:
+
+     --hh      Show help.
+
+     --vv      Operate in verbose mode.
+
+     --NN      Run in nondeterministic mode. This will cause the date/time of
+             inserted assets to reflect the file modified time.
+
+     --aa _I_N_T  Byte alignment for inserted zip assets. This must be a two power.
+             It defaults to 65536 since that ensures your asset will be page-
+             aligned on all conceivable platforms, both now and in the future.
+
+     --jj      Strip directory components. The filename of each input filepath
+             will be used as the zip asset name. This is otherwise known as
+             the basename. An error will be raised if the same zip asset name
+             ends up being specified multiple times.
+
+     --00      Store zip assets without compression. This is the default. This
+             option must be chosen when adding weights to a llamafile,
+             otherwise it won't be possible to map them into memory. Using --00
+             goes orders of a magnitude faster than using --66 compression.
+
+     --66      Store zip assets with sweet spot compression. Any value between
+             --00 and --99 is accepted as choices for compression level. Using --66
+             will oftentimes go 10x faster than --99 and only has a marginal
+             increase of size. Note uncompression speeds are unaffected.
+
+     --99      Store zip assets with the maximum compression. This takes a very
+             long time to compress. Uncompression will go just as fast. This
+             might be a good idea when publishing archives that'll be widely
+             consumed via the Internet for a long time.
+
+SSEEEE AALLSSOO
+     unzip(1), llamafile(1)
+
+AAUUTTHHOORRSS
+     Justine Alexandra Roberts Tunney <jtunney@mozilla.com>
+
+Llamafile Manual               December 5, 2023               Llamafile Manual
diff --git a/llamafile/zipalign.c b/llamafile/zipalign.c
index 64ebaf31ad..cddbcecaaf 100644
--- a/llamafile/zipalign.c
+++ b/llamafile/zipalign.c
@@ -29,27 +29,9 @@
 #include <stdbool.h>
 #include <sys/uio.h>
 #include <sys/stat.h>
+#include "llamafile.h"
 #include <third_party/zlib/zlib.h>
 
-#define USAGE \
-  " ZIP FILE...\n\
-\n\
-DESCRIPTION\n\
-\n\
-  Adds aligned uncompressed files to PKZIP archive\n\
-\n\
-FLAGS\n\
-\n\
-  -h        help\n\
-  -v        verbose\n\
-  -N        nondeterministic mode\n\
-  -a INT    alignment (default 65536)\n\
-  -j        strip directory components\n\
-  -0        store uncompressed (default)\n\
-  -6        store with faster compression\n\
-  -9        store with maximum compression\n\
-\n"
-
 #define CHUNK 2097152
 
 #define Min(a, b) ((a) < (b) ? (a) : (b))
@@ -96,11 +78,6 @@ static void *Realloc(void *p, size_t n) {
     return p;
 }
 
-static wontreturn void PrintUsage(int fd, int rc) {
-    tinyprint(fd, "SYNOPSIS\n\n  ", prog, USAGE, NULL);
-    exit(rc);
-}
-
 static void GetDosLocalTime(int64_t utcunixts,
                             uint16_t *out_time,
                             uint16_t *out_date) {
@@ -112,13 +89,20 @@ static void GetDosLocalTime(int64_t utcunixts,
 
 int main(int argc, char *argv[]) {
 
+    if (llamafile_has(argv, "-h") ||
+        llamafile_has(argv, "-help") ||
+        llamafile_has(argv, "--help")) {
+        llamafile_help("/zip/llamafile/zipalign.1.asc");
+        __builtin_unreachable();
+    }
+
     // get name of program
     prog = argv[0];
     if (!prog) prog = "zipalign";
 
     // parse flags
     int opt;
-    while ((opt = getopt(argc, argv, "0123456789hvjNa:")) != -1) {
+    while ((opt = getopt(argc, argv, "0123456789vjNa:")) != -1) {
         switch (opt) {
             case '0':
             case '1':
@@ -150,10 +134,8 @@ int main(int argc, char *argv[]) {
                     Die(prog, "FLAG_alignment must be two power");
                 }
                 break;
-            case 'h':
-                PrintUsage(1, 0);
             default:
-                PrintUsage(2, 1);
+                return 1;
         }
     }
     if (optind == argc) {