.env_gpt4all

# GPT4ALL or llama-cpp-python model_kwargs

# GPT4ALl GPT-J type, from model explorer choice, so downloads
model_name_gptj=ggml-gpt4all-j-v1.3-groovy.bin

# llama-cpp-python type, supporting version 3 quantization, here from locally built llama.cpp q4 v3 quantization
# below uses prompt_type=llama2
model_path_llama=llama-2-7b-chat.ggmlv3.q8_0.bin
# below assumes max_new_tokens=256 and max_seq_len=2048
max_tokens=1792
# uncomment below if using llama-cpp-pyton with cublas built in
# For maximum GPU performance, we automatically set this to maximum value for number of layers for model.  E.g. llama2 tends to have 35, and we force it to 35.  But low GPU memory systems may not handle, then uncomment below and choose (say) 15.
# n_gpu_layers=15

# GPT4ALl LLaMa type, supporting version 2 quantization, here from model explorer choice so downloads
model_name_gpt4all_llama=ggml-wizardLM-7B.q4_2.bin

# PDF_CLASS_NAME=UnstructuredPDFLoader

model_name_exllama_if_no_config=TheBloke/Nous-Hermes-Llama2-GPTQ