From 94b01828bc3a8dbed0de085d247a2947e5c73105 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Thu, 4 Apr 2024 11:03:20 -0400 Subject: [PATCH 1/2] Just exclude failing version --- .github/workflows/unit_tests.yml | 2 +- .github/workflows/unit_tests_gpu.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 5da14cafb..704169f26 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -37,7 +37,7 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Install model-specific dependencies run: | - pip install "llama-cpp-python<0.2.58" + pip install "llama-cpp-python!=0.2.58" - name: Run tests (except server) run: | pytest --cov=guidance --cov-report=xml --cov-report=term-missing --selected_model ${{ matrix.model }} -m "not server" -m "not needs_credentials" ./tests/ diff --git a/.github/workflows/unit_tests_gpu.yml b/.github/workflows/unit_tests_gpu.yml index 20e73b061..cb3dec4e2 100644 --- a/.github/workflows/unit_tests_gpu.yml +++ b/.github/workflows/unit_tests_gpu.yml @@ -44,7 +44,7 @@ jobs: - name: GPU pip installs run: | pip install accelerate - CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install "llama-cpp-python<0.2.58" + CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install "llama-cpp-python!=0.2.58" - name: Check GPU available run: | python -c "import torch; assert torch.cuda.is_available()" From d4eb224c0ce4eadd8bd4c844747d315c3e4751f0 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 5 Apr 2024 11:01:53 -0400 Subject: [PATCH 2/2] Put logits_all into Llama constructor --- guidance/models/llama_cpp/_llama_cpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance/models/llama_cpp/_llama_cpp.py b/guidance/models/llama_cpp/_llama_cpp.py index 4390f9f5f..06f758c40 100644 --- a/guidance/models/llama_cpp/_llama_cpp.py +++ b/guidance/models/llama_cpp/_llama_cpp.py @@ -100,7 +100,7 @@ def __init__(self, model, compute_log_probs, **kwargs): kwargs["verbose"] = True # llama-cpp-python can't hide output in this case with normalize_notebook_stdout_stderr(): - self.model_obj = llama_cpp.Llama(model_path=model, **kwargs) + self.model_obj = llama_cpp.Llama(model_path=model, logits_all=True, **kwargs) elif isinstance(model, llama_cpp.Llama): self.model = model.__class__.__name__ self.model_obj = model