From 94b01828bc3a8dbed0de085d247a2947e5c73105 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Thu, 4 Apr 2024 11:03:20 -0400
Subject: [PATCH 1/2] Just exclude failing version

---
 .github/workflows/unit_tests.yml     | 2 +-
 .github/workflows/unit_tests_gpu.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index 5da14cafb..704169f26 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -37,7 +37,7 @@ jobs:
           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
       - name: Install model-specific dependencies
         run: |
-          pip install "llama-cpp-python<0.2.58"
+          pip install "llama-cpp-python!=0.2.58"
       - name: Run tests (except server)
         run: |
           pytest --cov=guidance --cov-report=xml --cov-report=term-missing --selected_model ${{ matrix.model }} -m "not server"  -m "not needs_credentials" ./tests/
diff --git a/.github/workflows/unit_tests_gpu.yml b/.github/workflows/unit_tests_gpu.yml
index 20e73b061..cb3dec4e2 100644
--- a/.github/workflows/unit_tests_gpu.yml
+++ b/.github/workflows/unit_tests_gpu.yml
@@ -44,7 +44,7 @@ jobs:
       - name: GPU pip installs
         run: |
           pip install accelerate
-          CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install "llama-cpp-python<0.2.58"
+          CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install "llama-cpp-python!=0.2.58"
       - name: Check GPU available
         run: |
           python -c "import torch; assert torch.cuda.is_available()"

From d4eb224c0ce4eadd8bd4c844747d315c3e4751f0 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Fri, 5 Apr 2024 11:01:53 -0400
Subject: [PATCH 2/2] Put logits_all into Llama constructor

---
 guidance/models/llama_cpp/_llama_cpp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/guidance/models/llama_cpp/_llama_cpp.py b/guidance/models/llama_cpp/_llama_cpp.py
index 4390f9f5f..06f758c40 100644
--- a/guidance/models/llama_cpp/_llama_cpp.py
+++ b/guidance/models/llama_cpp/_llama_cpp.py
@@ -100,7 +100,7 @@ def __init__(self, model, compute_log_probs, **kwargs):
                 kwargs["verbose"] = True # llama-cpp-python can't hide output in this case
 
             with normalize_notebook_stdout_stderr():
-                self.model_obj = llama_cpp.Llama(model_path=model, **kwargs)
+                self.model_obj = llama_cpp.Llama(model_path=model, logits_all=True, **kwargs)
         elif isinstance(model, llama_cpp.Llama):
             self.model = model.__class__.__name__
             self.model_obj = model