refactor: improve RAG (#982)

- set max default_chuck_size to 2000 - set default rag_top_k to 4 - change reciprocal_rank_fusion weights
sigoden · Nov 11, 2024 · e18aa01 · e18aa01
1 parent b2dbd73
commit e18aa01
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 8 deletions.
diff --git a/config.example.yaml b/config.example.yaml
@@ -37,7 +37,7 @@ summary_prompt: 'This is a summary of the chat history as a recap: '
 # See [RAG-Guide](https://github.com/sigoden/aichat/wiki/RAG-Guide) for more details.
 rag_embedding_model: null                   # Specifies the embedding model to use
 rag_reranker_model: null                    # Specifies the rerank model to use
-rag_top_k: 4                                # Specifies the number of documents to retrieve
+rag_top_k: 5                                # Specifies the number of documents to retrieve
 rag_chunk_size: null                        # Specifies the chunk size
 rag_chunk_overlap: null                     # Specifies the chunk overlap
 rag_min_score_vector_search: 0              # Specifies the minimum relevance score for vector-based searching

diff --git a/models.yaml b/models.yaml
@@ -66,13 +66,13 @@
       type: embedding
       input_price: 0.13
       max_tokens_per_chunk: 8191
-      default_chunk_size: 3000
+      default_chunk_size: 2000
       max_batch_size: 100
     - name: text-embedding-3-small
       type: embedding
       input_price: 0.02
       max_tokens_per_chunk: 8191
-      default_chunk_size: 3000
+      default_chunk_size: 2000
       max_batch_size: 100
 
 # Links:
@@ -1004,12 +1004,12 @@
     - name: text-embedding-3-large
       type: embedding
       max_tokens_per_chunk: 8191
-      default_chunk_size: 3000
+      default_chunk_size: 2000
       max_batch_size: 100
     - name: text-embedding-3-small
       type: embedding
       max_tokens_per_chunk: 8191
-      default_chunk_size: 3000
+      default_chunk_size: 2000
       max_batch_size: 100
     - name: meta-llama-3.1-405b-instruct
       max_input_tokens: 128000
@@ -1714,7 +1714,7 @@
       max_input_tokens: 120000
       input_price: 0.12
       max_tokens_per_chunk: 16000
-      default_chunk_size: 3000
+      default_chunk_size: 2000
       max_batch_size: 128
     - name: rerank-2
       type: reranker

diff --git a/src/config/mod.rs b/src/config/mod.rs
@@ -186,7 +186,7 @@ impl Default for Config {
 
             rag_embedding_model: None,
             rag_reranker_model: None,
-            rag_top_k: 4,
+            rag_top_k: 5,
             rag_chunk_size: None,
             rag_chunk_overlap: None,
             rag_min_score_vector_search: 0.0,

diff --git a/src/rag/mod.rs b/src/rag/mod.rs
@@ -488,7 +488,7 @@ impl Rag {
             None => {
                 let ids = reciprocal_rank_fusion(
                     vec![vector_search_ids, keyword_search_ids],
-                    vec![1.0, 1.0],
+                    vec![1.125, 1.0],
                     top_k,
                 );
                 debug!("rrf_ids: {ids:?}");