Skip to content

Commit

Permalink
refactor: improve RAG (#982)
Browse files Browse the repository at this point in the history
- set max default_chuck_size to 2000
- set default rag_top_k to 4
- change reciprocal_rank_fusion weights
  • Loading branch information
sigoden authored Nov 11, 2024
1 parent b2dbd73 commit e18aa01
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 8 deletions.
2 changes: 1 addition & 1 deletion config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ summary_prompt: 'This is a summary of the chat history as a recap: '
# See [RAG-Guide](https://github.com/sigoden/aichat/wiki/RAG-Guide) for more details.
rag_embedding_model: null # Specifies the embedding model to use
rag_reranker_model: null # Specifies the rerank model to use
rag_top_k: 4 # Specifies the number of documents to retrieve
rag_top_k: 5 # Specifies the number of documents to retrieve
rag_chunk_size: null # Specifies the chunk size
rag_chunk_overlap: null # Specifies the chunk overlap
rag_min_score_vector_search: 0 # Specifies the minimum relevance score for vector-based searching
Expand Down
10 changes: 5 additions & 5 deletions models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@
type: embedding
input_price: 0.13
max_tokens_per_chunk: 8191
default_chunk_size: 3000
default_chunk_size: 2000
max_batch_size: 100
- name: text-embedding-3-small
type: embedding
input_price: 0.02
max_tokens_per_chunk: 8191
default_chunk_size: 3000
default_chunk_size: 2000
max_batch_size: 100

# Links:
Expand Down Expand Up @@ -1004,12 +1004,12 @@
- name: text-embedding-3-large
type: embedding
max_tokens_per_chunk: 8191
default_chunk_size: 3000
default_chunk_size: 2000
max_batch_size: 100
- name: text-embedding-3-small
type: embedding
max_tokens_per_chunk: 8191
default_chunk_size: 3000
default_chunk_size: 2000
max_batch_size: 100
- name: meta-llama-3.1-405b-instruct
max_input_tokens: 128000
Expand Down Expand Up @@ -1714,7 +1714,7 @@
max_input_tokens: 120000
input_price: 0.12
max_tokens_per_chunk: 16000
default_chunk_size: 3000
default_chunk_size: 2000
max_batch_size: 128
- name: rerank-2
type: reranker
Expand Down
2 changes: 1 addition & 1 deletion src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ impl Default for Config {

rag_embedding_model: None,
rag_reranker_model: None,
rag_top_k: 4,
rag_top_k: 5,
rag_chunk_size: None,
rag_chunk_overlap: None,
rag_min_score_vector_search: 0.0,
Expand Down
2 changes: 1 addition & 1 deletion src/rag/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ impl Rag {
None => {
let ids = reciprocal_rank_fusion(
vec![vector_search_ids, keyword_search_ids],
vec![1.0, 1.0],
vec![1.125, 1.0],
top_k,
);
debug!("rrf_ids: {ids:?}");
Expand Down

0 comments on commit e18aa01

Please sign in to comment.