diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index f00d7b2917..e7e9fc6d2a 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -1887,7 +1887,72 @@
]
}
},
- "/v1/vector-io/insert": {
+ "/v1/tool-runtime/rag-tool/documents": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RAGQueryResult"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "ToolRuntime"
+ ],
+ "summary": "Query the RAG system for context; typically invoked by the agent",
+ "parameters": [
+ {
+ "name": "content",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "$ref": "#/components/schemas/InterleavedContent"
+ }
+ },
+ {
+ "name": "vector_db_ids",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ {
+ "name": "query_config",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "$ref": "#/components/schemas/RAGQueryConfig"
+ }
+ },
+ {
+ "name": "X-LlamaStack-Provider-Data",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-Client-Version",
+ "in": "header",
+ "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ },
"post": {
"responses": {
"200": {
@@ -1895,8 +1960,9 @@
}
},
"tags": [
- "VectorIO"
+ "ToolRuntime"
],
+ "summary": "Index documents so they can be used by the RAG system",
"parameters": [
{
"name": "X-LlamaStack-Provider-Data",
@@ -1921,7 +1987,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/InsertChunksRequest"
+ "$ref": "#/components/schemas/InsertRequest"
}
}
},
@@ -1929,7 +1995,90 @@
}
}
},
- "/v1/tool-runtime/rag-tool/insert-documents": {
+ "/v1/vector-io/chunks": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/QueryChunksResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "VectorIO"
+ ],
+ "parameters": [
+ {
+ "name": "vector_db_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "query",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "$ref": "#/components/schemas/InterleavedContent"
+ }
+ },
+ {
+ "name": "params",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ {
+ "name": "X-LlamaStack-Provider-Data",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-Client-Version",
+ "in": "header",
+ "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ },
"post": {
"responses": {
"200": {
@@ -1937,9 +2086,8 @@
}
},
"tags": [
- "ToolRuntime"
+ "VectorIO"
],
- "summary": "Index documents so they can be used by the RAG system",
"parameters": [
{
"name": "X-LlamaStack-Provider-Data",
@@ -1964,7 +2112,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/InsertDocumentsRequest"
+ "$ref": "#/components/schemas/InsertChunksRequest"
}
}
},
@@ -3033,105 +3181,6 @@
}
}
},
- "/v1/vector-io/query": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/QueryChunksResponse"
- }
- }
- }
- }
- },
- "tags": [
- "VectorIO"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-Provider-Data",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-Client-Version",
- "in": "header",
- "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/QueryChunksRequest"
- }
- }
- },
- "required": true
- }
- }
- },
- "/v1/tool-runtime/rag-tool/query-context": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/RAGQueryResult"
- }
- }
- }
- }
- },
- "tags": [
- "ToolRuntime"
- ],
- "summary": "Query the RAG system for context; typically invoked by the agent",
- "parameters": [
- {
- "name": "X-LlamaStack-Provider-Data",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-Client-Version",
- "in": "header",
- "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/QueryContextRequest"
- }
- }
- },
- "required": true
- }
- }
- },
"/v1/telemetry/spans": {
"get": {
"responses": {
@@ -5256,11 +5305,8 @@
"const": "memory_retrieval",
"default": "memory_retrieval"
},
- "memory_bank_ids": {
- "type": "array",
- "items": {
- "type": "string"
- }
+ "vector_db_ids": {
+ "type": "string"
},
"inserted_context": {
"$ref": "#/components/schemas/InterleavedContent"
@@ -5271,7 +5317,7 @@
"turn_id",
"step_id",
"step_type",
- "memory_bank_ids",
+ "vector_db_ids",
"inserted_context"
]
},
@@ -6976,67 +7022,10 @@
"status"
]
},
- "InsertChunksRequest": {
+ "RAGDocument": {
"type": "object",
"properties": {
- "vector_db_id": {
- "type": "string"
- },
- "chunks": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "content": {
- "$ref": "#/components/schemas/InterleavedContent"
- },
- "metadata": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "content",
- "metadata"
- ]
- }
- },
- "ttl_seconds": {
- "type": "integer"
- }
- },
- "additionalProperties": false,
- "required": [
- "vector_db_id",
- "chunks"
- ]
- },
- "RAGDocument": {
- "type": "object",
- "properties": {
- "document_id": {
+ "document_id": {
"type": "string"
},
"content": {
@@ -7094,7 +7083,7 @@
"metadata"
]
},
- "InsertDocumentsRequest": {
+ "InsertRequest": {
"type": "object",
"properties": {
"documents": {
@@ -7117,6 +7106,63 @@
"chunk_size_in_tokens"
]
},
+ "InsertChunksRequest": {
+ "type": "object",
+ "properties": {
+ "vector_db_id": {
+ "type": "string"
+ },
+ "chunks": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "$ref": "#/components/schemas/InterleavedContent"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "content",
+ "metadata"
+ ]
+ }
+ },
+ "ttl_seconds": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "vector_db_id",
+ "chunks"
+ ]
+ },
"InvokeToolRequest": {
"type": "object",
"properties": {
@@ -7883,104 +7929,6 @@
"job_uuid"
]
},
- "QueryChunksRequest": {
- "type": "object",
- "properties": {
- "vector_db_id": {
- "type": "string"
- },
- "query": {
- "$ref": "#/components/schemas/InterleavedContent"
- },
- "params": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "vector_db_id",
- "query"
- ]
- },
- "QueryChunksResponse": {
- "type": "object",
- "properties": {
- "chunks": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "content": {
- "$ref": "#/components/schemas/InterleavedContent"
- },
- "metadata": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "content",
- "metadata"
- ]
- }
- },
- "scores": {
- "type": "array",
- "items": {
- "type": "number"
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "chunks",
- "scores"
- ]
- },
"DefaultRAGQueryGeneratorConfig": {
"type": "object",
"properties": {
@@ -8054,38 +8002,72 @@
}
]
},
- "QueryContextRequest": {
+ "RAGQueryResult": {
"type": "object",
"properties": {
"content": {
"$ref": "#/components/schemas/InterleavedContent"
+ }
+ },
+ "additionalProperties": false
+ },
+ "QueryChunksResponse": {
+ "type": "object",
+ "properties": {
+ "chunks": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "$ref": "#/components/schemas/InterleavedContent"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "content",
+ "metadata"
+ ]
+ }
},
- "query_config": {
- "$ref": "#/components/schemas/RAGQueryConfig"
- },
- "vector_db_ids": {
+ "scores": {
"type": "array",
"items": {
- "type": "string"
+ "type": "number"
}
}
},
"additionalProperties": false,
"required": [
- "content",
- "query_config",
- "vector_db_ids"
+ "chunks",
+ "scores"
]
},
- "RAGQueryResult": {
- "type": "object",
- "properties": {
- "content": {
- "$ref": "#/components/schemas/InterleavedContent"
- }
- },
- "additionalProperties": false
- },
"QueryCondition": {
"type": "object",
"properties": {
@@ -9246,8 +9228,8 @@
"description": ""
},
{
- "name": "InsertDocumentsRequest",
- "description": ""
+ "name": "InsertRequest",
+ "description": ""
},
{
"name": "Inspect"
@@ -9418,10 +9400,6 @@
"name": "QATFinetuningConfig",
"description": ""
},
- {
- "name": "QueryChunksRequest",
- "description": ""
- },
{
"name": "QueryChunksResponse",
"description": ""
@@ -9434,10 +9412,6 @@
"name": "QueryConditionOp",
"description": ""
},
- {
- "name": "QueryContextRequest",
- "description": ""
- },
{
"name": "QuerySpanTreeResponse",
"description": ""
@@ -9858,7 +9832,7 @@
"ImageDelta",
"InferenceStep",
"InsertChunksRequest",
- "InsertDocumentsRequest",
+ "InsertRequest",
"InterleavedContent",
"InterleavedContentItem",
"InvokeToolRequest",
@@ -9899,11 +9873,9 @@
"PreferenceOptimizeRequest",
"ProviderInfo",
"QATFinetuningConfig",
- "QueryChunksRequest",
"QueryChunksResponse",
"QueryCondition",
"QueryConditionOp",
- "QueryContextRequest",
"QuerySpanTreeResponse",
"QuerySpansResponse",
"QueryTracesResponse",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index e1ae07c45f..0e14d296ff 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -1009,7 +1009,7 @@ components:
- vector_db_id
- chunks
type: object
- InsertDocumentsRequest:
+ InsertRequest:
additionalProperties: false
properties:
chunk_size_in_tokens:
@@ -1299,10 +1299,6 @@ components:
type: string
inserted_context:
$ref: '#/components/schemas/InterleavedContent'
- memory_bank_ids:
- items:
- type: string
- type: array
started_at:
format: date-time
type: string
@@ -1314,11 +1310,13 @@ components:
type: string
turn_id:
type: string
+ vector_db_ids:
+ type: string
required:
- turn_id
- step_id
- step_type
- - memory_bank_ids
+ - vector_db_ids
- inserted_context
type: object
Message:
@@ -1630,27 +1628,6 @@ components:
- quantizer_name
- group_size
type: object
- QueryChunksRequest:
- additionalProperties: false
- properties:
- params:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- query:
- $ref: '#/components/schemas/InterleavedContent'
- vector_db_id:
- type: string
- required:
- - vector_db_id
- - query
- type: object
QueryChunksResponse:
additionalProperties: false
properties:
@@ -1710,22 +1687,6 @@ components:
- gt
- lt
type: string
- QueryContextRequest:
- additionalProperties: false
- properties:
- content:
- $ref: '#/components/schemas/InterleavedContent'
- query_config:
- $ref: '#/components/schemas/RAGQueryConfig'
- vector_db_ids:
- items:
- type: string
- type: array
- required:
- - content
- - query_config
- - vector_db_ids
- type: object
QuerySpanTreeResponse:
additionalProperties: false
properties:
@@ -5176,9 +5137,26 @@ paths:
description: OK
tags:
- ToolRuntime
- /v1/tool-runtime/rag-tool/insert-documents:
- post:
+ /v1/tool-runtime/rag-tool/documents:
+ get:
parameters:
+ - in: query
+ name: content
+ required: true
+ schema:
+ $ref: '#/components/schemas/InterleavedContent'
+ - in: query
+ name: vector_db_ids
+ required: true
+ schema:
+ items:
+ type: string
+ type: array
+ - in: query
+ name: query_config
+ required: false
+ schema:
+ $ref: '#/components/schemas/RAGQueryConfig'
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
@@ -5193,19 +5171,16 @@ paths:
required: false
schema:
type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/InsertDocumentsRequest'
- required: true
responses:
'200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/RAGQueryResult'
description: OK
- summary: Index documents so they can be used by the RAG system
+ summary: Query the RAG system for context; typically invoked by the agent
tags:
- ToolRuntime
- /v1/tool-runtime/rag-tool/query-context:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
@@ -5226,16 +5201,12 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/QueryContextRequest'
+ $ref: '#/components/schemas/InsertRequest'
required: true
responses:
'200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/RAGQueryResult'
description: OK
- summary: Query the RAG system for context; typically invoked by the agent
+ summary: Index documents so they can be used by the RAG system
tags:
- ToolRuntime
/v1/toolgroups:
@@ -5530,9 +5501,32 @@ paths:
description: OK
tags:
- VectorDBs
- /v1/vector-io/insert:
- post:
+ /v1/vector-io/chunks:
+ get:
parameters:
+ - in: query
+ name: vector_db_id
+ required: true
+ schema:
+ type: string
+ - in: query
+ name: query
+ required: true
+ schema:
+ $ref: '#/components/schemas/InterleavedContent'
+ - in: query
+ name: params
+ required: false
+ schema:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
@@ -5547,18 +5541,15 @@ paths:
required: false
schema:
type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/InsertChunksRequest'
- required: true
responses:
'200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/QueryChunksResponse'
description: OK
tags:
- VectorIO
- /v1/vector-io/query:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
@@ -5579,14 +5570,10 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/QueryChunksRequest'
+ $ref: '#/components/schemas/InsertChunksRequest'
required: true
responses:
'200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/QueryChunksResponse'
description: OK
tags:
- VectorIO
@@ -5814,9 +5801,8 @@ tags:
- description:
name: InsertChunksRequest
-- description:
- name: InsertDocumentsRequest
+- description:
+ name: InsertRequest
- name: Inspect
- description:
@@ -5932,9 +5918,6 @@ tags:
- description:
name: QATFinetuningConfig
-- description:
- name: QueryChunksRequest
- description:
name: QueryChunksResponse
@@ -5943,9 +5926,6 @@ tags:
- description:
name: QueryConditionOp
-- description:
- name: QueryContextRequest
- description:
name: QuerySpanTreeResponse
@@ -6245,7 +6225,7 @@ x-tagGroups:
- ImageDelta
- InferenceStep
- InsertChunksRequest
- - InsertDocumentsRequest
+ - InsertRequest
- InterleavedContent
- InterleavedContentItem
- InvokeToolRequest
@@ -6286,11 +6266,9 @@ x-tagGroups:
- PreferenceOptimizeRequest
- ProviderInfo
- QATFinetuningConfig
- - QueryChunksRequest
- QueryChunksResponse
- QueryCondition
- QueryConditionOp
- - QueryContextRequest
- QuerySpanTreeResponse
- QuerySpansResponse
- QueryTracesResponse
diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py
index 0247bb384c..3674e7f86f 100644
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@@ -74,8 +74,8 @@ class RAGQueryConfig(BaseModel):
@runtime_checkable
@trace_protocol
class RAGToolRuntime(Protocol):
- @webmethod(route="/tool-runtime/rag-tool/insert-documents", method="POST")
- async def insert_documents(
+ @webmethod(route="/tool-runtime/rag-tool/documents", method="POST")
+ async def insert(
self,
documents: List[RAGDocument],
vector_db_id: str,
@@ -84,12 +84,12 @@ async def insert_documents(
"""Index documents so they can be used by the RAG system"""
...
- @webmethod(route="/tool-runtime/rag-tool/query-context", method="POST")
- async def query_context(
+ @webmethod(route="/tool-runtime/rag-tool/documents", method="GET")
+ async def query(
self,
content: InterleavedContent,
- query_config: RAGQueryConfig,
vector_db_ids: List[str],
+ query_config: Optional[RAGQueryConfig] = None,
) -> RAGQueryResult:
"""Query the RAG system for context; typically invoked by the agent"""
...
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index 5371b89186..8a7117187b 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -38,9 +38,9 @@ def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]: ...
class VectorIO(Protocol):
vector_db_store: VectorDBStore
- # this will just block now until documents are inserted, but it should
+ # this will just block now until chunks are inserted, but it should
# probably return a Job instance which can be polled for completion
- @webmethod(route="/vector-io/insert", method="POST")
+ @webmethod(route="/vector-io/chunks", method="POST")
async def insert_chunks(
self,
vector_db_id: str,
@@ -48,7 +48,7 @@ async def insert_chunks(
ttl_seconds: Optional[int] = None,
) -> None: ...
- @webmethod(route="/vector-io/query", method="POST")
+ @webmethod(route="/vector-io/chunks", method="GET")
async def query_chunks(
self,
vector_db_id: str,
diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py
index 3ae9833dc1..6bb2045bd5 100644
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@@ -414,25 +414,25 @@ def __init__(
) -> None:
self.routing_table = routing_table
- async def query_context(
+ async def query(
self,
content: InterleavedContent,
- query_config: RAGQueryConfig,
vector_db_ids: List[str],
+ query_config: Optional[RAGQueryConfig] = None,
) -> RAGQueryResult:
return await self.routing_table.get_provider_impl(
- "rag_tool.query_context"
- ).query_context(content, query_config, vector_db_ids)
+ "query_from_memory"
+ ).query(content, vector_db_ids, query_config)
- async def insert_documents(
+ async def insert(
self,
documents: List[RAGDocument],
vector_db_id: str,
chunk_size_in_tokens: int = 512,
) -> None:
return await self.routing_table.get_provider_impl(
- "rag_tool.insert_documents"
- ).insert_documents(documents, vector_db_id, chunk_size_in_tokens)
+ "insert_into_memory"
+ ).insert(documents, vector_db_id, chunk_size_in_tokens)
def __init__(
self,
@@ -441,10 +441,9 @@ def __init__(
self.routing_table = routing_table
# HACK ALERT this should be in sync with "get_all_api_endpoints()"
- # TODO: make sure rag_tool vs builtin::memory is correct everywhere
self.rag_tool = self.RagToolImpl(routing_table)
- setattr(self, "rag_tool.query_context", self.rag_tool.query_context)
- setattr(self, "rag_tool.insert_documents", self.rag_tool.insert_documents)
+ for method in ("query", "insert"):
+ setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method))
async def initialize(self) -> None:
pass
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index 2d0ad137b9..75fd75afc4 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -84,7 +84,7 @@ def make_random_string(length: int = 8):
TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
-MEMORY_QUERY_TOOL = "rag_tool.query_context"
+MEMORY_QUERY_TOOL = "query_from_memory"
WEB_SEARCH_TOOL = "web_search"
MEMORY_GROUP = "builtin::memory"
@@ -432,16 +432,16 @@ async def _run(
)
)
)
- result = await self.tool_runtime_api.rag_tool.query_context(
+ result = await self.tool_runtime_api.rag_tool.query(
content=concat_interleaved_content(
[msg.content for msg in input_messages]
),
+ vector_db_ids=vector_db_ids,
query_config=RAGQueryConfig(
query_generator_config=DefaultRAGQueryGeneratorConfig(),
max_tokens_in_context=4096,
max_chunks=5,
),
- vector_db_ids=vector_db_ids,
)
retrieved_context = result.content
@@ -882,7 +882,7 @@ async def add_to_session_vector_db(
)
for a in data
]
- await self.tool_runtime_api.rag_tool.insert_documents(
+ await self.tool_runtime_api.rag_tool.insert(
documents=documents,
vector_db_id=vector_db_id,
chunk_size_in_tokens=512,
diff --git a/llama_stack/providers/inline/tool_runtime/memory/memory.py b/llama_stack/providers/inline/tool_runtime/memory/memory.py
index d3f8b07dc0..7798ed7118 100644
--- a/llama_stack/providers/inline/tool_runtime/memory/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/memory/memory.py
@@ -61,7 +61,7 @@ async def initialize(self):
async def shutdown(self):
pass
- async def insert_documents(
+ async def insert(
self,
documents: List[RAGDocument],
vector_db_id: str,
@@ -87,15 +87,16 @@ async def insert_documents(
vector_db_id=vector_db_id,
)
- async def query_context(
+ async def query(
self,
content: InterleavedContent,
- query_config: RAGQueryConfig,
vector_db_ids: List[str],
+ query_config: Optional[RAGQueryConfig] = None,
) -> RAGQueryResult:
if not vector_db_ids:
return RAGQueryResult(content=None)
+ query_config = query_config or RAGQueryConfig()
query = await generate_rag_query(
query_config.query_generator_config,
content,
@@ -159,11 +160,11 @@ async def list_runtime_tools(
# encountering fatals.
return [
ToolDef(
- name="rag_tool.query_context",
+ name="query_from_memory",
description="Retrieve context from memory",
),
ToolDef(
- name="rag_tool.insert_documents",
+ name="insert_into_memory",
description="Insert documents into memory",
),
]
diff --git a/llama_stack/providers/tests/tools/test_tools.py b/llama_stack/providers/tests/tools/test_tools.py
index 62b18ea664..bb4265f942 100644
--- a/llama_stack/providers/tests/tools/test_tools.py
+++ b/llama_stack/providers/tests/tools/test_tools.py
@@ -96,14 +96,14 @@ async def test_rag_tool(self, tools_stack, sample_documents):
)
# Insert documents into memory
- await tools_impl.rag_tool.insert_documents(
+ await tools_impl.rag_tool.insert(
documents=sample_documents,
vector_db_id="test_bank",
chunk_size_in_tokens=512,
)
# Execute the memory tool
- response = await tools_impl.rag_tool.query_context(
+ response = await tools_impl.rag_tool.query(
content="What are the main topics covered in the documentation?",
vector_db_ids=["test_bank"],
)
diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py
index 940b7b8985..12970ce08a 100644
--- a/tests/client-sdk/agents/test_agents.py
+++ b/tests/client-sdk/agents/test_agents.py
@@ -292,7 +292,7 @@ def test_rag_agent(llama_stack_client, agent_config):
embedding_model="all-MiniLM-L6-v2",
embedding_dimension=384,
)
- llama_stack_client.tool_runtime.rag_tool.insert_documents(
+ llama_stack_client.tool_runtime.insert_into_memory(
documents=documents,
vector_db_id=vector_db_id,
chunk_size_in_tokens=512,
@@ -321,4 +321,4 @@ def test_rag_agent(llama_stack_client, agent_config):
)
logs = [str(log) for log in EventLogger().log(response) if log is not None]
logs_str = "".join(logs)
- assert "Tool:rag_tool.query_context" in logs_str
+ assert "Tool:query_from_memory" in logs_str
diff --git a/tests/client-sdk/tool_runtime/test_rag_tool.py b/tests/client-sdk/tool_runtime/test_rag_tool.py
index bce0672681..ac2058754b 100644
--- a/tests/client-sdk/tool_runtime/test_rag_tool.py
+++ b/tests/client-sdk/tool_runtime/test_rag_tool.py
@@ -73,7 +73,7 @@ def test_vector_db_insert_inline_and_query(
llama_stack_client, single_entry_vector_db_registry, sample_documents
):
vector_db_id = single_entry_vector_db_registry[0]
- llama_stack_client.tool_runtime.rag_tool.insert_documents(
+ llama_stack_client.tool_runtime.insert_into_memory(
documents=sample_documents,
chunk_size_in_tokens=512,
vector_db_id=vector_db_id,
@@ -157,7 +157,7 @@ def test_vector_db_insert_from_url_and_query(
for i, url in enumerate(urls)
]
- llama_stack_client.tool_runtime.rag_tool.insert_documents(
+ llama_stack_client.tool_runtime.insert_into_memory(
documents=documents,
vector_db_id=vector_db_id,
chunk_size_in_tokens=512,