diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index f00d7b2917..e7e9fc6d2a 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -1887,7 +1887,72 @@ ] } }, - "/v1/vector-io/insert": { + "/v1/tool-runtime/rag-tool/documents": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RAGQueryResult" + } + } + } + } + }, + "tags": [ + "ToolRuntime" + ], + "summary": "Query the RAG system for context; typically invoked by the agent", + "parameters": [ + { + "name": "content", + "in": "query", + "required": true, + "schema": { + "$ref": "#/components/schemas/InterleavedContent" + } + }, + { + "name": "vector_db_ids", + "in": "query", + "required": true, + "schema": { + "type": "array", + "items": { + "type": "string" + } + } + }, + { + "name": "query_config", + "in": "query", + "required": false, + "schema": { + "$ref": "#/components/schemas/RAGQueryConfig" + } + }, + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ] + }, "post": { "responses": { "200": { @@ -1895,8 +1960,9 @@ } }, "tags": [ - "VectorIO" + "ToolRuntime" ], + "summary": "Index documents so they can be used by the RAG system", "parameters": [ { "name": "X-LlamaStack-Provider-Data", @@ -1921,7 +1987,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InsertChunksRequest" + "$ref": "#/components/schemas/InsertRequest" } } }, @@ -1929,7 +1995,90 @@ } } }, - "/v1/tool-runtime/rag-tool/insert-documents": { + "/v1/vector-io/chunks": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/QueryChunksResponse" + } + } + } + } + }, + "tags": [ + "VectorIO" + ], + "parameters": [ + { + "name": "vector_db_id", + "in": "query", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "query", + "in": "query", + "required": true, + "schema": { + "$ref": "#/components/schemas/InterleavedContent" + } + }, + { + "name": "params", + "in": "query", + "required": false, + "schema": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ] + }, "post": { "responses": { "200": { @@ -1937,9 +2086,8 @@ } }, "tags": [ - "ToolRuntime" + "VectorIO" ], - "summary": "Index documents so they can be used by the RAG system", "parameters": [ { "name": "X-LlamaStack-Provider-Data", @@ -1964,7 +2112,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InsertDocumentsRequest" + "$ref": "#/components/schemas/InsertChunksRequest" } } }, @@ -3033,105 +3181,6 @@ } } }, - "/v1/vector-io/query": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryChunksResponse" - } - } - } - } - }, - "tags": [ - "VectorIO" - ], - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryChunksRequest" - } - } - }, - "required": true - } - } - }, - "/v1/tool-runtime/rag-tool/query-context": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RAGQueryResult" - } - } - } - } - }, - "tags": [ - "ToolRuntime" - ], - "summary": "Query the RAG system for context; typically invoked by the agent", - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryContextRequest" - } - } - }, - "required": true - } - } - }, "/v1/telemetry/spans": { "get": { "responses": { @@ -5256,11 +5305,8 @@ "const": "memory_retrieval", "default": "memory_retrieval" }, - "memory_bank_ids": { - "type": "array", - "items": { - "type": "string" - } + "vector_db_ids": { + "type": "string" }, "inserted_context": { "$ref": "#/components/schemas/InterleavedContent" @@ -5271,7 +5317,7 @@ "turn_id", "step_id", "step_type", - "memory_bank_ids", + "vector_db_ids", "inserted_context" ] }, @@ -6976,67 +7022,10 @@ "status" ] }, - "InsertChunksRequest": { + "RAGDocument": { "type": "object", "properties": { - "vector_db_id": { - "type": "string" - }, - "chunks": { - "type": "array", - "items": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "content", - "metadata" - ] - } - }, - "ttl_seconds": { - "type": "integer" - } - }, - "additionalProperties": false, - "required": [ - "vector_db_id", - "chunks" - ] - }, - "RAGDocument": { - "type": "object", - "properties": { - "document_id": { + "document_id": { "type": "string" }, "content": { @@ -7094,7 +7083,7 @@ "metadata" ] }, - "InsertDocumentsRequest": { + "InsertRequest": { "type": "object", "properties": { "documents": { @@ -7117,6 +7106,63 @@ "chunk_size_in_tokens" ] }, + "InsertChunksRequest": { + "type": "object", + "properties": { + "vector_db_id": { + "type": "string" + }, + "chunks": { + "type": "array", + "items": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "content", + "metadata" + ] + } + }, + "ttl_seconds": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "vector_db_id", + "chunks" + ] + }, "InvokeToolRequest": { "type": "object", "properties": { @@ -7883,104 +7929,6 @@ "job_uuid" ] }, - "QueryChunksRequest": { - "type": "object", - "properties": { - "vector_db_id": { - "type": "string" - }, - "query": { - "$ref": "#/components/schemas/InterleavedContent" - }, - "params": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "vector_db_id", - "query" - ] - }, - "QueryChunksResponse": { - "type": "object", - "properties": { - "chunks": { - "type": "array", - "items": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "content", - "metadata" - ] - } - }, - "scores": { - "type": "array", - "items": { - "type": "number" - } - } - }, - "additionalProperties": false, - "required": [ - "chunks", - "scores" - ] - }, "DefaultRAGQueryGeneratorConfig": { "type": "object", "properties": { @@ -8054,38 +8002,72 @@ } ] }, - "QueryContextRequest": { + "RAGQueryResult": { "type": "object", "properties": { "content": { "$ref": "#/components/schemas/InterleavedContent" + } + }, + "additionalProperties": false + }, + "QueryChunksResponse": { + "type": "object", + "properties": { + "chunks": { + "type": "array", + "items": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "content", + "metadata" + ] + } }, - "query_config": { - "$ref": "#/components/schemas/RAGQueryConfig" - }, - "vector_db_ids": { + "scores": { "type": "array", "items": { - "type": "string" + "type": "number" } } }, "additionalProperties": false, "required": [ - "content", - "query_config", - "vector_db_ids" + "chunks", + "scores" ] }, - "RAGQueryResult": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent" - } - }, - "additionalProperties": false - }, "QueryCondition": { "type": "object", "properties": { @@ -9246,8 +9228,8 @@ "description": "" }, { - "name": "InsertDocumentsRequest", - "description": "" + "name": "InsertRequest", + "description": "" }, { "name": "Inspect" @@ -9418,10 +9400,6 @@ "name": "QATFinetuningConfig", "description": "" }, - { - "name": "QueryChunksRequest", - "description": "" - }, { "name": "QueryChunksResponse", "description": "" @@ -9434,10 +9412,6 @@ "name": "QueryConditionOp", "description": "" }, - { - "name": "QueryContextRequest", - "description": "" - }, { "name": "QuerySpanTreeResponse", "description": "" @@ -9858,7 +9832,7 @@ "ImageDelta", "InferenceStep", "InsertChunksRequest", - "InsertDocumentsRequest", + "InsertRequest", "InterleavedContent", "InterleavedContentItem", "InvokeToolRequest", @@ -9899,11 +9873,9 @@ "PreferenceOptimizeRequest", "ProviderInfo", "QATFinetuningConfig", - "QueryChunksRequest", "QueryChunksResponse", "QueryCondition", "QueryConditionOp", - "QueryContextRequest", "QuerySpanTreeResponse", "QuerySpansResponse", "QueryTracesResponse", diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index e1ae07c45f..0e14d296ff 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -1009,7 +1009,7 @@ components: - vector_db_id - chunks type: object - InsertDocumentsRequest: + InsertRequest: additionalProperties: false properties: chunk_size_in_tokens: @@ -1299,10 +1299,6 @@ components: type: string inserted_context: $ref: '#/components/schemas/InterleavedContent' - memory_bank_ids: - items: - type: string - type: array started_at: format: date-time type: string @@ -1314,11 +1310,13 @@ components: type: string turn_id: type: string + vector_db_ids: + type: string required: - turn_id - step_id - step_type - - memory_bank_ids + - vector_db_ids - inserted_context type: object Message: @@ -1630,27 +1628,6 @@ components: - quantizer_name - group_size type: object - QueryChunksRequest: - additionalProperties: false - properties: - params: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - query: - $ref: '#/components/schemas/InterleavedContent' - vector_db_id: - type: string - required: - - vector_db_id - - query - type: object QueryChunksResponse: additionalProperties: false properties: @@ -1710,22 +1687,6 @@ components: - gt - lt type: string - QueryContextRequest: - additionalProperties: false - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - vector_db_ids: - items: - type: string - type: array - required: - - content - - query_config - - vector_db_ids - type: object QuerySpanTreeResponse: additionalProperties: false properties: @@ -5176,9 +5137,26 @@ paths: description: OK tags: - ToolRuntime - /v1/tool-runtime/rag-tool/insert-documents: - post: + /v1/tool-runtime/rag-tool/documents: + get: parameters: + - in: query + name: content + required: true + schema: + $ref: '#/components/schemas/InterleavedContent' + - in: query + name: vector_db_ids + required: true + schema: + items: + type: string + type: array + - in: query + name: query_config + required: false + schema: + $ref: '#/components/schemas/RAGQueryConfig' - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -5193,19 +5171,16 @@ paths: required: false schema: type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertDocumentsRequest' - required: true responses: '200': + content: + application/json: + schema: + $ref: '#/components/schemas/RAGQueryResult' description: OK - summary: Index documents so they can be used by the RAG system + summary: Query the RAG system for context; typically invoked by the agent tags: - ToolRuntime - /v1/tool-runtime/rag-tool/query-context: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5226,16 +5201,12 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/QueryContextRequest' + $ref: '#/components/schemas/InsertRequest' required: true responses: '200': - content: - application/json: - schema: - $ref: '#/components/schemas/RAGQueryResult' description: OK - summary: Query the RAG system for context; typically invoked by the agent + summary: Index documents so they can be used by the RAG system tags: - ToolRuntime /v1/toolgroups: @@ -5530,9 +5501,32 @@ paths: description: OK tags: - VectorDBs - /v1/vector-io/insert: - post: + /v1/vector-io/chunks: + get: parameters: + - in: query + name: vector_db_id + required: true + schema: + type: string + - in: query + name: query + required: true + schema: + $ref: '#/components/schemas/InterleavedContent' + - in: query + name: params + required: false + schema: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -5547,18 +5541,15 @@ paths: required: false schema: type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertChunksRequest' - required: true responses: '200': + content: + application/json: + schema: + $ref: '#/components/schemas/QueryChunksResponse' description: OK tags: - VectorIO - /v1/vector-io/query: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5579,14 +5570,10 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/QueryChunksRequest' + $ref: '#/components/schemas/InsertChunksRequest' required: true responses: '200': - content: - application/json: - schema: - $ref: '#/components/schemas/QueryChunksResponse' description: OK tags: - VectorIO @@ -5814,9 +5801,8 @@ tags: - description: name: InsertChunksRequest -- description: - name: InsertDocumentsRequest +- description: + name: InsertRequest - name: Inspect - description: @@ -5932,9 +5918,6 @@ tags: - description: name: QATFinetuningConfig -- description: - name: QueryChunksRequest - description: name: QueryChunksResponse @@ -5943,9 +5926,6 @@ tags: - description: name: QueryConditionOp -- description: - name: QueryContextRequest - description: name: QuerySpanTreeResponse @@ -6245,7 +6225,7 @@ x-tagGroups: - ImageDelta - InferenceStep - InsertChunksRequest - - InsertDocumentsRequest + - InsertRequest - InterleavedContent - InterleavedContentItem - InvokeToolRequest @@ -6286,11 +6266,9 @@ x-tagGroups: - PreferenceOptimizeRequest - ProviderInfo - QATFinetuningConfig - - QueryChunksRequest - QueryChunksResponse - QueryCondition - QueryConditionOp - - QueryContextRequest - QuerySpanTreeResponse - QuerySpansResponse - QueryTracesResponse diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py index 0247bb384c..3674e7f86f 100644 --- a/llama_stack/apis/tools/rag_tool.py +++ b/llama_stack/apis/tools/rag_tool.py @@ -74,8 +74,8 @@ class RAGQueryConfig(BaseModel): @runtime_checkable @trace_protocol class RAGToolRuntime(Protocol): - @webmethod(route="/tool-runtime/rag-tool/insert-documents", method="POST") - async def insert_documents( + @webmethod(route="/tool-runtime/rag-tool/documents", method="POST") + async def insert( self, documents: List[RAGDocument], vector_db_id: str, @@ -84,12 +84,12 @@ async def insert_documents( """Index documents so they can be used by the RAG system""" ... - @webmethod(route="/tool-runtime/rag-tool/query-context", method="POST") - async def query_context( + @webmethod(route="/tool-runtime/rag-tool/documents", method="GET") + async def query( self, content: InterleavedContent, - query_config: RAGQueryConfig, vector_db_ids: List[str], + query_config: Optional[RAGQueryConfig] = None, ) -> RAGQueryResult: """Query the RAG system for context; typically invoked by the agent""" ... diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 5371b89186..8a7117187b 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -38,9 +38,9 @@ def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]: ... class VectorIO(Protocol): vector_db_store: VectorDBStore - # this will just block now until documents are inserted, but it should + # this will just block now until chunks are inserted, but it should # probably return a Job instance which can be polled for completion - @webmethod(route="/vector-io/insert", method="POST") + @webmethod(route="/vector-io/chunks", method="POST") async def insert_chunks( self, vector_db_id: str, @@ -48,7 +48,7 @@ async def insert_chunks( ttl_seconds: Optional[int] = None, ) -> None: ... - @webmethod(route="/vector-io/query", method="POST") + @webmethod(route="/vector-io/chunks", method="GET") async def query_chunks( self, vector_db_id: str, diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py index 3ae9833dc1..6bb2045bd5 100644 --- a/llama_stack/distribution/routers/routers.py +++ b/llama_stack/distribution/routers/routers.py @@ -414,25 +414,25 @@ def __init__( ) -> None: self.routing_table = routing_table - async def query_context( + async def query( self, content: InterleavedContent, - query_config: RAGQueryConfig, vector_db_ids: List[str], + query_config: Optional[RAGQueryConfig] = None, ) -> RAGQueryResult: return await self.routing_table.get_provider_impl( - "rag_tool.query_context" - ).query_context(content, query_config, vector_db_ids) + "query_from_memory" + ).query(content, vector_db_ids, query_config) - async def insert_documents( + async def insert( self, documents: List[RAGDocument], vector_db_id: str, chunk_size_in_tokens: int = 512, ) -> None: return await self.routing_table.get_provider_impl( - "rag_tool.insert_documents" - ).insert_documents(documents, vector_db_id, chunk_size_in_tokens) + "insert_into_memory" + ).insert(documents, vector_db_id, chunk_size_in_tokens) def __init__( self, @@ -441,10 +441,9 @@ def __init__( self.routing_table = routing_table # HACK ALERT this should be in sync with "get_all_api_endpoints()" - # TODO: make sure rag_tool vs builtin::memory is correct everywhere self.rag_tool = self.RagToolImpl(routing_table) - setattr(self, "rag_tool.query_context", self.rag_tool.query_context) - setattr(self, "rag_tool.insert_documents", self.rag_tool.insert_documents) + for method in ("query", "insert"): + setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method)) async def initialize(self) -> None: pass diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 2d0ad137b9..75fd75afc4 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -84,7 +84,7 @@ def make_random_string(length: int = 8): TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})") -MEMORY_QUERY_TOOL = "rag_tool.query_context" +MEMORY_QUERY_TOOL = "query_from_memory" WEB_SEARCH_TOOL = "web_search" MEMORY_GROUP = "builtin::memory" @@ -432,16 +432,16 @@ async def _run( ) ) ) - result = await self.tool_runtime_api.rag_tool.query_context( + result = await self.tool_runtime_api.rag_tool.query( content=concat_interleaved_content( [msg.content for msg in input_messages] ), + vector_db_ids=vector_db_ids, query_config=RAGQueryConfig( query_generator_config=DefaultRAGQueryGeneratorConfig(), max_tokens_in_context=4096, max_chunks=5, ), - vector_db_ids=vector_db_ids, ) retrieved_context = result.content @@ -882,7 +882,7 @@ async def add_to_session_vector_db( ) for a in data ] - await self.tool_runtime_api.rag_tool.insert_documents( + await self.tool_runtime_api.rag_tool.insert( documents=documents, vector_db_id=vector_db_id, chunk_size_in_tokens=512, diff --git a/llama_stack/providers/inline/tool_runtime/memory/memory.py b/llama_stack/providers/inline/tool_runtime/memory/memory.py index d3f8b07dc0..7798ed7118 100644 --- a/llama_stack/providers/inline/tool_runtime/memory/memory.py +++ b/llama_stack/providers/inline/tool_runtime/memory/memory.py @@ -61,7 +61,7 @@ async def initialize(self): async def shutdown(self): pass - async def insert_documents( + async def insert( self, documents: List[RAGDocument], vector_db_id: str, @@ -87,15 +87,16 @@ async def insert_documents( vector_db_id=vector_db_id, ) - async def query_context( + async def query( self, content: InterleavedContent, - query_config: RAGQueryConfig, vector_db_ids: List[str], + query_config: Optional[RAGQueryConfig] = None, ) -> RAGQueryResult: if not vector_db_ids: return RAGQueryResult(content=None) + query_config = query_config or RAGQueryConfig() query = await generate_rag_query( query_config.query_generator_config, content, @@ -159,11 +160,11 @@ async def list_runtime_tools( # encountering fatals. return [ ToolDef( - name="rag_tool.query_context", + name="query_from_memory", description="Retrieve context from memory", ), ToolDef( - name="rag_tool.insert_documents", + name="insert_into_memory", description="Insert documents into memory", ), ] diff --git a/llama_stack/providers/tests/tools/test_tools.py b/llama_stack/providers/tests/tools/test_tools.py index 62b18ea664..bb4265f942 100644 --- a/llama_stack/providers/tests/tools/test_tools.py +++ b/llama_stack/providers/tests/tools/test_tools.py @@ -96,14 +96,14 @@ async def test_rag_tool(self, tools_stack, sample_documents): ) # Insert documents into memory - await tools_impl.rag_tool.insert_documents( + await tools_impl.rag_tool.insert( documents=sample_documents, vector_db_id="test_bank", chunk_size_in_tokens=512, ) # Execute the memory tool - response = await tools_impl.rag_tool.query_context( + response = await tools_impl.rag_tool.query( content="What are the main topics covered in the documentation?", vector_db_ids=["test_bank"], ) diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py index 940b7b8985..12970ce08a 100644 --- a/tests/client-sdk/agents/test_agents.py +++ b/tests/client-sdk/agents/test_agents.py @@ -292,7 +292,7 @@ def test_rag_agent(llama_stack_client, agent_config): embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, ) - llama_stack_client.tool_runtime.rag_tool.insert_documents( + llama_stack_client.tool_runtime.insert_into_memory( documents=documents, vector_db_id=vector_db_id, chunk_size_in_tokens=512, @@ -321,4 +321,4 @@ def test_rag_agent(llama_stack_client, agent_config): ) logs = [str(log) for log in EventLogger().log(response) if log is not None] logs_str = "".join(logs) - assert "Tool:rag_tool.query_context" in logs_str + assert "Tool:query_from_memory" in logs_str diff --git a/tests/client-sdk/tool_runtime/test_rag_tool.py b/tests/client-sdk/tool_runtime/test_rag_tool.py index bce0672681..ac2058754b 100644 --- a/tests/client-sdk/tool_runtime/test_rag_tool.py +++ b/tests/client-sdk/tool_runtime/test_rag_tool.py @@ -73,7 +73,7 @@ def test_vector_db_insert_inline_and_query( llama_stack_client, single_entry_vector_db_registry, sample_documents ): vector_db_id = single_entry_vector_db_registry[0] - llama_stack_client.tool_runtime.rag_tool.insert_documents( + llama_stack_client.tool_runtime.insert_into_memory( documents=sample_documents, chunk_size_in_tokens=512, vector_db_id=vector_db_id, @@ -157,7 +157,7 @@ def test_vector_db_insert_from_url_and_query( for i, url in enumerate(urls) ] - llama_stack_client.tool_runtime.rag_tool.insert_documents( + llama_stack_client.tool_runtime.insert_into_memory( documents=documents, vector_db_id=vector_db_id, chunk_size_in_tokens=512,