From a63a43c646e08e7e98487d1769901db2a464570a Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 22 Jan 2025 10:39:13 -0800 Subject: [PATCH] [memory refactor][6/n] Update naming and routes (#839) Making a few small naming changes as per feedback: - RAGToolRuntime methods are called `insert` and `query` to keep them more general - The tool names are changed to non-namespaced forms `insert_into_memory` and `query_from_memory` - The REST endpoints are more REST-ful --- docs/resources/llama-stack-spec.html | 374 +++++++++--------- docs/resources/llama-stack-spec.yaml | 35 +- llama_stack/apis/tools/rag_tool.py | 10 +- llama_stack/apis/vector_io/vector_io.py | 2 +- llama_stack/distribution/routers/routers.py | 19 +- .../agents/meta_reference/agent_instance.py | 8 +- .../inline/tool_runtime/memory/memory.py | 11 +- .../providers/tests/tools/test_tools.py | 4 +- .../tests/vector_io/test_vector_store.py | 20 +- tests/client-sdk/agents/test_agents.py | 4 +- .../client-sdk/tool_runtime/test_rag_tool.py | 4 +- 11 files changed, 240 insertions(+), 251 deletions(-) diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index f00d7b2917..f6dd1c8dc3 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -1887,7 +1887,7 @@ ] } }, - "/v1/vector-io/insert": { + "/v1/tool-runtime/rag-tool/insert": { "post": { "responses": { "200": { @@ -1895,8 +1895,9 @@ } }, "tags": [ - "VectorIO" + "ToolRuntime" ], + "summary": "Index documents so they can be used by the RAG system", "parameters": [ { "name": "X-LlamaStack-Provider-Data", @@ -1921,7 +1922,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InsertChunksRequest" + "$ref": "#/components/schemas/InsertRequest" } } }, @@ -1929,7 +1930,7 @@ } } }, - "/v1/tool-runtime/rag-tool/insert-documents": { + "/v1/vector-io/insert": { "post": { "responses": { "200": { @@ -1937,9 +1938,8 @@ } }, "tags": [ - "ToolRuntime" + "VectorIO" ], - "summary": "Index documents so they can be used by the RAG system", "parameters": [ { "name": "X-LlamaStack-Provider-Data", @@ -1964,7 +1964,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InsertDocumentsRequest" + "$ref": "#/components/schemas/InsertChunksRequest" } } }, @@ -3033,7 +3033,7 @@ } } }, - "/v1/vector-io/query": { + "/v1/tool-runtime/rag-tool/query": { "post": { "responses": { "200": { @@ -3041,15 +3041,16 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/QueryChunksResponse" + "$ref": "#/components/schemas/RAGQueryResult" } } } } }, "tags": [ - "VectorIO" + "ToolRuntime" ], + "summary": "Query the RAG system for context; typically invoked by the agent", "parameters": [ { "name": "X-LlamaStack-Provider-Data", @@ -3074,7 +3075,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/QueryChunksRequest" + "$ref": "#/components/schemas/QueryRequest" } } }, @@ -3082,7 +3083,7 @@ } } }, - "/v1/tool-runtime/rag-tool/query-context": { + "/v1/vector-io/query": { "post": { "responses": { "200": { @@ -3090,16 +3091,15 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RAGQueryResult" + "$ref": "#/components/schemas/QueryChunksResponse" } } } } }, "tags": [ - "ToolRuntime" + "VectorIO" ], - "summary": "Query the RAG system for context; typically invoked by the agent", "parameters": [ { "name": "X-LlamaStack-Provider-Data", @@ -3124,7 +3124,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/QueryContextRequest" + "$ref": "#/components/schemas/QueryChunksRequest" } } }, @@ -5256,11 +5256,8 @@ "const": "memory_retrieval", "default": "memory_retrieval" }, - "memory_bank_ids": { - "type": "array", - "items": { - "type": "string" - } + "vector_db_ids": { + "type": "string" }, "inserted_context": { "$ref": "#/components/schemas/InterleavedContent" @@ -5271,7 +5268,7 @@ "turn_id", "step_id", "step_type", - "memory_bank_ids", + "vector_db_ids", "inserted_context" ] }, @@ -6976,63 +6973,6 @@ "status" ] }, - "InsertChunksRequest": { - "type": "object", - "properties": { - "vector_db_id": { - "type": "string" - }, - "chunks": { - "type": "array", - "items": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "content", - "metadata" - ] - } - }, - "ttl_seconds": { - "type": "integer" - } - }, - "additionalProperties": false, - "required": [ - "vector_db_id", - "chunks" - ] - }, "RAGDocument": { "type": "object", "properties": { @@ -7094,7 +7034,7 @@ "metadata" ] }, - "InsertDocumentsRequest": { + "InsertRequest": { "type": "object", "properties": { "documents": { @@ -7117,6 +7057,63 @@ "chunk_size_in_tokens" ] }, + "InsertChunksRequest": { + "type": "object", + "properties": { + "vector_db_id": { + "type": "string" + }, + "chunks": { + "type": "array", + "items": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "content", + "metadata" + ] + } + }, + "ttl_seconds": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "vector_db_id", + "chunks" + ] + }, "InvokeToolRequest": { "type": "object", "properties": { @@ -7883,104 +7880,6 @@ "job_uuid" ] }, - "QueryChunksRequest": { - "type": "object", - "properties": { - "vector_db_id": { - "type": "string" - }, - "query": { - "$ref": "#/components/schemas/InterleavedContent" - }, - "params": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "vector_db_id", - "query" - ] - }, - "QueryChunksResponse": { - "type": "object", - "properties": { - "chunks": { - "type": "array", - "items": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "content", - "metadata" - ] - } - }, - "scores": { - "type": "array", - "items": { - "type": "number" - } - } - }, - "additionalProperties": false, - "required": [ - "chunks", - "scores" - ] - }, "DefaultRAGQueryGeneratorConfig": { "type": "object", "properties": { @@ -8054,26 +7953,25 @@ } ] }, - "QueryContextRequest": { + "QueryRequest": { "type": "object", "properties": { "content": { "$ref": "#/components/schemas/InterleavedContent" }, - "query_config": { - "$ref": "#/components/schemas/RAGQueryConfig" - }, "vector_db_ids": { "type": "array", "items": { "type": "string" } + }, + "query_config": { + "$ref": "#/components/schemas/RAGQueryConfig" } }, "additionalProperties": false, "required": [ "content", - "query_config", "vector_db_ids" ] }, @@ -8086,6 +7984,104 @@ }, "additionalProperties": false }, + "QueryChunksRequest": { + "type": "object", + "properties": { + "vector_db_id": { + "type": "string" + }, + "query": { + "$ref": "#/components/schemas/InterleavedContent" + }, + "params": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "vector_db_id", + "query" + ] + }, + "QueryChunksResponse": { + "type": "object", + "properties": { + "chunks": { + "type": "array", + "items": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "content", + "metadata" + ] + } + }, + "scores": { + "type": "array", + "items": { + "type": "number" + } + } + }, + "additionalProperties": false, + "required": [ + "chunks", + "scores" + ] + }, "QueryCondition": { "type": "object", "properties": { @@ -9246,8 +9242,8 @@ "description": "" }, { - "name": "InsertDocumentsRequest", - "description": "" + "name": "InsertRequest", + "description": "" }, { "name": "Inspect" @@ -9435,8 +9431,8 @@ "description": "" }, { - "name": "QueryContextRequest", - "description": "" + "name": "QueryRequest", + "description": "" }, { "name": "QuerySpanTreeResponse", @@ -9858,7 +9854,7 @@ "ImageDelta", "InferenceStep", "InsertChunksRequest", - "InsertDocumentsRequest", + "InsertRequest", "InterleavedContent", "InterleavedContentItem", "InvokeToolRequest", @@ -9903,7 +9899,7 @@ "QueryChunksResponse", "QueryCondition", "QueryConditionOp", - "QueryContextRequest", + "QueryRequest", "QuerySpanTreeResponse", "QuerySpansResponse", "QueryTracesResponse", diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index e1ae07c45f..6bbaadf8dd 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -1009,7 +1009,7 @@ components: - vector_db_id - chunks type: object - InsertDocumentsRequest: + InsertRequest: additionalProperties: false properties: chunk_size_in_tokens: @@ -1299,10 +1299,6 @@ components: type: string inserted_context: $ref: '#/components/schemas/InterleavedContent' - memory_bank_ids: - items: - type: string - type: array started_at: format: date-time type: string @@ -1314,11 +1310,13 @@ components: type: string turn_id: type: string + vector_db_ids: + type: string required: - turn_id - step_id - step_type - - memory_bank_ids + - vector_db_ids - inserted_context type: object Message: @@ -1710,7 +1708,7 @@ components: - gt - lt type: string - QueryContextRequest: + QueryRequest: additionalProperties: false properties: content: @@ -1723,7 +1721,6 @@ components: type: array required: - content - - query_config - vector_db_ids type: object QuerySpanTreeResponse: @@ -5176,7 +5173,7 @@ paths: description: OK tags: - ToolRuntime - /v1/tool-runtime/rag-tool/insert-documents: + /v1/tool-runtime/rag-tool/insert: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5197,7 +5194,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/InsertDocumentsRequest' + $ref: '#/components/schemas/InsertRequest' required: true responses: '200': @@ -5205,7 +5202,7 @@ paths: summary: Index documents so they can be used by the RAG system tags: - ToolRuntime - /v1/tool-runtime/rag-tool/query-context: + /v1/tool-runtime/rag-tool/query: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5226,7 +5223,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/QueryContextRequest' + $ref: '#/components/schemas/QueryRequest' required: true responses: '200': @@ -5814,9 +5811,8 @@ tags: - description: name: InsertChunksRequest -- description: - name: InsertDocumentsRequest +- description: + name: InsertRequest - name: Inspect - description: @@ -5943,9 +5939,8 @@ tags: - description: name: QueryConditionOp -- description: - name: QueryContextRequest +- description: + name: QueryRequest - description: name: QuerySpanTreeResponse @@ -6245,7 +6240,7 @@ x-tagGroups: - ImageDelta - InferenceStep - InsertChunksRequest - - InsertDocumentsRequest + - InsertRequest - InterleavedContent - InterleavedContentItem - InvokeToolRequest @@ -6290,7 +6285,7 @@ x-tagGroups: - QueryChunksResponse - QueryCondition - QueryConditionOp - - QueryContextRequest + - QueryRequest - QuerySpanTreeResponse - QuerySpansResponse - QueryTracesResponse diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py index 0247bb384c..9503673043 100644 --- a/llama_stack/apis/tools/rag_tool.py +++ b/llama_stack/apis/tools/rag_tool.py @@ -74,8 +74,8 @@ class RAGQueryConfig(BaseModel): @runtime_checkable @trace_protocol class RAGToolRuntime(Protocol): - @webmethod(route="/tool-runtime/rag-tool/insert-documents", method="POST") - async def insert_documents( + @webmethod(route="/tool-runtime/rag-tool/insert", method="POST") + async def insert( self, documents: List[RAGDocument], vector_db_id: str, @@ -84,12 +84,12 @@ async def insert_documents( """Index documents so they can be used by the RAG system""" ... - @webmethod(route="/tool-runtime/rag-tool/query-context", method="POST") - async def query_context( + @webmethod(route="/tool-runtime/rag-tool/query", method="POST") + async def query( self, content: InterleavedContent, - query_config: RAGQueryConfig, vector_db_ids: List[str], + query_config: Optional[RAGQueryConfig] = None, ) -> RAGQueryResult: """Query the RAG system for context; typically invoked by the agent""" ... diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 5371b89186..8feeaa6d43 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -38,7 +38,7 @@ def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]: ... class VectorIO(Protocol): vector_db_store: VectorDBStore - # this will just block now until documents are inserted, but it should + # this will just block now until chunks are inserted, but it should # probably return a Job instance which can be polled for completion @webmethod(route="/vector-io/insert", method="POST") async def insert_chunks( diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py index 3ae9833dc1..6bb2045bd5 100644 --- a/llama_stack/distribution/routers/routers.py +++ b/llama_stack/distribution/routers/routers.py @@ -414,25 +414,25 @@ def __init__( ) -> None: self.routing_table = routing_table - async def query_context( + async def query( self, content: InterleavedContent, - query_config: RAGQueryConfig, vector_db_ids: List[str], + query_config: Optional[RAGQueryConfig] = None, ) -> RAGQueryResult: return await self.routing_table.get_provider_impl( - "rag_tool.query_context" - ).query_context(content, query_config, vector_db_ids) + "query_from_memory" + ).query(content, vector_db_ids, query_config) - async def insert_documents( + async def insert( self, documents: List[RAGDocument], vector_db_id: str, chunk_size_in_tokens: int = 512, ) -> None: return await self.routing_table.get_provider_impl( - "rag_tool.insert_documents" - ).insert_documents(documents, vector_db_id, chunk_size_in_tokens) + "insert_into_memory" + ).insert(documents, vector_db_id, chunk_size_in_tokens) def __init__( self, @@ -441,10 +441,9 @@ def __init__( self.routing_table = routing_table # HACK ALERT this should be in sync with "get_all_api_endpoints()" - # TODO: make sure rag_tool vs builtin::memory is correct everywhere self.rag_tool = self.RagToolImpl(routing_table) - setattr(self, "rag_tool.query_context", self.rag_tool.query_context) - setattr(self, "rag_tool.insert_documents", self.rag_tool.insert_documents) + for method in ("query", "insert"): + setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method)) async def initialize(self) -> None: pass diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 2d0ad137b9..75fd75afc4 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -84,7 +84,7 @@ def make_random_string(length: int = 8): TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})") -MEMORY_QUERY_TOOL = "rag_tool.query_context" +MEMORY_QUERY_TOOL = "query_from_memory" WEB_SEARCH_TOOL = "web_search" MEMORY_GROUP = "builtin::memory" @@ -432,16 +432,16 @@ async def _run( ) ) ) - result = await self.tool_runtime_api.rag_tool.query_context( + result = await self.tool_runtime_api.rag_tool.query( content=concat_interleaved_content( [msg.content for msg in input_messages] ), + vector_db_ids=vector_db_ids, query_config=RAGQueryConfig( query_generator_config=DefaultRAGQueryGeneratorConfig(), max_tokens_in_context=4096, max_chunks=5, ), - vector_db_ids=vector_db_ids, ) retrieved_context = result.content @@ -882,7 +882,7 @@ async def add_to_session_vector_db( ) for a in data ] - await self.tool_runtime_api.rag_tool.insert_documents( + await self.tool_runtime_api.rag_tool.insert( documents=documents, vector_db_id=vector_db_id, chunk_size_in_tokens=512, diff --git a/llama_stack/providers/inline/tool_runtime/memory/memory.py b/llama_stack/providers/inline/tool_runtime/memory/memory.py index d3f8b07dc0..7798ed7118 100644 --- a/llama_stack/providers/inline/tool_runtime/memory/memory.py +++ b/llama_stack/providers/inline/tool_runtime/memory/memory.py @@ -61,7 +61,7 @@ async def initialize(self): async def shutdown(self): pass - async def insert_documents( + async def insert( self, documents: List[RAGDocument], vector_db_id: str, @@ -87,15 +87,16 @@ async def insert_documents( vector_db_id=vector_db_id, ) - async def query_context( + async def query( self, content: InterleavedContent, - query_config: RAGQueryConfig, vector_db_ids: List[str], + query_config: Optional[RAGQueryConfig] = None, ) -> RAGQueryResult: if not vector_db_ids: return RAGQueryResult(content=None) + query_config = query_config or RAGQueryConfig() query = await generate_rag_query( query_config.query_generator_config, content, @@ -159,11 +160,11 @@ async def list_runtime_tools( # encountering fatals. return [ ToolDef( - name="rag_tool.query_context", + name="query_from_memory", description="Retrieve context from memory", ), ToolDef( - name="rag_tool.insert_documents", + name="insert_into_memory", description="Insert documents into memory", ), ] diff --git a/llama_stack/providers/tests/tools/test_tools.py b/llama_stack/providers/tests/tools/test_tools.py index 62b18ea664..bb4265f942 100644 --- a/llama_stack/providers/tests/tools/test_tools.py +++ b/llama_stack/providers/tests/tools/test_tools.py @@ -96,14 +96,14 @@ async def test_rag_tool(self, tools_stack, sample_documents): ) # Insert documents into memory - await tools_impl.rag_tool.insert_documents( + await tools_impl.rag_tool.insert( documents=sample_documents, vector_db_id="test_bank", chunk_size_in_tokens=512, ) # Execute the memory tool - response = await tools_impl.rag_tool.query_context( + response = await tools_impl.rag_tool.query( content="What are the main topics covered in the documentation?", vector_db_ids=["test_bank"], ) diff --git a/llama_stack/providers/tests/vector_io/test_vector_store.py b/llama_stack/providers/tests/vector_io/test_vector_store.py index ef6bfca735..2a41a89822 100644 --- a/llama_stack/providers/tests/vector_io/test_vector_store.py +++ b/llama_stack/providers/tests/vector_io/test_vector_store.py @@ -11,11 +11,9 @@ import pytest -from llama_stack.providers.utils.memory.vector_store import ( - content_from_doc, - MemoryBankDocument, - URL, -) +from llama_stack.apis.tools import RAGDocument + +from llama_stack.providers.utils.memory.vector_store import content_from_doc, URL DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf" @@ -41,33 +39,33 @@ class TestVectorStore: @pytest.mark.asyncio async def test_returns_content_from_pdf_data_uri(self): data_uri = data_url_from_file(DUMMY_PDF_PATH) - doc = MemoryBankDocument( + doc = RAGDocument( document_id="dummy", content=data_uri, mime_type="application/pdf", metadata={}, ) content = await content_from_doc(doc) - assert content == "Dummy PDF file" + assert content == "Dumm y PDF file" @pytest.mark.asyncio async def test_downloads_pdf_and_returns_content(self): # Using GitHub to host the PDF file url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf" - doc = MemoryBankDocument( + doc = RAGDocument( document_id="dummy", content=url, mime_type="application/pdf", metadata={}, ) content = await content_from_doc(doc) - assert content == "Dummy PDF file" + assert content == "Dumm y PDF file" @pytest.mark.asyncio async def test_downloads_pdf_and_returns_content_with_url_object(self): # Using GitHub to host the PDF file url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf" - doc = MemoryBankDocument( + doc = RAGDocument( document_id="dummy", content=URL( uri=url, @@ -76,4 +74,4 @@ async def test_downloads_pdf_and_returns_content_with_url_object(self): metadata={}, ) content = await content_from_doc(doc) - assert content == "Dummy PDF file" + assert content == "Dumm y PDF file" diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py index fe80100dac..6fe0678b44 100644 --- a/tests/client-sdk/agents/test_agents.py +++ b/tests/client-sdk/agents/test_agents.py @@ -292,7 +292,7 @@ def test_rag_agent(llama_stack_client, agent_config): embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, ) - llama_stack_client.tool_runtime.rag_tool.insert_documents( + llama_stack_client.tool_runtime.rag_tool.insert( documents=documents, vector_db_id=vector_db_id, chunk_size_in_tokens=512, @@ -321,4 +321,4 @@ def test_rag_agent(llama_stack_client, agent_config): ) logs = [str(log) for log in EventLogger().log(response) if log is not None] logs_str = "".join(logs) - assert "Tool:rag_tool.query_context" in logs_str + assert "Tool:query_from_memory" in logs_str diff --git a/tests/client-sdk/tool_runtime/test_rag_tool.py b/tests/client-sdk/tool_runtime/test_rag_tool.py index bce0672681..baf5b6b406 100644 --- a/tests/client-sdk/tool_runtime/test_rag_tool.py +++ b/tests/client-sdk/tool_runtime/test_rag_tool.py @@ -73,7 +73,7 @@ def test_vector_db_insert_inline_and_query( llama_stack_client, single_entry_vector_db_registry, sample_documents ): vector_db_id = single_entry_vector_db_registry[0] - llama_stack_client.tool_runtime.rag_tool.insert_documents( + llama_stack_client.tool_runtime.rag_tool.insert( documents=sample_documents, chunk_size_in_tokens=512, vector_db_id=vector_db_id, @@ -157,7 +157,7 @@ def test_vector_db_insert_from_url_and_query( for i, url in enumerate(urls) ] - llama_stack_client.tool_runtime.rag_tool.insert_documents( + llama_stack_client.tool_runtime.rag_tool.insert( documents=documents, vector_db_id=vector_db_id, chunk_size_in_tokens=512,