[memory refactor][6/n] Update naming and routes

meta-llama · Jan 22, 2025 · c295a2b · c295a2b
1 parent 1de1587
commit c295a2b
Show file tree

Hide file tree

Showing 10 changed files with 369 additions and 419 deletions.
diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
@@ -1009,7 +1009,7 @@ components:
       - vector_db_id
       - chunks
       type: object
-    InsertDocumentsRequest:
+    InsertRequest:
       additionalProperties: false
       properties:
         chunk_size_in_tokens:
@@ -1299,10 +1299,6 @@ components:
           type: string
         inserted_context:
           $ref: '#/components/schemas/InterleavedContent'
-        memory_bank_ids:
-          items:
-            type: string
-          type: array
         started_at:
           format: date-time
           type: string
@@ -1314,11 +1310,13 @@ components:
           type: string
         turn_id:
           type: string
+        vector_db_ids:
+          type: string
       required:
       - turn_id
       - step_id
       - step_type
-      - memory_bank_ids
+      - vector_db_ids
       - inserted_context
       type: object
     Message:
@@ -1630,27 +1628,6 @@ components:
       - quantizer_name
       - group_size
       type: object
-    QueryChunksRequest:
-      additionalProperties: false
-      properties:
-        params:
-          additionalProperties:
-            oneOf:
-            - type: 'null'
-            - type: boolean
-            - type: number
-            - type: string
-            - type: array
-            - type: object
-          type: object
-        query:
-          $ref: '#/components/schemas/InterleavedContent'
-        vector_db_id:
-          type: string
-      required:
-      - vector_db_id
-      - query
-      type: object
     QueryChunksResponse:
       additionalProperties: false
       properties:
@@ -1710,22 +1687,6 @@ components:
       - gt
       - lt
       type: string
-    QueryContextRequest:
-      additionalProperties: false
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-        query_config:
-          $ref: '#/components/schemas/RAGQueryConfig'
-        vector_db_ids:
-          items:
-            type: string
-          type: array
-      required:
-      - content
-      - query_config
-      - vector_db_ids
-      type: object
     QuerySpanTreeResponse:
       additionalProperties: false
       properties:
@@ -5176,9 +5137,26 @@ paths:
           description: OK
       tags:
       - ToolRuntime
-  /v1/tool-runtime/rag-tool/insert-documents:
-    post:
+  /v1/tool-runtime/rag-tool/documents:
+    get:
       parameters:
+      - in: query
+        name: content
+        required: true
+        schema:
+          $ref: '#/components/schemas/InterleavedContent'
+      - in: query
+        name: vector_db_ids
+        required: true
+        schema:
+          items:
+            type: string
+          type: array
+      - in: query
+        name: query_config
+        required: false
+        schema:
+          $ref: '#/components/schemas/RAGQueryConfig'
       - description: JSON-encoded provider data which will be made available to the
           adapter servicing the API
         in: header
@@ -5193,19 +5171,16 @@ paths:
         required: false
         schema:
           type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/InsertDocumentsRequest'
-        required: true
       responses:
         '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RAGQueryResult'
           description: OK
-      summary: Index documents so they can be used by the RAG system
+      summary: Query the RAG system for context; typically invoked by the agent
       tags:
       - ToolRuntime
-  /v1/tool-runtime/rag-tool/query-context:
     post:
       parameters:
       - description: JSON-encoded provider data which will be made available to the
@@ -5226,16 +5201,12 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/QueryContextRequest'
+              $ref: '#/components/schemas/InsertRequest'
         required: true
       responses:
         '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/RAGQueryResult'
           description: OK
-      summary: Query the RAG system for context; typically invoked by the agent
+      summary: Index documents so they can be used by the RAG system
       tags:
       - ToolRuntime
   /v1/toolgroups:
@@ -5530,9 +5501,32 @@ paths:
           description: OK
       tags:
       - VectorDBs
-  /v1/vector-io/insert:
-    post:
+  /v1/vector-io/chunks:
+    get:
       parameters:
+      - in: query
+        name: vector_db_id
+        required: true
+        schema:
+          type: string
+      - in: query
+        name: query
+        required: true
+        schema:
+          $ref: '#/components/schemas/InterleavedContent'
+      - in: query
+        name: params
+        required: false
+        schema:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
       - description: JSON-encoded provider data which will be made available to the
           adapter servicing the API
         in: header
@@ -5547,18 +5541,15 @@ paths:
         required: false
         schema:
           type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/InsertChunksRequest'
-        required: true
       responses:
         '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/QueryChunksResponse'
           description: OK
       tags:
       - VectorIO
-  /v1/vector-io/query:
     post:
       parameters:
       - description: JSON-encoded provider data which will be made available to the
@@ -5579,14 +5570,10 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/QueryChunksRequest'
+              $ref: '#/components/schemas/InsertChunksRequest'
         required: true
       responses:
         '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QueryChunksResponse'
           description: OK
       tags:
       - VectorIO
@@ -5814,9 +5801,8 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/InsertChunksRequest"
     />
   name: InsertChunksRequest
-- description: <SchemaDefinition schemaRef="#/components/schemas/InsertDocumentsRequest"
-    />
-  name: InsertDocumentsRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/InsertRequest" />
+  name: InsertRequest
 - name: Inspect
 - description: <SchemaDefinition schemaRef="#/components/schemas/InterleavedContent"
     />
@@ -5932,9 +5918,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/QATFinetuningConfig"
     />
   name: QATFinetuningConfig
-- description: <SchemaDefinition schemaRef="#/components/schemas/QueryChunksRequest"
-    />
-  name: QueryChunksRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/QueryChunksResponse"
     />
   name: QueryChunksResponse
@@ -5943,9 +5926,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/QueryConditionOp"
     />
   name: QueryConditionOp
-- description: <SchemaDefinition schemaRef="#/components/schemas/QueryContextRequest"
-    />
-  name: QueryContextRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/QuerySpanTreeResponse"
     />
   name: QuerySpanTreeResponse
@@ -6245,7 +6225,7 @@ x-tagGroups:
   - ImageDelta
   - InferenceStep
   - InsertChunksRequest
-  - InsertDocumentsRequest
+  - InsertRequest
   - InterleavedContent
   - InterleavedContentItem
   - InvokeToolRequest
@@ -6286,11 +6266,9 @@ x-tagGroups:
   - PreferenceOptimizeRequest
   - ProviderInfo
   - QATFinetuningConfig
-  - QueryChunksRequest
   - QueryChunksResponse
   - QueryCondition
   - QueryConditionOp
-  - QueryContextRequest
   - QuerySpanTreeResponse
   - QuerySpansResponse
   - QueryTracesResponse

diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py
@@ -74,8 +74,8 @@ class RAGQueryConfig(BaseModel):
 @runtime_checkable
 @trace_protocol
 class RAGToolRuntime(Protocol):
-    @webmethod(route="/tool-runtime/rag-tool/insert-documents", method="POST")
-    async def insert_documents(
+    @webmethod(route="/tool-runtime/rag-tool/documents", method="POST")
+    async def insert(
         self,
         documents: List[RAGDocument],
         vector_db_id: str,
@@ -84,12 +84,12 @@ async def insert_documents(
         """Index documents so they can be used by the RAG system"""
         ...
 
-    @webmethod(route="/tool-runtime/rag-tool/query-context", method="POST")
-    async def query_context(
+    @webmethod(route="/tool-runtime/rag-tool/documents", method="GET")
+    async def query(
         self,
         content: InterleavedContent,
-        query_config: RAGQueryConfig,
         vector_db_ids: List[str],
+        query_config: Optional[RAGQueryConfig] = None,
     ) -> RAGQueryResult:
         """Query the RAG system for context; typically invoked by the agent"""
         ...
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
@@ -38,17 +38,17 @@ def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]: ...
 class VectorIO(Protocol):
     vector_db_store: VectorDBStore
 
-    # this will just block now until documents are inserted, but it should
+    # this will just block now until chunks are inserted, but it should
     # probably return a Job instance which can be polled for completion
-    @webmethod(route="/vector-io/insert", method="POST")
+    @webmethod(route="/vector-io/chunks", method="POST")
     async def insert_chunks(
         self,
         vector_db_id: str,
         chunks: List[Chunk],
         ttl_seconds: Optional[int] = None,
     ) -> None: ...
 
-    @webmethod(route="/vector-io/query", method="POST")
+    @webmethod(route="/vector-io/chunks", method="GET")
     async def query_chunks(
         self,
         vector_db_id: str,

diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py
@@ -414,25 +414,25 @@ def __init__(
         ) -> None:
             self.routing_table = routing_table
 
-        async def query_context(
+        async def query(
             self,
             content: InterleavedContent,
-            query_config: RAGQueryConfig,
             vector_db_ids: List[str],
+            query_config: Optional[RAGQueryConfig] = None,
         ) -> RAGQueryResult:
             return await self.routing_table.get_provider_impl(
-                "rag_tool.query_context"
-            ).query_context(content, query_config, vector_db_ids)
+                "query_from_memory"
+            ).query(content, vector_db_ids, query_config)
 
-        async def insert_documents(
+        async def insert(
             self,
             documents: List[RAGDocument],
             vector_db_id: str,
             chunk_size_in_tokens: int = 512,
         ) -> None:
             return await self.routing_table.get_provider_impl(
-                "rag_tool.insert_documents"
-            ).insert_documents(documents, vector_db_id, chunk_size_in_tokens)
+                "insert_into_memory"
+            ).insert(documents, vector_db_id, chunk_size_in_tokens)
 
     def __init__(
         self,
@@ -441,10 +441,9 @@ def __init__(
         self.routing_table = routing_table
 
         # HACK ALERT this should be in sync with "get_all_api_endpoints()"
-        # TODO: make sure rag_tool vs builtin::memory is correct everywhere
         self.rag_tool = self.RagToolImpl(routing_table)
-        setattr(self, "rag_tool.query_context", self.rag_tool.query_context)
-        setattr(self, "rag_tool.insert_documents", self.rag_tool.insert_documents)
+        for method in ("query", "insert"):
+            setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method))
 
     async def initialize(self) -> None:
         pass