diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index f00d7b2917..e7e9fc6d2a 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -1887,7 +1887,72 @@
                 ]
             }
         },
-        "/v1/vector-io/insert": {
+        "/v1/tool-runtime/rag-tool/documents": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/RAGQueryResult"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "ToolRuntime"
+                ],
+                "summary": "Query the RAG system for context; typically invoked by the agent",
+                "parameters": [
+                    {
+                        "name": "content",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/components/schemas/InterleavedContent"
+                        }
+                    },
+                    {
+                        "name": "vector_db_ids",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    {
+                        "name": "query_config",
+                        "in": "query",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/RAGQueryConfig"
+                        }
+                    },
+                    {
+                        "name": "X-LlamaStack-Provider-Data",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "X-LlamaStack-Client-Version",
+                        "in": "header",
+                        "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
             "post": {
                 "responses": {
                     "200": {
@@ -1895,8 +1960,9 @@
                     }
                 },
                 "tags": [
-                    "VectorIO"
+                    "ToolRuntime"
                 ],
+                "summary": "Index documents so they can be used by the RAG system",
                 "parameters": [
                     {
                         "name": "X-LlamaStack-Provider-Data",
@@ -1921,7 +1987,7 @@
                     "content": {
                         "application/json": {
                             "schema": {
-                                "$ref": "#/components/schemas/InsertChunksRequest"
+                                "$ref": "#/components/schemas/InsertRequest"
                             }
                         }
                     },
@@ -1929,7 +1995,90 @@
                 }
             }
         },
-        "/v1/tool-runtime/rag-tool/insert-documents": {
+        "/v1/vector-io/chunks": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/QueryChunksResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "parameters": [
+                    {
+                        "name": "vector_db_id",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "query",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/components/schemas/InterleavedContent"
+                        }
+                    },
+                    {
+                        "name": "params",
+                        "in": "query",
+                        "required": false,
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    {
+                        "name": "X-LlamaStack-Provider-Data",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "X-LlamaStack-Client-Version",
+                        "in": "header",
+                        "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
             "post": {
                 "responses": {
                     "200": {
@@ -1937,9 +2086,8 @@
                     }
                 },
                 "tags": [
-                    "ToolRuntime"
+                    "VectorIO"
                 ],
-                "summary": "Index documents so they can be used by the RAG system",
                 "parameters": [
                     {
                         "name": "X-LlamaStack-Provider-Data",
@@ -1964,7 +2112,7 @@
                     "content": {
                         "application/json": {
                             "schema": {
-                                "$ref": "#/components/schemas/InsertDocumentsRequest"
+                                "$ref": "#/components/schemas/InsertChunksRequest"
                             }
                         }
                     },
@@ -3033,105 +3181,6 @@
                 }
             }
         },
-        "/v1/vector-io/query": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QueryChunksResponse"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "VectorIO"
-                ],
-                "parameters": [
-                    {
-                        "name": "X-LlamaStack-Provider-Data",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "X-LlamaStack-Client-Version",
-                        "in": "header",
-                        "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QueryChunksRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
-        "/v1/tool-runtime/rag-tool/query-context": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/RAGQueryResult"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "ToolRuntime"
-                ],
-                "summary": "Query the RAG system for context; typically invoked by the agent",
-                "parameters": [
-                    {
-                        "name": "X-LlamaStack-Provider-Data",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "X-LlamaStack-Client-Version",
-                        "in": "header",
-                        "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QueryContextRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/telemetry/spans": {
             "get": {
                 "responses": {
@@ -5256,11 +5305,8 @@
                         "const": "memory_retrieval",
                         "default": "memory_retrieval"
                     },
-                    "memory_bank_ids": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        }
+                    "vector_db_ids": {
+                        "type": "string"
                     },
                     "inserted_context": {
                         "$ref": "#/components/schemas/InterleavedContent"
@@ -5271,7 +5317,7 @@
                     "turn_id",
                     "step_id",
                     "step_type",
-                    "memory_bank_ids",
+                    "vector_db_ids",
                     "inserted_context"
                 ]
             },
@@ -6976,67 +7022,10 @@
                     "status"
                 ]
             },
-            "InsertChunksRequest": {
+            "RAGDocument": {
                 "type": "object",
                 "properties": {
-                    "vector_db_id": {
-                        "type": "string"
-                    },
-                    "chunks": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "content": {
-                                    "$ref": "#/components/schemas/InterleavedContent"
-                                },
-                                "metadata": {
-                                    "type": "object",
-                                    "additionalProperties": {
-                                        "oneOf": [
-                                            {
-                                                "type": "null"
-                                            },
-                                            {
-                                                "type": "boolean"
-                                            },
-                                            {
-                                                "type": "number"
-                                            },
-                                            {
-                                                "type": "string"
-                                            },
-                                            {
-                                                "type": "array"
-                                            },
-                                            {
-                                                "type": "object"
-                                            }
-                                        ]
-                                    }
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "content",
-                                "metadata"
-                            ]
-                        }
-                    },
-                    "ttl_seconds": {
-                        "type": "integer"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "vector_db_id",
-                    "chunks"
-                ]
-            },
-            "RAGDocument": {
-                "type": "object",
-                "properties": {
-                    "document_id": {
+                    "document_id": {
                         "type": "string"
                     },
                     "content": {
@@ -7094,7 +7083,7 @@
                     "metadata"
                 ]
             },
-            "InsertDocumentsRequest": {
+            "InsertRequest": {
                 "type": "object",
                 "properties": {
                     "documents": {
@@ -7117,6 +7106,63 @@
                     "chunk_size_in_tokens"
                 ]
             },
+            "InsertChunksRequest": {
+                "type": "object",
+                "properties": {
+                    "vector_db_id": {
+                        "type": "string"
+                    },
+                    "chunks": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "content": {
+                                    "$ref": "#/components/schemas/InterleavedContent"
+                                },
+                                "metadata": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "oneOf": [
+                                            {
+                                                "type": "null"
+                                            },
+                                            {
+                                                "type": "boolean"
+                                            },
+                                            {
+                                                "type": "number"
+                                            },
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array"
+                                            },
+                                            {
+                                                "type": "object"
+                                            }
+                                        ]
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "content",
+                                "metadata"
+                            ]
+                        }
+                    },
+                    "ttl_seconds": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "vector_db_id",
+                    "chunks"
+                ]
+            },
             "InvokeToolRequest": {
                 "type": "object",
                 "properties": {
@@ -7883,104 +7929,6 @@
                     "job_uuid"
                 ]
             },
-            "QueryChunksRequest": {
-                "type": "object",
-                "properties": {
-                    "vector_db_id": {
-                        "type": "string"
-                    },
-                    "query": {
-                        "$ref": "#/components/schemas/InterleavedContent"
-                    },
-                    "params": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "vector_db_id",
-                    "query"
-                ]
-            },
-            "QueryChunksResponse": {
-                "type": "object",
-                "properties": {
-                    "chunks": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "content": {
-                                    "$ref": "#/components/schemas/InterleavedContent"
-                                },
-                                "metadata": {
-                                    "type": "object",
-                                    "additionalProperties": {
-                                        "oneOf": [
-                                            {
-                                                "type": "null"
-                                            },
-                                            {
-                                                "type": "boolean"
-                                            },
-                                            {
-                                                "type": "number"
-                                            },
-                                            {
-                                                "type": "string"
-                                            },
-                                            {
-                                                "type": "array"
-                                            },
-                                            {
-                                                "type": "object"
-                                            }
-                                        ]
-                                    }
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "content",
-                                "metadata"
-                            ]
-                        }
-                    },
-                    "scores": {
-                        "type": "array",
-                        "items": {
-                            "type": "number"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "chunks",
-                    "scores"
-                ]
-            },
             "DefaultRAGQueryGeneratorConfig": {
                 "type": "object",
                 "properties": {
@@ -8054,38 +8002,72 @@
                     }
                 ]
             },
-            "QueryContextRequest": {
+            "RAGQueryResult": {
                 "type": "object",
                 "properties": {
                     "content": {
                         "$ref": "#/components/schemas/InterleavedContent"
+                    }
+                },
+                "additionalProperties": false
+            },
+            "QueryChunksResponse": {
+                "type": "object",
+                "properties": {
+                    "chunks": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "content": {
+                                    "$ref": "#/components/schemas/InterleavedContent"
+                                },
+                                "metadata": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "oneOf": [
+                                            {
+                                                "type": "null"
+                                            },
+                                            {
+                                                "type": "boolean"
+                                            },
+                                            {
+                                                "type": "number"
+                                            },
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array"
+                                            },
+                                            {
+                                                "type": "object"
+                                            }
+                                        ]
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "content",
+                                "metadata"
+                            ]
+                        }
                     },
-                    "query_config": {
-                        "$ref": "#/components/schemas/RAGQueryConfig"
-                    },
-                    "vector_db_ids": {
+                    "scores": {
                         "type": "array",
                         "items": {
-                            "type": "string"
+                            "type": "number"
                         }
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "content",
-                    "query_config",
-                    "vector_db_ids"
+                    "chunks",
+                    "scores"
                 ]
             },
-            "RAGQueryResult": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent"
-                    }
-                },
-                "additionalProperties": false
-            },
             "QueryCondition": {
                 "type": "object",
                 "properties": {
@@ -9246,8 +9228,8 @@
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/InsertChunksRequest\" />"
         },
         {
-            "name": "InsertDocumentsRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/InsertDocumentsRequest\" />"
+            "name": "InsertRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/InsertRequest\" />"
         },
         {
             "name": "Inspect"
@@ -9418,10 +9400,6 @@
             "name": "QATFinetuningConfig",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QATFinetuningConfig\" />"
         },
-        {
-            "name": "QueryChunksRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QueryChunksRequest\" />"
-        },
         {
             "name": "QueryChunksResponse",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QueryChunksResponse\" />"
@@ -9434,10 +9412,6 @@
             "name": "QueryConditionOp",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QueryConditionOp\" />"
         },
-        {
-            "name": "QueryContextRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QueryContextRequest\" />"
-        },
         {
             "name": "QuerySpanTreeResponse",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QuerySpanTreeResponse\" />"
@@ -9858,7 +9832,7 @@
                 "ImageDelta",
                 "InferenceStep",
                 "InsertChunksRequest",
-                "InsertDocumentsRequest",
+                "InsertRequest",
                 "InterleavedContent",
                 "InterleavedContentItem",
                 "InvokeToolRequest",
@@ -9899,11 +9873,9 @@
                 "PreferenceOptimizeRequest",
                 "ProviderInfo",
                 "QATFinetuningConfig",
-                "QueryChunksRequest",
                 "QueryChunksResponse",
                 "QueryCondition",
                 "QueryConditionOp",
-                "QueryContextRequest",
                 "QuerySpanTreeResponse",
                 "QuerySpansResponse",
                 "QueryTracesResponse",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index e1ae07c45f..0e14d296ff 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -1009,7 +1009,7 @@ components:
       - vector_db_id
       - chunks
       type: object
-    InsertDocumentsRequest:
+    InsertRequest:
       additionalProperties: false
       properties:
         chunk_size_in_tokens:
@@ -1299,10 +1299,6 @@ components:
           type: string
         inserted_context:
           $ref: '#/components/schemas/InterleavedContent'
-        memory_bank_ids:
-          items:
-            type: string
-          type: array
         started_at:
           format: date-time
           type: string
@@ -1314,11 +1310,13 @@ components:
           type: string
         turn_id:
           type: string
+        vector_db_ids:
+          type: string
       required:
       - turn_id
       - step_id
       - step_type
-      - memory_bank_ids
+      - vector_db_ids
       - inserted_context
       type: object
     Message:
@@ -1630,27 +1628,6 @@ components:
       - quantizer_name
       - group_size
       type: object
-    QueryChunksRequest:
-      additionalProperties: false
-      properties:
-        params:
-          additionalProperties:
-            oneOf:
-            - type: 'null'
-            - type: boolean
-            - type: number
-            - type: string
-            - type: array
-            - type: object
-          type: object
-        query:
-          $ref: '#/components/schemas/InterleavedContent'
-        vector_db_id:
-          type: string
-      required:
-      - vector_db_id
-      - query
-      type: object
     QueryChunksResponse:
       additionalProperties: false
       properties:
@@ -1710,22 +1687,6 @@ components:
       - gt
       - lt
       type: string
-    QueryContextRequest:
-      additionalProperties: false
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-        query_config:
-          $ref: '#/components/schemas/RAGQueryConfig'
-        vector_db_ids:
-          items:
-            type: string
-          type: array
-      required:
-      - content
-      - query_config
-      - vector_db_ids
-      type: object
     QuerySpanTreeResponse:
       additionalProperties: false
       properties:
@@ -5176,9 +5137,26 @@ paths:
           description: OK
       tags:
       - ToolRuntime
-  /v1/tool-runtime/rag-tool/insert-documents:
-    post:
+  /v1/tool-runtime/rag-tool/documents:
+    get:
       parameters:
+      - in: query
+        name: content
+        required: true
+        schema:
+          $ref: '#/components/schemas/InterleavedContent'
+      - in: query
+        name: vector_db_ids
+        required: true
+        schema:
+          items:
+            type: string
+          type: array
+      - in: query
+        name: query_config
+        required: false
+        schema:
+          $ref: '#/components/schemas/RAGQueryConfig'
       - description: JSON-encoded provider data which will be made available to the
           adapter servicing the API
         in: header
@@ -5193,19 +5171,16 @@ paths:
         required: false
         schema:
           type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/InsertDocumentsRequest'
-        required: true
       responses:
         '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RAGQueryResult'
           description: OK
-      summary: Index documents so they can be used by the RAG system
+      summary: Query the RAG system for context; typically invoked by the agent
       tags:
       - ToolRuntime
-  /v1/tool-runtime/rag-tool/query-context:
     post:
       parameters:
       - description: JSON-encoded provider data which will be made available to the
@@ -5226,16 +5201,12 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/QueryContextRequest'
+              $ref: '#/components/schemas/InsertRequest'
         required: true
       responses:
         '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/RAGQueryResult'
           description: OK
-      summary: Query the RAG system for context; typically invoked by the agent
+      summary: Index documents so they can be used by the RAG system
       tags:
       - ToolRuntime
   /v1/toolgroups:
@@ -5530,9 +5501,32 @@ paths:
           description: OK
       tags:
       - VectorDBs
-  /v1/vector-io/insert:
-    post:
+  /v1/vector-io/chunks:
+    get:
       parameters:
+      - in: query
+        name: vector_db_id
+        required: true
+        schema:
+          type: string
+      - in: query
+        name: query
+        required: true
+        schema:
+          $ref: '#/components/schemas/InterleavedContent'
+      - in: query
+        name: params
+        required: false
+        schema:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
       - description: JSON-encoded provider data which will be made available to the
           adapter servicing the API
         in: header
@@ -5547,18 +5541,15 @@ paths:
         required: false
         schema:
           type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/InsertChunksRequest'
-        required: true
       responses:
         '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/QueryChunksResponse'
           description: OK
       tags:
       - VectorIO
-  /v1/vector-io/query:
     post:
       parameters:
       - description: JSON-encoded provider data which will be made available to the
@@ -5579,14 +5570,10 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/QueryChunksRequest'
+              $ref: '#/components/schemas/InsertChunksRequest'
         required: true
       responses:
         '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QueryChunksResponse'
           description: OK
       tags:
       - VectorIO
@@ -5814,9 +5801,8 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/InsertChunksRequest"
     />
   name: InsertChunksRequest
-- description: <SchemaDefinition schemaRef="#/components/schemas/InsertDocumentsRequest"
-    />
-  name: InsertDocumentsRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/InsertRequest" />
+  name: InsertRequest
 - name: Inspect
 - description: <SchemaDefinition schemaRef="#/components/schemas/InterleavedContent"
     />
@@ -5932,9 +5918,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/QATFinetuningConfig"
     />
   name: QATFinetuningConfig
-- description: <SchemaDefinition schemaRef="#/components/schemas/QueryChunksRequest"
-    />
-  name: QueryChunksRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/QueryChunksResponse"
     />
   name: QueryChunksResponse
@@ -5943,9 +5926,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/QueryConditionOp"
     />
   name: QueryConditionOp
-- description: <SchemaDefinition schemaRef="#/components/schemas/QueryContextRequest"
-    />
-  name: QueryContextRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/QuerySpanTreeResponse"
     />
   name: QuerySpanTreeResponse
@@ -6245,7 +6225,7 @@ x-tagGroups:
   - ImageDelta
   - InferenceStep
   - InsertChunksRequest
-  - InsertDocumentsRequest
+  - InsertRequest
   - InterleavedContent
   - InterleavedContentItem
   - InvokeToolRequest
@@ -6286,11 +6266,9 @@ x-tagGroups:
   - PreferenceOptimizeRequest
   - ProviderInfo
   - QATFinetuningConfig
-  - QueryChunksRequest
   - QueryChunksResponse
   - QueryCondition
   - QueryConditionOp
-  - QueryContextRequest
   - QuerySpanTreeResponse
   - QuerySpansResponse
   - QueryTracesResponse
diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py
index 0247bb384c..3674e7f86f 100644
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@@ -74,8 +74,8 @@ class RAGQueryConfig(BaseModel):
 @runtime_checkable
 @trace_protocol
 class RAGToolRuntime(Protocol):
-    @webmethod(route="/tool-runtime/rag-tool/insert-documents", method="POST")
-    async def insert_documents(
+    @webmethod(route="/tool-runtime/rag-tool/documents", method="POST")
+    async def insert(
         self,
         documents: List[RAGDocument],
         vector_db_id: str,
@@ -84,12 +84,12 @@ async def insert_documents(
         """Index documents so they can be used by the RAG system"""
         ...
 
-    @webmethod(route="/tool-runtime/rag-tool/query-context", method="POST")
-    async def query_context(
+    @webmethod(route="/tool-runtime/rag-tool/documents", method="GET")
+    async def query(
         self,
         content: InterleavedContent,
-        query_config: RAGQueryConfig,
         vector_db_ids: List[str],
+        query_config: Optional[RAGQueryConfig] = None,
     ) -> RAGQueryResult:
         """Query the RAG system for context; typically invoked by the agent"""
         ...
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index 5371b89186..8a7117187b 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -38,9 +38,9 @@ def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]: ...
 class VectorIO(Protocol):
     vector_db_store: VectorDBStore
 
-    # this will just block now until documents are inserted, but it should
+    # this will just block now until chunks are inserted, but it should
     # probably return a Job instance which can be polled for completion
-    @webmethod(route="/vector-io/insert", method="POST")
+    @webmethod(route="/vector-io/chunks", method="POST")
     async def insert_chunks(
         self,
         vector_db_id: str,
@@ -48,7 +48,7 @@ async def insert_chunks(
         ttl_seconds: Optional[int] = None,
     ) -> None: ...
 
-    @webmethod(route="/vector-io/query", method="POST")
+    @webmethod(route="/vector-io/chunks", method="GET")
     async def query_chunks(
         self,
         vector_db_id: str,
diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py
index 3ae9833dc1..6bb2045bd5 100644
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@@ -414,25 +414,25 @@ def __init__(
         ) -> None:
             self.routing_table = routing_table
 
-        async def query_context(
+        async def query(
             self,
             content: InterleavedContent,
-            query_config: RAGQueryConfig,
             vector_db_ids: List[str],
+            query_config: Optional[RAGQueryConfig] = None,
         ) -> RAGQueryResult:
             return await self.routing_table.get_provider_impl(
-                "rag_tool.query_context"
-            ).query_context(content, query_config, vector_db_ids)
+                "query_from_memory"
+            ).query(content, vector_db_ids, query_config)
 
-        async def insert_documents(
+        async def insert(
             self,
             documents: List[RAGDocument],
             vector_db_id: str,
             chunk_size_in_tokens: int = 512,
         ) -> None:
             return await self.routing_table.get_provider_impl(
-                "rag_tool.insert_documents"
-            ).insert_documents(documents, vector_db_id, chunk_size_in_tokens)
+                "insert_into_memory"
+            ).insert(documents, vector_db_id, chunk_size_in_tokens)
 
     def __init__(
         self,
@@ -441,10 +441,9 @@ def __init__(
         self.routing_table = routing_table
 
         # HACK ALERT this should be in sync with "get_all_api_endpoints()"
-        # TODO: make sure rag_tool vs builtin::memory is correct everywhere
         self.rag_tool = self.RagToolImpl(routing_table)
-        setattr(self, "rag_tool.query_context", self.rag_tool.query_context)
-        setattr(self, "rag_tool.insert_documents", self.rag_tool.insert_documents)
+        for method in ("query", "insert"):
+            setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method))
 
     async def initialize(self) -> None:
         pass
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index 2d0ad137b9..75fd75afc4 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -84,7 +84,7 @@ def make_random_string(length: int = 8):
 
 
 TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
-MEMORY_QUERY_TOOL = "rag_tool.query_context"
+MEMORY_QUERY_TOOL = "query_from_memory"
 WEB_SEARCH_TOOL = "web_search"
 MEMORY_GROUP = "builtin::memory"
 
@@ -432,16 +432,16 @@ async def _run(
                         )
                     )
                 )
-                result = await self.tool_runtime_api.rag_tool.query_context(
+                result = await self.tool_runtime_api.rag_tool.query(
                     content=concat_interleaved_content(
                         [msg.content for msg in input_messages]
                     ),
+                    vector_db_ids=vector_db_ids,
                     query_config=RAGQueryConfig(
                         query_generator_config=DefaultRAGQueryGeneratorConfig(),
                         max_tokens_in_context=4096,
                         max_chunks=5,
                     ),
-                    vector_db_ids=vector_db_ids,
                 )
                 retrieved_context = result.content
 
@@ -882,7 +882,7 @@ async def add_to_session_vector_db(
             )
             for a in data
         ]
-        await self.tool_runtime_api.rag_tool.insert_documents(
+        await self.tool_runtime_api.rag_tool.insert(
             documents=documents,
             vector_db_id=vector_db_id,
             chunk_size_in_tokens=512,
diff --git a/llama_stack/providers/inline/tool_runtime/memory/memory.py b/llama_stack/providers/inline/tool_runtime/memory/memory.py
index d3f8b07dc0..7798ed7118 100644
--- a/llama_stack/providers/inline/tool_runtime/memory/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/memory/memory.py
@@ -61,7 +61,7 @@ async def initialize(self):
     async def shutdown(self):
         pass
 
-    async def insert_documents(
+    async def insert(
         self,
         documents: List[RAGDocument],
         vector_db_id: str,
@@ -87,15 +87,16 @@ async def insert_documents(
             vector_db_id=vector_db_id,
         )
 
-    async def query_context(
+    async def query(
         self,
         content: InterleavedContent,
-        query_config: RAGQueryConfig,
         vector_db_ids: List[str],
+        query_config: Optional[RAGQueryConfig] = None,
     ) -> RAGQueryResult:
         if not vector_db_ids:
             return RAGQueryResult(content=None)
 
+        query_config = query_config or RAGQueryConfig()
         query = await generate_rag_query(
             query_config.query_generator_config,
             content,
@@ -159,11 +160,11 @@ async def list_runtime_tools(
         # encountering fatals.
         return [
             ToolDef(
-                name="rag_tool.query_context",
+                name="query_from_memory",
                 description="Retrieve context from memory",
             ),
             ToolDef(
-                name="rag_tool.insert_documents",
+                name="insert_into_memory",
                 description="Insert documents into memory",
             ),
         ]
diff --git a/llama_stack/providers/tests/tools/test_tools.py b/llama_stack/providers/tests/tools/test_tools.py
index 62b18ea664..bb4265f942 100644
--- a/llama_stack/providers/tests/tools/test_tools.py
+++ b/llama_stack/providers/tests/tools/test_tools.py
@@ -96,14 +96,14 @@ async def test_rag_tool(self, tools_stack, sample_documents):
         )
 
         # Insert documents into memory
-        await tools_impl.rag_tool.insert_documents(
+        await tools_impl.rag_tool.insert(
             documents=sample_documents,
             vector_db_id="test_bank",
             chunk_size_in_tokens=512,
         )
 
         # Execute the memory tool
-        response = await tools_impl.rag_tool.query_context(
+        response = await tools_impl.rag_tool.query(
             content="What are the main topics covered in the documentation?",
             vector_db_ids=["test_bank"],
         )
diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py
index 940b7b8985..12970ce08a 100644
--- a/tests/client-sdk/agents/test_agents.py
+++ b/tests/client-sdk/agents/test_agents.py
@@ -292,7 +292,7 @@ def test_rag_agent(llama_stack_client, agent_config):
         embedding_model="all-MiniLM-L6-v2",
         embedding_dimension=384,
     )
-    llama_stack_client.tool_runtime.rag_tool.insert_documents(
+    llama_stack_client.tool_runtime.insert_into_memory(
         documents=documents,
         vector_db_id=vector_db_id,
         chunk_size_in_tokens=512,
@@ -321,4 +321,4 @@ def test_rag_agent(llama_stack_client, agent_config):
         )
         logs = [str(log) for log in EventLogger().log(response) if log is not None]
         logs_str = "".join(logs)
-        assert "Tool:rag_tool.query_context" in logs_str
+        assert "Tool:query_from_memory" in logs_str
diff --git a/tests/client-sdk/tool_runtime/test_rag_tool.py b/tests/client-sdk/tool_runtime/test_rag_tool.py
index bce0672681..ac2058754b 100644
--- a/tests/client-sdk/tool_runtime/test_rag_tool.py
+++ b/tests/client-sdk/tool_runtime/test_rag_tool.py
@@ -73,7 +73,7 @@ def test_vector_db_insert_inline_and_query(
     llama_stack_client, single_entry_vector_db_registry, sample_documents
 ):
     vector_db_id = single_entry_vector_db_registry[0]
-    llama_stack_client.tool_runtime.rag_tool.insert_documents(
+    llama_stack_client.tool_runtime.insert_into_memory(
         documents=sample_documents,
         chunk_size_in_tokens=512,
         vector_db_id=vector_db_id,
@@ -157,7 +157,7 @@ def test_vector_db_insert_from_url_and_query(
         for i, url in enumerate(urls)
     ]
 
-    llama_stack_client.tool_runtime.rag_tool.insert_documents(
+    llama_stack_client.tool_runtime.insert_into_memory(
         documents=documents,
         vector_db_id=vector_db_id,
         chunk_size_in_tokens=512,