Skip to content

Commit

Permalink
[memory refactor][6/n] Update naming and routes
Browse files Browse the repository at this point in the history
  • Loading branch information
ashwinb committed Jan 22, 2025
1 parent 1de1587 commit c295a2b
Show file tree
Hide file tree
Showing 10 changed files with 369 additions and 419 deletions.
574 changes: 273 additions & 301 deletions docs/resources/llama-stack-spec.html

Large diffs are not rendered by default.

148 changes: 63 additions & 85 deletions docs/resources/llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ components:
- vector_db_id
- chunks
type: object
InsertDocumentsRequest:
InsertRequest:
additionalProperties: false
properties:
chunk_size_in_tokens:
Expand Down Expand Up @@ -1299,10 +1299,6 @@ components:
type: string
inserted_context:
$ref: '#/components/schemas/InterleavedContent'
memory_bank_ids:
items:
type: string
type: array
started_at:
format: date-time
type: string
Expand All @@ -1314,11 +1310,13 @@ components:
type: string
turn_id:
type: string
vector_db_ids:
type: string
required:
- turn_id
- step_id
- step_type
- memory_bank_ids
- vector_db_ids
- inserted_context
type: object
Message:
Expand Down Expand Up @@ -1630,27 +1628,6 @@ components:
- quantizer_name
- group_size
type: object
QueryChunksRequest:
additionalProperties: false
properties:
params:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
query:
$ref: '#/components/schemas/InterleavedContent'
vector_db_id:
type: string
required:
- vector_db_id
- query
type: object
QueryChunksResponse:
additionalProperties: false
properties:
Expand Down Expand Up @@ -1710,22 +1687,6 @@ components:
- gt
- lt
type: string
QueryContextRequest:
additionalProperties: false
properties:
content:
$ref: '#/components/schemas/InterleavedContent'
query_config:
$ref: '#/components/schemas/RAGQueryConfig'
vector_db_ids:
items:
type: string
type: array
required:
- content
- query_config
- vector_db_ids
type: object
QuerySpanTreeResponse:
additionalProperties: false
properties:
Expand Down Expand Up @@ -5176,9 +5137,26 @@ paths:
description: OK
tags:
- ToolRuntime
/v1/tool-runtime/rag-tool/insert-documents:
post:
/v1/tool-runtime/rag-tool/documents:
get:
parameters:
- in: query
name: content
required: true
schema:
$ref: '#/components/schemas/InterleavedContent'
- in: query
name: vector_db_ids
required: true
schema:
items:
type: string
type: array
- in: query
name: query_config
required: false
schema:
$ref: '#/components/schemas/RAGQueryConfig'
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
Expand All @@ -5193,19 +5171,16 @@ paths:
required: false
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/InsertDocumentsRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/RAGQueryResult'
description: OK
summary: Index documents so they can be used by the RAG system
summary: Query the RAG system for context; typically invoked by the agent
tags:
- ToolRuntime
/v1/tool-runtime/rag-tool/query-context:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
Expand All @@ -5226,16 +5201,12 @@ paths:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryContextRequest'
$ref: '#/components/schemas/InsertRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/RAGQueryResult'
description: OK
summary: Query the RAG system for context; typically invoked by the agent
summary: Index documents so they can be used by the RAG system
tags:
- ToolRuntime
/v1/toolgroups:
Expand Down Expand Up @@ -5530,9 +5501,32 @@ paths:
description: OK
tags:
- VectorDBs
/v1/vector-io/insert:
post:
/v1/vector-io/chunks:
get:
parameters:
- in: query
name: vector_db_id
required: true
schema:
type: string
- in: query
name: query
required: true
schema:
$ref: '#/components/schemas/InterleavedContent'
- in: query
name: params
required: false
schema:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
Expand All @@ -5547,18 +5541,15 @@ paths:
required: false
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/InsertChunksRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/QueryChunksResponse'
description: OK
tags:
- VectorIO
/v1/vector-io/query:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
Expand All @@ -5579,14 +5570,10 @@ paths:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryChunksRequest'
$ref: '#/components/schemas/InsertChunksRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/QueryChunksResponse'
description: OK
tags:
- VectorIO
Expand Down Expand Up @@ -5814,9 +5801,8 @@ tags:
- description: <SchemaDefinition schemaRef="#/components/schemas/InsertChunksRequest"
/>
name: InsertChunksRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/InsertDocumentsRequest"
/>
name: InsertDocumentsRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/InsertRequest" />
name: InsertRequest
- name: Inspect
- description: <SchemaDefinition schemaRef="#/components/schemas/InterleavedContent"
/>
Expand Down Expand Up @@ -5932,9 +5918,6 @@ tags:
- description: <SchemaDefinition schemaRef="#/components/schemas/QATFinetuningConfig"
/>
name: QATFinetuningConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/QueryChunksRequest"
/>
name: QueryChunksRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/QueryChunksResponse"
/>
name: QueryChunksResponse
Expand All @@ -5943,9 +5926,6 @@ tags:
- description: <SchemaDefinition schemaRef="#/components/schemas/QueryConditionOp"
/>
name: QueryConditionOp
- description: <SchemaDefinition schemaRef="#/components/schemas/QueryContextRequest"
/>
name: QueryContextRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/QuerySpanTreeResponse"
/>
name: QuerySpanTreeResponse
Expand Down Expand Up @@ -6245,7 +6225,7 @@ x-tagGroups:
- ImageDelta
- InferenceStep
- InsertChunksRequest
- InsertDocumentsRequest
- InsertRequest
- InterleavedContent
- InterleavedContentItem
- InvokeToolRequest
Expand Down Expand Up @@ -6286,11 +6266,9 @@ x-tagGroups:
- PreferenceOptimizeRequest
- ProviderInfo
- QATFinetuningConfig
- QueryChunksRequest
- QueryChunksResponse
- QueryCondition
- QueryConditionOp
- QueryContextRequest
- QuerySpanTreeResponse
- QuerySpansResponse
- QueryTracesResponse
Expand Down
10 changes: 5 additions & 5 deletions llama_stack/apis/tools/rag_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ class RAGQueryConfig(BaseModel):
@runtime_checkable
@trace_protocol
class RAGToolRuntime(Protocol):
@webmethod(route="/tool-runtime/rag-tool/insert-documents", method="POST")
async def insert_documents(
@webmethod(route="/tool-runtime/rag-tool/documents", method="POST")
async def insert(
self,
documents: List[RAGDocument],
vector_db_id: str,
Expand All @@ -84,12 +84,12 @@ async def insert_documents(
"""Index documents so they can be used by the RAG system"""
...

@webmethod(route="/tool-runtime/rag-tool/query-context", method="POST")
async def query_context(
@webmethod(route="/tool-runtime/rag-tool/documents", method="GET")
async def query(
self,
content: InterleavedContent,
query_config: RAGQueryConfig,
vector_db_ids: List[str],
query_config: Optional[RAGQueryConfig] = None,
) -> RAGQueryResult:
"""Query the RAG system for context; typically invoked by the agent"""
...
6 changes: 3 additions & 3 deletions llama_stack/apis/vector_io/vector_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,17 @@ def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]: ...
class VectorIO(Protocol):
vector_db_store: VectorDBStore

# this will just block now until documents are inserted, but it should
# this will just block now until chunks are inserted, but it should
# probably return a Job instance which can be polled for completion
@webmethod(route="/vector-io/insert", method="POST")
@webmethod(route="/vector-io/chunks", method="POST")
async def insert_chunks(
self,
vector_db_id: str,
chunks: List[Chunk],
ttl_seconds: Optional[int] = None,
) -> None: ...

@webmethod(route="/vector-io/query", method="POST")
@webmethod(route="/vector-io/chunks", method="GET")
async def query_chunks(
self,
vector_db_id: str,
Expand Down
19 changes: 9 additions & 10 deletions llama_stack/distribution/routers/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,25 +414,25 @@ def __init__(
) -> None:
self.routing_table = routing_table

async def query_context(
async def query(
self,
content: InterleavedContent,
query_config: RAGQueryConfig,
vector_db_ids: List[str],
query_config: Optional[RAGQueryConfig] = None,
) -> RAGQueryResult:
return await self.routing_table.get_provider_impl(
"rag_tool.query_context"
).query_context(content, query_config, vector_db_ids)
"query_from_memory"
).query(content, vector_db_ids, query_config)

async def insert_documents(
async def insert(
self,
documents: List[RAGDocument],
vector_db_id: str,
chunk_size_in_tokens: int = 512,
) -> None:
return await self.routing_table.get_provider_impl(
"rag_tool.insert_documents"
).insert_documents(documents, vector_db_id, chunk_size_in_tokens)
"insert_into_memory"
).insert(documents, vector_db_id, chunk_size_in_tokens)

def __init__(
self,
Expand All @@ -441,10 +441,9 @@ def __init__(
self.routing_table = routing_table

# HACK ALERT this should be in sync with "get_all_api_endpoints()"
# TODO: make sure rag_tool vs builtin::memory is correct everywhere
self.rag_tool = self.RagToolImpl(routing_table)
setattr(self, "rag_tool.query_context", self.rag_tool.query_context)
setattr(self, "rag_tool.insert_documents", self.rag_tool.insert_documents)
for method in ("query", "insert"):
setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method))

async def initialize(self) -> None:
pass
Expand Down
Loading

0 comments on commit c295a2b

Please sign in to comment.