Skip to content

Commit

Permalink
[memory refactor][1/n] Rename Memory -> VectorIO, MemoryBanks -> Vect…
Browse files Browse the repository at this point in the history
…orDBs (#828)

See #827 for the broader
design.

This is the first part:

- delete other kinds of memory banks (keyvalue, keyword, graph) for now;
we will introduce a keyvalue store API as part of this design but not
use it in the RAG tool yet.
- renaming of the APIs
  • Loading branch information
ashwinb authored Jan 22, 2025
1 parent 35a00d0 commit 3ae8585
Show file tree
Hide file tree
Showing 37 changed files with 176 additions and 297 deletions.
3 changes: 0 additions & 3 deletions llama_stack/apis/agents/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
ToolResponseMessage,
UserMessage,
)
from llama_stack.apis.memory import MemoryBank
from llama_stack.apis.safety import SafetyViolation
from llama_stack.apis.tools import ToolDef
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
Expand Down Expand Up @@ -133,8 +132,6 @@ class Session(BaseModel):
turns: List[Turn]
started_at: datetime

memory_bank: Optional[MemoryBank] = None


class AgentToolGroupWithArgs(BaseModel):
name: str
Expand Down
4 changes: 2 additions & 2 deletions llama_stack/apis/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Api(Enum):
inference = "inference"
safety = "safety"
agents = "agents"
memory = "memory"
vector_io = "vector_io"
datasetio = "datasetio"
scoring = "scoring"
eval = "eval"
Expand All @@ -25,7 +25,7 @@ class Api(Enum):

models = "models"
shields = "shields"
memory_banks = "memory_banks"
vector_dbs = "vector_dbs"
datasets = "datasets"
scoring_functions = "scoring_functions"
eval_tasks = "eval_tasks"
Expand Down
161 changes: 0 additions & 161 deletions llama_stack/apis/memory_banks/memory_banks.py

This file was deleted.

2 changes: 1 addition & 1 deletion llama_stack/apis/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
class ResourceType(Enum):
model = "model"
shield = "shield"
memory_bank = "memory_bank"
vector_db = "vector_db"
dataset = "dataset"
scoring_function = "scoring_function"
eval_task = "eval_task"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

from .memory_banks import * # noqa: F401 F403
from .vector_dbs import * # noqa: F401 F403
66 changes: 66 additions & 0 deletions llama_stack/apis/vector_dbs/vector_dbs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

from typing import List, Literal, Optional, Protocol, runtime_checkable

from llama_models.schema_utils import json_schema_type, webmethod
from pydantic import BaseModel

from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol


@json_schema_type
class VectorDB(Resource):
type: Literal[ResourceType.vector_db.value] = ResourceType.vector_db.value

embedding_model: str
embedding_dimension: int

@property
def vector_db_id(self) -> str:
return self.identifier

@property
def provider_vector_db_id(self) -> str:
return self.provider_resource_id


class VectorDBInput(BaseModel):
vector_db_id: str
embedding_model: str
embedding_dimension: int
provider_vector_db_id: Optional[str] = None


class ListVectorDBsResponse(BaseModel):
data: List[VectorDB]


@runtime_checkable
@trace_protocol
class VectorDBs(Protocol):
@webmethod(route="/vector-dbs", method="GET")
async def list_vector_dbs(self) -> ListVectorDBsResponse: ...

@webmethod(route="/vector-dbs/{vector_db_id}", method="GET")
async def get_vector_db(
self,
vector_db_id: str,
) -> Optional[VectorDB]: ...

@webmethod(route="/vector-dbs", method="POST")
async def register_vector_db(
self,
vector_db_id: str,
embedding_model: str,
embedding_dimension: Optional[int] = 384,
provider_id: Optional[str] = None,
provider_vector_db_id: Optional[str] = None,
) -> VectorDB: ...

@webmethod(route="/vector-dbs/{vector_db_id}", method="DELETE")
async def unregister_vector_db(self, vector_db_id: str) -> None: ...
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

from .memory import * # noqa: F401 F403
from .vector_io import * # noqa: F401 F403
Original file line number Diff line number Diff line change
Expand Up @@ -13,55 +13,45 @@
from llama_models.schema_utils import json_schema_type, webmethod
from pydantic import BaseModel, Field

from llama_stack.apis.common.content_types import URL
from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.memory_banks import MemoryBank
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol


@json_schema_type
class MemoryBankDocument(BaseModel):
document_id: str
content: InterleavedContent | URL
mime_type: str | None = None
metadata: Dict[str, Any] = Field(default_factory=dict)


class Chunk(BaseModel):
content: InterleavedContent
token_count: int
document_id: str
metadata: Dict[str, Any] = Field(default_factory=dict)


@json_schema_type
class QueryDocumentsResponse(BaseModel):
class QueryChunksResponse(BaseModel):
chunks: List[Chunk]
scores: List[float]


class MemoryBankStore(Protocol):
def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: ...
class VectorDBStore(Protocol):
def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]: ...


@runtime_checkable
@trace_protocol
class Memory(Protocol):
memory_bank_store: MemoryBankStore
class VectorIO(Protocol):
vector_db_store: VectorDBStore

# this will just block now until documents are inserted, but it should
# probably return a Job instance which can be polled for completion
@webmethod(route="/memory/insert", method="POST")
async def insert_documents(
@webmethod(route="/vector-io/insert", method="POST")
async def insert_chunks(
self,
bank_id: str,
documents: List[MemoryBankDocument],
vector_db_id: str,
chunks: List[Chunk],
ttl_seconds: Optional[int] = None,
) -> None: ...

@webmethod(route="/memory/query", method="POST")
async def query_documents(
@webmethod(route="/vector-io/query", method="POST")
async def query_chunks(
self,
bank_id: str,
vector_db_id: str,
query: InterleavedContent,
params: Optional[Dict[str, Any]] = None,
) -> QueryDocumentsResponse: ...
) -> QueryChunksResponse: ...
Loading

0 comments on commit 3ae8585

Please sign in to comment.