diff --git a/Makefile b/Makefile index 8283b96d..979a97e5 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,8 @@ lint: PYTHON_FILES=. lint_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d main | grep -E '\.py$$') lint lint_diff: - poetry run black --target-version py39 -l 88 $(PYTHON_FILES) --check - poetry run ruff . + poetry run black --target-version py311 -l 88 $(PYTHON_FILES) --check + poetry run ruff check . poetry run mypy $(PYTHON_FILES) test: diff --git a/docs/encoders/aurelio-bm25.ipynb b/docs/encoders/aurelio-bm25.ipynb index e74f1e1b..9e4b7852 100644 --- a/docs/encoders/aurelio-bm25.ipynb +++ b/docs/encoders/aurelio-bm25.ipynb @@ -153,9 +153,7 @@ " \"Enter OpenAI API Key: \"\n", ")\n", "\n", - "encoder = OpenAIEncoder(\n", - " name=\"text-embedding-3-small\", score_threshold=0.3\n", - ")" + "encoder = OpenAIEncoder(name=\"text-embedding-3-small\", score_threshold=0.3)" ] }, { diff --git a/docs/examples/hybrid-router.ipynb b/docs/examples/hybrid-router.ipynb index e52c5752..71e57ca7 100644 --- a/docs/examples/hybrid-router.ipynb +++ b/docs/examples/hybrid-router.ipynb @@ -155,9 +155,7 @@ "from semantic_router.routers import HybridRouter\n", "\n", "router = HybridRouter(\n", - " encoder=dense_encoder,\n", - " sparse_encoder=sparse_encoder,\n", - " routes=routes\n", + " encoder=dense_encoder, sparse_encoder=sparse_encoder, routes=routes\n", ")" ] }, diff --git a/docs/examples/pinecone-hybrid.ipynb b/docs/examples/pinecone-hybrid.ipynb index 134b6e0d..b354907f 100644 --- a/docs/examples/pinecone-hybrid.ipynb +++ b/docs/examples/pinecone-hybrid.ipynb @@ -53,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -90,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -119,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -143,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -153,9 +153,7 @@ " \"Enter OpenAI API Key: \"\n", ")\n", "\n", - "encoder = OpenAIEncoder(\n", - " name=\"text-embedding-3-small\", score_threshold=0.3\n", - ")" + "encoder = OpenAIEncoder(name=\"text-embedding-3-small\", score_threshold=0.3)" ] }, { @@ -167,16 +165,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2024-11-26 22:34:54 - pinecone_plugin_interface.logging - INFO - discover_namespace_packages.py:12 - discover_subpackages() - Discovering subpackages in _NamespacePath(['/Users/jamesbriggs/Library/Caches/pypoetry/virtualenvs/semantic-router-C1zr4a78-py3.12/lib/python3.12/site-packages/pinecone_plugins'])\n", - "2024-11-26 22:34:54 - pinecone_plugin_interface.logging - INFO - discover_plugins.py:9 - discover_plugins() - Looking for plugins in pinecone_plugins.inference\n", - "2024-11-26 22:34:54 - pinecone_plugin_interface.logging - INFO - installation.py:10 - install_plugins() - Installing plugin inference into Pinecone\n" + "2024-11-27 15:41:32 - pinecone_plugin_interface.logging - INFO - discover_namespace_packages.py:12 - discover_subpackages() - Discovering subpackages in _NamespacePath(['/Users/jamesbriggs/Library/Caches/pypoetry/virtualenvs/semantic-router-C1zr4a78-py3.12/lib/python3.12/site-packages/pinecone_plugins'])\n", + "2024-11-27 15:41:32 - pinecone_plugin_interface.logging - INFO - discover_plugins.py:9 - discover_plugins() - Looking for plugins in pinecone_plugins.inference\n", + "2024-11-27 15:41:32 - pinecone_plugin_interface.logging - INFO - installation.py:10 - install_plugins() - Installing plugin inference into Pinecone\n" ] } ], @@ -203,7 +201,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -226,16 +224,16 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "True" + "False" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -253,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -272,7 +270,7 @@ " \" politics: why don't you tell me about your political opinions\"]" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -290,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -309,7 +307,7 @@ " Utterance(route='politics', utterance=\"why don't you tell me about your political opinions\", function_schemas=None, metadata={}, diff_tag=' ')]" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -327,7 +325,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -349,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -358,7 +356,7 @@ "True" ] }, - "execution_count": 12, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -369,7 +367,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -388,7 +386,7 @@ " \" politics: why don't you tell me about your political opinions\"]" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -406,14 +404,14 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2024-11-26 22:35:56 - httpx - INFO - _client.py:1013 - _send_single_request() - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + "2024-11-27 15:42:03 - httpx - INFO - _client.py:1013 - _send_single_request() - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" ] }, { @@ -422,7 +420,7 @@ "RouteChoice(name=None, function_call=None, similarity_score=None)" ] }, - "execution_count": 15, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -440,7 +438,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-11-26 22:35:20 - httpx - INFO - _client.py:1013 - _send_single_request() - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + "2024-11-27 15:42:06 - httpx - INFO - _client.py:1013 - _send_single_request() - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" ] }, { diff --git a/semantic_router/encoders/aurelio.py b/semantic_router/encoders/aurelio.py index d226e3d3..8824b2f1 100644 --- a/semantic_router/encoders/aurelio.py +++ b/semantic_router/encoders/aurelio.py @@ -12,16 +12,17 @@ class AurelioSparseEncoder(SparseEncoder): model: Optional[Any] = None idx_mapping: Optional[Dict[int, int]] = None client: AurelioClient = Field(default_factory=AurelioClient, exclude=True) - async_client: AsyncAurelioClient = Field(default_factory=AsyncAurelioClient, exclude=True) + async_client: AsyncAurelioClient = Field( + default_factory=AsyncAurelioClient, exclude=True + ) type: str = "sparse" def __init__( self, name: str = "bm25", - score_threshold: float = 1.0, api_key: Optional[str] = None, ): - super().__init__(name=name, score_threshold=score_threshold) + super().__init__(name=name) if api_key is None: api_key = os.getenv("AURELIO_API_KEY") if api_key is None: @@ -33,9 +34,11 @@ def __call__(self, docs: list[str]) -> list[SparseEmbedding]: res: EmbeddingResponse = self.client.embedding(input=docs, model=self.name) embeds = [SparseEmbedding.from_aurelio(r.embedding) for r in res.data] return embeds - + async def acall(self, docs: list[str]) -> list[SparseEmbedding]: - res: EmbeddingResponse = await self.async_client.embedding(input=docs, model=self.name) + res: EmbeddingResponse = await self.async_client.embedding( + input=docs, model=self.name + ) embeds = [SparseEmbedding.from_aurelio(r.embedding) for r in res.data] return embeds diff --git a/semantic_router/encoders/base.py b/semantic_router/encoders/base.py index f2cee15d..1bcf3d9d 100644 --- a/semantic_router/encoders/base.py +++ b/semantic_router/encoders/base.py @@ -35,4 +35,4 @@ def __call__(self, docs: List[str]) -> List[SparseEmbedding]: raise NotImplementedError("Subclasses must implement this method") def acall(self, docs: List[str]) -> Coroutine[Any, Any, List[SparseEmbedding]]: - raise NotImplementedError("Subclasses must implement this method") \ No newline at end of file + raise NotImplementedError("Subclasses must implement this method") diff --git a/semantic_router/encoders/tfidf.py b/semantic_router/encoders/tfidf.py index 873d900a..1bec7b9f 100644 --- a/semantic_router/encoders/tfidf.py +++ b/semantic_router/encoders/tfidf.py @@ -14,9 +14,8 @@ class TfidfEncoder(SparseEncoder): idf: ndarray = np.array([]) word_index: Dict = {} - def __init__(self, name: str = "tfidf", score_threshold: float = 0.82): - # TODO default score_threshold not thoroughly tested, should optimize - super().__init__(name=name, score_threshold=score_threshold) + def __init__(self, name: str = "tfidf"): + super().__init__(name=name) self.word_index = {} self.idf = np.array([]) @@ -29,7 +28,7 @@ def __call__(self, docs: List[str]) -> List[List[float]]: docs = [self._preprocess(doc) for doc in docs] tf = self._compute_tf(docs) tfidf = tf * self.idf - return tfidf.tolist() + return tfidf def fit(self, routes: List[Route]): docs = [] diff --git a/semantic_router/index/hybrid_local.py b/semantic_router/index/hybrid_local.py index 28a3d83b..f2821422 100644 --- a/semantic_router/index/hybrid_local.py +++ b/semantic_router/index/hybrid_local.py @@ -66,15 +66,19 @@ def describe(self) -> Dict: "dimensions": self.index.shape[1] if self.index is not None else 0, "vectors": self.index.shape[0] if self.index is not None else 0, } - - def _sparse_dot_product(self, vec_a: dict[int, float], vec_b: dict[int, float]) -> float: + + def _sparse_dot_product( + self, vec_a: dict[int, float], vec_b: dict[int, float] + ) -> float: # switch vecs to ensure first is smallest for more efficiency if len(vec_a) > len(vec_b): vec_a, vec_b = vec_b, vec_a return sum(vec_a[i] * vec_b.get(i, 0) for i in vec_a) - + def _sparse_index_dot_product(self, vec_a: dict[int, float]) -> list[float]: - dot_products = [self._sparse_dot_product(vec_a, vec_b) for vec_b in self.sparse_index] + dot_products = [ + self._sparse_dot_product(vec_a, vec_b) for vec_b in self.sparse_index + ] return dot_products def query( diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py index 5eb0aecb..bb6ed3ef 100644 --- a/semantic_router/index/pinecone.py +++ b/semantic_router/index/pinecone.py @@ -652,7 +652,9 @@ async def _async_fetch_metadata(self, vector_id: str) -> dict: ) def __len__(self): - namespace_stats = self.index.describe_index_stats()["namespaces"].get(self.namespace) + namespace_stats = self.index.describe_index_stats()["namespaces"].get( + self.namespace + ) if namespace_stats: return namespace_stats["vector_count"] else: diff --git a/semantic_router/routers/base.py b/semantic_router/routers/base.py index 1af3e38b..087eb57c 100644 --- a/semantic_router/routers/base.py +++ b/semantic_router/routers/base.py @@ -4,7 +4,7 @@ import random import hashlib from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from pydantic.v1 import BaseModel, Field, validator +from pydantic.v1 import BaseModel, Field import numpy as np import yaml # type: ignore @@ -380,8 +380,7 @@ def _set_index(self, index: Optional[BaseIndex]): self.index = index def _init_index_state(self): - """Initializes an index (where required) and runs auto_sync if active. - """ + """Initializes an index (where required) and runs auto_sync if active.""" # initialize index now, check if we need dimensions if self.index.dimensions is None: dims = len(self.encoder(["test"])[0]) diff --git a/semantic_router/routers/hybrid.py b/semantic_router/routers/hybrid.py index f8ec89cd..0feb8379 100644 --- a/semantic_router/routers/hybrid.py +++ b/semantic_router/routers/hybrid.py @@ -60,7 +60,7 @@ def __init__( # run initialize index now if auto sync is active if self.auto_sync: self._init_index_state() - + def _set_sparse_encoder(self, sparse_encoder: Optional[DenseEncoder]): if sparse_encoder is None: logger.warning("No sparse_encoder provided. Using default BM25Encoder.") @@ -126,7 +126,7 @@ def __call__( vector=np.array(vector) if isinstance(vector, list) else vector, top_k=self.top_k, route_filter=route_filter, - sparse_vector=sparse_vector[0] + sparse_vector=sparse_vector[0], ) top_class, top_class_scores = self._semantic_classify( list(zip(scores, route_names)) @@ -142,7 +142,9 @@ def _convex_scaling(self, dense: np.ndarray, sparse: list[dict[int, float]]): scaled_dense = np.array(dense) * self.alpha scaled_sparse = [] for sparse_dict in sparse: - scaled_sparse.append({k: v * (1 - self.alpha) for k, v in sparse_dict.items()}) + scaled_sparse.append( + {k: v * (1 - self.alpha) for k, v in sparse_dict.items()} + ) return scaled_dense, scaled_sparse def _set_aggregation_method(self, aggregation: str = "sum"): diff --git a/semantic_router/routers/semantic.py b/semantic_router/routers/semantic.py index e8a7db14..8a21fdf2 100644 --- a/semantic_router/routers/semantic.py +++ b/semantic_router/routers/semantic.py @@ -1,15 +1,12 @@ import json import random from typing import Any, Dict, List, Optional, Tuple, Union -from pydantic.v1 import validator, Field import numpy as np from tqdm.auto import tqdm -from semantic_router.encoders import AutoEncoder, DenseEncoder, OpenAIEncoder +from semantic_router.encoders import AutoEncoder, DenseEncoder from semantic_router.index.base import BaseIndex -from semantic_router.index.local import LocalIndex -from semantic_router.index.pinecone import PineconeIndex from semantic_router.llms import BaseLLM, OpenAILLM from semantic_router.route import Route from semantic_router.routers.base import BaseRouter, RouterConfig diff --git a/semantic_router/schema.py b/semantic_router/schema.py index 2d00572f..507ea349 100644 --- a/semantic_router/schema.py +++ b/semantic_router/schema.py @@ -1,11 +1,13 @@ from datetime import datetime from difflib import Differ from enum import Enum +import numpy as np from typing import List, Optional, Union, Any, Dict, Tuple from pydantic.v1 import BaseModel, Field from semantic_router.utils.logger import logger from aurelio_sdk.schema import BM25Embedding + class EncoderType(Enum): AURELIO = "aurelio" AZURE = "azure" @@ -406,37 +408,53 @@ class Metric(Enum): MANHATTAN = "manhattan" -class SparseValue(BaseModel): - index: int - value: float - - class SparseEmbedding(BaseModel): - embedding: List[SparseValue] + """Sparse embedding interface. Primarily uses numpy operations for faster + operations. + """ + embedding: np.ndarray - def to_dict(self): - return {x.index: x.value for x in self.embedding} + class Config: + arbitrary_types_allowed = True + + @classmethod + def from_array(cls, array: np.ndarray): + if array.ndim != 2 or array.shape[1] != 2: + raise ValueError( + f"Expected a 2D array with 2 columns, got a {array.ndim}D array with {array.shape[1]} columns. " + "Column 0 should contain index positions, and column 1 should contain respective values." + ) + return cls(embedding=array) - def to_pinecone(self): - return { - "indices": [x.index for x in self.embedding], - "values": [x.value for x in self.embedding], - } + @classmethod + def from_aurelio(cls, embedding: BM25Embedding): + arr = np.array([embedding.indices, embedding.values]).T + return cls.from_array(arr) @classmethod def from_dict(cls, sparse_dict: dict): - return cls(embedding=[SparseValue(index=i, value=v) for i, v in sparse_dict.items()]) + arr = np.array([list(sparse_dict.keys()), list(sparse_dict.values())]).T + return cls.from_array(arr) - @classmethod - def from_aurelio(cls, embedding: BM25Embedding): - return cls(embedding=[ - SparseValue( - index=i, - value=v - ) for i, v in zip(embedding.indices, embedding.values) - ]) + def to_dict(self): + return { + i: v for i, v in zip( + self.embedding[:,0].astype(int), + self.embedding[:,1] + ) + } + + def to_pinecone(self): + return { + "indices": self.embedding[:, 0].astype(int).tolist(), + "values": self.embedding[:, 1].tolist(), + } # dictionary interface def items(self): - return [(x.index, x.value) for x in self.embedding] - + return [ + (i, v) for i, v in zip( + self.embedding[:,0].astype(int), + self.embedding[:,1] + ) + ] diff --git a/tests/unit/test_sync.py b/tests/unit/test_sync.py index 8405add9..ea4b8d41 100644 --- a/tests/unit/test_sync.py +++ b/tests/unit/test_sync.py @@ -223,7 +223,9 @@ def test_second_initialization_not_synced( _ = SemanticRouter( encoder=openai_encoder, routes=routes, index=index, auto_sync="local" ) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes_2, index=index) + route_layer = SemanticRouter( + encoder=openai_encoder, routes=routes_2, index=index + ) if index_cls is PineconeIndex: time.sleep(PINECONE_SLEEP) # allow for index to be populated assert route_layer.is_synced() is False @@ -236,7 +238,9 @@ def test_utterance_diff(self, openai_encoder, routes, routes_2, index_cls): _ = SemanticRouter( encoder=openai_encoder, routes=routes, index=index, auto_sync="local" ) - route_layer_2 = SemanticRouter(encoder=openai_encoder, routes=routes_2, index=index) + route_layer_2 = SemanticRouter( + encoder=openai_encoder, routes=routes_2, index=index + ) if index_cls is PineconeIndex: time.sleep(PINECONE_SLEEP) # allow for index to be populated diff = route_layer_2.get_utterance_diff(include_metadata=True)