Skip to content

Commit

Permalink
feature/added aws bedrock (#1012)
Browse files Browse the repository at this point in the history
* added bedrock

* pinned langgraph

* update poetry

* added embedding to bedrock

* updated embedding-models

* formatting

* added models to django

* removd hack

* removed aws

* rationalised embedding model

* updated variables

* revert infra changes

* reformat

* updatd .env.integration

* poetry update

* Update infrastructure/aws/variables.tf

---------

Co-authored-by: George Burton <[email protected]>
  • Loading branch information
gecBurton and George Burton authored Sep 3, 2024
1 parent 42a1c68 commit 731df58
Show file tree
Hide file tree
Showing 16 changed files with 1,261 additions and 1,096 deletions.
2 changes: 1 addition & 1 deletion .env.integration
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
UNSTRUCTURED_HOST=unstructured
EMBEDDING_BACKEND=azure
EMBEDDING_BACKEND=text-embedding-3-large

EMBEDDING_DOCUMENT_FIELD_NAME=azure_embedding
AWS_REGION=eu-west-2
Expand Down
320 changes: 172 additions & 148 deletions core-api/poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions core-api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ langchain-elasticsearch = "^0.2.2"
litellm = "^1.43.9"



[tool.poetry.group.dev.dependencies]
pytest = "^8.3.2"
pytest-cov = "^5.0.0"
Expand Down
303 changes: 161 additions & 142 deletions django_app/poetry.lock

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.1 on 2024-09-03 14:16

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('redbox_core', '0040_chatmessagetokenuse'),
]

operations = [
migrations.AlterField(
model_name='aisettings',
name='chat_backend',
field=models.CharField(choices=[('gpt-35-turbo-16k', 'gpt-35-turbo-16k'), ('gpt-4-turbo-2024-04-09', 'gpt-4-turbo-2024-04-09'), ('gpt-4o', 'gpt-4o'), ('anthropic.claude-3-sonnet-20240229-v1:0', 'claude-3-sonnet'), ('anthropic.claude-3-haiku-20240307-v1:0', 'claude-3-haiku')], default='gpt-4o', help_text='LLM to use in chat', max_length=64),
),
]
2 changes: 2 additions & 0 deletions django_app/redbox_app/redbox_core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class ChatBackend(models.TextChoices):
GPT_35_TURBO = "gpt-35-turbo-16k", _("gpt-35-turbo-16k")
GPT_4_TURBO = "gpt-4-turbo-2024-04-09", _("gpt-4-turbo-2024-04-09")
GPT_4_OMNI = "gpt-4o", _("gpt-4o")
CLAUDE_3_SONNET = "anthropic.claude-3-sonnet-20240229-v1:0", _("claude-3-sonnet")
CLAUDE_3_HAIKU = "anthropic.claude-3-haiku-20240307-v1:0", _("claude-3-haiku")

label = models.CharField(max_length=50, unique=True)
max_document_tokens = models.PositiveIntegerField(default=1_000_000, null=True, blank=True)
Expand Down
3 changes: 2 additions & 1 deletion infrastructure/aws/data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ locals {
"AWS_REGION" : var.region,
"worker_ingest_min_chunk_size" : var.worker_ingest_min_chunk_size,
"worker_ingest_max_chunk_size" : var.worker_ingest_max_chunk_size,
"UNSTRUCTURED_HOST" : local.unstructured_host
"UNSTRUCTURED_HOST" : local.unstructured_host,
"EMBEDDING_BACKEND": var.embedding_backend
}

core_secrets = {
Expand Down
16 changes: 16 additions & 0 deletions infrastructure/aws/iam.tf
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@
data "aws_iam_policy_document" "ecs_exec_role_policy" {
# checkov:skip=CKV_AWS_109:KMS policies can't be restricted
# checkov:skip=CKV_AWS_111:KMS policies can't be restricted
# checkov:skip=CKV_AWS_356:Allow for policies to not have resource limits

statement {
effect = "Allow"
actions = [
"bedrock:Invoke*",
"bedrock:Get*",
"bedrock:List*"
]
resources = [
"*"
]
}

# checkov:skip=CKV_AWS_111:Allow for write access without constraints
# checkov:skip=CKV_AWS_356:Allow for policies to not have resource limits
statement {
Expand Down
2 changes: 1 addition & 1 deletion infrastructure/aws/rds.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module "rds" {
# checkov:skip=CKV_TF_1: We're using semantic versions instead of commit hash
# source = "../../../i-dot-ai-core-terraform-modules//modules/infrastructure/rds" # For testing local changes
# source = "../../../i-dot-ai-core-terraform-modules//modules/infrastructure/rds" # For testing local changes
source = "git::https://github.com/i-dot-ai/i-dot-ai-core-terraform-modules.git//modules/infrastructure/rds?ref=v1.0.0-rds"
name = local.name
db_name = var.project_name
Expand Down
7 changes: 7 additions & 0 deletions infrastructure/aws/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -502,3 +502,10 @@ variable "django_queue_max_attempts" {
default = 1
description = "How many attempts to run unstructured task"
}

variable "embedding_backend" {
type = string
default = "text-embedding-3-large"
description = "which provider to use to embed text, e.g. azure, aws, openai"
}

438 changes: 223 additions & 215 deletions poetry.lock

Large diffs are not rendered by default.

1,198 changes: 623 additions & 575 deletions redbox-core/poetry.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion redbox-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ pydantic-settings = "^2.3.4"
langchain-elasticsearch = "^0.2.2"
pytest-dotenv = "^0.5.2"
kneed = "^0.8.5"
langgraph = "^0.2.3"
langgraph = "<0.2.15"
langchain-aws = "^0.1.17"


[tool.poetry.group.dev.dependencies]
Expand Down
29 changes: 21 additions & 8 deletions redbox-core/redbox/chains/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from redbox.models.chain import AISettings
from redbox.models.settings import Settings
from redbox.retriever import AllElasticsearchRetriever, ParameterisedElasticsearchRetriever
from langchain_aws import ChatBedrock
from langchain_community.embeddings import BedrockEmbeddings


def get_chat_llm(env: Settings, ai_settings: AISettings):
Expand Down Expand Up @@ -71,6 +73,12 @@ def get_chat_llm(env: Settings, ai_settings: AISettings):
)
]
)
elif ai_settings.chat_backend in (
"anthropic.claude-3-sonnet-20240229-v1:0",
"anthropic.claude-3-haiku-20240307-v1:0",
):
chat_model = ChatBedrock(model_id=ai_settings.chat_backend)

if chat_model is None:
raise Exception("%s not recognised", ai_settings.chat_backend)
else:
Expand All @@ -87,7 +95,7 @@ def get_azure_embeddings(env: Settings):
api_key=convert_to_secret_str(env.embedding_openai_api_key),
azure_endpoint=env.embedding_azure_openai_endpoint,
api_version=env.azure_api_version_embeddings,
model=env.azure_embedding_model,
model=env.embedding_backend,
max_retries=env.embedding_max_retries,
retry_min_seconds=env.embedding_retry_min_seconds,
retry_max_seconds=env.embedding_retry_max_seconds,
Expand All @@ -100,20 +108,25 @@ def get_openai_embeddings(env: Settings):
return OpenAIEmbeddings(
api_key=convert_to_secret_str(env.embedding_openai_api_key),
base_url=env.embedding_openai_base_url,
model=env.embedding_openai_model,
model=env.embedding_model,
chunk_size=env.embedding_max_batch_size,
)


def get_aws_embeddings(env: Settings):
return BedrockEmbeddings(region_name=env.aws_region, model_id=env.embedding_backend)


def get_embeddings(env: Settings) -> Embeddings:
if env.embedding_backend == "azure":
if env.embedding_backend == "text-embedding-3-large":
return get_azure_embeddings(env)
elif env.embedding_backend == "openai":
if env.embedding_backend == "text-embedding-ada-002":
return get_openai_embeddings(env)
elif env.embedding_backend == "fake":
return FakeEmbeddings(size=3072) # TODO
else:
raise Exception("No configured embedding model")
if env.embedding_backend == "fake":
return FakeEmbeddings(size=3072)
if env.embedding_backend == "amazon.titan-embed-text-v2:0":
return get_aws_embeddings(env)
raise Exception("No configured embedding model")


def get_all_chunks_retriever(env: Settings) -> ElasticsearchRetriever:
Expand Down
8 changes: 7 additions & 1 deletion redbox-core/redbox/models/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,13 @@ class AISettings(BaseModel):
similarity_threshold: int = 0

# this is also the azure_openai_model
chat_backend: Literal["gpt-35-turbo-16k", "gpt-4-turbo-2024-04-09", "gpt-4o"] = "gpt-4o"
chat_backend: Literal[
"gpt-35-turbo-16k",
"gpt-4-turbo-2024-04-09",
"gpt-4o",
"anthropic.claude-3-sonnet-20240229-v1:0",
"anthropic.claude-3-haiku-20240307-v1:0",
] = "gpt-4o"


class DocumentState(TypedDict):
Expand Down
7 changes: 4 additions & 3 deletions redbox-core/redbox/models/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,20 @@ class Settings(BaseSettings):
embedding_openai_api_key: str = "NotAKey"
embedding_azure_openai_endpoint: str = "not an endpoint"
azure_api_version_embeddings: str = "2024-02-01"
azure_embedding_model: str = "text-embedding-3-large"

embedding_backend: Literal[
"text-embedding-ada-002", "amazon.titan-embed-text-v2:0", "text-embedding-3-large", "fake"
] = "text-embedding-3-large"

llm_max_tokens: int = 1024

embedding_backend: Literal["azure", "openai", "fake"] = "azure"
embedding_max_retries: int = 1
embedding_retry_min_seconds: int = 120 # Azure uses 60s
embedding_retry_max_seconds: int = 300
embedding_max_batch_size: int = 512
embedding_document_field_name: str = "embedding"

embedding_openai_base_url: str | None = None
embedding_openai_model: str = "text-embedding-ada-002"

partition_strategy: Literal["auto", "fast", "ocr_only", "hi_res"] = "fast"
clustering_strategy: Literal["full"] | None = None
Expand Down

0 comments on commit 731df58

Please sign in to comment.