feature/added aws bedrock (#1012)

* added bedrock * pinned langgraph * update poetry * added embedding to bedrock * updated embedding-models * formatting * added models to django * removd hack * removed aws * rationalised embedding model * updated variables * revert infra changes * reformat * updatd .env.integration * poetry update * Update infrastructure/aws/variables.tf --------- Co-authored-by: George Burton <[email protected]>
i-dot-ai · Sep 3, 2024 · 731df58 · 731df58
1 parent 42a1c68
commit 731df58
Show file tree

Hide file tree

Showing 16 changed files with 1,261 additions and 1,096 deletions.
diff --git a/.env.integration b/.env.integration
@@ -1,5 +1,5 @@
 UNSTRUCTURED_HOST=unstructured
-EMBEDDING_BACKEND=azure
+EMBEDDING_BACKEND=text-embedding-3-large
 
 EMBEDDING_DOCUMENT_FIELD_NAME=azure_embedding
 AWS_REGION=eu-west-2

diff --git a/core-api/poetry.lock b/core-api/poetry.lock
diff --git a/core-api/pyproject.toml b/core-api/pyproject.toml
@@ -29,6 +29,7 @@ langchain-elasticsearch = "^0.2.2"
 litellm = "^1.43.9"
 
 
+
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.3.2"
 pytest-cov = "^5.0.0"

diff --git a/django_app/poetry.lock b/django_app/poetry.lock
diff --git a/django_app/redbox_app/redbox_core/migrations/0041_alter_aisettings_chat_backend.py b/django_app/redbox_app/redbox_core/migrations/0041_alter_aisettings_chat_backend.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.1 on 2024-09-03 14:16
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('redbox_core', '0040_chatmessagetokenuse'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='aisettings',
+            name='chat_backend',
+            field=models.CharField(choices=[('gpt-35-turbo-16k', 'gpt-35-turbo-16k'), ('gpt-4-turbo-2024-04-09', 'gpt-4-turbo-2024-04-09'), ('gpt-4o', 'gpt-4o'), ('anthropic.claude-3-sonnet-20240229-v1:0', 'claude-3-sonnet'), ('anthropic.claude-3-haiku-20240307-v1:0', 'claude-3-haiku')], default='gpt-4o', help_text='LLM to use in chat', max_length=64),
+        ),
+    ]
diff --git a/django_app/redbox_app/redbox_core/models.py b/django_app/redbox_app/redbox_core/models.py
@@ -57,6 +57,8 @@ class ChatBackend(models.TextChoices):
         GPT_35_TURBO = "gpt-35-turbo-16k", _("gpt-35-turbo-16k")
         GPT_4_TURBO = "gpt-4-turbo-2024-04-09", _("gpt-4-turbo-2024-04-09")
         GPT_4_OMNI = "gpt-4o", _("gpt-4o")
+        CLAUDE_3_SONNET = "anthropic.claude-3-sonnet-20240229-v1:0", _("claude-3-sonnet")
+        CLAUDE_3_HAIKU = "anthropic.claude-3-haiku-20240307-v1:0", _("claude-3-haiku")
 
     label = models.CharField(max_length=50, unique=True)
     max_document_tokens = models.PositiveIntegerField(default=1_000_000, null=True, blank=True)

diff --git a/infrastructure/aws/data.tf b/infrastructure/aws/data.tf
@@ -56,7 +56,8 @@ locals {
     "AWS_REGION" : var.region,
     "worker_ingest_min_chunk_size" : var.worker_ingest_min_chunk_size,
     "worker_ingest_max_chunk_size" : var.worker_ingest_max_chunk_size,
-    "UNSTRUCTURED_HOST" : local.unstructured_host
+    "UNSTRUCTURED_HOST" : local.unstructured_host,
+    "EMBEDDING_BACKEND": var.embedding_backend
   }
 
   core_secrets = {

diff --git a/infrastructure/aws/iam.tf b/infrastructure/aws/iam.tf
@@ -1,4 +1,20 @@
 data "aws_iam_policy_document" "ecs_exec_role_policy" {
+  # checkov:skip=CKV_AWS_109:KMS policies can't be restricted
+  # checkov:skip=CKV_AWS_111:KMS policies can't be restricted
+  # checkov:skip=CKV_AWS_356:Allow for policies to not have resource limits
+
+  statement {
+    effect = "Allow"
+    actions = [
+      "bedrock:Invoke*",
+      "bedrock:Get*",
+      "bedrock:List*"
+    ]
+    resources = [
+      "*"
+    ]
+  }
+
   # checkov:skip=CKV_AWS_111:Allow for write access without constraints
   # checkov:skip=CKV_AWS_356:Allow for policies to not have resource limits
   statement {

diff --git a/infrastructure/aws/rds.tf b/infrastructure/aws/rds.tf
@@ -1,6 +1,6 @@
 module "rds" {
   # checkov:skip=CKV_TF_1: We're using semantic versions instead of commit hash
-#  source = "../../../i-dot-ai-core-terraform-modules//modules/infrastructure/rds"  # For testing local changes
+  # source = "../../../i-dot-ai-core-terraform-modules//modules/infrastructure/rds"  # For testing local changes
   source                  = "git::https://github.com/i-dot-ai/i-dot-ai-core-terraform-modules.git//modules/infrastructure/rds?ref=v1.0.0-rds"
   name                    = local.name
   db_name                 = var.project_name

diff --git a/infrastructure/aws/variables.tf b/infrastructure/aws/variables.tf
@@ -502,3 +502,10 @@ variable "django_queue_max_attempts" {
   default     = 1
   description = "How many attempts to run unstructured task"
 }
+
+variable "embedding_backend" {
+  type        = string
+  default     = "text-embedding-3-large"
+  description = "which provider to use to embed text, e.g. azure, aws, openai"
+}
+
diff --git a/poetry.lock b/poetry.lock
diff --git a/redbox-core/poetry.lock b/redbox-core/poetry.lock
diff --git a/redbox-core/pyproject.toml b/redbox-core/pyproject.toml
@@ -23,7 +23,8 @@ pydantic-settings = "^2.3.4"
 langchain-elasticsearch = "^0.2.2"
 pytest-dotenv = "^0.5.2"
 kneed = "^0.8.5"
-langgraph = "^0.2.3"
+langgraph = "<0.2.15"
+langchain-aws = "^0.1.17"
 
 
 [tool.poetry.group.dev.dependencies]

diff --git a/redbox-core/redbox/chains/components.py b/redbox-core/redbox/chains/components.py
@@ -10,6 +10,8 @@
 from redbox.models.chain import AISettings
 from redbox.models.settings import Settings
 from redbox.retriever import AllElasticsearchRetriever, ParameterisedElasticsearchRetriever
+from langchain_aws import ChatBedrock
+from langchain_community.embeddings import BedrockEmbeddings
 
 
 def get_chat_llm(env: Settings, ai_settings: AISettings):
@@ -71,6 +73,12 @@ def get_chat_llm(env: Settings, ai_settings: AISettings):
                     )
                 ]
             )
+    elif ai_settings.chat_backend in (
+        "anthropic.claude-3-sonnet-20240229-v1:0",
+        "anthropic.claude-3-haiku-20240307-v1:0",
+    ):
+        chat_model = ChatBedrock(model_id=ai_settings.chat_backend)
+
     if chat_model is None:
         raise Exception("%s not recognised", ai_settings.chat_backend)
     else:
@@ -87,7 +95,7 @@ def get_azure_embeddings(env: Settings):
         api_key=convert_to_secret_str(env.embedding_openai_api_key),
         azure_endpoint=env.embedding_azure_openai_endpoint,
         api_version=env.azure_api_version_embeddings,
-        model=env.azure_embedding_model,
+        model=env.embedding_backend,
         max_retries=env.embedding_max_retries,
         retry_min_seconds=env.embedding_retry_min_seconds,
         retry_max_seconds=env.embedding_retry_max_seconds,
@@ -100,20 +108,25 @@ def get_openai_embeddings(env: Settings):
     return OpenAIEmbeddings(
         api_key=convert_to_secret_str(env.embedding_openai_api_key),
         base_url=env.embedding_openai_base_url,
-        model=env.embedding_openai_model,
+        model=env.embedding_model,
         chunk_size=env.embedding_max_batch_size,
     )
 
 
+def get_aws_embeddings(env: Settings):
+    return BedrockEmbeddings(region_name=env.aws_region, model_id=env.embedding_backend)
+
+
 def get_embeddings(env: Settings) -> Embeddings:
-    if env.embedding_backend == "azure":
+    if env.embedding_backend == "text-embedding-3-large":
         return get_azure_embeddings(env)
-    elif env.embedding_backend == "openai":
+    if env.embedding_backend == "text-embedding-ada-002":
         return get_openai_embeddings(env)
-    elif env.embedding_backend == "fake":
-        return FakeEmbeddings(size=3072)  # TODO
-    else:
-        raise Exception("No configured embedding model")
+    if env.embedding_backend == "fake":
+        return FakeEmbeddings(size=3072)
+    if env.embedding_backend == "amazon.titan-embed-text-v2:0":
+        return get_aws_embeddings(env)
+    raise Exception("No configured embedding model")
 
 
 def get_all_chunks_retriever(env: Settings) -> ElasticsearchRetriever:

diff --git a/redbox-core/redbox/models/chain.py b/redbox-core/redbox/models/chain.py
@@ -117,7 +117,13 @@ class AISettings(BaseModel):
     similarity_threshold: int = 0
 
     # this is also the azure_openai_model
-    chat_backend: Literal["gpt-35-turbo-16k", "gpt-4-turbo-2024-04-09", "gpt-4o"] = "gpt-4o"
+    chat_backend: Literal[
+        "gpt-35-turbo-16k",
+        "gpt-4-turbo-2024-04-09",
+        "gpt-4o",
+        "anthropic.claude-3-sonnet-20240229-v1:0",
+        "anthropic.claude-3-haiku-20240307-v1:0",
+    ] = "gpt-4o"
 
 
 class DocumentState(TypedDict):

diff --git a/redbox-core/redbox/models/settings.py b/redbox-core/redbox/models/settings.py
@@ -64,19 +64,20 @@ class Settings(BaseSettings):
     embedding_openai_api_key: str = "NotAKey"
     embedding_azure_openai_endpoint: str = "not an endpoint"
     azure_api_version_embeddings: str = "2024-02-01"
-    azure_embedding_model: str = "text-embedding-3-large"
+
+    embedding_backend: Literal[
+        "text-embedding-ada-002", "amazon.titan-embed-text-v2:0", "text-embedding-3-large", "fake"
+    ] = "text-embedding-3-large"
 
     llm_max_tokens: int = 1024
 
-    embedding_backend: Literal["azure", "openai", "fake"] = "azure"
     embedding_max_retries: int = 1
     embedding_retry_min_seconds: int = 120  # Azure uses 60s
     embedding_retry_max_seconds: int = 300
     embedding_max_batch_size: int = 512
     embedding_document_field_name: str = "embedding"
 
     embedding_openai_base_url: str | None = None
-    embedding_openai_model: str = "text-embedding-ada-002"
 
     partition_strategy: Literal["auto", "fast", "ocr_only", "hi_res"] = "fast"
     clustering_strategy: Literal["full"] | None = None