Skip to content

Commit

Permalink
Feature/remove-file-route (#1009)
Browse files Browse the repository at this point in the history
* remove unused code

* remove unused tests

* direct file control

* check index exists first

* removed http mocking

* wip

* delete unused files

* formatting

* tests passing

* remove static

* reinstated mocking unstructured

* moved s3_client to conftest

* core_file.uuid now random

* core_file.uuid now random

* remove file route

* removed storage

* remove unused code

* fix tests

* remove file status

* removed user-uuid from meatdata

* remove File

* remove File from django

* update core-api

---------

Co-authored-by: George Burton <[email protected]>
  • Loading branch information
gecBurton and George Burton authored Sep 3, 2024
1 parent d78e1d4 commit 42a1c68
Show file tree
Hide file tree
Showing 26 changed files with 66 additions and 1,114 deletions.
12 changes: 9 additions & 3 deletions core-api/core_api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
from fastapi.responses import RedirectResponse

from core_api.routes.chat import chat_app
from core_api.routes.file import file_app
from redbox import __version__ as redbox_version
from redbox.models import Settings, StatusResponse
from redbox.models import Settings
from pydantic import BaseModel


# === Logging ===

Expand Down Expand Up @@ -52,6 +53,12 @@ def root():
return RedirectResponse(url="/docs")


class StatusResponse(BaseModel):
status: str
uptime_seconds: float
version: str


@app.get("/health", status_code=HTTPStatus.OK, tags=["health"])
def health(response: Response) -> StatusResponse:
"""Returns the health of the API
Expand Down Expand Up @@ -79,4 +86,3 @@ def health(response: Response) -> StatusResponse:


app.mount("/chat", chat_app)
app.mount("/file", file_app)
204 changes: 0 additions & 204 deletions core-api/core_api/routes/file.py

This file was deleted.

64 changes: 11 additions & 53 deletions core-api/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
from langchain_core.documents.base import Document
from langchain_core.embeddings.fake import FakeEmbeddings
from langchain_elasticsearch.vectorstores import ElasticsearchStore
from redbox.models import File, Settings
from redbox.models import Settings
from redbox.models.file import ChunkMetadata, ChunkResolution
from redbox.storage import ElasticsearchStorageHandler

from core_api import dependencies
from core_api.app import app as application
Expand All @@ -33,11 +32,6 @@ def es_client(env: Settings) -> Elasticsearch:
return env.elasticsearch_client()


@pytest.fixture()
def es_storage_handler(es_client: Elasticsearch, env: Settings) -> ElasticsearchStorageHandler:
return ElasticsearchStorageHandler(es_client=es_client, root_index=env.elastic_root_index)


@pytest.fixture(scope="session")
def es_index(env: Settings) -> str:
return f"{env.elastic_root_index}-chunk"
Expand Down Expand Up @@ -115,23 +109,21 @@ def file_pdf_path() -> Path:


@pytest.fixture()
def file_pdf_object(file_pdf_path: Path, alice: UUID, env: Settings) -> File:
def file_pdf(file_pdf_path: Path, alice: UUID, env: Settings) -> str:
"""The unuploaded File object of Alice's PDF."""
file_name = file_pdf_path.name
return File(key=file_name, bucket=env.bucket_name, creator_user_uuid=alice)
return file_name


@pytest.fixture()
def file_pdf_chunks(file_pdf_object: File) -> list[Document]:
def file_pdf_chunks(file_pdf) -> list[Document]:
"""The Document chunk objects of Alice's PDF."""
normal_chunks = [
Document(
page_content="hello",
metadata=ChunkMetadata(
parent_file_uuid=str(file_pdf_object.uuid),
index=i,
file_name=file_pdf_object.key,
creator_user_uuid=file_pdf_object.creator_user_uuid,
file_name=file_pdf,
page_number=4,
created_datetime=datetime.now(UTC),
token_count=4,
Expand All @@ -145,10 +137,8 @@ def file_pdf_chunks(file_pdf_object: File) -> list[Document]:
Document(
page_content="hello" * 10,
metadata=ChunkMetadata(
parent_file_uuid=str(file_pdf_object.uuid),
index=i,
file_name=file_pdf_object.key,
creator_user_uuid=file_pdf_object.creator_user_uuid,
file_name=file_pdf,
page_number=4,
created_datetime=datetime.now(UTC),
token_count=20,
Expand All @@ -160,44 +150,28 @@ def file_pdf_chunks(file_pdf_object: File) -> list[Document]:
return normal_chunks + large_chunks


@pytest.fixture()
def file_pdf(
es_store: ElasticsearchStore,
es_storage_handler: ElasticsearchStorageHandler,
file_pdf_object: File,
file_pdf_chunks: list[Document],
) -> File:
"""The File object of Alice's PDF, with all objects in the Elasticsearch index."""
es_storage_handler.write_item(file_pdf_object)
es_storage_handler.refresh()
es_store.add_documents(file_pdf_chunks)
return file_pdf_object


@pytest.fixture()
def file_html_path() -> Path:
"""The path of Alice's HTML."""
return Path(__file__).parents[2] / "tests" / "data" / "pdf" / "example.html"


@pytest.fixture()
def file_html_object(file_html_path: Path, alice: UUID, env: Settings) -> File:
def file_html(file_html_path: Path, alice: UUID, env: Settings) -> str:
"""The unuploaded File object of Alice's HTML."""
file_name = file_html_path.name
return File(key=file_name, bucket=env.bucket_name, creator_user_uuid=alice)
return file_name


@pytest.fixture()
def file_html_chunks(file_html_object: File) -> list[Document]:
def file_html_chunks(file_html: str) -> list[Document]:
"""The Document chunk objects of Alice's HTML."""
normal_chunks = [
Document(
page_content="hello",
metadata=ChunkMetadata(
parent_file_uuid=str(file_html_object.uuid),
index=i,
file_name=file_html_object.key,
creator_user_uuid=file_html_object.creator_user_uuid,
file_name=file_html,
page_number=4,
created_datetime=datetime.now(UTC),
token_count=4,
Expand All @@ -211,10 +185,8 @@ def file_html_chunks(file_html_object: File) -> list[Document]:
Document(
page_content="hello" * 10,
metadata=ChunkMetadata(
parent_file_uuid=str(file_html_object.uuid),
index=i,
file_name=file_html_object.key,
creator_user_uuid=file_html_object.creator_user_uuid,
file_name=file_html,
page_number=4,
created_datetime=datetime.now(UTC),
token_count=20,
Expand All @@ -224,17 +196,3 @@ def file_html_chunks(file_html_object: File) -> list[Document]:
for i in range(2)
]
return normal_chunks + large_chunks


@pytest.fixture()
def file_html(
es_store: ElasticsearchStore,
es_storage_handler: ElasticsearchStorageHandler,
file_html_object: File,
file_html_chunks: list[Document],
) -> File:
"""The File object of Alice's HTML, with all objects in the Elasticsearch index."""
es_storage_handler.write_item(file_html_object)
es_storage_handler.refresh()
es_store.add_documents(file_html_chunks)
return file_html_object
Loading

0 comments on commit 42a1c68

Please sign in to comment.