Feature/remove-file-route (#1009)

* remove unused code * remove unused tests * direct file control * check index exists first * removed http mocking * wip * delete unused files * formatting * tests passing * remove static * reinstated mocking unstructured * moved s3_client to conftest * core_file.uuid now random * core_file.uuid now random * remove file route * removed storage * remove unused code * fix tests * remove file status * removed user-uuid from meatdata * remove File * remove File from django * update core-api --------- Co-authored-by: George Burton <[email protected]>
i-dot-ai · Sep 3, 2024 · 42a1c68 · 42a1c68
1 parent d78e1d4
commit 42a1c68
Show file tree

Hide file tree

Showing 26 changed files with 66 additions and 1,114 deletions.
diff --git a/core-api/core_api/app.py b/core-api/core_api/app.py
@@ -7,9 +7,10 @@
 from fastapi.responses import RedirectResponse
 
 from core_api.routes.chat import chat_app
-from core_api.routes.file import file_app
 from redbox import __version__ as redbox_version
-from redbox.models import Settings, StatusResponse
+from redbox.models import Settings
+from pydantic import BaseModel
+
 
 # === Logging ===
 
@@ -52,6 +53,12 @@ def root():
     return RedirectResponse(url="/docs")
 
 
+class StatusResponse(BaseModel):
+    status: str
+    uptime_seconds: float
+    version: str
+
+
 @app.get("/health", status_code=HTTPStatus.OK, tags=["health"])
 def health(response: Response) -> StatusResponse:
     """Returns the health of the API
@@ -79,4 +86,3 @@ def health(response: Response) -> StatusResponse:
 
 
 app.mount("/chat", chat_app)
-app.mount("/file", file_app)
diff --git a/core-api/core_api/routes/file.py b/core-api/core_api/routes/file.py
diff --git a/core-api/tests/conftest.py b/core-api/tests/conftest.py
@@ -10,9 +10,8 @@
 from langchain_core.documents.base import Document
 from langchain_core.embeddings.fake import FakeEmbeddings
 from langchain_elasticsearch.vectorstores import ElasticsearchStore
-from redbox.models import File, Settings
+from redbox.models import Settings
 from redbox.models.file import ChunkMetadata, ChunkResolution
-from redbox.storage import ElasticsearchStorageHandler
 
 from core_api import dependencies
 from core_api.app import app as application
@@ -33,11 +32,6 @@ def es_client(env: Settings) -> Elasticsearch:
     return env.elasticsearch_client()
 
 
-@pytest.fixture()
-def es_storage_handler(es_client: Elasticsearch, env: Settings) -> ElasticsearchStorageHandler:
-    return ElasticsearchStorageHandler(es_client=es_client, root_index=env.elastic_root_index)
-
-
 @pytest.fixture(scope="session")
 def es_index(env: Settings) -> str:
     return f"{env.elastic_root_index}-chunk"
@@ -115,23 +109,21 @@ def file_pdf_path() -> Path:
 
 
 @pytest.fixture()
-def file_pdf_object(file_pdf_path: Path, alice: UUID, env: Settings) -> File:
+def file_pdf(file_pdf_path: Path, alice: UUID, env: Settings) -> str:
     """The unuploaded File object of Alice's PDF."""
     file_name = file_pdf_path.name
-    return File(key=file_name, bucket=env.bucket_name, creator_user_uuid=alice)
+    return file_name
 
 
 @pytest.fixture()
-def file_pdf_chunks(file_pdf_object: File) -> list[Document]:
+def file_pdf_chunks(file_pdf) -> list[Document]:
     """The Document chunk objects of Alice's PDF."""
     normal_chunks = [
         Document(
             page_content="hello",
             metadata=ChunkMetadata(
-                parent_file_uuid=str(file_pdf_object.uuid),
                 index=i,
-                file_name=file_pdf_object.key,
-                creator_user_uuid=file_pdf_object.creator_user_uuid,
+                file_name=file_pdf,
                 page_number=4,
                 created_datetime=datetime.now(UTC),
                 token_count=4,
@@ -145,10 +137,8 @@ def file_pdf_chunks(file_pdf_object: File) -> list[Document]:
         Document(
             page_content="hello" * 10,
             metadata=ChunkMetadata(
-                parent_file_uuid=str(file_pdf_object.uuid),
                 index=i,
-                file_name=file_pdf_object.key,
-                creator_user_uuid=file_pdf_object.creator_user_uuid,
+                file_name=file_pdf,
                 page_number=4,
                 created_datetime=datetime.now(UTC),
                 token_count=20,
@@ -160,44 +150,28 @@ def file_pdf_chunks(file_pdf_object: File) -> list[Document]:
     return normal_chunks + large_chunks
 
 
-@pytest.fixture()
-def file_pdf(
-    es_store: ElasticsearchStore,
-    es_storage_handler: ElasticsearchStorageHandler,
-    file_pdf_object: File,
-    file_pdf_chunks: list[Document],
-) -> File:
-    """The File object of Alice's PDF, with all objects in the Elasticsearch index."""
-    es_storage_handler.write_item(file_pdf_object)
-    es_storage_handler.refresh()
-    es_store.add_documents(file_pdf_chunks)
-    return file_pdf_object
-
-
 @pytest.fixture()
 def file_html_path() -> Path:
     """The path of Alice's HTML."""
     return Path(__file__).parents[2] / "tests" / "data" / "pdf" / "example.html"
 
 
 @pytest.fixture()
-def file_html_object(file_html_path: Path, alice: UUID, env: Settings) -> File:
+def file_html(file_html_path: Path, alice: UUID, env: Settings) -> str:
     """The unuploaded File object of Alice's HTML."""
     file_name = file_html_path.name
-    return File(key=file_name, bucket=env.bucket_name, creator_user_uuid=alice)
+    return file_name
 
 
 @pytest.fixture()
-def file_html_chunks(file_html_object: File) -> list[Document]:
+def file_html_chunks(file_html: str) -> list[Document]:
     """The Document chunk objects of Alice's HTML."""
     normal_chunks = [
         Document(
             page_content="hello",
             metadata=ChunkMetadata(
-                parent_file_uuid=str(file_html_object.uuid),
                 index=i,
-                file_name=file_html_object.key,
-                creator_user_uuid=file_html_object.creator_user_uuid,
+                file_name=file_html,
                 page_number=4,
                 created_datetime=datetime.now(UTC),
                 token_count=4,
@@ -211,10 +185,8 @@ def file_html_chunks(file_html_object: File) -> list[Document]:
         Document(
             page_content="hello" * 10,
             metadata=ChunkMetadata(
-                parent_file_uuid=str(file_html_object.uuid),
                 index=i,
-                file_name=file_html_object.key,
-                creator_user_uuid=file_html_object.creator_user_uuid,
+                file_name=file_html,
                 page_number=4,
                 created_datetime=datetime.now(UTC),
                 token_count=20,
@@ -224,17 +196,3 @@ def file_html_chunks(file_html_object: File) -> list[Document]:
         for i in range(2)
     ]
     return normal_chunks + large_chunks
-
-
-@pytest.fixture()
-def file_html(
-    es_store: ElasticsearchStore,
-    es_storage_handler: ElasticsearchStorageHandler,
-    file_html_object: File,
-    file_html_chunks: list[Document],
-) -> File:
-    """The File object of Alice's HTML, with all objects in the Elasticsearch index."""
-    es_storage_handler.write_item(file_html_object)
-    es_storage_handler.refresh()
-    es_store.add_documents(file_html_chunks)
-    return file_html_object