Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assistant Archival Cleanup #1011

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 25 additions & 13 deletions apps/assistants/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from field_audit.models import AuditAction, AuditingManager

from apps.chat.agent.tools import get_assistant_tools
from apps.experiments.models import VersionsMixin, VersionsObjectManagerMixin
from apps.experiments.models import Experiment, VersionsMixin, VersionsObjectManagerMixin
from apps.experiments.versioning import VersionField
from apps.teams.models import BaseTeamModel
from apps.utils.models import BaseModel
Expand Down Expand Up @@ -176,28 +176,40 @@ def get_custom_action_operations(self) -> models.QuerySet:
def archive(self):
from apps.assistants.tasks import delete_openai_assistant_task

# don't archive assistant if it's still referenced by an active experiment or pipeline
if self.get_related_experiments_queryset().exists():
if self.get_related_experiments_queryset().exists() or self.get_related_pipeline_node_queryset().exists():
return False

if self.get_related_pipeline_node_queryset().exists():
return False

super().archive()
if self.is_working_version:
# TODO: should this delete the assistant from OpenAI?
for (
version
) in self.versions.all(): # first perform all checks so assistants are not archived prior to return False
if (
version.get_related_experiments_queryset().exists()
or version.get_related_pipeline_node_queryset().exists()
):
return False
for version in self.versions.all():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rather than re-iterating over the queryset you could accumulate the IDs in the previous loop

delete_openai_assistant_task.delay(version.id)
self.versions.update(is_archived=True, audit_action=AuditAction.AUDIT)
else:
delete_openai_assistant_task.delay(self.id)

super().archive()
delete_openai_assistant_task.delay(self.id)
return True

def get_related_experiments_queryset(self):
def get_related_experiments_queryset(self, query=None):
if query:
return Experiment.objects.filter(assistant_id__in=query, is_archived=False)

return self.experiment_set.filter(is_archived=False)

def get_related_pipeline_node_queryset(self):
def get_related_pipeline_node_queryset(self, query=None):
Copy link
Collaborator

@SmittieC SmittieC Jan 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the query param is a list of ids, I suggest that we name and type it accordingly:

def get_related_pipeline_node_queryset(self, assistant_ids: list):

We could even pass in a queryset that returns the ids. We did something similar here for example. This wouldn't require parsing the query results to a list, which is nice, but up to you on which approach you'd like to take.

Also, looks like this parameter will come in useful to optimize the archive method. So instead of iterating through all assistant versions (in the case where we're archiving the working version), we can simply fetch the version ids and call this method with it. Same with get_related_experiments_queryset.

from apps.pipelines.models import Node

if query:
return Node.objects.filter(type="AssistantNode").filter(
params__assistant_id__in=query,
pipeline__is_archived=False,
)

return Node.objects.filter(type="AssistantNode").filter(
params__assistant_id=str(self.id),
pipeline__is_archived=False,
Expand Down
160 changes: 132 additions & 28 deletions apps/assistants/tests/test_delete.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import uuid
from unittest.mock import Mock
from unittest.mock import Mock, patch

import pytest
from django.db.models import Q

from apps.assistants.models import ToolResources
from apps.assistants.models import OpenAiAssistant, ToolResources
from apps.assistants.sync import _get_files_to_delete, delete_openai_files_for_resource
from apps.utils.factories.assistants import OpenAiAssistantFactory
from apps.utils.factories.experiment import ExperimentFactory
from apps.utils.factories.files import FileFactory
from apps.utils.factories.pipelines import NodeFactory, PipelineFactory


@pytest.fixture()
Expand All @@ -28,36 +31,137 @@ def code_resource(assistant):


@pytest.mark.django_db()
def test_files_to_delete_when_only_referenced_by_one_resource(code_resource):
files_to_delete = list(_get_files_to_delete(code_resource.assistant.team, code_resource.id))
assert len(files_to_delete) == 2
assert {f.id for f in files_to_delete} == {f.id for f in code_resource.files.all()}
class TestAssistantDeletion:
def test_files_to_delete_when_only_referenced_by_one_resource(self, code_resource):
files_to_delete = list(_get_files_to_delete(code_resource.assistant.team, code_resource.id))
assert len(files_to_delete) == 2
assert {f.id for f in files_to_delete} == {f.id for f in code_resource.files.all()}

def test_files_not_to_delete_when_referenced_by_multiple_resources(self, code_resource):
all_files = list(code_resource.files.all())
tool_resource = ToolResources.objects.create(tool_type="file_search", assistant=code_resource.assistant)
tool_resource.files.set([all_files[0]])

@pytest.mark.django_db()
def test_files_not_to_delete_when_referenced_by_multiple_resources(code_resource):
all_files = list(code_resource.files.all())
tool_resource = ToolResources.objects.create(tool_type="file_search", assistant=code_resource.assistant)
tool_resource.files.set([all_files[0]])
# only the second file should be deleted
files_to_delete = list(_get_files_to_delete(code_resource.assistant.team, code_resource.id))
assert len(files_to_delete) == 1
assert files_to_delete[0].id == all_files[1].id

files_to_delete = list(_get_files_to_delete(tool_resource.assistant.team, tool_resource.id))
assert len(files_to_delete) == 0

def test_delete_openai_files_for_resource(self, code_resource):
all_files = list(code_resource.files.all())
assert all(f.external_id for f in all_files)
assert all(f.external_source for f in all_files)
client = Mock()
delete_openai_files_for_resource(client, code_resource.assistant.team, code_resource)

assert client.files.delete.call_count == 2
all_files = list(code_resource.files.all())
assert not any(f.external_id for f in all_files)
assert not any(f.external_source for f in all_files)

# only the second file should be deleted
files_to_delete = list(_get_files_to_delete(code_resource.assistant.team, code_resource.id))
assert len(files_to_delete) == 1
assert files_to_delete[0].id == all_files[1].id

files_to_delete = list(_get_files_to_delete(tool_resource.assistant.team, tool_resource.id))
assert len(files_to_delete) == 0
# assistant.refresh_from_db()


@pytest.mark.django_db()
def test_delete_openai_files_for_resource(code_resource):
all_files = list(code_resource.files.all())
assert all(f.external_id for f in all_files)
assert all(f.external_source for f in all_files)
client = Mock()
delete_openai_files_for_resource(client, code_resource.assistant.team, code_resource)

assert client.files.delete.call_count == 2
all_files = list(code_resource.files.all())
assert not any(f.external_id for f in all_files)
assert not any(f.external_source for f in all_files)
class TestAssistantArchival:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice tests. It would be great if we can add tests for pipelines using an assistant as well.

def test_archive_assistant(self):
assistant = OpenAiAssistantFactory()
assert assistant.is_archived is False
assistant.archive()
assert assistant.is_archived is True

def test_archive_assistant_with_still_exisiting_experiment(self):
experiment = ExperimentFactory()
assistant = OpenAiAssistantFactory()
experiment.assistant = assistant
experiment.save()

assistant.archive()
assert assistant.is_archived is False # archiving failed

experiment.archive()
assistant.archive()
assert assistant.is_archived is True # archiving successful

@patch("apps.assistants.sync.push_assistant_to_openai", Mock())
def test_archive_versioned_assistant_with_still_exisiting_experiment_and_pipeline(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test seems to be the same as the previous one since the presence of the pipeline has no effect because it does't reference the assistant

assistant = OpenAiAssistantFactory()
v2_assistant = assistant.create_new_version()
pipeline = PipelineFactory()
experiment = ExperimentFactory(pipeline=pipeline)
experiment.assistant = v2_assistant
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this isn't a valid state - and experiment should either have an assistant or a pipeline but not both

experiment.save()

assistant.archive()
assert assistant.is_archived is False # archiving failed
assert v2_assistant.is_archived is False

experiment.archive()
assistant.archive()
v2_assistant.refresh_from_db()

assert assistant.is_archived is True # archiving successful
assert v2_assistant.is_archived is True

@patch("apps.assistants.sync.push_assistant_to_openai", Mock())
def test_get_related_pipeline_node_queryset_with_versions(self):
assistant = OpenAiAssistantFactory()
v2_assistant = assistant.create_new_version()
pipeline = PipelineFactory()
NodeFactory(type="AssistantNode", pipeline=pipeline, params={"assistant_id": str(assistant.id)})
exp = ExperimentFactory(pipeline=pipeline)
exp.assistant = assistant
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you shouldn't set this property since you're testing the reference through the pipeline

exp.save()
v2_exp = exp.create_new_version()
NodeFactory(type="AssistantNode", pipeline=v2_exp.pipeline, params={"assistant_id": str(v2_assistant.id)})
NodeFactory(type="AssistantNode", pipeline=v2_exp.pipeline, params={"assistant_id": str(v2_assistant.id)})
v2_exp.assistant = v2_assistant
v2_exp.save()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't follow what's happening here. Creating a new version should take care of creating versioned nodes as well.

assistant.refresh_from_db()
v2_assistant.refresh_from_db()

version_query = list(
map(
str,
OpenAiAssistant.objects.filter(Q(id=assistant.id) | Q(working_version__id=assistant.id)).values_list(
"id", flat=True
),
)
)

assert len(assistant.get_related_pipeline_node_queryset()) == 1
assert len(v2_assistant.get_related_pipeline_node_queryset()) == 2
assert len(assistant.get_related_pipeline_node_queryset(query=version_query)) == 3

@patch("apps.assistants.sync.push_assistant_to_openai", Mock())
def test_get_related_experiments_queryset_with_versions(self):
assistant = OpenAiAssistantFactory()
v2_assistant = assistant.create_new_version()
exp = ExperimentFactory()
exp.assistant = assistant
exp.save()
exp2 = ExperimentFactory()
exp2.assistant = assistant
exp2.save()
v2_exp = exp.create_new_version()
v2_exp.assistant = v2_assistant
v2_exp.save()
assistant.refresh_from_db()
v2_assistant.refresh_from_db()

version_query = list(
map(
str,
OpenAiAssistant.objects.filter(Q(id=assistant.id) | Q(working_version__id=assistant.id)).values_list(
"id", flat=True
),
)
)

assert len(assistant.get_related_experiments_queryset()) == 2
assert len(v2_assistant.get_related_experiments_queryset()) == 1
assert len(assistant.get_related_experiments_queryset(query=version_query)) == 3
15 changes: 13 additions & 2 deletions apps/assistants/views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.contrib import messages
from django.contrib.auth.mixins import PermissionRequiredMixin
from django.db import transaction
from django.db.models import Q
from django.http import HttpResponse, HttpResponseRedirect
from django.shortcuts import get_object_or_404, render
from django.template.loader import render_to_string
Expand Down Expand Up @@ -212,13 +213,23 @@ def delete(self, request, team_slug: str, pk: int):
messages.success(request, "Assistant Archived")
return HttpResponse()
else:
version_query = None
if assistant.is_working_version:
version_query = list(
map(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be wrong, but I don't think we have to map the ids to strings?

str,
OpenAiAssistant.objects.filter(
Q(id=assistant.id) | Q(working_version__id=assistant.id)
).values_list("id", flat=True),
)
)
experiments = [
Chip(label=experiment.name, url=experiment.get_absolute_url())
for experiment in assistant.get_related_experiments_queryset()
for experiment in assistant.get_related_experiments_queryset(query=version_query)
]
pipeline_nodes = [
Chip(label=node.pipeline.name, url=node.pipeline.get_absolute_url())
for node in assistant.get_related_pipeline_node_queryset().select_related("pipeline")
for node in assistant.get_related_pipeline_node_queryset(query=version_query).select_related("pipeline")
]
response = render_to_string(
"assistants/partials/referenced_objects.html",
Expand Down
Loading