-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Assistant Archival Cleanup #1011
base: main
Are you sure you want to change the base?
Changes from 5 commits
3753f52
70a78bf
35ce4f0
b5404ef
527e24e
742c032
63b5e3c
ea2fac1
5fb1bad
bff8ccf
03020f8
eccb21e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ | |
from field_audit.models import AuditAction, AuditingManager | ||
|
||
from apps.chat.agent.tools import get_assistant_tools | ||
from apps.experiments.models import VersionsMixin, VersionsObjectManagerMixin | ||
from apps.experiments.models import Experiment, VersionsMixin, VersionsObjectManagerMixin | ||
from apps.experiments.versioning import VersionField | ||
from apps.teams.models import BaseTeamModel | ||
from apps.utils.models import BaseModel | ||
|
@@ -176,28 +176,40 @@ def get_custom_action_operations(self) -> models.QuerySet: | |
def archive(self): | ||
from apps.assistants.tasks import delete_openai_assistant_task | ||
|
||
# don't archive assistant if it's still referenced by an active experiment or pipeline | ||
if self.get_related_experiments_queryset().exists(): | ||
if self.get_related_experiments_queryset().exists() or self.get_related_pipeline_node_queryset().exists(): | ||
return False | ||
|
||
if self.get_related_pipeline_node_queryset().exists(): | ||
return False | ||
|
||
super().archive() | ||
if self.is_working_version: | ||
# TODO: should this delete the assistant from OpenAI? | ||
for ( | ||
version | ||
) in self.versions.all(): # first perform all checks so assistants are not archived prior to return False | ||
if ( | ||
version.get_related_experiments_queryset().exists() | ||
or version.get_related_pipeline_node_queryset().exists() | ||
): | ||
return False | ||
for version in self.versions.all(): | ||
delete_openai_assistant_task.delay(version.id) | ||
self.versions.update(is_archived=True, audit_action=AuditAction.AUDIT) | ||
else: | ||
delete_openai_assistant_task.delay(self.id) | ||
|
||
super().archive() | ||
delete_openai_assistant_task.delay(self.id) | ||
return True | ||
|
||
def get_related_experiments_queryset(self): | ||
def get_related_experiments_queryset(self, query=None): | ||
if query: | ||
return Experiment.objects.filter(assistant_id__in=query, is_archived=False) | ||
|
||
return self.experiment_set.filter(is_archived=False) | ||
|
||
def get_related_pipeline_node_queryset(self): | ||
def get_related_pipeline_node_queryset(self, query=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the def get_related_pipeline_node_queryset(self, assistant_ids: list): We could even pass in a queryset that returns the ids. We did something similar here for example. This wouldn't require parsing the query results to a list, which is nice, but up to you on which approach you'd like to take. Also, looks like this parameter will come in useful to optimize the |
||
from apps.pipelines.models import Node | ||
|
||
if query: | ||
return Node.objects.filter(type="AssistantNode").filter( | ||
params__assistant_id__in=query, | ||
pipeline__is_archived=False, | ||
) | ||
|
||
return Node.objects.filter(type="AssistantNode").filter( | ||
params__assistant_id=str(self.id), | ||
pipeline__is_archived=False, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,15 @@ | ||
import uuid | ||
from unittest.mock import Mock | ||
from unittest.mock import Mock, patch | ||
|
||
import pytest | ||
from django.db.models import Q | ||
|
||
from apps.assistants.models import ToolResources | ||
from apps.assistants.models import OpenAiAssistant, ToolResources | ||
from apps.assistants.sync import _get_files_to_delete, delete_openai_files_for_resource | ||
from apps.utils.factories.assistants import OpenAiAssistantFactory | ||
from apps.utils.factories.experiment import ExperimentFactory | ||
from apps.utils.factories.files import FileFactory | ||
from apps.utils.factories.pipelines import NodeFactory, PipelineFactory | ||
|
||
|
||
@pytest.fixture() | ||
|
@@ -28,36 +31,137 @@ def code_resource(assistant): | |
|
||
|
||
@pytest.mark.django_db() | ||
def test_files_to_delete_when_only_referenced_by_one_resource(code_resource): | ||
files_to_delete = list(_get_files_to_delete(code_resource.assistant.team, code_resource.id)) | ||
assert len(files_to_delete) == 2 | ||
assert {f.id for f in files_to_delete} == {f.id for f in code_resource.files.all()} | ||
class TestAssistantDeletion: | ||
def test_files_to_delete_when_only_referenced_by_one_resource(self, code_resource): | ||
files_to_delete = list(_get_files_to_delete(code_resource.assistant.team, code_resource.id)) | ||
assert len(files_to_delete) == 2 | ||
assert {f.id for f in files_to_delete} == {f.id for f in code_resource.files.all()} | ||
|
||
def test_files_not_to_delete_when_referenced_by_multiple_resources(self, code_resource): | ||
all_files = list(code_resource.files.all()) | ||
tool_resource = ToolResources.objects.create(tool_type="file_search", assistant=code_resource.assistant) | ||
tool_resource.files.set([all_files[0]]) | ||
|
||
@pytest.mark.django_db() | ||
def test_files_not_to_delete_when_referenced_by_multiple_resources(code_resource): | ||
all_files = list(code_resource.files.all()) | ||
tool_resource = ToolResources.objects.create(tool_type="file_search", assistant=code_resource.assistant) | ||
tool_resource.files.set([all_files[0]]) | ||
# only the second file should be deleted | ||
files_to_delete = list(_get_files_to_delete(code_resource.assistant.team, code_resource.id)) | ||
assert len(files_to_delete) == 1 | ||
assert files_to_delete[0].id == all_files[1].id | ||
|
||
files_to_delete = list(_get_files_to_delete(tool_resource.assistant.team, tool_resource.id)) | ||
assert len(files_to_delete) == 0 | ||
|
||
def test_delete_openai_files_for_resource(self, code_resource): | ||
all_files = list(code_resource.files.all()) | ||
assert all(f.external_id for f in all_files) | ||
assert all(f.external_source for f in all_files) | ||
client = Mock() | ||
delete_openai_files_for_resource(client, code_resource.assistant.team, code_resource) | ||
|
||
assert client.files.delete.call_count == 2 | ||
all_files = list(code_resource.files.all()) | ||
assert not any(f.external_id for f in all_files) | ||
assert not any(f.external_source for f in all_files) | ||
|
||
# only the second file should be deleted | ||
files_to_delete = list(_get_files_to_delete(code_resource.assistant.team, code_resource.id)) | ||
assert len(files_to_delete) == 1 | ||
assert files_to_delete[0].id == all_files[1].id | ||
|
||
files_to_delete = list(_get_files_to_delete(tool_resource.assistant.team, tool_resource.id)) | ||
assert len(files_to_delete) == 0 | ||
# assistant.refresh_from_db() | ||
|
||
|
||
@pytest.mark.django_db() | ||
def test_delete_openai_files_for_resource(code_resource): | ||
all_files = list(code_resource.files.all()) | ||
assert all(f.external_id for f in all_files) | ||
assert all(f.external_source for f in all_files) | ||
client = Mock() | ||
delete_openai_files_for_resource(client, code_resource.assistant.team, code_resource) | ||
|
||
assert client.files.delete.call_count == 2 | ||
all_files = list(code_resource.files.all()) | ||
assert not any(f.external_id for f in all_files) | ||
assert not any(f.external_source for f in all_files) | ||
class TestAssistantArchival: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice tests. It would be great if we can add tests for pipelines using an assistant as well. |
||
def test_archive_assistant(self): | ||
assistant = OpenAiAssistantFactory() | ||
assert assistant.is_archived is False | ||
assistant.archive() | ||
assert assistant.is_archived is True | ||
|
||
def test_archive_assistant_with_still_exisiting_experiment(self): | ||
experiment = ExperimentFactory() | ||
assistant = OpenAiAssistantFactory() | ||
experiment.assistant = assistant | ||
experiment.save() | ||
|
||
assistant.archive() | ||
assert assistant.is_archived is False # archiving failed | ||
|
||
experiment.archive() | ||
assistant.archive() | ||
assert assistant.is_archived is True # archiving successful | ||
|
||
@patch("apps.assistants.sync.push_assistant_to_openai", Mock()) | ||
def test_archive_versioned_assistant_with_still_exisiting_experiment_and_pipeline(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test seems to be the same as the previous one since the presence of the pipeline has no effect because it does't reference the assistant |
||
assistant = OpenAiAssistantFactory() | ||
v2_assistant = assistant.create_new_version() | ||
pipeline = PipelineFactory() | ||
experiment = ExperimentFactory(pipeline=pipeline) | ||
experiment.assistant = v2_assistant | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this isn't a valid state - and experiment should either have an assistant or a pipeline but not both |
||
experiment.save() | ||
|
||
assistant.archive() | ||
assert assistant.is_archived is False # archiving failed | ||
assert v2_assistant.is_archived is False | ||
|
||
experiment.archive() | ||
assistant.archive() | ||
v2_assistant.refresh_from_db() | ||
|
||
assert assistant.is_archived is True # archiving successful | ||
assert v2_assistant.is_archived is True | ||
|
||
@patch("apps.assistants.sync.push_assistant_to_openai", Mock()) | ||
def test_get_related_pipeline_node_queryset_with_versions(self): | ||
assistant = OpenAiAssistantFactory() | ||
v2_assistant = assistant.create_new_version() | ||
pipeline = PipelineFactory() | ||
NodeFactory(type="AssistantNode", pipeline=pipeline, params={"assistant_id": str(assistant.id)}) | ||
exp = ExperimentFactory(pipeline=pipeline) | ||
exp.assistant = assistant | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you shouldn't set this property since you're testing the reference through the pipeline |
||
exp.save() | ||
v2_exp = exp.create_new_version() | ||
NodeFactory(type="AssistantNode", pipeline=v2_exp.pipeline, params={"assistant_id": str(v2_assistant.id)}) | ||
NodeFactory(type="AssistantNode", pipeline=v2_exp.pipeline, params={"assistant_id": str(v2_assistant.id)}) | ||
v2_exp.assistant = v2_assistant | ||
v2_exp.save() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't follow what's happening here. Creating a new version should take care of creating versioned nodes as well. |
||
assistant.refresh_from_db() | ||
v2_assistant.refresh_from_db() | ||
|
||
version_query = list( | ||
map( | ||
str, | ||
OpenAiAssistant.objects.filter(Q(id=assistant.id) | Q(working_version__id=assistant.id)).values_list( | ||
"id", flat=True | ||
), | ||
) | ||
) | ||
|
||
assert len(assistant.get_related_pipeline_node_queryset()) == 1 | ||
assert len(v2_assistant.get_related_pipeline_node_queryset()) == 2 | ||
assert len(assistant.get_related_pipeline_node_queryset(query=version_query)) == 3 | ||
|
||
@patch("apps.assistants.sync.push_assistant_to_openai", Mock()) | ||
def test_get_related_experiments_queryset_with_versions(self): | ||
assistant = OpenAiAssistantFactory() | ||
v2_assistant = assistant.create_new_version() | ||
exp = ExperimentFactory() | ||
exp.assistant = assistant | ||
exp.save() | ||
exp2 = ExperimentFactory() | ||
exp2.assistant = assistant | ||
exp2.save() | ||
v2_exp = exp.create_new_version() | ||
v2_exp.assistant = v2_assistant | ||
v2_exp.save() | ||
assistant.refresh_from_db() | ||
v2_assistant.refresh_from_db() | ||
|
||
version_query = list( | ||
map( | ||
str, | ||
OpenAiAssistant.objects.filter(Q(id=assistant.id) | Q(working_version__id=assistant.id)).values_list( | ||
"id", flat=True | ||
), | ||
) | ||
) | ||
|
||
assert len(assistant.get_related_experiments_queryset()) == 2 | ||
assert len(v2_assistant.get_related_experiments_queryset()) == 1 | ||
assert len(assistant.get_related_experiments_queryset(query=version_query)) == 3 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
from django.contrib import messages | ||
from django.contrib.auth.mixins import PermissionRequiredMixin | ||
from django.db import transaction | ||
from django.db.models import Q | ||
from django.http import HttpResponse, HttpResponseRedirect | ||
from django.shortcuts import get_object_or_404, render | ||
from django.template.loader import render_to_string | ||
|
@@ -212,13 +213,23 @@ def delete(self, request, team_slug: str, pk: int): | |
messages.success(request, "Assistant Archived") | ||
return HttpResponse() | ||
else: | ||
version_query = None | ||
if assistant.is_working_version: | ||
version_query = list( | ||
map( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I might be wrong, but I don't think we have to map the ids to strings? |
||
str, | ||
OpenAiAssistant.objects.filter( | ||
Q(id=assistant.id) | Q(working_version__id=assistant.id) | ||
).values_list("id", flat=True), | ||
) | ||
) | ||
experiments = [ | ||
Chip(label=experiment.name, url=experiment.get_absolute_url()) | ||
for experiment in assistant.get_related_experiments_queryset() | ||
for experiment in assistant.get_related_experiments_queryset(query=version_query) | ||
] | ||
pipeline_nodes = [ | ||
Chip(label=node.pipeline.name, url=node.pipeline.get_absolute_url()) | ||
for node in assistant.get_related_pipeline_node_queryset().select_related("pipeline") | ||
for node in assistant.get_related_pipeline_node_queryset(query=version_query).select_related("pipeline") | ||
] | ||
response = render_to_string( | ||
"assistants/partials/referenced_objects.html", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
rather than re-iterating over the queryset you could accumulate the IDs in the previous loop