Skip to content

Commit

Permalink
rename DEMDocType to DEMDocument
Browse files Browse the repository at this point in the history
 mn/#3-support-latest-elasticsearch-dsl-6
  • Loading branch information
codekiln committed Jul 12, 2023
1 parent 572d6b1 commit e06a082
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 44 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
Changelog
---------

0.10.0 (2023-07-12)
^^^^^^^^^^^^^^^^^^
* support elasticsearch-dsl 6.4 - closes `#3 support elasticsearch-dsl-py v6.4`
* fix `#90 Convert to GitHub Actions`
* fix `#96 es_list doesn't list all indexes`
* open up django to anything > 1.10
* rename DEMDocType to DEMDocument, following elasticsearch-dsl-py
* DEMDocType will be removed in 1.X

0.9.0 (2018-11-27)
^^^^^^^^^^^^^^^^^^
* added postgres to docker-compose and travis, and started using postgres for all tests instead of sqlite
Expand Down
11 changes: 5 additions & 6 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,11 @@ Django Elastic Migrations adapts these tools into a Django app which also:
* Implements concurrent bulk indexing powered by python ``multiprocessing``
* Gives Django test hooks for Elasticsearch
* Records a history of all actions that change Elasticsearch indexes
* Supports AWS Elasticsearch 6.0, 6.1 (6.2 TBD; see `#3 support elasticsearch-dsl 6.2`_)
* Supports elasticsearch-dsl-py>=6.4 (support for 7.10.2 is in progress)
* Enables having two or more servers share the same Elasticsearch cluster

.. _elasticsearch-py: https://github.com/elastic/elasticsearch-py
.. _elasticsearch-dsl-py: https://github.com/elastic/elasticsearch-dsl-py
.. _#3 support elasticsearch-dsl 6.2: https://github.com/HBS-HBX/django-elastic-migrations/issues/3


Models
Expand Down Expand Up @@ -123,15 +122,15 @@ Installation
#. Create an ``DEMIndex``:
::

from django_elastic_migrations.indexes import DEMIndex, DEMDocType
from django_elastic_migrations.indexes import DEMIndex, DEMDocument
from .models import Movie
from elasticsearch_dsl import Text

MoviesIndex = DEMIndex('movies')


@MoviesIndex.doc_type
class MovieSearchDoc(DEMDocType):
class MovieSearchDoc(DEMDocument):
text = TEXT_COMPLEX_ENGLISH_NGRAM_METAPHONE

@classmethod
Expand Down Expand Up @@ -292,11 +291,11 @@ Tuning Bulk Indexing Parameters
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

By default, ``/.manage.py es_update`` will divide the result of
``DEMDocType.get_queryset()`` into batches of size ``DocType.BATCH_SIZE``.
``DEMDocument.get_queryset()`` into batches of size ``DocType.BATCH_SIZE``.
Override this number to change the batch size.

There are many configurable paramters to Elasticsearch's `bulk updater <https://elasticsearch-py.readthedocs.io/en/master/helpers.html?highlight=bulk#elasticsearch.helpers.streaming_bulk>`_.
To provide a custom value, override ``DEMDocType.get_bulk_indexing_kwargs()``
To provide a custom value, override ``DEMDocument.get_bulk_indexing_kwargs()``
and return the kwargs you would like to customize.

Development
Expand Down
12 changes: 6 additions & 6 deletions django_elastic_migrations/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,22 @@ class IndexNamePropertyCannotBeSet(DjangoElasticMigrationsException):
"""


class DEMDocTypeRequiresGetReindexIterator(DjangoElasticMigrationsException):
class DEMDocumentRequiresGetReindexIterator(DjangoElasticMigrationsException):
"""
Raised when ./manage.py es_update tries to call DEMDocType.get_reindex_iterator()
Raised when ./manage.py es_update tries to call DEMDocument.get_reindex_iterator()
on a subclass, but the subclass has not implemented this.
"""
message = ("To run ./manage.py es_update my_index, my_index needs to "
"implement DEMDocType.get_reindex_iterator(self, last_updated_datetime=None)")
"implement DEMDocument.get_reindex_iterator(self, last_updated_datetime=None)")


class DEMDocTypeRequiresGetQueryset(DjangoElasticMigrationsException):
class DEMDocumentRequiresGetQueryset(DjangoElasticMigrationsException):
"""
Raised when ./manage.py es_update tries to call DEMDocType.get_queryset()
Raised when ./manage.py es_update tries to call DEMDocument.get_queryset()
on a subclass, but the subclass has not implemented this.
"""
message = ("To run ./manage.py es_update my_index, my_index needs to "
"implement DEMDocType.get_queryset()")
"implement DEMDocument.get_queryset()")


class DEMIndexVersionCodebaseMismatchError(DjangoElasticMigrationsException):
Expand Down
54 changes: 29 additions & 25 deletions django_elastic_migrations/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from elasticsearch_dsl import Index as ESIndex, Document as ESDocument, Q as ESQ, Search

from django_elastic_migrations import es_client, environment_prefix, es_test_prefix, dem_index_paths, get_logger, codebase_id
from django_elastic_migrations.exceptions import DEMIndexNotFound, DEMDocTypeRequiresGetReindexIterator, \
IllegalDEMIndexState, NoActiveIndexVersion, DEMDocTypeRequiresGetQueryset
from django_elastic_migrations.exceptions import DEMIndexNotFound, DEMDocumentRequiresGetReindexIterator, \
IllegalDEMIndexState, NoActiveIndexVersion, DEMDocumentRequiresGetQueryset
from django_elastic_migrations.utils.es_utils import get_index_hash_and_json
from django_elastic_migrations.utils.loading import import_module_element
from django_elastic_migrations.utils.multiprocessing_utils import DjangoMultiProcess, USE_ALL_WORKERS
Expand Down Expand Up @@ -273,7 +273,7 @@ def reinitialize_esindex_instances(cls):
After our django-elastic-migrations app is ready
to talk to the DB and find out the names of the indexes,
go through and reinitialize each ES Index subclass instance
as well as their associated ES DocType subclasses
as well as their associated ES Document subclasses
with the appropriate index names
"""
for index_base_name, instance in cls.instances.items():
Expand Down Expand Up @@ -442,7 +442,7 @@ def _start_action_for_indexes(cls, action, index_name, exact_mode=False):
raise DEMIndexNotFound()


class _DEMDocTypeIndexHandler(object):
class _DEMDocumentIndexHandler(object):
"""
Internally, Elasticsearch-dsl-py uses a string stored in the
DocType to determine which index to write to. This class is
Expand Down Expand Up @@ -478,12 +478,12 @@ def __getattribute__(self, item):
return None


class DEMDocType(ESDocument):
class DEMDocument(ESDocument):
"""
Django users subclass DEMDocType instead of Elasticsearch's DocType
Django users should subclass DEMDocument instead of elasticsearch-dsl-py's Document
to use Django Elastic Migrations. All documentation from their class
applies here.
https://elasticsearch-dsl.readthedocs.io/en/latest/api.html#elasticsearch_dsl.DocType
https://elasticsearch-dsl.readthedocs.io/en/latest/api.html#elasticsearch_dsl.Document
Change from Elasticsearch: we manage the doc type's index name to
make it the activated version of the index by default.
Expand All @@ -507,29 +507,29 @@ class DEMDocType(ESDocument):
MAX_RETRIES = 5

def __init__(self, *args, **kwargs):
super(DEMDocType, self).__init__(*args, **kwargs)
super(DEMDocument, self).__init__(*args, **kwargs)
# super.__init__ creates the self._doc_type property that we
# modify here
self._doc_type = _DEMDocTypeIndexHandler(
self._doc_type = _DEMDocumentIndexHandler(
getattr(self, '_doc_type', None))

@classmethod
def get_reindex_iterator(cls, queryset):
"""
Django users override this method. It must return an iterator
or generator of instantiated DEMDocType subclasses, ready
or generator of instantiated DEMDocument subclasses, ready
for inserting into Elasticsearch. For example:
class UsersDocType(DEMDocType)
class UsersDocument(DEMDocument)
@classmethod
def get_reindex_iterator(cls, queryset):
return [cls.getDocForUser(u) for user in queryset]
:param queryset: queryset of objects to index; result of get_queryset()
:return: iterator / generator of *DEMDocType instances*
:return: iterator / generator of *DEMDocument instances*
"""
raise DEMDocTypeRequiresGetReindexIterator()
raise DEMDocumentRequiresGetReindexIterator()

@classmethod
def get_queryset(cls, last_updated_datetime=None):
Expand All @@ -539,15 +539,15 @@ def get_queryset(cls, last_updated_datetime=None):
:param last_updated_datetime:
:return:
"""
raise DEMDocTypeRequiresGetQueryset()
raise DEMDocumentRequiresGetQueryset()

@classmethod
def get_db_objects_by_id(cls, ids):
return cls.get_queryset().filter(**{"{}__in".format(cls.PK_ATTRIBUTE): ids})

@classmethod
def get_dem_index(cls):
# TODO: what should happen if DEMDocType instance has no active version?
# TODO: what should happen if DEMDocument instance has no active version?
# currently, if this exact version is not found, it will return None
return DEMIndexManager.get_dem_index(cls._doc_type.index, exact_mode=True)

Expand Down Expand Up @@ -686,7 +686,7 @@ def bulk_index(cls, reindex_iterator):
"""
Execute Elasticsearch's bulk indexing helper, passing in the result of
cls.get_bulk_indexing_kwargs()
:param reindex_iterator: an iterator of DocType instances from cls.get_reindex_iterator()
:param reindex_iterator: an iterator of Document instances from cls.get_reindex_iterator()
:return: (num_success, num_failed)
"""
kwargs = cls.get_bulk_indexing_kwargs()
Expand Down Expand Up @@ -786,10 +786,14 @@ def _get_index(self, index=None, required=True):
if index is None:
index = self.get_dem_index().get_es_index_name()
if index is None:
index = super(DEMDocType, self)._get_index(index, required)
index = super(DEMDocument, self)._get_index(index, required)
return index


# until we can support only elasticsearch 7.x, we need to support both DocType and Document
DEMDocType = DEMDocument


class DEMIndex(ESIndex):
"""
Django users subclass DEMIndex instead of elasticsearch-dsl-py's Index
Expand Down Expand Up @@ -881,25 +885,25 @@ def delete(self, **kwargs):
if index_version:
index_version.delete()

def doc_type(self, doc_type=None) -> DEMDocType:
def doc_type(self, doc_type=None) -> DEMDocument:
"""
Overrides elasticsearch_dsl.Index.doc_type() and called during DEMIndexManager.initialize().
Associates a DEMDocType with this DEMIndex, so that commands
Associates a DEMDocument with this DEMIndex, so that commands
directed to this DEMIndex always go to the active index version by
querying the DEMIndexManager.
Sets the active index version name into the DEMDocType, so that
DEMDocType.search() queries the active index version.
Sets the active index version name into the DEMDocument, so that
DEMDocument.search() queries the active index version.
:returns DEMDocType associated with this DEMIndex (if any)
:returns DEMDocument associated with this DEMIndex (if any)
"""
if doc_type:
self.__doc_type = doc_type
active_version_name = self.get_active_version_index_name()

# set the active index version name into the DEMDocType,
# so DEMDocType.search() is directed to the active index in elasticsearch
# set the active index version name into the DEMDocument,
# so DEMDocument.search() is directed to the active index in elasticsearch
if active_version_name and self.__doc_type._doc_type:
self.__doc_type._doc_type.index = active_version_name

Expand All @@ -918,7 +922,7 @@ def doc_type(self, doc_type=None) -> DEMDocType:
our_tag = codebase_id
msg = (
"DEMIndex.doc_type received a request to use an elasticsearch index whose exact "
"schema / DEMDocType was not accessible in this codebase. "
"schema / DEMDocument was not accessible in this codebase. "
"This may lead to undefined behavior (for example if this codebase searches or indexes "
"a field that has changed in the requested index, it may not return correctly). "
"\n - requested index: {version_name} "
Expand Down
2 changes: 1 addition & 1 deletion django_elastic_migrations/management/commands/es_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def add_arguments(self, parser):
parser.add_argument(
'--resume', action='store_true',
help=("Only update documents that have changed since "
"the last ./manage.py es_update. NOTE: DEMDocType subclass "
"the last ./manage.py es_update. NOTE: DEMDocument subclass "
"needs to implement this.")
)
parser.add_argument(
Expand Down
12 changes: 6 additions & 6 deletions tests/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.conf import settings
from elasticsearch_dsl import Text, Q, analyzer, token_filter, tokenizer

from django_elastic_migrations.indexes import DEMIndex, DEMDocType, DEMIndexManager
from django_elastic_migrations.indexes import DEMIndex, DEMDocument, DEMIndexManager
from tests.models import Movie

basic_analyzer = analyzer(
Expand All @@ -16,7 +16,7 @@
alternate_textfield = Text(analyzer=basic_analyzer, search_analyzer=basic_analyzer)


class GenericDocType(DEMDocType):
class GenericDocument(DEMDocument):
full_text = Text(required=True)
full_text_boosted = Text(required=True)

Expand Down Expand Up @@ -85,7 +85,7 @@ def get_full_text_search_query(cls, search_query):


@MovieSearchIndex.doc_type
class MovieSearchDoc(GenericDocType):
class MovieSearchDoc(GenericDocument):

@classmethod
def get_queryset(cls, last_updated_datetime=None):
Expand All @@ -107,7 +107,7 @@ def get_model_full_text_boosted(cls, model):
return model.title


class DefaultNewSearchDocTypeMixin(GenericDocType):
class DefaultNewSearchDocTypeMixin(GenericDocument):
"""
Used by get_new_search_index() in the case that doc_type_mixin is not supplied
"""
Expand All @@ -133,7 +133,7 @@ def get_new_search_index(name, doc_type_mixin=None, create_and_activate=True, de
:param cls: parameters of GenericDocType to override
:param doc_type_mixin: a class to mix in to the generated doctype
:param create_and_activate: if True, call DEMIndexManager.initialize(True, True) before returning
:return: DEMIndex, DEMDocType
:return: DEMIndex, DEMDocument
"""
if name == "movies":
raise ValueError("Don't use the movies index for testing; it will interfere with the fixture")
Expand All @@ -146,7 +146,7 @@ def get_new_search_index(name, doc_type_mixin=None, create_and_activate=True, de
my_new_index = DEMIndex(name)
my_new_index.settings(**settings.ELASTICSEARCH_INDEX_SETTINGS)

class MyNewSearchDocType(doc_type_mixin, GenericDocType):
class MyNewSearchDocType(doc_type_mixin, GenericDocument):
pass

my_new_index.doc_type(MyNewSearchDocType)
Expand Down

0 comments on commit e06a082

Please sign in to comment.