CenterForOpenScience · mfraezz · Dec 23, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024
diff --git a/share/search/index_strategy/_trovesearch_util.py b/share/search/index_strategy/_trovesearch_util.py
@@ -19,17 +19,15 @@
 )
 from trove.util.iris import get_sufficiently_unique_iri, is_worthwhile_iri
 from trove.vocab.namespaces import (
-    DCTERMS,
-    FOAF,
-    OSFMAP,
     OWL,
     RDF,
-    RDFS,
-    SKOS,
     TROVE,
     XSD,
 )
-from trove.vocab.osfmap import is_date_property
+from trove.vocab.osfmap import (
+    is_date_property,
+    SKIPPABLE_PROPERTIES,
+)
 
 
 _logger = logging.getLogger(__name__)
@@ -38,16 +36,6 @@
 ###
 # constants
 
-SKIPPABLE_PROPERTIES = (
-    OSFMAP.contains,  # too much, not helpful
-    OWL.sameAs,  # handled special
-)
-
-TITLE_PROPERTIES = (DCTERMS.title,)
-NAME_PROPERTIES = (FOAF.name, OSFMAP.fileName)
-LABEL_PROPERTIES = (RDFS.label, SKOS.prefLabel, SKOS.altLabel)
-NAMELIKE_PROPERTIES = (*TITLE_PROPERTIES, *NAME_PROPERTIES, *LABEL_PROPERTIES)
-
 KEYWORD_LENGTH_MAX = 8191  # skip keyword terms that might exceed lucene's internal limit
 # (see https://www.elastic.co/guide/en/elasticsearch/reference/current/ignore-above.html)
 KEYWORD_MAPPING = {'type': 'keyword', 'ignore_above': KEYWORD_LENGTH_MAX}

diff --git a/share/search/index_strategy/trove_indexcard_flats.py b/share/search/index_strategy/trove_indexcard_flats.py
@@ -40,15 +40,11 @@
     PropertypathUsage,
 )
 from trove.util.iris import get_sufficiently_unique_iri, is_worthwhile_iri, iri_path_as_keyword
-from trove.vocab.osfmap import is_date_property
+from trove.vocab import osfmap
 from trove.vocab.namespaces import RDF, OWL
 from ._trovesearch_util import (
     latest_rdf_for_indexcard_pks,
     GraphWalk,
-    TITLE_PROPERTIES,
-    NAME_PROPERTIES,
-    LABEL_PROPERTIES,
-    NAMELIKE_PROPERTIES,
     KEYWORD_LENGTH_MAX,
 )
 
@@ -322,7 +318,7 @@ def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> Cardsear
 
         def pls_handle_valuesearch(self, valuesearch_params: ValuesearchParams) -> ValuesearchResponse:
             _cursor = OffsetCursor.from_cursor(valuesearch_params.page_cursor)
-            _is_date_search = is_date_property(valuesearch_params.valuesearch_propertypath[-1])
+            _is_date_search = osfmap.is_date_property(valuesearch_params.valuesearch_propertypath[-1])
             _search_kwargs = dict(
                 query=self._cardsearch_query(
                     valuesearch_params.cardsearch_filter_set,
@@ -833,7 +829,7 @@ def _inner_hits(self, *, highlight_query=None) -> dict:
 
 def _should_skip_card(indexcard_rdf, rdfdoc):
     # skip cards without some value for name/title/label
-    return not any(rdfdoc.q(indexcard_rdf.focus_iri, NAMELIKE_PROPERTIES))
+    return not any(rdfdoc.q(indexcard_rdf.focus_iri, osfmap.NAMELIKE_PROPERTIES))
 
 
 def _bucketlist(agg_result: dict) -> list[str]:
@@ -911,17 +907,17 @@ def for_iri_at_path(cls, path: tuple[str, ...], iri: str, rdfdoc):
             # TODO: don't discard language for name/title/label
             name_text=frozenset(
                 _text.unicode_value
-                for _text in rdfdoc.q(iri, NAME_PROPERTIES)
+                for _text in rdfdoc.q(iri, osfmap.NAME_PROPERTIES)
                 if isinstance(_text, primitive_rdf.Literal)
             ),
             title_text=frozenset(
                 _text.unicode_value
-                for _text in rdfdoc.q(iri, TITLE_PROPERTIES)
+                for _text in rdfdoc.q(iri, osfmap.TITLE_PROPERTIES)
                 if isinstance(_text, primitive_rdf.Literal)
             ),
             label_text=frozenset(
                 _text.unicode_value
-                for _text in rdfdoc.q(iri, LABEL_PROPERTIES)
+                for _text in rdfdoc.q(iri, osfmap.LABEL_PROPERTIES)
                 if isinstance(_text, primitive_rdf.Literal)
             ),
         )

diff --git a/share/search/index_strategy/trovesearch_denorm.py b/share/search/index_strategy/trovesearch_denorm.py
@@ -44,7 +44,7 @@
     ValuesearchResponse,
     ValuesearchResult,
 )
-from trove.vocab.osfmap import is_date_property
+from trove.vocab import osfmap
 from trove.vocab.namespaces import OWL, RDF
 from . import _trovesearch_util as ts
 
@@ -230,7 +230,7 @@ def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> Cardsear
         def pls_handle_valuesearch(self, valuesearch_params: ValuesearchParams) -> ValuesearchResponse:
             _path = valuesearch_params.valuesearch_propertypath
             _cursor = OffsetCursor.from_cursor(valuesearch_params.page_cursor)
-            _is_date_search = is_date_property(_path[-1])
+            _is_date_search = osfmap.is_date_property(_path[-1])
             _query = (
                 _build_date_valuesearch(valuesearch_params)
                 if _is_date_search
@@ -275,7 +275,7 @@ def should_skip(self) -> bool:
                 # skip cards that belong to an obsolete suid with a later duplicate
                 _suid.has_forecompat_replacement()
                 # ...or that are without some value for name/title/label
-                or not any(self.rdfdoc.q(self.focus_iri, ts.NAMELIKE_PROPERTIES))
+                or not any(self.rdfdoc.q(self.focus_iri, osfmap.NAMELIKE_PROPERTIES))
             )
 
         def build_docs(self) -> Iterator[tuple[str, dict]]:
@@ -319,9 +319,9 @@ def _iri_value_subdoc(self, iri: str) -> dict:
             _shortwalk = self._fullwalk.shortwalk_from(iri)
             return {
                 **self._paths_and_values(_shortwalk),
-                'value_name': list(self._texts_at_properties(_shortwalk, ts.NAME_PROPERTIES)),
-                'value_title': list(self._texts_at_properties(_shortwalk, ts.TITLE_PROPERTIES)),
-                'value_label': list(self._texts_at_properties(_shortwalk, ts.LABEL_PROPERTIES)),
+                'value_name': list(self._texts_at_properties(_shortwalk, osfmap.NAME_PROPERTIES)),
+                'value_title': list(self._texts_at_properties(_shortwalk, osfmap.TITLE_PROPERTIES)),
+                'value_label': list(self._texts_at_properties(_shortwalk, osfmap.LABEL_PROPERTIES)),
                 'at_card_propertypaths': [
                     ts.propertypath_as_keyword(_path)
                     for _path in self._fullwalk.paths_by_iri[iri]

diff --git a/tests/trove/_input_output_tests.py b/tests/trove/_input_output_tests.py
@@ -0,0 +1,56 @@
+import abc
+from unittest import TestCase
+import typing
+
+
+class BasicInputOutputTestCase(TestCase):
+    '''base for tests that have a simple/repetitive input/output pattern
+    '''
+    maxDiff = None  # usually want the full diff for these tests, tho can override if you prefer
+
+    # expected on subclasses:
+    inputs: typing.ClassVar[
+        dict[str, typing.Any]
+    ]
+    expected_outputs: typing.ClassVar[
+        # keys should match `inputs` keys (enforce with types? maybe someday)
+        dict[str, typing.Any]
+    ]
+
+    # required in subclasses
+    @abc.abstractmethod
+    def compute_output(self, given_input: typing.Any) -> typing.Any:
+        raise NotImplementedError
+
+    # (optional override, for when equality isn't so easy)
+    def assert_outputs_equal(self, expected_output: typing.Any, actual_output: typing.Any) -> None:
+        self.assertEqual(expected_output, actual_output)
+
+    # (optional override, for when logic is more complicated)
+    def run_input_output_test(self, given_input, expected_output):
+        _actual_output = self.compute_output(given_input)
+        self.assert_outputs_equal(expected_output, _actual_output)
+
+    ###
+    # private details
+
+    def __init_subclass__(cls):
+        # HACK: assign `test_*` method only on concrete subclasses,
+        # so the test runner doesn't try instantiating a base class
+        if getattr(cls, 'inputs', None) and getattr(cls, 'expected_outputs', None):
+            cls.test_inputs_match_outputs = cls._test_inputs_match_outputs  # type: ignore[attr-defined]
+
+    # the only actual test method -- assigned to concrete subclasses in __init_subclass__
+    def _test_inputs_match_outputs(self):
+        for _name, _input, _expected_output in self._iter_cases():
+            with self.subTest(name=_name):
+                self.run_input_output_test(_input, _expected_output)
+
+    def _iter_cases(self) -> typing.Iterator[tuple[str, typing.Any, typing.Any]]:
+        # yields (name, input, expected_output) tuples
+        for _name, _input in self.inputs.items():
+            try:
+                _expected_output = self.expected_outputs[_name]
+            except KeyError:
+                raise NotImplementedError(f'{self.__class__.__qualname__}.expected_outputs["{_name}"]')
+            yield (_name, _input, _expected_output)
diff --git a/tests/trove/derive/_base.py b/tests/trove/derive/_base.py
@@ -1,48 +1,35 @@
 import datetime
-from unittest import mock, TestCase
-import typing
+from unittest import mock
 
 from primitive_metadata import primitive_rdf as rdf
 
+from trove.derive._base import IndexcardDeriver
+from tests.trove._input_output_tests import BasicInputOutputTestCase
 from ._inputs import DERIVER_TEST_DOCS, DeriverTestDoc
 
 
 SHOULD_SKIP = object()  # for deriver inputs that should be skipped
 
 
-class BaseIndexcardDeriverTest(TestCase):
-    maxDiff = None
+class BaseIndexcardDeriverTest(BasicInputOutputTestCase):
+    inputs = DERIVER_TEST_DOCS  # (leave this one alone)
 
-    #######
-    # implement these things:
+    # required on subclasses: `deriver_class` and `expected_outputs`
+    deriver_class: type[IndexcardDeriver]
+    # expected_outputs: dict[str, typing.Any]
+    # ^ (from BasicInputOutputTestCase) must have the same keys as
+    # `DERIVER_TEST_DOCS` and values that are either `SHOULD_SKIP`
+    # (when `deriver.should_skip()` should return true) or a value
+    # that can be compared against `deriver.derive_card_as_text()`
 
-    # a subclass of IndexcardDeriver
-    deriver_class: type
+    def compute_output(self, given_input):
+        return self._get_deriver(given_input).derive_card_as_text()
 
-    # dictionary with the same keys as `DERIVER_TEST_DOCS` and values that
-    # are either `SHOULD_SKIP` (above) or strings that will be passed as
-    # `expected_text` to `derived_texts_equal`
-    expected_outputs: dict
-
-    # (optional override, for when equality isn't so easy)
-    def assert_derived_texts_equal(self, expected_text: str, actual_text: str) -> None:
-        self.assertEqual(expected_text, actual_text)
-
-    #######
-    # don't override anything else
-
-    test_should_skip: typing.Callable[['BaseIndexcardDeriverTest'], None]
-    test_derive_card_as_text: typing.Callable[['BaseIndexcardDeriverTest'], None]
-
-    def __init_subclass__(cls):
-        # add test methods on subclasses (but not the base class!)
-        cls.test_should_skip = _test_should_skip
-        cls.test_derive_card_as_text = _test_derive_card_as_text
-
-    def setUp(self):
-        _patcher = mock.patch('share.util.IDObfuscator.encode', new=lambda x: x.id)
-        _patcher.start()
-        self.addCleanup(_patcher.stop)
+    def run_input_output_test(self, given_input, expected_output):
+        if expected_output is SHOULD_SKIP:
+            self.assertTrue(self._get_deriver(given_input).should_skip())
+        else:
+            super().run_input_output_test(given_input, expected_output)
 
     def _get_deriver(self, input_doc: DeriverTestDoc):
         _mock_suid = mock.Mock()
@@ -62,26 +49,3 @@ def _get_deriver(self, input_doc: DeriverTestDoc):
         _mock_indexcard_rdf.indexcard.id = '--indexcard-id--'
         _mock_indexcard_rdf.indexcard.source_record_suid = _mock_suid
         return self.deriver_class(_mock_indexcard_rdf)
-
-    def _iter_test_cases(self):
-        for _input_key, _input_doc in DERIVER_TEST_DOCS.items():
-            _expected_output = self.expected_outputs.get(_input_key)
-            if _expected_output is None:
-                raise NotImplementedError(f'{self.__class__.__qualname__}.expected_outputs["{_input_key}"]')
-            with self.subTest(input_key=_input_key):
-                yield (_input_key, self._get_deriver(_input_doc), _expected_output)
-
-
-def _test_should_skip(self: BaseIndexcardDeriverTest) -> None:
-    for _input_key, _deriver, _expected_output in self._iter_test_cases():
-        self.assertEqual(
-            bool(_expected_output is SHOULD_SKIP),
-            _deriver.should_skip(),
-        )
-
-
-def _test_derive_card_as_text(self: BaseIndexcardDeriverTest) -> None:
-    for _input_key, _deriver, _expected_output in self._iter_test_cases():
-        if _expected_output is not SHOULD_SKIP:
-            _output = _deriver.derive_card_as_text()
-            self.assert_derived_texts_equal(_expected_output, _output)
diff --git a/tests/trove/derive/test_osfmap_json.py b/tests/trove/derive/test_osfmap_json.py
@@ -7,7 +7,7 @@
 class TestOsfmapJsonDeriver(BaseIndexcardDeriverTest):
     deriver_class = OsfmapJsonDeriver
 
-    def assert_derived_texts_equal(self, expected, actual):
+    def assert_outputs_equal(self, expected, actual):
         self.assertEqual(expected, json.loads(actual))
 
     expected_outputs = {

diff --git a/tests/trove/derive/test_sharev2_elastic.py b/tests/trove/derive/test_sharev2_elastic.py
@@ -1,4 +1,5 @@
 import json
+from unittest import mock
 
 from trove.derive.sharev2_elastic import ShareV2ElasticDeriver
 
@@ -8,7 +9,13 @@
 class TestShareV2ElasticDeriver(BaseIndexcardDeriverTest):
     deriver_class = ShareV2ElasticDeriver
 
-    def assert_derived_texts_equal(self, expected, actual):
+    def setUp(self):
+        # un-obfuscated ids, please
+        _patcher = mock.patch('share.util.IDObfuscator.encode', new=lambda x: x.id)
+        _patcher.start()
+        self.addCleanup(_patcher.stop)
+
+    def assert_outputs_equal(self, expected, actual):
         self.assertEqual(expected, json.loads(actual))
 
     expected_outputs = {

diff --git a/tests/trove/render/__init__.py b/tests/trove/render/__init__.py
diff --git a/tests/trove/render/_base.py b/tests/trove/render/_base.py
@@ -0,0 +1,48 @@
+import json
+
+from trove.render._base import BaseRenderer
+from trove.render._rendering import ProtoRendering
+from tests.trove._input_output_tests import BasicInputOutputTestCase
+from ._inputs import UNRENDERED_RDF, UNRENDERED_SEARCH_RDF, RdfCase
+
+
+class TroveRendererTests(BasicInputOutputTestCase):
+    inputs = UNRENDERED_RDF
+
+    # required on subclasses: `renderer_class` and `expected_outputs`
+    renderer_class: type[BaseRenderer]
+    # expected_outputs: dict[str, typing.Any] (from BasicInputOutputTestCase)
+
+    def compute_output(self, given_input: RdfCase):
+        _renderer = self.renderer_class(
+            response_focus_iri=given_input.focus,
+            response_tripledict=given_input.tripledict,
+        )
+        return _renderer.render_document()
+
+    def assert_outputs_equal(self, expected_output, actual_output) -> None:
+        if expected_output is ...:
+            print(repr(actual_output))
+            raise NotImplementedError
+        self.assertEqual(expected_output.mediatype, actual_output.mediatype)
+        self.assertEqual(
+            self._get_rendered_output(expected_output),
+            self._get_rendered_output(actual_output),
+        )
+
+    def _get_rendered_output(self, rendering: ProtoRendering):
+        # for now, they always iter strings (update if/when bytes are in play)
+        return ''.join(rendering.iter_content())  # type: ignore[arg-type]
+
+
+class TrovesearchRendererTests(TroveRendererTests):
+    inputs = UNRENDERED_SEARCH_RDF
+
+
+class TroveJsonRendererTests(TroveRendererTests):
+    def _get_rendered_output(self, rendering: ProtoRendering):
+        return json.loads(super()._get_rendered_output(rendering))
+
+
+class TrovesearchJsonRendererTests(TroveJsonRendererTests, TrovesearchRendererTests):
+    pass