From db2081da91d69c529c710d4abf98cb0654e4931e Mon Sep 17 00:00:00 2001 From: Kesara Rathnayake Date: Fri, 16 Aug 2024 11:56:21 +1200 Subject: [PATCH] fix: Avoid making unnecessary datatracker lookups (#457) Fixes #452 --- bibxml/xml2rfc_adapters.py | 152 +++++++++++----------- xml2rfc_compat/fixtures/test_refdata.json | 3 +- xml2rfc_compat/tests/test_adapters.py | 19 +++ 3 files changed, 99 insertions(+), 75 deletions(-) diff --git a/bibxml/xml2rfc_adapters.py b/bibxml/xml2rfc_adapters.py index afac97a3..331669a3 100644 --- a/bibxml/xml2rfc_adapters.py +++ b/bibxml/xml2rfc_adapters.py @@ -231,82 +231,86 @@ def resolve(self) -> BibliographicItem: "lacks I-D version", self.anchor) indexed_version = None - # Check Datatracker’s latest version (slow) - try: - dt_bibitem = get_internet_draft( - f'draft-{self.bare_anchor}', - strict=indexed_bibitem is None, - ).bibitem - - if len(dt_bibitem.version or []) > 0: - dt_version = dt_bibitem.version[0].draft - if not isinstance(dt_version, str): - raise ValueError( - "Malformed I-D version (not a string): " - f"{dt_version}") - try: - parsed_version = int(dt_version) - except (ValueError, TypeError): - raise ValueError( - "Malformed I-D version (doesn’t parse to an integer): " - f"{dt_version}") - else: - if parsed_version < 0: + dt_version: Optional[str] = None + dt_bibitem: Optional[BibliographicItem] = None + + if not (self.requested_version and self.requested_version == indexed_version): + # Check Datatracker’s latest version (slow) + # when request is unversioned or requested version is not indexed + try: + dt_bibitem = get_internet_draft( + f'draft-{self.bare_anchor}', + strict=indexed_bibitem is None, + ).bibitem + + if dt_bibitem and dt_bibitem.version and len(dt_bibitem.version) > 0: + dt_version = dt_bibitem.version[0].draft + if not isinstance(dt_version, str): raise ValueError( - "Malformed I-D version (not a positive integer): " + "Malformed I-D version (not a string): " f"{dt_version}") - else: - raise ValueError("Missing I-D version") - - except Exception: - log.exception( - "Failed to fetch or validate latest draft from Datatracker " - "when resolving xml2rfc bibxml3 path") - else: - # Conditions for falling back to Datatracker’s response. - # We want to prefer indexed items in general, because they tend to - # provide more complete data, but in some cases we have no choice - # but to fall back. - if any([ - # We were not requested a version - not self.requested_version, - # We were requested a version and we got that version - # from Datatracker - self.requested_version == dt_version, - ]) and any([ - # We did not find indexed item matching given ID - # and maybe version: - not indexed_bibitem, - # We were not requested a version, - # and latest version on Datatracker is different - # (assuming newer): - not self.requested_version - and indexed_version != dt_version, - # We were requested a version, - # and somehow indexed version does not match requested version: - self.requested_version - and indexed_version != self.requested_version, - ]): - # Datatracker’s version is newer - # or we don’t have this draft indexed. - # Note this (should be transient until sources are reindexed, - # if not then there’s a problem) - # and return Datatracker’s version - self.log(f"returning Datatracker version {dt_version}") - log.warn( - "Returning Datatracker result for xml2rfc bibxml3 path. " - "If unversioned I-D was requested, " - "then Datatracker may have a newer I-D version " - "than indexed sources. " - "Alternatively, indexed version could not be used " - "for some reason. " - "Requested version %s, " - "indexed sources have version %s, " - "returning Datatracker’s version %s. ", - self.requested_version, - indexed_version, - dt_version) - self.resolved_item = dt_bibitem + try: + parsed_version = int(dt_version) + except (ValueError, TypeError): + raise ValueError( + "Malformed I-D version (doesn’t parse to an integer): " + f"{dt_version}") + else: + if parsed_version < 0: + raise ValueError( + "Malformed I-D version (not a positive integer): " + f"{dt_version}") + else: + raise ValueError("Missing I-D version") + + except Exception: + log.exception( + "Failed to fetch or validate latest draft from Datatracker " + "when resolving xml2rfc bibxml3 path") + # Conditions for falling back to Datatracker’s response. + # We want to prefer indexed items in general, because they tend to + # provide more complete data, but in some cases we have no choice + # but to fall back. + if any([ + # We were not requested a version + not self.requested_version, + # We were requested a version and we got that version + # from Datatracker + self.requested_version == dt_version, + ]) and any([ + # We did not find indexed item matching given ID + # and maybe version: + not indexed_bibitem, + # We were not requested a version, + # and latest version on Datatracker is different + # (assuming newer): + not self.requested_version + and indexed_version != dt_version, + # We were requested a version, + # and somehow indexed version does not match requested version: + self.requested_version + and indexed_version != self.requested_version, + ]): + # Datatracker’s version is newer + # or we don’t have this draft indexed. + # Note this (should be transient until sources are reindexed, + # if not then there’s a problem) + # and return Datatracker’s version + self.log(f"returning Datatracker version {dt_version}") + log.warn( + "Returning Datatracker result for xml2rfc bibxml3 path. " + "If unversioned I-D was requested, " + "then Datatracker may have a newer I-D version " + "than indexed sources. " + "Alternatively, indexed version could not be used " + "for some reason. " + "Requested version %s, " + "indexed sources have version %s, " + "returning Datatracker’s version %s. ", + self.requested_version, + indexed_version, + dt_version) + self.resolved_item = dt_bibitem if not self.resolved_item: if indexed_bibitem and any([ diff --git a/xml2rfc_compat/fixtures/test_refdata.json b/xml2rfc_compat/fixtures/test_refdata.json index cf79d859..0746c00e 100644 --- a/xml2rfc_compat/fixtures/test_refdata.json +++ b/xml2rfc_compat/fixtures/test_refdata.json @@ -632,7 +632,7 @@ "link": [ { "type": "TXT", - "content": "https://www.ietf.org/archive/id/draft-ietf-hip-rfc5201-bis-13.txt" + "content": "https://www.example.org/versioned-13.txt" } ], "type": "standard", @@ -697,6 +697,7 @@ "language": [ "en" ], + "version": "13", "docnumber": "I-D.ietf-hip-rfc5201-bis", "contributor": [ { diff --git a/xml2rfc_compat/tests/test_adapters.py b/xml2rfc_compat/tests/test_adapters.py index 1bb1668e..f8d39028 100644 --- a/xml2rfc_compat/tests/test_adapters.py +++ b/xml2rfc_compat/tests/test_adapters.py @@ -23,6 +23,7 @@ def setUp(self): self.rfcs_ref = "RFC.4037" self.misc_ref = "FIPS.180.1993" self.internet_drafts_ref = "I-D.ietf-hip-rfc5201-bis" + self.internet_drafts_versioned_ref = "I-D.draft-ietf-hip-rfc5201-bis-13" self.w3c_ref = "rec-powder-grouping-20090901" self.threegpp_ref = "SDO-3GPP.25.321:Rel-8/8.3.0" self.ieee_ref = "R.IEEE.P2740/D-6.5.2020-08" @@ -68,6 +69,24 @@ def test_internet_drafts(self): bibitem = adapter.resolve() self._assert_is_instance_of_bibliographicitem(bibitem) self._assert_refs_equal(bibitem, self.internet_drafts_ref) + self.assertTrue( + all( + # provides latest link from the datatracker + link.content.startswith("https://datatracker.ietf.org/") + for link in as_list(bibitem.link or []) + ) + ) + + def test_internet_drafts_versioned(self): + adapter = InternetDraftsAdapter(self.dirname, "bibxml3", self.internet_drafts_versioned_ref) + bibitem = adapter.resolve() + self.assertTrue( + all( + # provides link from the fixtures + link.content == "https://www.example.org/versioned-13.txt" + for link in as_list(bibitem.link or []) + ) + ) def test_internet_drafts_not_found(self): adapter = InternetDraftsAdapter(self.dirname, "bibxml3", self.internet_drafts_ref.replace("b", ""))