Skip to content

Commit

Permalink
Merge branch 'master' into CMR-9501
Browse files Browse the repository at this point in the history
  • Loading branch information
TylerHeald1 committed Nov 14, 2023
2 parents 65421ac + ed36ada commit 314f0d2
Show file tree
Hide file tree
Showing 93 changed files with 13,195 additions and 12,142 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/graph-db.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:

strategy:
matrix:
node-version: [16.15.0]
node-version: [lts/hydrogen]
# See supported Node.js release schedule at https://nodejs.org/en/about/releases/

services:
Expand Down
17 changes: 16 additions & 1 deletion bootstrap-app/src/cmr/bootstrap/api/routes.clj
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
[cmr.bootstrap.api.fingerprint :as fingerprint]
[cmr.bootstrap.api.rebalancing :as rebalancing]
[cmr.bootstrap.api.virtual-products :as virtual-products]
[cmr.bootstrap.data.metadata-retrieval.collection-metadata-cache :as cmc]
[cmr.bootstrap.services.health-service :as hs]
[cmr.common-app.api.health :as common-health]
[cmr.common-app.api.routes :as common-routes]
[cmr.common.api.context :as context]
[cmr.common-app.data.metadata-retrieval.collection-metadata-cache :as mc]
[cmr.common-app.services.kms-fetcher :as kms-fetcher]
[cmr.common.api.errors :as errors]
[cmr.common.jobs :as jobs]
[cmr.common.log :refer [info]]
[cmr.common.generics :as common-generic]
[compojure.core :refer :all]
Expand Down Expand Up @@ -114,6 +116,19 @@
(fingerprint/fingerprint-by-id request-context concept-id)))
;; Add routes for accessing caches
common-routes/cache-api-routes
(context "/caches/refresh/:cache-name" [cache-name]
(POST "/" {:keys [params request-context headers]}
(acl/verify-ingest-management-permission request-context :update)
(cond
(= (keyword cache-name) mc/cache-key)
(cmc/refresh-cache request-context)

(= (keyword cache-name) kms-fetcher/kms-cache-key)
(kms-fetcher/refresh-kms-cache request-context))

:else
(route/not-found "Not Found"))
{:status 200})
;; db migration route
(POST "/db-migrate" {:keys [request-context params]}
(acl/verify-ingest-management-permission request-context :update)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
(ns cmr.bootstrap.data.metadata-retrieval.collection-metadata-cache
"Defines a cache for catalog item metadata. It currently only stores collections.
The metadata cache contains data like the following:
concept-id -> revision-format-map
* concept-id
* revision-id
* native-format - A key or map of format and version identifying the native format
* various format keys each mapped to compressed metadata."
(:require
[clj-time.core :as t]
[clojure.set :as cset]
[cmr.common-app.data.metadata-retrieval.collection-metadata-cache :as cmn-coll-metadata-cache]
[cmr.common-app.data.metadata-retrieval.revision-format-map :as crfm]
[cmr.bootstrap.data.metadata-retrieval.metadata-transformer :as metadata-transformer]
[cmr.common.config :refer [defconfig]]
[cmr.common.hash-cache :as hash-cache]
[cmr.common.jobs :refer [defjob]]
[cmr.common.log :as log :refer [info]]
[cmr.common.util :as c-util]
[cmr.metadata-db.services.concept-service :as metadata-db]
[cmr.umm-spec.versioning :as umm-version]))

(defconfig non-cached-collection-metadata-formats
"Defines a set of collection metadata formats that will not be cached in memory"
{:default #{}
:type :edn})

(def all-formats
"All the possible collection metadata formats that could be cached."
#{:echo10
:iso19115
:dif
:dif10
;; Note that when upgrading umm version we should also cache the previous version of UMM.
{:format :umm-json
:version umm-version/current-collection-version}})

(defn cached-formats
"This is a set of formats that are cached."
[]
(cset/difference all-formats (non-cached-collection-metadata-formats)))

(defn- concept-tuples->cache-map
"Takes a set of concept tuples fetches the concepts from metadata db, converts them to revision
format maps, and stores them into a cache map"
[context concept-tuples]
(let [mdb-context (cmn-coll-metadata-cache/context->metadata-db-context context)
concepts (doall (metadata-db/get-concepts mdb-context concept-tuples true))
concepts (cmn-coll-metadata-cache/concepts-without-xml-processing-inst concepts)
rfms (c-util/fast-map #(crfm/compress
(crfm/concept->revision-format-map context
%
(cached-formats)
metadata-transformer/transform-to-multiple-formats
true))
concepts)]
(reduce #(assoc %1 (:concept-id %2) %2) {} rfms)))

(defn update-cache
"Updates the collection metadata cache by querying elastic search for updates since the
last time the cache was updated."
[context]
(info "Updating collection metadata cache")
(let [incremental-since-refresh-date (str (t/now))
concepts-tuples (cmn-coll-metadata-cache/fetch-updated-collections-from-elastic context)
new-cache-value (reduce #(merge %1 (concept-tuples->cache-map context %2))
{}
(partition-all 1000 concepts-tuples))
cache (hash-cache/context->cache context cmn-coll-metadata-cache/cache-key)]
(hash-cache/set-value cache
cmn-coll-metadata-cache/cache-key
cmn-coll-metadata-cache/incremental-since-refresh-date-key
incremental-since-refresh-date)
(hash-cache/set-values cache
cmn-coll-metadata-cache/cache-key
new-cache-value)
(info "Metadata cache update complete. Cache Size:" (hash-cache/cache-size cache cmn-coll-metadata-cache/cache-key))))

(defn refresh-cache
"Refreshes the collection metadata cache"
[context]
(info "Refreshing collection metadata cache")
(let [incremental-since-refresh-date (str (t/now))
concepts-tuples (cmn-coll-metadata-cache/fetch-collections-from-elastic context)
new-cache-value (reduce #(merge %1 (concept-tuples->cache-map context %2))
{}
(partition-all 1000 concepts-tuples))
cache (hash-cache/context->cache context cmn-coll-metadata-cache/cache-key)]
(hash-cache/set-value cache
cmn-coll-metadata-cache/cache-key
cmn-coll-metadata-cache/incremental-since-refresh-date-key
incremental-since-refresh-date)
(hash-cache/set-values cache cmn-coll-metadata-cache/cache-key new-cache-value)

(info "Metadata cache refresh complete. Cache Size:"
(hash-cache/cache-size cache cmn-coll-metadata-cache/cache-key))))

(defjob RefreshCollectionsMetadataCache
[ctx system]
(refresh-cache {:system system}))

(defn refresh-collections-metadata-cache-job
[job-key]
{:job-type RefreshCollectionsMetadataCache
:job-key job-key
;; The time here is UTC.
:daily-at-hour-and-minute [06 00]})

(defjob UpdateCollectionsMetadataCache
[ctx system]
(update-cache {:system system}))

(defn update-collections-metadata-cache-job
[job-key]
{:job-type UpdateCollectionsMetadataCache
:job-key job-key
:interval (cmn-coll-metadata-cache/update-collection-metadata-cache-interval)})

(comment
(refresh-cache {:system (get-in user/system [:apps :bootstrap])})
(cmn-coll-metadata-cache/prettify-cache (get-in user/system [:apps :bootstrap :caches cmn-coll-metadata-cache/cache-key])))
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
(ns cmr.bootstrap.data.metadata-retrieval.metadata-transformer
"Contains functions for converting collection concept metadata into other formats."
(:require
[cheshire.core :as json]
[cmr.common.log :as log :refer (debug info warn error)]
[cmr.common.mime-types :as mt]
[cmr.common.services.errors :as errors]
[cmr.common.util :as c-util]
[cmr.common.xml :as cx]
[cmr.umm-spec.migration.version.core :as vm]
[cmr.umm-spec.umm-json :as umm-json]
[cmr.umm-spec.umm-spec-core :as umm-spec]))

;; dynamic is here only for testing purposes to test failure cases.
(defn ^:dynamic transform-strategy
"Determines which transformation strategy should be used to convert the given concept to the target
format"
[concept target-format]
;;throw exception if target format is native. That should be handled elsewhere.
{:pre [(not= :native target-format)]}

(let [concept-mime-type (:format concept)]
(cond
;; No conversion is required - same format and version.
(= (mt/mime-type->format concept-mime-type) target-format)
:current-format

(and (= :umm-json (mt/format-key concept-mime-type))
(= :umm-json (mt/format-key target-format)))
:migrate-umm-json

:else
:umm-spec)))

(defn- transform-with-strategy-umm-spec
[context concept _ target-formats]
(let [{concept-mime-type :format, metadata :metadata} concept
ummc (umm-spec/parse-metadata context (:concept-type concept) concept-mime-type metadata)]
(reduce (fn [translated-map target-format]
(assoc translated-map target-format
(umm-spec/generate-metadata context ummc target-format)))
{}
target-formats)))

(defn transform-with-strategy-migrate-umm-json-to-target-format
"Adds to the translated-map the concept migrated to the passed in target-format."
[context concept-type source-version metadata translated-map target-format]
(assoc translated-map target-format
(umm-json/umm->json
(c-util/remove-nils-empty-maps-seqs
(vm/migrate-umm context
concept-type
source-version
(umm-spec/umm-json-version concept-type
target-format)
(json/decode metadata true))))))

(defn- transform-with-strategy-migrate-umm-json
[context concept _ target-formats]
(let [{concept-mime-type :format, metadata :metadata, concept-type :concept-type} concept
source-version (umm-spec/umm-json-version concept-type concept-mime-type)
[t result] (c-util/time-execution (reduce #(transform-with-strategy-migrate-umm-json-to-target-format
context
concept-type
source-version
metadata
%1
%2)
{}
target-formats))]
(info "transform-with-strategy migrate-umm-json: "
"time: " t
"concept-mime-type: " concept-mime-type
"concept-type: " concept-type
"parent request num-concepts: " (:num-concepts concept)
"target-formats: " target-formats
"source version: " source-version
"provider: " (:provider-id concept)
"metadata length: " (count metadata))
result))

(defn transform-with-strategy
"Depending on the transformation strategy pick the correct function to call to translate
the concept to the target format."
[context concept strategy target-formats]
(case strategy
:current-format
{(mt/mime-type->format (:format concept))
(cx/remove-xml-processing-instructions (:metadata concept))}

:migrate-umm-json
(transform-with-strategy-migrate-umm-json context concept strategy target-formats)

:umm-spec
(transform-with-strategy-umm-spec context concept strategy target-formats)

(errors/internal-error!
(format "Unexpected transform strategy [%s] from concept of type [%s] to [%s]"
strategy (:format concept) (pr-str target-formats)))))

(defn transform-to-multiple-formats-with-strategy
"Transforms a concept into a map of formats dictated by the passed target-formats list."
[context concept ignore-exceptions? [k v]]
(if ignore-exceptions?
(try
(transform-with-strategy context concept k v)
(catch Throwable e
(log/error
e
(str "Ignoring exception while trying to transform metadata for concept "
(:concept-id concept) " with revision " (:revision-id concept) " error: "
(.getMessage e)))))
(transform-with-strategy context concept k v)))

(defn transform-to-multiple-formats
"Transforms the concept into multiple different formats. Returns a map of target format to metadata."
[context concept target-formats ignore-exceptions?]
{:pre [(not (:deleted concept))]}
(->> target-formats
(group-by #(transform-strategy concept %))
(keep #(transform-to-multiple-formats-with-strategy context concept ignore-exceptions? %))
(reduce into {})))
15 changes: 10 additions & 5 deletions bootstrap-app/src/cmr/bootstrap/system.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@
represented as a map of components. Design based on
http://stuartsierra.com/2013/09/15/lifecycle-composition and related posts."
(:require
[cmr.access-control.system :as ac-system]
[cmr.acl.core :as acl]
[cmr.bootstrap.api.routes :as routes]
[cmr.bootstrap.config :as bootstrap-config]
[cmr.bootstrap.data.bulk-index :as bi]
[cmr.bootstrap.data.bulk-migration :as bm]
[cmr.bootstrap.data.metadata-retrieval.collection-metadata-cache :as b-coll-metadata-cache]
[cmr.bootstrap.data.virtual-products :as vp]
[cmr.bootstrap.services.dispatch.core :as dispatch]
[cmr.common-app.api.health :as common-health]
[cmr.common-app.data.metadata-retrieval.collection-metadata-cache :as cmn-coll-metadata-cache]
[cmr.common-app.services.jvm-info :as jvm-info]
[cmr.common-app.services.kms-fetcher :as kf]
[cmr.common-app.services.search.elastic-search-index :as search-index]
[cmr.common.api.web-server :as web]
[cmr.common.cache :as cache]
[cmr.common.cache.in-memory-cache :as mem-cache]
Expand All @@ -29,7 +31,6 @@
[cmr.metadata-db.config :as mdb-config]
[cmr.metadata-db.system :as mdb-system]
[cmr.metadata-db.services.util :as mdb-util]
[cmr.oracle.connection :as oracle]
[cmr.transmit.config :as transmit-config]))

(defconfig db-batch-size
Expand All @@ -42,7 +43,7 @@

(def ^:private component-order
"Defines the order to start the components."
[:log :caches :db :queue-broker :scheduler :web :nrepl])
[:log :caches :search-index :db :queue-broker :scheduler :web :nrepl])

(def system-holder
"Required for jobs"
Expand All @@ -66,6 +67,7 @@
sys {:log (log/create-logger-with-log-level (log-level))
:embedded-systems {:metadata-db metadata-db
:indexer indexer}
:search-index (search-index/create-elastic-search-index)
:db-batch-size (db-batch-size)
:core-async-dispatcher (dispatch/create-backend :async)
:synchronous-dispatcher (dispatch/create-backend :sync)
Expand All @@ -77,9 +79,12 @@
:relative-root-url (transmit-config/bootstrap-relative-root-url)
:caches {acl/token-imp-cache-key (acl/create-token-imp-cache)
kf/kms-cache-key (kf/create-kms-cache)
common-health/health-cache-key (common-health/create-health-cache)}
common-health/health-cache-key (common-health/create-health-cache)
cmn-coll-metadata-cache/cache-key (cmn-coll-metadata-cache/create-cache)}
:scheduler (jobs/create-scheduler `system-holder [jvm-info/log-jvm-statistics-job
(kf/refresh-kms-cache-job "bootstrap-kms-cache-refresh")])
(kf/refresh-kms-cache-job "bootstrap-kms-cache-refresh")
(b-coll-metadata-cache/refresh-collections-metadata-cache-job "bootstrap-collections-metadata-cache-refresh")
(b-coll-metadata-cache/update-collections-metadata-cache-job "bootstrap-collections-metadata-cache-update")])
:queue-broker queue-broker}]
(transmit-config/system-with-connections sys [:metadata-db :echo-rest :kms
:indexer :access-control])))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
(ns cmr.bootstrap.test.data.metadata-retrieval.metadata-transformer
(:require
[clojure.java.io :as io]
[clojure.test :refer :all]
[cmr.common.mime-types :as mt]
[cmr.common.util :as util]
[cmr.bootstrap.data.metadata-retrieval.metadata-transformer :as metadata-transformer]))

(def original-transform-strategy metadata-transformer/transform-strategy)

;; Define some test metadata
(def dif10-concept
"A fake concept map with dif10 metadata"
{:concept-id "C1-PROV1"
:revision-id 1
:metadata (slurp (io/resource "example-data/dif10/sample_collection.xml"))
:format mt/dif10
:concept-type :collection})

(deftest transform-to-multiple-formats-test
(testing "Skipping exceptions if failed"
(let [num-calls (atom 0)
bad-transform-strategy (fn [& args]
(if (= (swap! num-calls inc) 2)
:weird-transform-strategy
(apply original-transform-strategy args)))]
(with-bindings {#'metadata-transformer/transform-strategy bad-transform-strategy}
(let [actual (metadata-transformer/transform-to-multiple-formats
;; The second transform fails so dif10 is excluded in the output
{} dif10-concept [:echo10 :dif :iso19115] true true)
actual-formats (set (keys actual))]
;; We only check the generated formats, not the actual metadata generated for simplicity reasons
(is (= #{:echo10 :iso19115} actual-formats)))))))
Loading

0 comments on commit 314f0d2

Please sign in to comment.