-
Notifications
You must be signed in to change notification settings - Fork 95
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
93 changed files
with
13,195 additions
and
12,142 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
122 changes: 122 additions & 0 deletions
122
bootstrap-app/src/cmr/bootstrap/data/metadata_retrieval/collection_metadata_cache.clj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
(ns cmr.bootstrap.data.metadata-retrieval.collection-metadata-cache | ||
"Defines a cache for catalog item metadata. It currently only stores collections. | ||
The metadata cache contains data like the following: | ||
concept-id -> revision-format-map | ||
* concept-id | ||
* revision-id | ||
* native-format - A key or map of format and version identifying the native format | ||
* various format keys each mapped to compressed metadata." | ||
(:require | ||
[clj-time.core :as t] | ||
[clojure.set :as cset] | ||
[cmr.common-app.data.metadata-retrieval.collection-metadata-cache :as cmn-coll-metadata-cache] | ||
[cmr.common-app.data.metadata-retrieval.revision-format-map :as crfm] | ||
[cmr.bootstrap.data.metadata-retrieval.metadata-transformer :as metadata-transformer] | ||
[cmr.common.config :refer [defconfig]] | ||
[cmr.common.hash-cache :as hash-cache] | ||
[cmr.common.jobs :refer [defjob]] | ||
[cmr.common.log :as log :refer [info]] | ||
[cmr.common.util :as c-util] | ||
[cmr.metadata-db.services.concept-service :as metadata-db] | ||
[cmr.umm-spec.versioning :as umm-version])) | ||
|
||
(defconfig non-cached-collection-metadata-formats | ||
"Defines a set of collection metadata formats that will not be cached in memory" | ||
{:default #{} | ||
:type :edn}) | ||
|
||
(def all-formats | ||
"All the possible collection metadata formats that could be cached." | ||
#{:echo10 | ||
:iso19115 | ||
:dif | ||
:dif10 | ||
;; Note that when upgrading umm version we should also cache the previous version of UMM. | ||
{:format :umm-json | ||
:version umm-version/current-collection-version}}) | ||
|
||
(defn cached-formats | ||
"This is a set of formats that are cached." | ||
[] | ||
(cset/difference all-formats (non-cached-collection-metadata-formats))) | ||
|
||
(defn- concept-tuples->cache-map | ||
"Takes a set of concept tuples fetches the concepts from metadata db, converts them to revision | ||
format maps, and stores them into a cache map" | ||
[context concept-tuples] | ||
(let [mdb-context (cmn-coll-metadata-cache/context->metadata-db-context context) | ||
concepts (doall (metadata-db/get-concepts mdb-context concept-tuples true)) | ||
concepts (cmn-coll-metadata-cache/concepts-without-xml-processing-inst concepts) | ||
rfms (c-util/fast-map #(crfm/compress | ||
(crfm/concept->revision-format-map context | ||
% | ||
(cached-formats) | ||
metadata-transformer/transform-to-multiple-formats | ||
true)) | ||
concepts)] | ||
(reduce #(assoc %1 (:concept-id %2) %2) {} rfms))) | ||
|
||
(defn update-cache | ||
"Updates the collection metadata cache by querying elastic search for updates since the | ||
last time the cache was updated." | ||
[context] | ||
(info "Updating collection metadata cache") | ||
(let [incremental-since-refresh-date (str (t/now)) | ||
concepts-tuples (cmn-coll-metadata-cache/fetch-updated-collections-from-elastic context) | ||
new-cache-value (reduce #(merge %1 (concept-tuples->cache-map context %2)) | ||
{} | ||
(partition-all 1000 concepts-tuples)) | ||
cache (hash-cache/context->cache context cmn-coll-metadata-cache/cache-key)] | ||
(hash-cache/set-value cache | ||
cmn-coll-metadata-cache/cache-key | ||
cmn-coll-metadata-cache/incremental-since-refresh-date-key | ||
incremental-since-refresh-date) | ||
(hash-cache/set-values cache | ||
cmn-coll-metadata-cache/cache-key | ||
new-cache-value) | ||
(info "Metadata cache update complete. Cache Size:" (hash-cache/cache-size cache cmn-coll-metadata-cache/cache-key)))) | ||
|
||
(defn refresh-cache | ||
"Refreshes the collection metadata cache" | ||
[context] | ||
(info "Refreshing collection metadata cache") | ||
(let [incremental-since-refresh-date (str (t/now)) | ||
concepts-tuples (cmn-coll-metadata-cache/fetch-collections-from-elastic context) | ||
new-cache-value (reduce #(merge %1 (concept-tuples->cache-map context %2)) | ||
{} | ||
(partition-all 1000 concepts-tuples)) | ||
cache (hash-cache/context->cache context cmn-coll-metadata-cache/cache-key)] | ||
(hash-cache/set-value cache | ||
cmn-coll-metadata-cache/cache-key | ||
cmn-coll-metadata-cache/incremental-since-refresh-date-key | ||
incremental-since-refresh-date) | ||
(hash-cache/set-values cache cmn-coll-metadata-cache/cache-key new-cache-value) | ||
|
||
(info "Metadata cache refresh complete. Cache Size:" | ||
(hash-cache/cache-size cache cmn-coll-metadata-cache/cache-key)))) | ||
|
||
(defjob RefreshCollectionsMetadataCache | ||
[ctx system] | ||
(refresh-cache {:system system})) | ||
|
||
(defn refresh-collections-metadata-cache-job | ||
[job-key] | ||
{:job-type RefreshCollectionsMetadataCache | ||
:job-key job-key | ||
;; The time here is UTC. | ||
:daily-at-hour-and-minute [06 00]}) | ||
|
||
(defjob UpdateCollectionsMetadataCache | ||
[ctx system] | ||
(update-cache {:system system})) | ||
|
||
(defn update-collections-metadata-cache-job | ||
[job-key] | ||
{:job-type UpdateCollectionsMetadataCache | ||
:job-key job-key | ||
:interval (cmn-coll-metadata-cache/update-collection-metadata-cache-interval)}) | ||
|
||
(comment | ||
(refresh-cache {:system (get-in user/system [:apps :bootstrap])}) | ||
(cmn-coll-metadata-cache/prettify-cache (get-in user/system [:apps :bootstrap :caches cmn-coll-metadata-cache/cache-key]))) |
122 changes: 122 additions & 0 deletions
122
bootstrap-app/src/cmr/bootstrap/data/metadata_retrieval/metadata_transformer.clj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
(ns cmr.bootstrap.data.metadata-retrieval.metadata-transformer | ||
"Contains functions for converting collection concept metadata into other formats." | ||
(:require | ||
[cheshire.core :as json] | ||
[cmr.common.log :as log :refer (debug info warn error)] | ||
[cmr.common.mime-types :as mt] | ||
[cmr.common.services.errors :as errors] | ||
[cmr.common.util :as c-util] | ||
[cmr.common.xml :as cx] | ||
[cmr.umm-spec.migration.version.core :as vm] | ||
[cmr.umm-spec.umm-json :as umm-json] | ||
[cmr.umm-spec.umm-spec-core :as umm-spec])) | ||
|
||
;; dynamic is here only for testing purposes to test failure cases. | ||
(defn ^:dynamic transform-strategy | ||
"Determines which transformation strategy should be used to convert the given concept to the target | ||
format" | ||
[concept target-format] | ||
;;throw exception if target format is native. That should be handled elsewhere. | ||
{:pre [(not= :native target-format)]} | ||
|
||
(let [concept-mime-type (:format concept)] | ||
(cond | ||
;; No conversion is required - same format and version. | ||
(= (mt/mime-type->format concept-mime-type) target-format) | ||
:current-format | ||
|
||
(and (= :umm-json (mt/format-key concept-mime-type)) | ||
(= :umm-json (mt/format-key target-format))) | ||
:migrate-umm-json | ||
|
||
:else | ||
:umm-spec))) | ||
|
||
(defn- transform-with-strategy-umm-spec | ||
[context concept _ target-formats] | ||
(let [{concept-mime-type :format, metadata :metadata} concept | ||
ummc (umm-spec/parse-metadata context (:concept-type concept) concept-mime-type metadata)] | ||
(reduce (fn [translated-map target-format] | ||
(assoc translated-map target-format | ||
(umm-spec/generate-metadata context ummc target-format))) | ||
{} | ||
target-formats))) | ||
|
||
(defn transform-with-strategy-migrate-umm-json-to-target-format | ||
"Adds to the translated-map the concept migrated to the passed in target-format." | ||
[context concept-type source-version metadata translated-map target-format] | ||
(assoc translated-map target-format | ||
(umm-json/umm->json | ||
(c-util/remove-nils-empty-maps-seqs | ||
(vm/migrate-umm context | ||
concept-type | ||
source-version | ||
(umm-spec/umm-json-version concept-type | ||
target-format) | ||
(json/decode metadata true)))))) | ||
|
||
(defn- transform-with-strategy-migrate-umm-json | ||
[context concept _ target-formats] | ||
(let [{concept-mime-type :format, metadata :metadata, concept-type :concept-type} concept | ||
source-version (umm-spec/umm-json-version concept-type concept-mime-type) | ||
[t result] (c-util/time-execution (reduce #(transform-with-strategy-migrate-umm-json-to-target-format | ||
context | ||
concept-type | ||
source-version | ||
metadata | ||
%1 | ||
%2) | ||
{} | ||
target-formats))] | ||
(info "transform-with-strategy migrate-umm-json: " | ||
"time: " t | ||
"concept-mime-type: " concept-mime-type | ||
"concept-type: " concept-type | ||
"parent request num-concepts: " (:num-concepts concept) | ||
"target-formats: " target-formats | ||
"source version: " source-version | ||
"provider: " (:provider-id concept) | ||
"metadata length: " (count metadata)) | ||
result)) | ||
|
||
(defn transform-with-strategy | ||
"Depending on the transformation strategy pick the correct function to call to translate | ||
the concept to the target format." | ||
[context concept strategy target-formats] | ||
(case strategy | ||
:current-format | ||
{(mt/mime-type->format (:format concept)) | ||
(cx/remove-xml-processing-instructions (:metadata concept))} | ||
|
||
:migrate-umm-json | ||
(transform-with-strategy-migrate-umm-json context concept strategy target-formats) | ||
|
||
:umm-spec | ||
(transform-with-strategy-umm-spec context concept strategy target-formats) | ||
|
||
(errors/internal-error! | ||
(format "Unexpected transform strategy [%s] from concept of type [%s] to [%s]" | ||
strategy (:format concept) (pr-str target-formats))))) | ||
|
||
(defn transform-to-multiple-formats-with-strategy | ||
"Transforms a concept into a map of formats dictated by the passed target-formats list." | ||
[context concept ignore-exceptions? [k v]] | ||
(if ignore-exceptions? | ||
(try | ||
(transform-with-strategy context concept k v) | ||
(catch Throwable e | ||
(log/error | ||
e | ||
(str "Ignoring exception while trying to transform metadata for concept " | ||
(:concept-id concept) " with revision " (:revision-id concept) " error: " | ||
(.getMessage e))))) | ||
(transform-with-strategy context concept k v))) | ||
|
||
(defn transform-to-multiple-formats | ||
"Transforms the concept into multiple different formats. Returns a map of target format to metadata." | ||
[context concept target-formats ignore-exceptions?] | ||
{:pre [(not (:deleted concept))]} | ||
(->> target-formats | ||
(group-by #(transform-strategy concept %)) | ||
(keep #(transform-to-multiple-formats-with-strategy context concept ignore-exceptions? %)) | ||
(reduce into {}))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
33 changes: 33 additions & 0 deletions
33
bootstrap-app/test/cmr/bootstrap/test/data/metadata_retrieval/metadata_transformer.clj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
(ns cmr.bootstrap.test.data.metadata-retrieval.metadata-transformer | ||
(:require | ||
[clojure.java.io :as io] | ||
[clojure.test :refer :all] | ||
[cmr.common.mime-types :as mt] | ||
[cmr.common.util :as util] | ||
[cmr.bootstrap.data.metadata-retrieval.metadata-transformer :as metadata-transformer])) | ||
|
||
(def original-transform-strategy metadata-transformer/transform-strategy) | ||
|
||
;; Define some test metadata | ||
(def dif10-concept | ||
"A fake concept map with dif10 metadata" | ||
{:concept-id "C1-PROV1" | ||
:revision-id 1 | ||
:metadata (slurp (io/resource "example-data/dif10/sample_collection.xml")) | ||
:format mt/dif10 | ||
:concept-type :collection}) | ||
|
||
(deftest transform-to-multiple-formats-test | ||
(testing "Skipping exceptions if failed" | ||
(let [num-calls (atom 0) | ||
bad-transform-strategy (fn [& args] | ||
(if (= (swap! num-calls inc) 2) | ||
:weird-transform-strategy | ||
(apply original-transform-strategy args)))] | ||
(with-bindings {#'metadata-transformer/transform-strategy bad-transform-strategy} | ||
(let [actual (metadata-transformer/transform-to-multiple-formats | ||
;; The second transform fails so dif10 is excluded in the output | ||
{} dif10-concept [:echo10 :dif :iso19115] true true) | ||
actual-formats (set (keys actual))] | ||
;; We only check the generated formats, not the actual metadata generated for simplicity reasons | ||
(is (= #{:echo10 :iso19115} actual-formats))))))) |
Oops, something went wrong.