Skip to content

Commit

Permalink
Refactoring configurations and examples.
Browse files Browse the repository at this point in the history
  • Loading branch information
rhysrevans3 committed Nov 11, 2024
1 parent 1a47f9b commit 53f9c5a
Show file tree
Hide file tree
Showing 47 changed files with 1,030 additions and 1,349 deletions.
8 changes: 5 additions & 3 deletions example/conf/collection-generator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ recipes_root: recipes/
# The input plugins to be run for the generator
inputs:
- name: text_file
filepath: input/collections.txt
conf:
filepath: input/collections.txt

# The output plugins to be run for the generator
outputs:
Expand All @@ -18,8 +19,9 @@ outputs:
stac_version: '1.0.0'
stac_extensions: []
- name: json_file
dirpath: output/collections
filename_term: id
conf:
dirpath: output/collections
filename_term: id
mappings:
- name: stac
stac_version: '1.0.0'
Expand Down
14 changes: 5 additions & 9 deletions example/conf/item-generator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ recipes_root: recipes/
# The input plugins to be run for the generator
inputs:
- name: text_file
filepath: input/assets.txt
conf:
filepath: input/assets.txt

# The output plugins to be run for the generator
outputs:
Expand All @@ -17,12 +18,7 @@ outputs:
- name: stac
stac_version: '1.0.0'
stac_extensions: []
- name: json_file
dirpath: output/items
filename_term: id
mappings:
- name: stac
stac_version: '1.0.0'
stac_extensions: []
- name: text_file
filepath: input/collections.txt
conf:
filepath: input/collections.txt

13 changes: 9 additions & 4 deletions example/recipes/collection/CMIP6.CMIP.MOHC.UKESM1-0-LL.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
# The type of STAC record that will be generated
# The type of record that will be generated
type: collection

# These extraction methods will be run after `extraction_methods` and should generate the id of the record
# This section is optional and can be run seperatly to find the id of a record
id:
- method: default
inputs:
defaults:
collection_id: cmip6
id: dkrz

# The extaction methods are run in series with the output dictionary is passed from one to the next
# extaction methods add, update or remove the data from the output dictionary
extraction_methods:
- method: default
inputs:
defaults:
id: dkrz

- method: json_file
inputs:
dirpath: output/items/
terms:
properties:
- mip_era
- activity_id
- institution_id
Expand Down
27 changes: 16 additions & 11 deletions example/recipes/item/CMIP6.CMIP.MOHC.UKESM1-0-LL.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,19 @@ paths:
- https://cmip6-zarr-o.s3-ext.jc.rl.ac.uk/CMIP6.CMIP.MOHC.UKESM1-0-LL
- https://cmip6-zarr-o.s3-ext.jc.rl.ac.uk/CMIP6.C4MIP.MOHC.UKESM1-0-LL

# The type of STAC record that will be generated
# The type of record that will be generated
type: item

# These extraction methods will be run after `extraction_methods` and should generate the id of the record
# This section is optional and can be run seperatly to find the id of a record
id:
- method: default
- method: regex
inputs:
regex: '\/(?P<mip_era>\w*)\.(?P<activity_id>\w*)\.(?P<institution_id>[\w-]*)\.(?P<source_id>[\w-]*)\/(?P<experiment_id>[\w-]*)\.(?P<member_id>\w*)\.(?P<table_id>\w*)\.(?P<var_id>\w*)\.(?P<grid_label>\w*)\.(?P<version>\w*)'

- method: string_template
inputs:
defaults:
item_id: $instance_id
template: '{mip_era}.{activity_id}.{institution_id}.{source_id}.{table_id}.{var_id}.{version}'
output_key: id

# The extaction methods are run in series with the output dictionary is passed from one to the next
# extaction methods add, update or remove the data from the output dictionary
Expand All @@ -23,13 +27,14 @@ extraction_methods:
- method: string_template
inputs:
template: '{mip_era}.{activity_id}.{institution_id}.{source_id}.{table_id}.{var_id}.{version}'
output_key: instance_id
output_key: id

# Some extraction methods generate assets which can also include their own list of extration methods to be run on the assets
- method: intake_assets
- method: assets
inputs:
uri: https://raw.githubusercontent.com/cedadev/cmip6-object-store/master/catalogs/ceda-zarr-cmip6.json
object_path_attr: zarr_path
backend: intake_esm
input_term: https://raw.githubusercontent.com/cedadev/cmip6-object-store/master/catalogs/ceda-zarr-cmip6.json
href_term: zarr_path
search_kwargs:
mip_era: $mip_era
activity_id: $activity_id
Expand All @@ -47,7 +52,7 @@ extraction_methods:
- method: lambda
inputs:
function: 'lambda assets: {f"data{str(en+1).zfill(4)}": assets[key] for en, key in enumerate(sorted(assets))}'
input_args:
args:
- $assets
output_key: assets

Expand All @@ -58,4 +63,4 @@ extraction_methods:

# member of defines the other recipes that define a parent of this record
member_of:
- recipes/collection/CMIP6.CMIP.MOHC.UKESM1-0-LL.yaml
- recipes/collection/CMIP6.CMIP.MOHC.UKESM1-0-LL.yaml
26 changes: 12 additions & 14 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ homepage = "https://github.com/cedadev/stac-generator"
keywords = ['stac', 'metadata', 'plugin', 'framework']

packages = [
{ include = "stac_generator"},
{ include = "stac_generator" }
]

[tool.poetry.scripts]
stac_generator = 'stac_generator.scripts.stac_generator:main'
recipe_keys = 'stac_generator.scripts.recipe_keys:main'

[tool.poetry.dependencies]
python = "^3.10"
Expand Down Expand Up @@ -89,37 +90,34 @@ ignore_missing_imports = true
packages = "stac_generator"

[tool.poetry.plugins."stac_generator.inputs"]
elasticsearch_aggregation = "stac_generator.plugins.inputs.elasticsearch_aggregation:ElasticsearchAggregationInput"
file_system = "stac_generator.plugins.inputs.file_system:FileSystemInput"
object_store = "stac_generator.plugins.inputs.object_store:ObjectStoreInput"
intake_esm = "stac_generator.plugins.inputs.intake_esm:IntakeESMInput"
object_store = "stac_generator.plugins.inputs.object_store:ObjectStoreInput"
rabbitmq = "stac_generator.plugins.inputs.rabbit_mq:RabbitMQInput"
thredds = "stac_generator.plugins.inputs.thredds:ThreddsInput"
text_file = "stac_generator.plugins.inputs.text_file:TextFileInput"
solr = "stac_generator.plugins.inputs.solr:SolrInput"
elasticsearch = "stac_generator.plugins.inputs.elasticsearch:ElasticsearchInput"
text_file = "stac_generator.plugins.inputs.text_file:TextFileInput"
thredds = "stac_generator.plugins.inputs.thredds:ThreddsInput"

[tool.poetry.plugins."stac_generator.outputs"]
standard_out = "stac_generator.plugins.outputs.standard_out:StandardOutOutput"
standard_out_bulk = "stac_generator.plugins.bulk_outputs.standard_out:StandardOutBulkOutput"
elasticsearch = "stac_generator.plugins.outputs.elasticsearch:ElasticsearchOutput"
elasticsearch_bulk = "stac_generator.plugins.bulk_outputs.elasticsearch:ElasticsearchBulkOutput"
stacapi = "stac_generator.plugins.outputs.stacapi_backend:StacApiOutputBackend"
text_file = "stac_generator.plugins.outputs.text_file:TextFileOutput"
intake_esm = "stac_generator.plugins.outputs.intake_esm:IntakeESMOutput"
json_file = "stac_generator.plugins.outputs.json_file:JsonFileOutput"
rabbitmq = "stac_generator.plugins.outputs.rabbit_mq:RabbitMQOutput"
rabbitmq_bulk = "stac_generator.plugins.bulk_outputs.rabbit_mq:RabbitMQBulkOutput"
intake_esm = "stac_generator.plugins.outputs.intake_esm:IntakeESMOutput"
stac_fastapi = "stac_generator.plugins.outputs.stac_fastapi:STACFastAPIOutput"
standard_out = "stac_generator.plugins.outputs.standard_out:StandardOutOutput"
standard_out_bulk = "stac_generator.plugins.bulk_outputs.standard_out:StandardOutBulkOutput"
text_file = "stac_generator.plugins.outputs.text_file:TextFileOutput"

[tool.poetry.plugins."stac_generator.mappings"]
ceda = "stac_generator.plugins.mappings.ceda:CEDAMapping"
stac = "stac_generator.plugins.mappings.stac:STACMapping"
jinja = "stac_generator.plugins.mappings.jinja2:Jinja2Mapping"

[tool.poetry.plugins."stac_generator.generators"]
asset = "stac_generator.plugins.generators.asset:AssetGenerator"
item = "stac_generator.plugins.generators.item:ItemGenerator"
collection = "stac_generator.plugins.generators.collection:CollectionGenerator"
[tool.poetry.plugins."stac_generator.generator"]
generator = "stac_generator.core.generator:Generator"

[build-system]
requires = ["poetry-core"]
Expand Down
27 changes: 18 additions & 9 deletions stac_generator/core/bulk_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,38 @@
__license__ = "BSD - see LICENSE file in top-level package directory"
__contact__ = "[email protected]"

from abc import ABC, abstractmethod
from abc import abstractmethod

from cachetools import Cache
from pydantic import BaseModel, Field

from stac_generator.core.process_config import SetConfig

class BaseBulkOutput(ABC):

class BulkOutputConf(BaseModel):
"""Elasticsearch config model."""

cache_max_size: str = Field(
description="Max size of cache.",
)


class BulkOutput(SetConfig):
"""
Base class to define an bulk output
"""

config_class = BulkOutputConf

def __init__(self, **kwargs):
"""
Set the kwargs to generate instance attributes of the same name and create cache
:param kwargs:
"""
for k, v in kwargs.items():
setattr(self, k, v)

if not hasattr(self, "cache_max_size"):
self.cache_max_size = 100
super().__init__(**kwargs)

self.data_cache = Cache(maxsize=self.cache_max_size + 1)
self.data_cache = Cache(maxsize=self.conf.cache_max_size + 1)

def __del__(self):
self.clear_cache()
Expand Down Expand Up @@ -68,7 +77,7 @@ def run(self, data: dict) -> None:
# add to cache
self.data_cache.update(self.data_to_cache(data))

if self.data_cache.currsize >= self.cache_max_size:
if self.data_cache.currsize >= self.conf.cache_max_size:
self.clear_cache()

def clear_cache(self) -> None:
Expand Down
53 changes: 0 additions & 53 deletions stac_generator/core/extraction_method.py

This file was deleted.

Loading

0 comments on commit 53f9c5a

Please sign in to comment.