diff --git a/.readthedocs.yml b/.readthedocs.yml index f0ec3db5..5944fdae 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,15 +1,15 @@ +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + version: 2 + build: - os: ubuntu-20.04 + os: ubuntu-22.04 tools: - python: "3.10" -formats: - - htmlzip -sphinx: - configuration: docs/conf.py + python: "3.11" + +mkdocs: + configuration: mkdocs.yml + python: install: - - requirements: docs/requirements.txt - - method: pip - path: . - system_packages: true \ No newline at end of file + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index b1f8e177..1324b246 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/ ### Added - User-defined output path for csv, xml, database [#402](https://github.com/OpenEnergyPlatform/open-MaStR/pull/402) - Add date=existing parameter to Mastr.download [#452](https://github.com/OpenEnergyPlatform/open-MaStR/pull/452) +- Update documentation and release it on mkdocs [#460](https://github.com/OpenEnergyPlatform/open-MaStR/pull/460) - Replace values in ArtDerFlaecheIds with their entries from katalogwerte [#464](https://github.com/OpenEnergyPlatform/open-MaStR/pull/464) - Add a Mastr.translate method for english translation [##471](https://github.com/OpenEnergyPlatform/open-MaStR/pull/471) diff --git a/README.rst b/README.rst index 0b0b51b7..9d2db869 100644 --- a/README.rst +++ b/README.rst @@ -20,11 +20,13 @@ open-mastr * - Tests - |badge_ci| * - Publication - - |badge_pypi| |badge_zenodo| + - |badge_pypi| + * - Data Publication + - |badge_zenodo| * - Development - |badge_issue_open| |badge_issue_closes| |badge_pr_open| |badge_pr_closes| * - Community - - |badge_contributing| |badge_contributors| |badge_repo_counts| |PyPI download month| + - |badge_contributing| |badge_contributors| |badge_repo_counts| |PyPI download month| |Total PyPI downloads| .. contents:: @@ -53,7 +55,7 @@ The MaStR data can be Documentation ============= -| The documentation is in `sphinx `_ reStructuredText format in the ``doc`` sub-folder of the repository. +| The documentation is in `Material for Mkdocs `_ markdown format in the ``doc`` sub-folder of the repository. | Find the `documentation `_ hosted on ReadTheDocs. | The original API documentation can be found on the `Webhilfe des Marktstammdatenregisters `_. @@ -97,7 +99,7 @@ Install the package with .. code-block:: python - python setup.py install + pip install "open_mastr[dev]" Examples of Usage @@ -152,7 +154,7 @@ Data :alt: PyPI .. |badge_zenodo| image:: https://zenodo.org/badge/DOI/10.5281/zenodo.6807426.svg - :target: https://doi.org/10.5281/zenodo.6807426 + :target: https://doi.org/10.5281/zenodo.6807425 :alt: zenodo .. |badge_issue_open| image:: https://img.shields.io/github/issues-raw/OpenEnergyPlatform/open-MaStR @@ -177,4 +179,9 @@ Data :alt: counter .. |PyPI download month| image:: https://img.shields.io/pypi/dm/open-mastr?label=PyPi%20Downloads - :target: https://pypi.org/project/open-mastr/ + :target: https://pypistats.org/packages/open-mastr + +.. |Total PyPI downloads| image:: https://static.pepy.tech/badge/open-mastr + :target: https://pepy.tech/project/open-mastr + + diff --git a/RELEASE_PROCEDURE.md b/RELEASE_PROCEDURE.md index 3ccb2437..79d2d743 100644 --- a/RELEASE_PROCEDURE.md +++ b/RELEASE_PROCEDURE.md @@ -41,14 +41,7 @@ It always has the format `YYYY-MM-DD`, e.g. `2022-05-16`. * [Draft a new release](https://github.com/OpenEnergyPlatform/open-MaStR/releases/new) * Enter the release version number `v0.12.1` as title * Summarize key changes in the description - * `## [v0.12.1] Patch - Name - Date` - * `### Added` - * `### Changed` - * `### Removed` -* Add a link to compare versions - * `**Compare versions:** [v0.12.0 - v0.12.1](https://github.com/OpenEnergyPlatform/open-MaStR/compare/v0.12.0...v0.12.1)` -* Add a link to the `📝CHANGELOG.md` - * `Also see [**CHANGELOG.md**](https://github.com/OpenEnergyPlatform/open-MaStR/blob/production/CHANGELOG.md)` + * Use the `generate release notes` button provided by github * Save draft ### 4. 🐙 Finish all planned Developments diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 0a9e3c5a..d55f0e15 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,3 +1,7 @@ .wy-nav-content { max-width: 80% !important; +} + +:not([data-md-state="blur"]) + nav { + display: none; } \ No newline at end of file diff --git a/docs/advanced.md b/docs/advanced.md new file mode 100644 index 00000000..2e9f55e8 --- /dev/null +++ b/docs/advanced.md @@ -0,0 +1,409 @@ +For most users, the functionalites described in [Getting Started](/getting_started) are sufficient. If you want to examine how you can configure the package's behavior for your own needs, check out [Cofiguration](#configuration). Or you can explore the two main functionalities of the package, namely the [Bulk Download](#bulk-download) +or the [SOAP API download](#soap-api-download). + +## Configuration +### Database settings + + +Configure your database with the `engine` parameter of [`Mastr`][open_mastr.Mastr]. +It defines the engine of the database where the MaStR is mirrored to. Default is 'sqlite'. + +The possible databases are: + +* **sqlite**: By default the database will be stored in `$HOME/.open-MaStR/data/sqlite/open-mastr.db`. +* **own database**: The Mastr class accepts a sqlalchemy.engine.Engine object as engine which enables the user to + use any other desired database. + If you do so, you need to insert the connection parameter into the engine variable. It'll look like this: + +```python + + from sqlalchemy import create_engine + + engine_postgres = create_engine("postgresql+psycopg2://open-mastr:open-mastr@localhost:55443/open-mastr") + engine_sqlite = create_engine("sqlite:///path/to/sqlite/database.db") + db = Mastr(engine=engine_sqlite) +``` + +### Project directory + +The directory `$HOME/.open-MaStR` is automatically created. It is used to store configuration files and save data. +Default config files are copied to this directory which can be modified - but with caution. +The project home directory is structured as follows (files and folders below `data/` just an example). + +```bash + + .open-MaStR/ + ├── config + │   ├── credentials.cfg + │   ├── filenames.yml + │   ├── logging.yml + ├── data + │   ├── dataversion- + │   ├── sqlite + │      └── open-mastr.db + └── xml_download + └── Gesamtdatenexport_.zip + └── logs + └── open_mastr.log +``` + + +* **config** + * `credentials.cfg`
+ Credentials used to access + [Marktstammdatenregister (MaStR)](https://www.marktstammdatenregister.de/MaStR) API + * `filenames.yml`
+ File names are defined here. + * `logging.yml`
+ Logging configuration. For changing the log level to increase or decrease details of log + messages, edit the level of the handlers. +* **data** + * `dataversion-`
+ Contains exported data as csv files from method [`to_csv`][open_mastr.Mastr.to_csv] + * `sqlite`
+ Contains the sqlite database in `open-mastr.db` + * `xml_download`
+ Contains the bulk download in `Gesamtdatenexport_.zip`
+ New bulk download versions overwrite older versions. +* **logs** + * `open_mastr.log`
+ The files stores the logging information from executing open-mastr. + + + +### Logs + +For the download via the API, logs are stored in a single file in `/$HOME//.open-MaStR/logs/open_mastr.log`. +New logging messages are appended. It is recommended to delete the log file from time to time because of its required disk space. + + +### Data + +If the zipped dump of the MaStR is downloaded, it is saved in the folder `$HOME/.open-MaStR/data/xml_download`. + +The data can then be written to any sql database supported by [sqlalchemy](https://docs.sqlalchemy.org/). The type of the sql database is determined by the parameter `engine` in the [Mastr][open_mastr.Mastr] class. + +For more information regarding the database see [Database settings](Database settings). + + +## Bulk download + +On the homepage [MaStR/Datendownload](https://www.marktstammdatenregister.de/MaStR/Datendownload) a zipped folder containing the whole +MaStR is offered. The data is delivered as xml-files. The official documentation can be found +on the same page (in german). This data is updated on a daily base. + +``` mermaid +flowchart LR + id1("📥 Download raw data + from marktstammdatenregister.de") --> id2(Write xml files to database) + id2 --> id3[("📗 open-mastr database")] + id3 --> id4("🔧 Decode and cleanse data") + id4 --> id3 + id3 --> id5("Merge corresponding tables + and save as csv") + id5 --> id6>"📜 open-mastr csv files"] + click id1 "https://www.marktstammdatenregister.de/MaStR/Datendownload" _blank + click id2 "https://github.com/OpenEnergyPlatform/open-MaStR/blob/7b155a9ebdd5204de8ae6ba7a96036775a1f4aec/open_mastr/xml_download/utils_write_to_database.py#L17C6-L17C6" _blank + click id4 "https://github.com/OpenEnergyPlatform/open-MaStR/blob/7b155a9ebdd5204de8ae6ba7a96036775a1f4aec/open_mastr/xml_download/utils_cleansing_bulk.py#L10" _blank + click id5 "https://github.com/OpenEnergyPlatform/open-MaStR/blob/7b155a9ebdd5204de8ae6ba7a96036775a1f4aec/open_mastr/mastr.py#L288" _blank + click id6 "https://doi.org/10.5281/zenodo.6807425" _blank +``` + + +In the following, the process is described that is started when calling the [`Mastr.download`][open_mastr.Mastr.download] function with the parameter `method`="bulk". +First, the zipped files are downloaded and saved in `$HOME/.open-MaStR/data/xml_download`. The zipped folder contains many xml files, +which represent the different tables from the MaStR. Those tables are then parsed to a sqlite database. If only some specific +tables are of interest, they can be specified with the parameter `data`. Every table that is selected in `data` will be deleted from the local database, if existent, and then filled with data from the xml files. + +In the next step, a basic data cleansing is performed. Many entries in the MaStR from the bulk download are replaced by numbers. +As an example, instead of writing the german states where the unit is registered (Saxony, Brandenburg, Bavaria, ...) the MaStR states +corresponding digits (7, 2, 9, ...). One major step of cleansing is therefore to replace those digits with their original meaning. +Moreover, the datatypes of different entries are set in the data cleansing process and corrupted files are repaired. + +If needed, the tables in the database can be obtained as csv files. Those files are created by first merging corresponding tables (e.g all tables that contain information about solar) and then dumping those tables to `.csv` files with the [`to_csv`][open_mastr.Mastr.to_csv] method. + +=== "Advantages" + * No registration for an API key is needed + * Download of the whole dataset is possible + +=== "Disadvantages" + * No single tables or entries can be downloaded + * Download takes long time + + +## SOAP API download + +### MaStR account and credentials + + +For downloading data from the +[Marktstammdatenregister (MaStR) database](https://www.marktstammdatenregister.de/MaStR) +via its API a [registration](https://www.marktstammdatenregister.de/MaStRHilfe/files/regHilfen/201108_Handbuch%20f%C3%BCr%20Registrierungen%20durch%20Dienstleister.pdf) is mandatory. + +To download data from the MaStR API using the `open-MaStR`, the credentials (MaStR user and token) need to be provided in a certain way. Three options exist: + +1. **Credentials file:** + Both, user and token, are stored in plain text in the credentials file. + For storing the credentials in the credentials file (plus optionally using keyring for the token) simply instantiate + [`MaStRDownload`][open_mastr.soap_api.download.MaStRDownload] once and you get asked for a user name and a token. The + information you insert will be used to create the credentials file. + + It is also possible to create the credentials file by hand, using this format: + + ``` + [MaStR] + user = SOM123456789012 + token = msöiöo8u2o29933n31733m§=§1n33§304n... # optional, 540 characters + ``` + + The `token` should be written in one line, without line breaks. + + The credentials file needs to be stored at: `$HOME/.open-MaStR/config/credentials.cfg` + +2. **Credentials file + keyring:** + The user is stored in the credentials file, while the token is stored encrypted in the [keyring](https://pypi.org/project/keyring/). + + Read in the documentation of the keyring library how to store your token in the + keyring. + +3. **Don't store:** + Just use the password for one query and forget it + + The latter option is only available when using [`MaStRAPI`][open_mastr.soap_api.download.MaStRAPI]. + Instantiate with + + ```python + MaStRAPI(user='USERNAME', key='TOKEN') + ``` + + to provide user and token in a script and use these + credentials in subsequent queries. + +### MaStRAPI + +You can access the MaStR data via API by using the class `MaStRAPI` directly if you have the API credentials +configured correctly. Use the code snippet below for queries. + + +```python +from open_mastr.soap_api.download import MaStRAPI + +if __name__ == "__main__": + + mastr_api = MaStRAPI() + print(mastr_api.GetLokaleUhrzeit()) +``` + +For API calls and their optional parameters refer to [API documentation](https://www.marktstammdatenregister. +de/MaStRHilfe/subpages/webdienst.html). + +???+ example "Example queries and their responses" + + === "mastr_api.GetLokaleUhrzeit()" + + Response: + ```python + { + 'Ergebniscode': 'OK', + 'AufrufVeraltet': False, + 'AufrufLebenszeitEnde': None, + 'AufrufVersion': 1, + 'LokaleUhrzeit': datetime.datetime(2023, 9, 25, 8, 1, 46, 24766, tzinfo=) + } + ``` + + API function name: `GetLokaleUhrzeit`
+ Example query: `mastr_api.GetLokaleUhrzeit()`
+ Parameter: `None` + + + + === "mastr_api.GetListeAlleEinheiten(limit=1)" + + Response: + ```python + { + "Ergebniscode": "OkWeitereDatenVorhanden", + "AufrufVeraltet": False, + "AufrufLebenszeitEnde": None, + "AufrufVersion": 1, + "Einheiten": [ + { + "EinheitMastrNummer": "SEE984033548619", + "DatumLetzeAktualisierung": datetime.datetime( + 2020, 2, 20, 16, 28, 35, 250812 + ), + "Name": "Photovoltaikanlage ERWin4", + "Einheitart": "Stromerzeugungseinheit", + "Einheittyp": "Solareinheit", + "Standort": "48147 Münster", + "Bruttoleistung": Decimal("3.960"), + "Erzeugungsleistung": None, + "EinheitSystemstatus": "Aktiv", + "EinheitBetriebsstatus": "InBetrieb", + "Anlagenbetreiber": "ABR949444220202", + "EegMastrNummer": "EEG920083771065", + "KwkMastrNummer": None, + "SpeMastrNummer": None, + "GenMastrNummer": None, + "BestandsanlageMastrNummer": None, + "NichtVorhandenInMigriertenEinheiten": None, + } + ], + } + ``` + + API function name: `GetEinheitSolar`
+ Example query: `mastr_api.GetListeAlleEinheiten(limit=1)` + + | Parameter | Description | + |------------------------|-----------------------------------------------------------------------------------------------------------| + | marktakteurMastrNummer | The MaStR number of the requested unit | + | startAb | Sets the beginning of the records to be delivered [default value: 1] | + | datumAb | Restrict the amount of data to be retrieved to changed data from the specified date [Default value: NULL] | + | limit | Limit of the maximum data records to be delivered [default/maximum value: maximum of own limit] | + | einheitMastrNummern[] | | + + === "mastr_api.GetEinheitSolar(einheitMastrNummer="SEE984033548619")" + + Response: + ```python + { + "Ergebniscode": "OK", + "AufrufVeraltet": False, + "AufrufLebenszeitEnde": None, + "AufrufVersion": 1, + "EinheitMastrNummer": "SEE984033548619", + "DatumLetzteAktualisierung": datetime.datetime(2020, 2, 20, 16, 28, 35, 250812), + "LokationMastrNummer": "SEL948991715391", + "NetzbetreiberpruefungStatus": "Geprueft", + "Netzbetreiberzuordnungen": [ + { + "NetzbetreiberMastrNummer": "SNB980883363112", + "NetzbetreiberpruefungsDatum": datetime.date(2020, 2, 25), + "NetzbetreiberpruefungsStatus": "Geprueft", + } + ], + "NetzbetreiberpruefungDatum": datetime.date(2020, 2, 25), + "AnlagenbetreiberMastrNummer": "ABR949444220202", + "NetzbetreiberMastrNummer": ["SNB980883363112"], + "Land": "Deutschland", + "Bundesland": "NordrheinWestfalen", + "Landkreis": "Münster", + "Gemeinde": "Münster", + "Gemeindeschluessel": "05515000", + "Postleitzahl": "48147", + "Gemarkung": None, + "FlurFlurstuecknummern": None, + "Strasse": None, + "StrasseNichtGefunden": False, + "Hausnummer": {"Wert": None, "NichtVorhanden": False}, + "HausnummerNichtGefunden": False, + "Adresszusatz": None, + "Ort": "Münster", + "Laengengrad": None, + "Breitengrad": None, + "UtmZonenwert": None, + "UtmEast": None, + "UtmNorth": None, + "GaussKruegerHoch": None, + "GaussKruegerRechts": None, + "Registrierungsdatum": datetime.date(2019, 2, 1), + "GeplantesInbetriebnahmedatum": None, + "Inbetriebnahmedatum": datetime.date(2007, 7, 20), + "DatumEndgueltigeStilllegung": None, + "DatumBeginnVoruebergehendeStilllegung": None, + "DatumWiederaufnahmeBetrieb": None, + "EinheitSystemstatus": "Aktiv", + "EinheitBetriebsstatus": "InBetrieb", + "BestandsanlageMastrNummer": None, + "NichtVorhandenInMigriertenEinheiten": None, + "AltAnlagenbetreiberMastrNummer": None, + "DatumDesBetreiberwechsels": None, + "DatumRegistrierungDesBetreiberwechsels": None, + "NameStromerzeugungseinheit": "Photovoltaikanlage ERWin4", + "Weic": {"Wert": None, "NichtVorhanden": False}, + "WeicDisplayName": None, + "Kraftwerksnummer": {"Wert": None, "NichtVorhanden": False}, + "Energietraeger": "SolareStrahlungsenergie", + "Bruttoleistung": Decimal("3.960"), + "Nettonennleistung": Decimal("3.960"), + "Schwarzstartfaehigkeit": None, + "Inselbetriebsfaehigkeit": None, + "Einsatzverantwortlicher": None, + "FernsteuerbarkeitNb": False, + "FernsteuerbarkeitDv": None, + "FernsteuerbarkeitDr": None, + "Einspeisungsart": "Volleinspeisung", + "PraequalifiziertFuerRegelenergie": None, + "GenMastrNummer": None, + "zugeordneteWirkleistungWechselrichter": Decimal("4.000"), + "GemeinsamerWechselrichterMitSpeicher": "KeinStromspeicherVorhanden", + "AnzahlModule": 22, + "Lage": "BaulicheAnlagen", + "Leistungsbegrenzung": "Nein", + "EinheitlicheAusrichtungUndNeigungswinkel": True, + "Hauptausrichtung": "Sued", + "HauptausrichtungNeigungswinkel": "Grad20Bis40", + "Nebenausrichtung": "None", + "NebenausrichtungNeigungswinkel": "None", + "InAnspruchGenommeneFlaeche": None, + "ArtDerFlaeche": [], + "InAnspruchGenommeneAckerflaeche": None, + "Nutzungsbereich": "Haushalt", + "Buergerenergie": None, + "EegMastrNummer": "EEG920083771065", + } + ``` + + API function name: `GetEinheitSolar`
+ Example query: `mastr_api.GetEinheitSolar(einheitMastrNummer="SEE984033548619")`
+ + | Parameter | Description | + |--------------------------|-------------------------------------------------------------------| + | `apiKey` | The web service key for validation | + | `marktakteurMastrNummer` | The MaStR number of the market actor used by the web service user | + | `einheitMastrNummer` | The MaStR number of the requested unit | + + +??? note "Why can't I just query all information of all units of a specific power plant type?" + + As the example queries above demonstrate, the API is structured so that units of power plants types (e.g. wind + turbine, solar PV systems, gas power plant) have to be queried directly by their unique identifier ( + `EinheitMastrNummer"`) and a distinct API query. To download all unit information of a specific power plant + you need to know the "EinheitMastrNummer".
+ + Firstly, by querying for all units with `mastr_api.GetListeAlleEinheiten()` you'll get all units, their unique + identifier (`EinheitMastrNummer`) and their power plant type (`Einheitentyp`). You can then sort them by power + plant type and use the power plant type specific API query to retrieve information about it.
+ + Cumbersome?
+ Luckily, `open-MaStR` has you covered and provides methods to just query for all units of a power + plant type. + + + + +### MaStRDownload + +The class `MaStRDownload` builds upon methods provided in the class `MaStRAPI`.
+ +It provides methods to download power plant unit types and additional information +for each unit type, such as extended unit data, permit data, chp-specific data, location data +or eeg-specific data.
+ +The class handles the querying logic and knows which additional data for each unit type is available +and which SOAP service has to be used to query it. + + +### MaStRMirror + +The class `MaStRMirror` builds upon methods provided in the class `MaStRDownload`.
+ +The aim of the class has been to mirror the Marktstammdatenregister database and keep it up-to-date. +Historically, `open-mastr` has been developed before the owner of the dataset, BNetzA, offered the `bulk` download. +The class can still be used for use-cases where only the most recent changes to a local database are of interest. +For downloading the entire MaStR database we recommend the bulk download functionalities by specifying `donwload(method="bulk")`. + + + diff --git a/docs/advanced.rst b/docs/advanced.rst deleted file mode 100644 index 966afc0c..00000000 --- a/docs/advanced.rst +++ /dev/null @@ -1,446 +0,0 @@ -******************** -Advanced Topics -******************** -In the following sections, a deeper description of the package and its functionalities is given. - -Configuration -================== -Project directory -------------------- - -The directory `$HOME/.open-MaStR` is automatically created. It is used to store configuration files and save data. -Default config files are copied to this directory which can be modified - but with caution. -The project home directory is structured as follows (files and folders below `data/` just an example). - -.. code-block:: bash - - .open-MaStR/ - ├── config - │   ├── credentials.cfg - │   ├── filenames.yml - │   ├── logging.yml - │   └── tables.yml - ├── data - │   ├── rli_v3.0.0 - │      ├── bnetza_mastr_solar_eeg_fail.csv - │      ├── bnetza_mastr_solar_extended_fail.csv - │      └── bnetza_mastr_solar_raw.csv - │   ├── sqlite - │      └── open-mastr.db - └── xml_download - └── Gesamtdatenexport.zip - └── logs - └── open_mastr.log - - -Configuration files -------------------- - -* :code:`credentials.cfg`: Credentials used to access - `Marktstammdatenregister (MaStR) `_ API (read more in - :ref:`MaStR account and credentials `) and token for Zenodo. -* :code:`filenames.yml`: File names are defined here. -* :code:`logging.yml`: Logging configuration. For changing the log level to increase or decrease details of log - messages, edit the `level` of the handlers. -* :code:`tables.yml`: Names of tables where data gets imported to during :ref:`Post-processing (Outdated)` - -Data ----- - -If the zipped dump of the MaStR is downloaded, it is saved in the folder `$HOME/.open-MaStR/data/xml_download`. New versions -of the dump overwrite older versions. - -The data can then be written to an sql database. The type of the sql database is determined -by the parameter `engine` in the Mastr class (see :ref:`mastr module`). - -For more information regarding the database see :ref:`Database settings `. - -Exported data is saved under `$HOME/.open-MaStR/data/`. -Files that are suffixed with `_raw` can contain joined data retrieved during :ref:`downloading `. -The structure of the data is described in :ref:`Data Description`. - - -Environment variables -^^^^^^^^^^^^^^^^^^^^^ - -There are some environment variables to customize open-MaStR: - -.. list-table:: - :widths: 5 5 5 - :header-rows: 1 - - * - variable - - description - - example - * - SQLITE_DATABASE_PATH - - Path to the SQLite file. This allows to use to use multiple instances of the MaStR database. The database instances exist in parallel and are independent of each other. - - `/home/mastr-rabbit/.open-MaStR/data/sqlite/your_custom_instance_name.db` - * - OUTPUT_PATH - - | Path to user-defined output directory for CSV data, XML file and database. - | If not specified, output directory defaults to `$HOME/.open-MaStR/` - - | Linux: `/home/mastr-rabbit/open-mastr-user-defined-output-path` - | Windows: `C:\\Users\\open-mastr-user-defined-output-path` - -MaStR account and credentials -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -For downloading data from the -`Marktstammdatenregister (MaStR) database `_ -via its API a registration is mandatory (please `read here `_). - -To download data using `open-MaStR`, the credentials (MaStR user and token) need to be provided in a certain way. -Three options exist - -1. **Credentials file:** Both, user and token, are stored in plain text in the credentials file - - -For storing the credentials in the credentials file (plus optionally using keyring for the token) simply instantiate -:py:class:`open_mastr.soap_api.download.MaStRDownload` once and you get asked for a user name and a token. The -information you insert will be used to create the credentials file. - -It is also possible to create the credentials file by hand, using this format: - -.. code-block:: - - [MaStR] - user = SOM123456789012 - token = msöiöo8u2o29933n31733m§=§1n33§304n... # optional, 540 characters - -The `token` should be written in one line, without line breaks. - -The credentials file needs to be stored at: `$HOME/.open-MaStR/config/credentials.cfg` - -2. **Credentials file + keyring:** The user is stored in the credentials file, while the token is stored encrypted in the `keyring `_. - -Read in the documentation of the `keyring library `_ how to store your token in the -keyring. - -3. **Don't store:** Just use the password for one query and forget it - -The latter option is only available when using :class:`open_mastr.soap_api.download.MaStRAPI`. -Instantiate with - -.. code-block:: - - MaStRAPI(user='USERNAME', key='TOKEN') - -to provide user and token in a script and use these -credentials in subsequent queries. - -Logs ----- - -For the download via the API, logs are stored in a single file in `/$HOME/cwm/.open-MaStR/logs/open_mastr.log`. -New logging messages are appended. It is recommended to delete the log file from time to time because of its required disk space. - - -Zenodo token ------------------- - -Uploading data to `Zenodo `_ requires authentication. When logged in with your account you can -`create tokens `_ for API requests. - -The section in `credentials.cfg` looks like: - -.. code-block:: - - [Zenodo] - token = voh6Zo2ohbohReith4ec2iezeiJ9Miefohso0DohK9ohtha6mahfame7hohc - - -Download -============================= - -Get data via the bulk download -------------------------------- -On the homepage `MaStR/Datendownload `_ a zipped folder containing the whole -MaStR is offered. The data is delivered as xml-files. The official documentation can be found -`here [in german] `_. -This data is updated on a daily base. - -.. figure:: images/MaStR_bulk_download.svg - :width: 50% - :align: center - - Overview of the open_mastr bulk download functionality. - -In the following, the process is described that is started when calling the download function with the parameter method="bulk". -First, the zipped files are downloaded and saved in `$HOME/.open-MaStR/data/xml_download`. The zipped folder contains many xml files, -which represent the different tables from the MaStR. Those tables are then parsed to a sqlite database. If only some specific -data are of interest, they can be specified with the parameter `data`. Every table that is selected in `data` will be deleted, if existent, -and then filled with data from the xml files. - -In the last step, a basic data cleansing is performed. Many entries in the MaStR from the bulk download are replaced by numbers. -As an example, instead of writing the german states where the unit is registered (Saxony, Brandenburg, Bavaria, ...) the MaStR states -corresponding digits. One major step of cleansing is therefore to replace those digits with their original meaning. -Moreover, the datatypes of different entries are set in the data cleansing process. - -Advantages of the bulk download: - * No registration for an API key is needed - -Disadvantages of the bulk download: - * No single tables or entries can be downloaded - -Get data via the MaStR-API ---------------------------- - -Downloading data with the MaStR-API is more complex than using the :ref:`Bulk download `. - -Follow this checklist for configuration: - -#. Set up a MaStR account and configure your :ref:`credentials `. -#. Configure your :ref:`database `. -#. Configure your :ref:`API download ` settings. - -Database settings -^^^^^^^^^^^^^^^^^^^^^^^^ - -Configure your database with the `engine` parameter of :class:`Mastr`. -It defines the engine of the database where the MaStR is mirrored to. Default is 'sqlite'. - -Choose from: {'sqlite', sqlalchemy.engine.Engine} - -The possible databases are: - -* **sqlite**: By default the database will be stored in `$HOME/.open-MaStR/data/sqlite/open-mastr.db` (can be customized via the - :ref:`environment variable ` `$SQLITE_DATABASE_PATH`). -* **own database**: The Mastr class accepts a sqlalchemy.engine.Engine object as engine which enables the user to - use any other desired database. - If you're planning to use docker with a postgresql database, you need to set up the docker container including a postegresql database yourself. - To do so, you can build upon the 'docker-compose.yml' in the scripts folder. - If you do so, you need to insert the connection parameter into the engine variable. It'll look like this: - -.. code-block:: - - from sqlalchemy import create_engine - - engine = create_engine("postgresql+psycopg2://open-mastr:open-mastr@localhost:55443/open-mastr") - db = Mastr(engine=engine) - - -.. tabs:: - - .. code-tab:: py SQLite - - from sqlalchemy import create_engine - - engine = create_engine("sqlite:///path/to/sqlite/database.db") - db = Mastr(engine=engine) - - .. code-tab:: py PostgreSQL - - from sqlalchemy import create_engine - - engine = create_engine("postgresql://myusername:mypassword@localhost/mydatabase") - db = Mastr(engine=engine) -.. - - - -API download settings -^^^^^^^^^^^^^^^^^^^^^^^^ - -Prior to starting the download of data from MaStR-API, you might want to adjust parameters in the config file. -Please read in :ref:`Configuration`. -For downloading data from Marktstammdatenregister (MaStR) registering an account is required. -Find more information :ref:`here `. - -By using the API (e.g. by setting `method` ="API" in the `Mastr.download()` method) -additional parameters can be set to define in detail which data should be obtained. - -.. list-table:: API-related download arguments and explanation - :widths: 5 5 5 - :header-rows: 1 - - * - argument - - options for specification - - explanation - * - data - - ["wind","biomass","combustion","gsgk","hydro","nuclear","storage","solar"] - - Select data to download. - * - date - - None or :class:`datetime.datetime` or str - - Specify backfill date from which on data is retrieved. Only data with time stamp greater than `date` will be retrieved. Defaults to `None` which means that todays date is chosen. - * - api_data_types - - ["unit_data","eeg_data","kwk_data","permit_data"] - - Select the type of data to download. - * - api_location_types - - ["location_elec_generation","location_elec_consumption","location_gas_generation","location_gas_consumption"] - - Select location_types to download. - * - api_processes - - Number of type int, e.g.: 5 - - Select the number of parallel download processes. Possible number depends on the capabilities of your machine. Defaults to `None`. - * - api_limit - - Number of type int, e.g.: 1500 - - Select the number of entries to download. Defaults to 50. - * - api_chunksize - - int or None, e.g.: 1000 - - Data is downloaded and inserted into the database in chunks of `api_chunksize`. Defaults to 1000. - -.. warning:: - The implementation of parallel processes is currently under construction. Please let the argument `api_processes` at the default value `None`. - - - - - -Three different levels of access to data are offered where the code builds on top of each other. - -.. figure:: images/MaStR_downloading.svg - :width: 70% - :align: center - - Overview of open-mastr API download functionality. - -The most fundamental data access is provided by :class:`open_mastr.soap_api.download.MaStRAPI` that simply -wraps SOAP webservice functions with python methods. -Using the methods of the aforementioned, :class:`open_mastr.soap_api.download.MaStRDownload` provides -methods for bulk data download and association of data from different sources. -If one seeks for an option to store the entire data in a local database, -:class:`open_mastr.soap_api.mirror.MaStRMirror` is the right choice. It offers complete data download -and updating latest data changes. - -Mirror MaStR database -^^^^^^^^^^^^^^^^^^^^^^^^ - -.. autoclass:: open_mastr.soap_api.mirror.MaStRMirror - :members: - - -Download large number of units -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Downloading thousands of units is simplified using -:meth:`open_mastr.soap_api.download.MaStRDownload.download_power_plants`. - -.. autoclass:: open_mastr.soap_api.download.MaStRDownload - :members: - - -MaStR API wrapper -^^^^^^^^^^^^^^^^^^^^ - -For simplifying queries against the MaStR API, :class:`open_mastr.soap_api.download.MaStRAPI` wraps around exposed -SOAP queries. -This is the low-level MaStR API access. - -.. autoclass:: open_mastr.soap_api.download.MaStRAPI - :members: - - -.. include:: _data/raw_data.rst - - -Post-processing (Outdated) -=========================== - -The following chapter on postprocessing is outdated. It refers to jupyter notebooks you can find -on our `github page `_. It is planned to add those post-processing -features in later versions to the open_mastr package. - -Pre-requisites (Outdated) -------------------------- - -Major parts of data cleansing, correction and enrichment are written in SQL. We recommend to run these scripts in a -dockered PostgreSQL database. If you want to use a native installation of PostgreSQL, help yourself and continue with -:ref:`Run postprocessing (Outdated)`. - -Make sure you have `docker-compose `_ installed. Run - -.. code-block:: bash - - docker-compose up -d - -to start a PostgreSQL database. - -You can connect to the database with the following credentials (if not changed in `docker-compose.yml`). - -======== ========== -Field Value -======== ========== -host localhost -port 55443 -database open-mastr -User open-mastr -Password open-mastr -======== ========== - - -Once you're finished with working in/with the database, shut it down with - -.. code-block:: bash - - docker-compose down - - -Run postprocessing (Outdated) ------------------------------ - -During post-processing downloaded :ref:`Download ` gets cleaned, imported to a PostgreSQL database, -and enriched. -To run the postprocessing, use the following code snippets. - -.. code-block:: python - - from open_mastr.postprocessing.cleaning import cleaned_data - from postprocessing.postprocessing import postprocess - - cleaned = cleaned_data() - postprocess(cleaned) - -As a result, cleaned data gets saved as CSV files and tables named like `bnetza_mastr__cleaned` -appear in the schema `model_draft`. -Use - -.. code-block:: python - - from postprocessing.postprocessing import to_csv - - to_csv() - -to export processed data in CSV format. - -.. note:: - - It is assumed raw data resides in `~/.open-MaStR/data//` as explained in :ref:`Configuration`. - -.. warning:: - - Raw data downloaded with :class:`open_mastr.soap_api.download.MaStRDownload` is - currently not supported. - Please use raw data from a CSV export(:meth:`open_mastr.soap_api.mirror.MaStRMirror.to_csv`) - of :class:`open_mastr.soap_api.mirror.MaStRMirror` data. - - -Database import (Outdated) --------------------------- - -Where available, geo location data, given in lat/lon (*Breitengrad*, *Längengrad*), is converted into a PostGIS geometry -data type during database import. This allows spatial data operations in PostgreSQL/PostGIS. - - -Data cleansing (Outdated) -------------------------- - -Units inside Germany and inside German offshore regions are selected and get distinguished from units that are (falsely) -located outside of Germany. -Data is stored in separate tables. - - -Data enrichment (Outdated) --------------------------- - -For units without geo location data, a location is estimated based on the zip code. The centroid of the zip code region -polygon is used as proxy for the exact location. -To determine the zip code area, zip code data of OSM is used which is stored in -`boundaries.osm_postcode `_. -If a unit originally had correct geo data and the origin of estimated geom data is documented in the column `comment`. - - -Zenodo upload -============= - -Use the function :func:`~.open_mastr.data_io.zenodo_upload` for uploading data to Zenodo. - -.. autofunction:: open_mastr.data_io.zenodo_upload \ No newline at end of file diff --git a/docs/changelog.rst b/docs/changelog.rst deleted file mode 100644 index 5ec4a0ec..00000000 --- a/docs/changelog.rst +++ /dev/null @@ -1 +0,0 @@ -.. mdinclude:: ../CHANGELOG.md \ No newline at end of file diff --git a/docs/dataset.md b/docs/dataset.md new file mode 100644 index 00000000..30e0290f --- /dev/null +++ b/docs/dataset.md @@ -0,0 +1,100 @@ +## Background + +The Marktstammdatenregister (MaStR) has been operated by the German Federal Network Agency (Bundesnetzagentur, abbreviated as BNetzA) since January 31, 2019, as a central online database for data related to the German energy system. Owners of electricity or gas generating plants are obligated to report master data about themselves and their plants. Additionally, industrial facilities consuming large amounts of electricity must register if they are connected to at least a high-voltage electricity grid. + +Most unit information is openly accessible and is published under an open data license, the Data licence Germany – attribution – version 2.0 (DL-DE-BY-2.0). This data can be downloaded, used, and republished with no restrictions, provided that proper attribution to the Bundesnetzagentur is given. + +For units with a net capacity of less than 30 kW, some location information is restricted from publication. This includes street names, house numbers, parcel designations, and exact coordinates of units. The most detailed location information accessible for all units is the postal code or the municipality. + +In our paper titled [Analyzing Data Reliability in Germany's Energy System: A Validation of Unit Locations of the Marktstammdatenregister](https://arxiv.org/abs/2304.10581), we provide further insights into the content and quality of the dataset. + +## Content + +The German Federal Network Agency regularly updates the dataset and adds new tables and attributes. Hence, the primary resource of information about the dataset should be the original website: + +* Get information about the `bulk` data [here](https://www.marktstammdatenregister.de/MaStR/Datendownload) (in german) +* Get information about the `API` data [here](https://www.marktstammdatenregister.de/MaStRHilfe/subpages/webdienst.html) (in german) + +## Difference between `bulk` and `API` dataset + +As you may have noticed, we distinguish between `bulk` and `API` datasets. The `bulk` dataset refers to the data obtained from the zipped XML files downloaded from [here](https://www.marktstammdatenregister.de/MaStR/Datendownload) using the [`Mastr.download`][open_mastr.Mastr.download] function. The `API` data is obtained by requesting information via the SOAP-API and the [`soap_api.download.MaStRDownload`][open_mastr.soap_api.download.MaStRDownload] module. + +??? question "Why is the table structure in the open-mastr database as it is?" + + The structure of the database is historically determined by the data retrieved via API. (open-mastr existed before the XML-dump was provided). +
See [MaStR data model](#Mastr-data-model) + + +## Tables in the database + +!!! question "Confused by all the tables?" + :sparkles: We regularly run the whole download and cleansing pipeline and upload the dataset as csv files at [zenodo](https://doi.org/10.5281/zenodo.6807425)! + +After downloading the MaStR, you will find a database with a large number of tables. Here we give a brief overview of what you can find in those tables: + +### Tables in the local database + + +=== "_extended tables" + The main information about the different technologies lies in the `_extended` tables. You can find the capacity, location, and other technology-specific attributes here. + + | Table name | Comments | + |------|------| + | biomass_extended | | + | combustion_extended | *Conventional powerplants: Gas, Oil, Coal, ...* | + | gsgk_extended | *gsgk is short for: Geothermal, Mine gas, and Pressure relaxation* | + | hydro_extended | | + | nuclear_extended | | + | solar_extended | | + | storage_extended | | + | wind_extended | | + +=== "_eeg tables" + In germany, renewable energies were subsidized by the state - according to a law called 'EEG'. Relevant information like the 'EEG ID' are in the `_eeg` tables. + + | Table name | Comments | + |------|------| + | biomass_eeg | | + | gsgk_eeg | *gsgk is short for: Geothermal, Mine gas, and Pressure relaxation* | + | hydro_eeg | | + | solar_eeg | | + | storage_eeg | | + | wind_eeg | | + +=== "Other tables" + Other tables contain information about the grid, the energy market, or gas consumers and producers: + + | Table name | Comments | + |------|------| + | balancing_area | *Related to the energy market* | + | electricity_consumer | *Only large consumers* | + | gas_consumer | *Only large consumers* | + | gas_producer | | + | gas_storage | | + | gas_storage_extended | | + | grid_connections | *Does not contain geoinformation* | + | grids | *Does not contain geoinformation* | + | locations_extended | *Connects units with grids - to get coordinates of units use the _extended tables*| + | market_actors | | + | market_roles | | + | permit | | + | storage_units | | + | kwk | *short for: Combined heat and power (CHP)* | + +### MaStR data model +A useful overview of the MaStR data model can be found [here (in german)](https://www.marktstammdatenregister.de/MaStRHilfe/files/webdienst/Objektmodell%20-%20Fachliche%20Ansicht%20V1.2.0.pdf). A translated version using the names from the tables you can find in your local database is presented here: + +=== "translated image (english)" + ![Data model of the MaStR](images/DetailAnlagen_english.PNG) + +=== "original image (german)" + ![Data model of the MaStR](images/DetailAnlagenModellMaStR.png) + + +## Tables as CSV + +Tables from the database can be exported to csv files. By default, all available power plant unit data will be exported +to csv files. + +For exported csv's additional available data is joined on basic unit data. For example: For biomass power plants one csv +is exported consisting of the join of four database tables (unit data, chp data, permit data, eeg data). We regularly run the whole download and cleansing pipeline and upload the dataset as csv files at [zenodo](https://doi.org/10.5281/zenodo.6807425). diff --git a/docs/development.rst b/docs/development.rst deleted file mode 100644 index 720bf29e..00000000 --- a/docs/development.rst +++ /dev/null @@ -1,103 +0,0 @@ -*********** -Development -*********** - -For now, please read in the -`CONTRIBUTING `_. - -Build docs -========== - -For checking that the documentation builds successfully, run sphinx locally. -Make sure you've installed docs requirements - -.. code-block:: bash - - pip install -r docs/requirements.txt - -The you can build the docs with - -.. code-block:: bash - - sphinx-build -E -a docs docs/_build/ - -If structure of functions (new added/deleted or names changed) changes, code reference needs -to be updated. - -.. code-block:: bash - - sphinx-apidoc --no-toc -o docs/reference open_mastr open_mastr/postprocessing - -If the SOAP API of MaStR changes, data table metadata documentation needs to be updated as well. From within -`open-MaStR/docs` execute - -.. code-block:: bash - - python -c "from open_mastr.utils.docs import generate_data_docs; generate_data_docs()" - - -to recreate the files. - - -Code Formatting -================= - -We use `Black `_ as Python formatter. In order to simplify and guarantee the proper usage, -Black is added as a pre-commit hook. When you try to commit some changes, the changed files will eventually be formatted -and your commit fails. After adding the new changes to your commit, you should have a clean formatted code and -a successful commit. - -The first time after installing the package in development mode, you have to install the pre-commit hooks. - -.. code-block:: bash - - pre-commit install - - -Testing -======= - -Make sure you have test requirements installed (in addition to the package itself). - -.. code-block:: bash - - pip install -e .[dev] - -Some tests query data from `MaStR `_. Therefore, user name and the token must -be provided by credentials file `credentials.cfg` (with token optionally stored in keyring). -Further explanation :ref:`here `. - -`PyTest `_ is used to run the test suite. - -.. code-block:: bash - - pytest -vv - -Updating open-mastr ORM due to wsdl API change -================================= - -The Markstammdatenregister API is yearly updated on 1 April and 1 October. The updates often entail changes, additions or deletions of columns in the data base. -This changes have to be reflected in the code by adapting: - -1. orm.py - ensuring a faultless download with `method='API'` -2. mastr_datapackage.json - updating the metadata for correct data description -3. docs\_data\_raw\ - updating tables for correct online documentation - -You need to set up a new database with the adapted ORM structure locally to apply the changes after implementation. Your existing database won't reflect the changes. - -The issue `#351 `_ in the open-mastr GitHub repo can serve as a process blueprint for future updates. - - -Validating metadata documentation -================================= - -From with a directory of data (default case: `~/.open-MaStR/data/` execute - -.. code-block:: bash - - frictionless validate datapackage.json --basepath . - -for validating datapackage metadata with -`Frictionless data specifications -`_. -At the moment, there complaints about the format. diff --git a/docs/development/changelog_mirror.md b/docs/development/changelog_mirror.md new file mode 100644 index 00000000..a1df8959 --- /dev/null +++ b/docs/development/changelog_mirror.md @@ -0,0 +1,7 @@ +--- +hide: + - toc +--- +{% + include-markdown "../../CHANGELOG.md" +%} \ No newline at end of file diff --git a/docs/development/contributing_mirror.md b/docs/development/contributing_mirror.md new file mode 100644 index 00000000..66369d0c --- /dev/null +++ b/docs/development/contributing_mirror.md @@ -0,0 +1,8 @@ +--- +hide: + - toc +--- + +{% + include-markdown "../../CONTRIBUTING.md" +%} \ No newline at end of file diff --git a/docs/development/release_procedure_mirror.md b/docs/development/release_procedure_mirror.md new file mode 100644 index 00000000..ef87ece1 --- /dev/null +++ b/docs/development/release_procedure_mirror.md @@ -0,0 +1,8 @@ +--- +hide: + - toc +--- + +{% + include-markdown "../../RELEASE_PROCEDURE.md" +%} \ No newline at end of file diff --git a/docs/getting_started.md b/docs/getting_started.md new file mode 100644 index 00000000..1cfd6e37 --- /dev/null +++ b/docs/getting_started.md @@ -0,0 +1,71 @@ +# Getting Started + +The intention of open-MaStR is to provide tools for receiving a complete as possible and accurate as possible list of +power plant units based on the public registry Marktstammdatenregister (short: [MaStR](https://www.marktstammdatenregister.de)). + +## Downloading the MaStR data + + +The MaStR dataset is updated on a daily basis. To download todays MaStR and save it in a sqlite database, you will use the [`Mastr`][open_mastr.Mastr] class and its [`download`][open_mastr.Mastr.download] method. The [`download`][open_mastr.Mastr.download] method offers two different ways to get the data by changing the `method` parameter (if not specified, `method` defaults to "bulk"): + +1. `method` = "bulk": Get data via the bulk download from [MaStR/Datendownload](https://www.marktstammdatenregister.de/MaStR/Datendownload). Use this if you want to download the whole dataset (few Gigabite) or if you want to download all units of a given technology (e.g. all wind turbines in Germany). +2. `method` = "API": Get data via the MaStR SOAP-API. Use this if you want specific information about single units and if you have registerd to get an API token. + + +### Bulk download + +The following code block shows the basic download commands: + +```python +from open_mastr import Mastr + +db = Mastr() +db.download() +``` + +When a `Mastr` object is initialized, a sqlite database is created in `$HOME/.open-MaStR/data/sqlite`. With the function `Mastr.download()`, the **whole MaStR is downloaded** in the zipped xml file format. It is then read into the sqlite database and simple data cleansing functions are started. + +More detailed information can be found in the section [bulk download](advanced/#bulk-download). + +API download +----------------------------------- +When using `download(method="API")`, the data is retrieved from the MaStR API. For using the MaStR API, credentials are needed (see [SOAP API download](advanced/#soap-api-download)). + +```python +from open_mastr import Mastr + +db = Mastr() +db.download(method='API') +``` + +The default settings will save retrieved data into the sqlite database. The function can be used to mirror the open-MaStR database using the SOAP API. Note that the data provided by the bulk download and the SOAP API is similar, but not exactly the same. + +## Accessing the database + +For accessing and working with the MaStR database after you have downloaded it, you can use sqlite browsers +such as [DB Browser for SQLite](https://sqlitebrowser.org/) or any python module +which can process sqlite data. Pandas, for example, comes with the function +[read_sql](https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html). + + +=== "pandas" + ```python + import pandas as pd + + table="wind_extended" + df = pd.read_sql(sql=table, con=db.engine) + ``` + +=== "DB Browser for sqlite" + ![Image caption](images/DBBrowser.PNG){ loading=lazy } + +## Exporting data + +The tables in the database can be exported as csv files. While technology-related data is joined for each unit, +additional tables are mirrored from database to csv as they are. To export the data you can use to method `to_csv`. + +```python + + tables=["wind", "grid"] + db.to_csv(tables) +``` \ No newline at end of file diff --git a/docs/getting_started.rst b/docs/getting_started.rst deleted file mode 100644 index 5ae76edf..00000000 --- a/docs/getting_started.rst +++ /dev/null @@ -1,121 +0,0 @@ -******************** -Getting Started -******************** - -The intention of open-MaStR is to provide tools for receiving a complete as possible and accurate as possible list of -power plant units based on the public registry Marktstammdatenregister (short: `MaStR `_). - -Downloading the MaStR data -============================= - -For downloading the MaStR and saving -it in a sqlite database, you will use the :class:`Mastr` class and its `download` method (For documentation of those methods see -:ref:`mastr module`) - -The :meth:`download` function offers two different ways to get the data by changing the `method` parameter (if not specified, `method` defaults to "bulk"): - #. `method` = "bulk": Get data via the bulk download from `MaStR/Datendownload `_ - #. `method` = "API": Get data via the MaStR-API - -Keep in mind: While the data from both methods is quiet similar, it is not exactly the same! - -Bulk download ------------------------------------ -The following code block shows the basic download commands: - - .. code-block:: python - - from open_mastr import Mastr - - db = Mastr() - db.download() - -When a :class:`Mastr` object is initialized, a sqlite database is created -in `$HOME/.open-MaStR/data/sqlite`. With the function `Mastr.download()`, the **whole MaStR is downloaded** in the zipped xml file -format. It is then read into the sqlite database and simple data cleansing functions are started. - -More detailed information can be found in :ref:`Get data via the bulk download `. - -API download ------------------------------------ -When using `download(method="API")`, the data is retrieved from the MaStR API. For using the MaStR API, -credentials are needed (see :ref:`Get data via the MaStR-API`). - - .. code-block:: python - - from open_mastr import Mastr - - db = Mastr() - db.download(method='API') - -The default settings will save retrieved data into the sqlite database. The function can be used to mirror the open-MaStR database regularly -without needing to download the `provided dumps `_ daily. - -More detailed information can be found in :ref:`Get data via the MaStR-API `. - -Accessing the database -=================================== - - -For accessing and working with the MaStR database after you have downloaded it, you can use sqlite browsers -(such as `DB Browser for SQLite `_) or any python module -which can process sqlite data. Pandas, for example, comes with the function -`read_sql `_. - - .. code-block:: python - - import pandas as pd - - table="wind_extended" - df = pd.read_sql(sql=table, con=db.engine) - - -The tables that exist in the database are listed below. Their relations can be found in the chapter :ref:`Data Description ` - -.. list-table:: Tables in the sqlite database - :widths: 5 - :header-rows: 1 - - * - table - * - balancing_area - * - basic_units - * - biomass_eeg - * - biomass_extended - * - combustion_extended - * - electricity_consumer - * - gas_consumer - * - gas_producer - * - gas_storage - * - gas_storage_extended - * - grid_connections - * - grids - * - gsgk_eeg - * - gsgk_extended - * - hydro_eeg - * - hydro_extended - * - kwk - * - locations_basic - * - locations_extended - * - market_actors - * - market_roles - * - nuclear_extended - * - permit - * - solar_eeg - * - solar_extended - * - storage_eeg - * - storage_extended - * - storage_units - * - wind_eeg - * - wind_extended - -Exporting data -=================================== - - -The tables in the database can be exported as csv files. While technology-related data is joined for each unit, -additional tables are mirrored from database to csv as they are. To export the data you can use to method :meth:`to_csv` - - .. code-block:: python - - tables=["wind", "grid"] - db.to_csv(tables) - diff --git a/docs/images/DBBrowser.PNG b/docs/images/DBBrowser.PNG new file mode 100644 index 00000000..e6070baf Binary files /dev/null and b/docs/images/DBBrowser.PNG differ diff --git a/docs/images/DetailAnlagen_english.PNG b/docs/images/DetailAnlagen_english.PNG new file mode 100644 index 00000000..e40f4ec8 Binary files /dev/null and b/docs/images/DetailAnlagen_english.PNG differ diff --git a/docs/images/MaStR_bulk_download.svg b/docs/images/MaStR_bulk_download.svg deleted file mode 100644 index ac9aa36c..00000000 --- a/docs/images/MaStR_bulk_download.svg +++ /dev/null @@ -1,493 +0,0 @@ - - - - - - - - - - - - - - Download zippedxml - - files - - - Read rawdatatosqlite - - database - - - - Do basicdatacleansing - - - The zippedfolderoftheMaStRisdownloadedtothedata/xml_downloadfolder.Thexmlfilesareparsedtoasqlitedatabaseinthefolderdata/sqlite. Thestructureofthetablesareslightlychangedtomatch theAPIformat.Basicdatacleansingisperformed. The datacanstillbeassumedtoberawdata. - - - diff --git a/docs/images/OpenEnergyFamily_Logo_OpenMaStR_no_name_grey.png b/docs/images/OpenEnergyFamily_Logo_OpenMaStR_no_name_grey.png new file mode 100644 index 00000000..0d8d4cf3 Binary files /dev/null and b/docs/images/OpenEnergyFamily_Logo_OpenMaStR_no_name_grey.png differ diff --git a/docs/images/OpenEnergyFamily_Logo_OpenMaStR_no_name_white.png b/docs/images/OpenEnergyFamily_Logo_OpenMaStR_no_name_white.png new file mode 100644 index 00000000..bc5ebae0 Binary files /dev/null and b/docs/images/OpenEnergyFamily_Logo_OpenMaStR_no_name_white.png differ diff --git a/docs/images/OpenEnergyFamily_Logo_OpenMaStR_white.png b/docs/images/OpenEnergyFamily_Logo_OpenMaStR_white.png new file mode 100644 index 00000000..ccb77800 Binary files /dev/null and b/docs/images/OpenEnergyFamily_Logo_OpenMaStR_white.png differ diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..ede972b1 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,29 @@ +# Introduction + + +The [Marktstammdatenregister (MaStR)](https://www.marktstammdatenregister.de/MaStR) is a German register +provided by the German Federal Network Agency (Bundesnetzagentur / BNetza) that keeps track of all power and gas units located in Germany. + +Generally, the MaStR data can be accessed via various options: + + 1. browse, filter and download [online](https://www.marktstammdatenregister.de/MaStR) + 1. download [daily provided dumps](https://www.marktstammdatenregister.de/MaStR/Datendownload) + 1. access via the [web service](https://www.marktstammdatenregister.de/MaStRHilfe/subpages/webdienst.html) + +The python package `open-mastr` provides an interface for accessing the data and contributes to improving the +usability of the access options above. This repository is intended for people who wish to simply work with the +MaStR data and do not want to deal with the individual obstacles to data access of the three options above.

+It facilitates access to the daily provided MaStR dumps with download methods (bulk) and by +parsing the XML files to a relational database. Furthermore, the software provides a Python wrapper to access MaStR +SOAP web service (API). + +Moreover, this repository aims to develop methods to analyze, validate and enrich the data.
+We want to collect and compile post-processing scripts to improve the data quality. + +!!! question "Just here for the data?" + :sparkles: We regularly run the whole download and cleansing pipeline and upload the dataset as csv files at [zenodo](https://doi.org/10.5281/zenodo.6807425)! + + +## License +The original dataset is licensed under the **Datenlizenz Deutschland – Namensnennung – Version 2.0** (DL-DE-BY-2.0) +[Marktstammdatenregister](https://www.marktstammdatenregister.de/MaStR) - © Bundesnetzagentur für Elektrizität, Gas, Telekommunikation, Post und Eisenbahnen | [DL-DE-BY-2.0](https://www.govdata.de/dl-de/by-2-0) \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 3fcc9abe..00000000 --- a/docs/index.rst +++ /dev/null @@ -1,28 +0,0 @@ -.. open-MaStR documentation master file, created by - sphinx-quickstart on Mon Oct 5 13:21:00 2020. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -###################################### -Welcome to open-MaStR's documentation! -###################################### - -.. toctree:: - :maxdepth: 2 - :caption: Documentation - - readme - getting_started - advanced - development - changelog - Reference - - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/readme.rst b/docs/readme.rst deleted file mode 100644 index e2618b81..00000000 --- a/docs/readme.rst +++ /dev/null @@ -1 +0,0 @@ -.. mdinclude:: ../README.md \ No newline at end of file diff --git a/docs/reference/advanced.md b/docs/reference/advanced.md new file mode 100644 index 00000000..c06958a0 --- /dev/null +++ b/docs/reference/advanced.md @@ -0,0 +1,5 @@ +# Advanced functions to use the MaStR SOAP-API + +::: open_mastr.soap_api.download.MaStRAPI +::: open_mastr.soap_api.download.MaStRDownload +::: open_mastr.soap_api.mirror.MaStRMirror diff --git a/docs/reference/basic.md b/docs/reference/basic.md new file mode 100644 index 00000000..bf47274f --- /dev/null +++ b/docs/reference/basic.md @@ -0,0 +1,4 @@ +# Basic functions +::: open_mastr.Mastr + + diff --git a/docs/reference/open_mastr.rst b/docs/reference/open_mastr.rst deleted file mode 100644 index 0a79a498..00000000 --- a/docs/reference/open_mastr.rst +++ /dev/null @@ -1,46 +0,0 @@ -open\_mastr package -=================== - -mastr module -------------- - -.. automodule:: open_mastr.mastr - :members: - :undoc-members: - :show-inheritance: - -Subpackages ------------- - -.. toctree:: - :maxdepth: 4 - - open_mastr.soap_api - open_mastr.utils - -Submodules ----------- - -open\_mastr.data\_io module ---------------------------- - -.. automodule:: open_mastr.data_io - :members: - :undoc-members: - :show-inheritance: - -open\_mastr.orm module --------------------------------- - -.. automodule:: open_mastr.orm - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: open_mastr - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/reference/open_mastr.soap_api.metadata.rst b/docs/reference/open_mastr.soap_api.metadata.rst deleted file mode 100644 index d917da70..00000000 --- a/docs/reference/open_mastr.soap_api.metadata.rst +++ /dev/null @@ -1,29 +0,0 @@ -open\_mastr.soap\_api.metadata package -====================================== - -Submodules ----------- - -open\_mastr.soap\_api.metadata.create module --------------------------------------------- - -.. automodule:: open_mastr.soap_api.metadata.create - :members: - :undoc-members: - :show-inheritance: - -open\_mastr.soap\_api.metadata.description module -------------------------------------------------- - -.. automodule:: open_mastr.soap_api.metadata.description - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: open_mastr.soap_api.metadata - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/reference/open_mastr.soap_api.rst b/docs/reference/open_mastr.soap_api.rst deleted file mode 100644 index 1a16e159..00000000 --- a/docs/reference/open_mastr.soap_api.rst +++ /dev/null @@ -1,62 +0,0 @@ -open\_mastr.soap\_api package -============================= - -Subpackages ------------ - -.. toctree:: - :maxdepth: 4 - - open_mastr.soap_api.metadata - -Submodules ----------- - -open\_mastr.soap\_api.config module ------------------------------------ - -.. automodule:: open_mastr.soap_api.config - :members: - :undoc-members: - :show-inheritance: - -open\_mastr.soap\_api.download module -------------------------------------- - -.. automodule:: open_mastr.soap_api.download - :members: - :undoc-members: - :show-inheritance: - -open\_mastr.soap\_api.mirror module ------------------------------------ - -.. automodule:: open_mastr.soap_api.mirror - :members: - :undoc-members: - :show-inheritance: - -open\_mastr.soap\_api.parallel module -------------------------------------- - -.. automodule:: open_mastr.soap_api.parallel - :members: - :undoc-members: - :show-inheritance: - - -open\_mastr.soap\_api.utils module ----------------------------------- - -.. automodule:: open_mastr.soap_api.utils - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: open_mastr.soap_api - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/reference/open_mastr.utils.rst b/docs/reference/open_mastr.utils.rst deleted file mode 100644 index fbd3a9ae..00000000 --- a/docs/reference/open_mastr.utils.rst +++ /dev/null @@ -1,37 +0,0 @@ -open\_mastr.utils package -========================= - -Submodules ----------- - -open\_mastr.utils.credentials module ------------------------------------- - -.. automodule:: open_mastr.utils.credentials - :members: - :undoc-members: - :show-inheritance: - -open\_mastr.utils.docs module ------------------------------ - -.. automodule:: open_mastr.utils.docs - :members: - :undoc-members: - :show-inheritance: - -open\_mastr.utils.helpers module --------------------------------- - -.. automodule:: open_mastr.utils.helpers - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: open_mastr.utils - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/requirements.txt b/docs/requirements.txt index 2ce224c3..47cb3193 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -sphinx<7 -sphinx-rtd-theme -sphinx-tabs -m2r2 \ No newline at end of file +mkdocstrings[python] +mkdocs-material +mkdocs-include-markdown-plugin + diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..850ba28b --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,96 @@ +site_name: open-mastr +theme: + name: material + features: + - navigation.tabs + - navigation.tabs.sticky + - navigation.top + - search.suggest + - search.highlight + - content.tabs.link + - content.code.annotation + - content.code.copy + - toc.integrate + language: en + palette: + # Palette toggle for dark mode + - scheme: slate + toggle: + icon: material/brightness-4 + name: Switch to light mode + # Palette toggle for light mode + - scheme: default + toggle: + icon: material/brightness-7 + name: Switch to dark mode + icon: + repo: fontawesome/brands/github + logo: images/OpenEnergyFamily_Logo_OpenMaStR_no_name_white.png + favicon: images/OpenEnergyFamily_Logo_OpenMaStR_no_name_grey.png + +plugins: + - search + - autorefs + - mkdocstrings: + default_handler: python + handlers: + python: + options: + show_root_heading: true + show_source: false + docstring_style: numpy + docstring_section_style: spacy + - include-markdown + + +watch: + - open_mastr + +markdown_extensions: + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.inlinehilite + - pymdownx.snippets + - admonition + - pymdownx.arithmatex: + generic: true + - footnotes + - pymdownx.details + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.mark + - attr_list + - pymdownx.emoji: + emoji_index: !!python/name:materialx.emoji.twemoji + emoji_generator: !!python/name:materialx.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + +# Customization +extra: + version: + provider: mike + +nav: + - Home: index.md + - Getting Started: getting_started.md + - Dataset: dataset.md + - Advanced: advanced.md + - Reference: + - Basic Usage: reference/basic.md + - Advanced Usage of the MaStR SOAP-API: reference/advanced.md + - Development: + - Contributing: development/contributing_mirror.md + - Release Procedure: development/release_procedure_mirror.md + - Changelog: development/changelog_mirror.md + +repo_url: https://github.com/OpenEnergyPlatform/open-MaStR +site_description: A package that provides an interface for downloading and processing the data of the Marktstammdatenregister (MaStR) +site_dir: _build + + +copyright: | + © 2023 RLI and fortiss GmbH diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py index 88ef5ea3..64723d8a 100644 --- a/open_mastr/mastr.py +++ b/open_mastr/mastr.py @@ -49,26 +49,27 @@ class Mastr: """ - :class:`.Mastr` is used to download the MaStR database and keep it up-to-date. + `Mastr` is used to download the MaStR database and keep it up-to-date. - A sql database is used to mirror the MaStR database. It can be filled with + A SQL database is used to mirror the MaStR database. It can be filled with data either from the MaStR-bulk download or from the MaStR-API. - .. code-block:: python - - from open_mastr import Mastr - - db = Mastr() - db.download() - Parameters - ------------ - engine: {'sqlite', sqlalchemy.engine.Engine}, optional - Defines the engine of the database where the MaStR is mirrored to. Default is 'sqlite'. - - connect_to_translated_db: boolean, optional + ---------- + engine : {'sqlite', sqlalchemy.engine.Engine}, optional + Defines the engine of the database where the MaStR is mirrored to. + Default is 'sqlite'. + connect_to_translated_db: boolean, optional Allows connection to an existing translated database. Default is 'False'. Only for 'sqlite'-type engines. + + !!! example + ```python + from open_mastr import Mastr + + db = Mastr() + db.download() + ``` """ def __init__(self, engine="sqlite", connect_to_translated_db=False) -> None: @@ -112,102 +113,81 @@ def download( ) -> None: """ Download the MaStR either via the bulk download or via the MaStR API and write it to a - sqlite database. + SQLite database. Parameters - ------------ - method: {'API', 'bulk'}, optional + ---------- + method : 'API' or 'bulk', optional Either "API" or "bulk". Determines whether the data is downloaded via the zipped bulk download or via the MaStR API. The latter requires an account from marktstammdatenregister.de, (see :ref:`Configuration `). Default to 'bulk'. - data: str or list or None, optional + data : str or list or None, optional Determines which types of data are written to the database. If None, all data is - used. If it is a list, possible entries are listed at the table - below with respect to the download method. Missing categories are - being developed. If only one data is of interest, this can be - given as a string. Default to None, where all data is included. - - .. csv-table:: Values for data parameter - :header-rows: 1 - :widths: 5 5 5 - - "Data", "Bulk", "API" - "wind", "Yes", "Yes" - "solar", "Yes", "Yes" - "biomass", "Yes", "Yes" - "hydro", "Yes", "Yes" - "gsgk", "Yes", "Yes" - "combustion", "Yes", "Yes" - "nuclear", "Yes", "Yes" - "gas", "Yes", "Yes" - "storage", "Yes", "Yes" - "electricity_consumer", "Yes", "No" - "location", "Yes", "Yes" - "market", "Yes", "No" - "grid", "Yes", "No" - "balancing_area", "Yes", "No" - "permit", "Yes", "Yes" - "deleted_units", "Yes", "No" - "retrofit_units", "Yes", "No" - - date: None or :class:`datetime.datetime` or str, optional - For bulk method: - - Either "today" or None if the newest data dump should be downloaded - rom the MaStR website. If an already downloaded dump should be used, - state the date of the download in the format - "yyyymmdd" or use the string "existing". Defaults to None. - - For API method: - - Specify backfill date from which on data is retrieved - - Only data with modification time stamp greater than `date` is retrieved. - - * `datetime.datetime(2020, 11, 27)`: Retrieve data which is newer than this - time stamp - * 'latest': Retrieve data which is newer than the newest data already in the table. - - .. warning:: - - Don't use "latest" in combination with "api_limit". This might lead to - unexpected results. - - * `None`: Complete backfill - - Defaults to `None`. - bulk_cleansing: bool, optional + used. If it is a list, possible entries are listed below with respect to the download method. Missing categories are + being developed. If only one data is of interest, this can be given as a string. Default to None, where all data is included. + + | Data | Bulk | API | + |-----------------------|------|------| + | "wind" | Yes | Yes | + | "solar" | Yes | Yes | + | "biomass" | Yes | Yes | + | "hydro" | Yes | Yes | + | "gsgk" | Yes | Yes | + | "combustion" | Yes | Yes | + | "nuclear" | Yes | Yes | + | "gas" | Yes | Yes | + | "storage" | Yes | Yes | + | "electricity_consumer"| Yes | No | + | "location" | Yes | Yes | + | "market" | Yes | No | + | "grid" | Yes | No | + | "balancing_area" | Yes | No | + | "permit" | Yes | Yes | + | "deleted_units" | Yes | No | + | "retrofit_units" | Yes | No | + date : None or `datetime.datetime` or str, optional + + | date | Bulk | API | + |-----------------------|------|------| + | "today" | latest files are downloaded from marktstammdatenregister.de | - | + | "20230101" | If file from this date exists locally, it is used. Otherwise it throws an error (You can only receive todays data from the server) | - | + | "existing" | Use latest downloaded zipped xml files, throws an error if the bulk download folder is empty | - | + | "latest" | - | Retrieve data that is newer than the newest data already in the table | + | datetime.datetime(2020, 11, 27) | - | Retrieve data that is newer than this time stamp | + | None | set date="today" | set date="latest" | + + Default to `None`. + bulk_cleansing : bool, optional If True, data cleansing is applied after the download (which is recommended). Default to True. - api_processes: int or None or "max", optional + api_processes : int or None or "max", optional Number of parallel processes used to download additional data. Defaults to `None`. If set to "max", the maximum number of possible processes is used. - .. warning:: + !!! warning The implementation of parallel processes is currently under construction. Please let the argument `api_processes` at the default value `None`. - - api_limit: int or None, optional - Limit number of units that data is download for. Defaults to `None` which refers + api_limit : int or None, optional + Limit the number of units that data is downloaded for. Defaults to `None` which refers to query data for existing data requests, for example created by - :meth:`~.create_additional_data_requests`. Note: There is a limited number of + [`create_additional_data_requests`][open_mastr.soap_api.mirror.MaStRMirror.create_additional_data_requests]. Note: There is a limited number of requests you are allowed to have per day, so setting api_limit to a value is recommended. - api_chunksize: int or None, optional + api_chunksize : int or None, optional Data is downloaded and inserted into the database in chunks of `chunksize`. Defaults to 1000. - api_data_types: list or None, optional - Select type of additional data that should be retrieved. Choose from - "unit_data", "eeg_data", "kwk_data", "permit_data". Defaults to all. - api_location_types: list or None, optional - Select type of location that should be retrieved. Choose from + api_data_types : list or None, optional + Select the type of additional data that should be retrieved. Choose from + "unit_data", "eeg_data", "kwk_data", "permit_data". Defaults to all. + api_location_types : list or None, optional + Select the type of location that should be retrieved. Choose from "location_elec_generation", "location_elec_consumption", "location_gas_generation", "location_gas_consumption". Defaults to all. """ - + if self.is_translated: raise TypeError( "you are currently connected to a translated database\n" @@ -317,7 +297,7 @@ def to_csv( ------------ tables: None or list For exporting selected tables choose from: - ["wind","solar","biomass","hydro","gsgk","combustion","nuclear","storage", + ["wind", "solar", "biomass", "hydro", "gsgk", "combustion", "nuclear", "storage", "balancing_area", "electricity_consumer", "gas_consumer", "gas_producer", "gas_storage", "gas_storage_extended", "grid_connections", "grids", "market_actors", "market_roles", diff --git a/open_mastr/soap_api/download.py b/open_mastr/soap_api/download.py index 17879d12..e7a4c4c0 100644 --- a/open_mastr/soap_api/download.py +++ b/open_mastr/soap_api/download.py @@ -29,39 +29,42 @@ class MaStRAPI(object): """ Access the Marktstammdatenregister (MaStR) SOAP API via a Python wrapper - :ref:`Read about ` + Read about [MaStR account and credentials](/advanced/#mastr-account-and-credentials) how to create a user account and a role including a token to access the MaStR SOAP API. - Create an :class:`.MaStRAPI()` instance with your role credentials + Create an `MaStRAPI` instance with your role credentials - .. code-block:: python + ```python mastr_api = MaStRAPI( user="SOM123456789012", key=""koo5eixeiQuoi'w8deighai8ahsh1Ha3eib3coqu7ceeg%ies..." ) + ``` Alternatively, leave `user` and `key` empty if user and token are accessible via `credentials.cfg`. How to configure this is described - :ref:`here `. + [here](/advanced/#mastr-account-and-credentials). - .. code-block:: python + ```python mastr_api = MaStRAPI() + ``` - Now, you can use the MaStR API instance to call `pre-defined SOAP API - queries - `_ + Now, you can use the MaStR API instance to call [pre-defined SOAP API + queries](https://www.marktstammdatenregister.de/MaStRHilfe/files/webdienst/Funktionen_MaStR_Webdienste_V1.2.39.html) via the class' methods. For example, get a list of units limited to two entries. - .. code-block:: python + ```python mastr_api.GetListeAlleEinheiten(limit=2) + ``` - Note, as the example shows, you don't have to pass credentials for calling - wrapped SOAP queries. This is handled internally. + !!! Note + As the example shows, you don't have to pass credentials for calling + wrapped SOAP queries. This is handled internally. """ def __init__(self, user=None, key=None): @@ -413,12 +416,12 @@ def _missed_units_to_file(data, data_type, missed_units): class MaStRDownload: """Use the higher level interface for bulk download - :class:`.MaStRDownload` builds on top of :class:`.MaStRAPI()` and provides + `MaStRDownload` builds on top of [`MaStRAPI`][open_mastr.soap_api.download.MaStRAPI] and provides an interface for easier downloading. Use methods documented below to retrieve specific data. On the example of data for nuclear power plants, this looks like - .. code-block:: python + ```python from open_mastr.soap_api.download import MaStRDownload @@ -427,10 +430,11 @@ class MaStRDownload: for tech in ["nuclear", "hydro", "wind", "solar", "biomass", "combustion", "gsgk"]: power_plants = mastr_dl.download_power_plants(tech, limit=10) print(power_plants.head()) + ``` - .. warning:: + !!! warning - Be careful with increasing `limit`. Typically, your account allows only for 10.000 API + Be careful with increasing `limit`. Typically, your account allows only for 10,000 API request per day. """ @@ -588,14 +592,13 @@ def download_power_plants(self, data, limit=None): * 'combustion' * 'gsgk' * 'storage' - limit : int - Maximum number of units to be downloaded. Defaults to :code:`None`. + Maximum number of units to be downloaded. Returns ------- pd.DataFrame - Joined data tables + Joined data tables. """ # Create data version directory create_data_dir() @@ -693,7 +696,7 @@ def download_power_plants(self, data, limit=None): # Remove duplicates joined_data.drop_duplicates(inplace=True) - to_csv(joined_data, data) #FIXME: reference to helpers im import + to_csv(joined_data, data) # FIXME: reference to helpers im import return joined_data @@ -717,18 +720,16 @@ def basic_unit_data(self, data=None, limit=2000, date_from=None, max_retries=3): data : str, optional Technology data is requested for. See :meth:`MaStRDownload.download_power_plants` for options. - Data is retrieved using :meth:`MaStRAPI.GetGefilterteListeStromErzeuger`. + Data is retrieved using `MaStRAPI.GetGefilterteListeStromErzeuger`. If not given, it defaults to `None`. This implies data for all available technologies is retrieved using the web service function - :meth:`MaStRAPI.GetListeAlleEinheiten`. + `MaStRAPI.GetListeAlleEinheiten`. limit : int, optional Maximum number of units to download. If not provided, data for all units is downloaded. - .. warning: - - Mind the daily request limit for your MaStR account. - + !!! warning + Mind the daily request limit for your MaStR account. date_from: `datetime.datetime()`, optional If specified, only units with latest change date newer than this are queried. Defaults to `None`. @@ -810,21 +811,19 @@ def additional_data(self, data, unit_ids, data_fcn, timeout=10): * "kwk_unit_data" (:meth:`~.kwk_unit_data`): Unit information from KWK unit registry. * "permit_unit_data" (:meth:`~.permit_unit_data`): Information about the permit process of a unit. - timeout: int, optional - Timeout limit for data retrieval for each unit when using multiprocessing + Timeout limit for data retrieval for each unit when using multiprocessing. Defaults to 10. Returns ------- tuple of list of dict and tuple - Returns additional data in dictionaries that are packed into a list. Format - - .. code-block:: python - - return = ( + Returns additional data in dictionaries that are packed into a list. + ```python + return = ( [additional_unit_data_dict1, additional_unit_data_dict2, ...], [tuple("SME930865355925", "Reason for failing dowload"), ...] ) + ``` """ # Prepare a list of unit IDs packed as tuple associated with data prepared_args = list(product(unit_ids, [data])) @@ -884,7 +883,6 @@ def _retrieve_data_in_parallel_process( with multiprocessing.Pool( processes=self.parallel_processes, maxtasksperchild=1 ) as pool: - with tqdm( total=len(prepared_args), desc=f"Downloading {data_fcn} ({data})", @@ -926,9 +924,10 @@ def extended_unit_data(self, unit_specs): unit_specs : tuple *EinheitMastrNummer* and data type as tuple that for example looks like - .. code-block:: python + ```python tuple("SME930865355925", "hydro") + ``` Returns ------- @@ -938,9 +937,10 @@ def extended_unit_data(self, unit_specs): tuple *EinheitMastrNummer* and message the explains why a download failed. Format - .. code-block:: python + ```python tuple("SME930865355925", "Reason for failing dowload") + ``` """ mastr_id, data = unit_specs @@ -987,9 +987,10 @@ def eeg_unit_data(self, unit_specs): tuple *EegMastrNummer* and message the explains why a download failed. Format - .. code-block:: python + ```python tuple("EEG961554380393", "Reason for failing dowload") + ``` """ eeg_id, data = unit_specs try: @@ -1013,7 +1014,8 @@ def eeg_unit_data(self, unit_specs): def kwk_unit_data(self, unit_specs): """ - Download KWK (Kraft-Wärme-Kopplung) data for a unit. + Download KWK (german: Kraft-Wärme-Kopplung, english: Combined Heat and Power, CHP) + data for a unit. Additional data collected during a subsidy program for supporting combined heat power plants. @@ -1023,9 +1025,10 @@ def kwk_unit_data(self, unit_specs): unit_specs : tuple *KwkMastrnummer* and data type as tuple that for example looks like - .. code-block:: python + ```python tuple("KWK910493229164", "biomass") + ``` Returns @@ -1036,9 +1039,10 @@ def kwk_unit_data(self, unit_specs): tuple *KwkMastrNummer* and message the explains why a download failed. Format - .. code-block:: python + ```python tuple("KWK910493229164", "Reason for failing dowload") + ``` """ kwk_id, data = unit_specs try: @@ -1069,9 +1073,10 @@ def permit_unit_data(self, unit_specs): unit_specs : tuple *GenMastrnummer* and data type as tuple that for example looks like - .. code-block:: python + ```python tuple("SGE952474728808", "biomass") + ``` Returns @@ -1082,9 +1087,10 @@ def permit_unit_data(self, unit_specs): tuple *GenMastrNummer* and message the explains why a download failed. Format - .. code-block:: python + ```python tuple("GEN952474728808", "Reason for failing dowload") + ``` """ permit_id, data = unit_specs try: @@ -1118,9 +1124,10 @@ def location_data(self, specs): specs : tuple Location *Mastrnummer* and data_name as tuple that for example looks like - .. code-block:: python + ```python tuple("SEL927688371072", "location_elec_generation") + ``` Returns @@ -1131,9 +1138,10 @@ def location_data(self, specs): tuple Location *MastrNummer* and message the explains why a download failed. Format - .. code-block:: python + ```python tuple("SEL927688371072", "Reason for failing dowload") + ``` """ # Unpack tuple argument to two separate variables @@ -1204,7 +1212,7 @@ def basic_location_data(self, limit=2000, date_from=None, max_retries=3): Retrieve basic location data in chunks Retrieves data for all types of locations at once using - :meth:`MaStRAPI.GetListeAlleLokationen`. + `MaStRAPI.GetListeAlleLokationen`. Locations include * Electricity generation location (SEL - Stromerzeugungslokation) @@ -1216,15 +1224,11 @@ def basic_location_data(self, limit=2000, date_from=None, max_retries=3): ---------- limit: int, optional Maximum number of locations to download. - Defaults to 2000. - - .. warning: - Mind the daily request limit for your MaStR account. - - date_from: :obj:`datetime.datetime`, optional + !!! warning + Mind the daily request limit for your MaStR account, usually 10,000 per day. + date_from: `datetime.datetime`, optional If specified, only locations with latest change date newer than this are queried. - Defaults to :obj:`None`. max_retries: int, optional Maximum number of retries for each chunk in case of errors with the connection to the server. @@ -1235,7 +1239,7 @@ def basic_location_data(self, limit=2000, date_from=None, max_retries=3): For each chunk a separate generator is returned all wrapped into another generator. Access with - .. code-block:: python + ```python chunks = mastr_dl.basic_location_data( date_from=datetime.datetime(2020, 11, 7, 0, 0, 0), limit=2010 @@ -1244,6 +1248,7 @@ def basic_location_data(self, limit=2000, date_from=None, max_retries=3): for chunk in chunks: for location in chunk: print(location) # prints out one dict per location one after another + ``` """ # Prepare indices for chunked data retrieval chunksize = 2000 diff --git a/open_mastr/soap_api/mirror.py b/open_mastr/soap_api/mirror.py index 70755621..379a1f83 100644 --- a/open_mastr/soap_api/mirror.py +++ b/open_mastr/soap_api/mirror.py @@ -21,51 +21,55 @@ class MaStRMirror: """ - Mirror the Marktstammdatenregister database and keep it up-to-date + Mirror the Marktstammdatenregister database and keep it up-to-date. A PostgreSQL database is used to mirror the MaStR database. It builds on functionality for bulk data download - provided by :class:`open_mastr.soap_api.download.MaStRDownload`. + provided by [`MaStRDownload`][open_mastr.soap_api.download.MaStRDownload]. A rough overview is given by the following schema on the example of wind power units. + ![Schema on the example of downloading wind power units using the API](../images/MaStR_Mirror.svg){ loading=lazy width=70% align=center} - .. figure:: /images/MaStR_Mirror.svg - :width: 70% - :align: center - Initially, basic unit data gets backfilled with :meth:`~.backfill_basic` + + Initially, basic unit data gets backfilled with `~.backfill_basic` (downloads basic unit data for 2,000 units of type 'solar'). - .. code-block:: python + ```python from open_mastr.soap_api.prototype_mastr_reflected import MaStRMirror mastr_mirror = MaStRMirror() mastr_mirror.backfill_basic("solar", limit=2000) - + ``` Based on this, requests for additional data are created. This happens during backfilling basic data. But it is also possible to (re-)create - requests for remaining additional data using :meth:`~.create_additional_data_requests`. + requests for remaining additional data using + [`MaStRMirror.create_additional_data_requests`][open_mastr.soap_api.mirror.MaStRMirror.create_additional_data_requests]. - .. code-block:: python + ```python mastr_mirror.create_additional_data_requests("solar") + ``` Additional unit data, in the case of wind power this is extended data, EEG data and permit data, can be - retrieved subsequently by :meth:`~.retrieve_additional_data`. - - .. code-block:: python - - mastr_mirror.retrieve_additional_data("solar", ["unit_data"]) + retrieved subsequently by `~.retrieve_additional_data`. + ```python + mastr_mirror.retrieve_additional_data("solar", ["unit_data"]) + ``` The data can be joined to one table for each data type and exported to - CSV files using :meth:`~.to_csv`. + CSV files using [`Mastr.to_csv`][open_mastr.mastr.Mastr.to_csv]. + + Also consider to use `~.dump` and `~.restore` for specific purposes. - Also consider to use :meth:`~.dump` and :meth:`~.restore` for specific purposes. + !!! Note + This feature was built before the bulk download was offered at marktstammdatenregister.de. + It can still be used to compare the two datasets received from the API and the bulk download. """ @@ -106,14 +110,14 @@ def __init__( self.unit_type_map = UNIT_TYPE_MAP self.unit_type_map_reversed = reverse_unit_type_map() - def backfill_basic(self, data=None, date=None, limit=10 ** 8) -> None: + def backfill_basic(self, data=None, date=None, limit=10**8) -> None: """Backfill basic unit data. Fill database table 'basic_units' with data. It allows specification of which data should be retrieved via the described parameter options. - Under the hood, :meth:`open_mastr.soap_api.download.MaStRDownload.basic_unit_data` is used. + Under the hood, [`MaStRDownload.basic_unit_data`][open_mastr.soap_api.download.MaStRDownload.basic_unit_data] is used. Parameters ---------- @@ -122,7 +126,6 @@ def backfill_basic(self, data=None, date=None, limit=10 ** 8) -> None: * ['solar']: Backfill data for a single data type. * ['solar', 'wind'] (`list`): Backfill data for multiple technologies given in a list. - date: None, :class:`datetime.datetime`, str Specify backfill date from which on data is retrieved @@ -136,13 +139,11 @@ def backfill_basic(self, data=None, date=None, limit=10 ** 8) -> None: Hence, it works in combination with `data=None` and `data=["wind", "solar"]` for example. - .. warning:: + !!! warning Don't use 'latest' in combination with `limit`. This might lead to unexpected results. * `None`: Complete backfill - - Defaults to `None`. limit: int Maximum number of units. Defaults to the large number of 10**8 which means @@ -155,7 +156,7 @@ def backfill_basic(self, data=None, date=None, limit=10 ** 8) -> None: self._write_basic_data_for_one_data_type_to_db(data_type, date, limit) def backfill_locations_basic( - self, limit=10 ** 7, date=None, delete_additional_data_requests=True + self, limit=10**7, date=None, delete_additional_data_requests=True ): """ Backfill basic location data. @@ -164,12 +165,12 @@ def backfill_locations_basic( of which data should be retrieved via the described parameter options. - Under the hood, :meth:`open_mastr.soap_api.download.MaStRDownload.basic_location_data` + Under the hood, [`MaStRDownload.basic_location_data`][open_mastr.soap_api.download.MaStRDownload.basic_location_data] is used. Parameters ---------- - date: None, :class:`datetime.datetime`, str + date: None, `datetime.datetime`, str Specify backfill date from which on data is retrieved Only data with modification time stamp greater that `date` is retrieved. @@ -177,13 +178,11 @@ def backfill_locations_basic( * `datetime.datetime(2020, 11, 27)`: Retrieve data which is newer than this time stamp * 'latest': Retrieve data which is newer than the newest data already in the table. - .. warning:: + !!! warning Don't use 'latest' in combination with `limit`. This might lead to unexpected results. * `None`: Complete backfill - - Defaults to `None`. limit: int Maximum number of locations to download. Defaults to `None` which means no limit is set and all available data is queried. @@ -198,7 +197,6 @@ def backfill_locations_basic( locations_basic = self.mastr_dl.basic_location_data(limit, date_from=date) for locations_chunk in locations_basic: - # Remove duplicates returned from API locations_chunk_unique = [ location @@ -211,7 +209,6 @@ def backfill_locations_basic( ] with session_scope(engine=self._engine) as session: - # Find units that are already in the DB common_ids = [ _.LokationMastrNummer @@ -257,27 +254,27 @@ def backfill_locations_basic( orm.AdditionalLocationsRequested, new_requests ) - def retrieve_additional_data(self, data, data_type, limit=10 ** 8, chunksize=1000): + def retrieve_additional_data(self, data, data_type, limit=10**8, chunksize=1000): """ Retrieve additional unit data Execute additional data requests stored in - :class:`open_mastr.soap_api.orm.AdditionalDataRequested`. - See also docs of :meth:`open_mastr.soap_api.download.py.MaStRDownload.additional_data` + `open_mastr.soap_api.orm.AdditionalDataRequested`. + See also docs of [`MaStRDownload.additional_data`][open_mastr.soap_api.download.MaStRDownload.additional_data] for more information on how data is downloaded. Parameters ---------- data: `str` See list of available technologies in - :meth:`open_mastr.soap_api.download.py.MaStRDownload.download_power_plants`. + `open_mastr.soap_api.download.py.MaStRDownload.download_power_plants`. data_type: `str` Select type of additional data that is to be retrieved. Choose from "unit_data", "eeg_data", "kwk_data", "permit_data". limit: int Limit number of units that data is download for. Defaults to the very large number 10**8 which refers to query data for existing data requests, for example created by - :meth:`~.create_additional_data_requests`. + `~.create_additional_data_requests`. chunksize: int Data is downloaded and inserted into the database in chunks of `chunksize`. Defaults to 1000. @@ -296,7 +293,6 @@ def retrieve_additional_data(self, data, data_type, limit=10 ** 8, chunksize=100 number_units_queried = 0 while number_units_queried < limit: - with session_scope(engine=self._engine) as session: ( requested_chunk, @@ -348,14 +344,14 @@ def retrieve_additional_data(self, data, data_type, limit=10 ** 8, chunksize=100 break def retrieve_additional_location_data( - self, location_type, limit=10 ** 8, chunksize=1000 + self, location_type, limit=10**8, chunksize=1000 ): """ Retrieve extended location data Execute additional data requests stored in - :class:`open_mastr.soap_api.orm.AdditionalLocationsRequested`. - See also docs of :meth:`open_mastr.soap_api.download.py.MaStRDownload.additional_data` + `open_mastr.soap_api.orm.AdditionalLocationsRequested`. + See also docs of `open_mastr.soap_api.download.py.MaStRDownload.additional_data` for more information on how data is downloaded. Parameters @@ -378,7 +374,6 @@ def retrieve_additional_location_data( locations_queried = 0 while locations_queried < limit: - with session_scope(engine=self._engine) as session: # Get a chunk ( @@ -453,89 +448,88 @@ def retrieve_additional_location_data( log.info("No further data is requested") break - # def create_additional_data_requests( - # self, - # technology, - # data_types=["unit_data", "eeg_data", "kwk_data", "permit_data"], - # delete_existing=True, - # ): - # """ - # Create new requests for additional unit data - # - # For units that exist in basic_units but not in the table for additional - # data of `data_type`, a new data request - # is submitted. - # - # Parameters - # ---------- - # technology: str - # Specify technology additional data should be requested for. - # data_types: list - # Select type of additional data that is to be requested. - # Defaults to all data that is available for a - # technology. - # delete_existing: bool - # Toggle deletion of already existing requests for additional data. - # Defaults to True. - # """ - # - # data_requests = [] - # - # with session_scope(engine=self._engine) as session: - # # Check which additional data is missing - # for data_type in data_types: - # if data_type_available := self.orm_map[technology].get(data_type, None): - # log.info( - # f"Create requests for additional data of type {data_type} for {technology}" - # ) - # - # # Get ORM for additional data by technology and data_type - # additional_data_orm = getattr(orm, data_type_available) - # - # # Delete prior additional data requests for this technology and data_type - # if delete_existing: - # session.query(orm.AdditionalDataRequested).filter( - # orm.AdditionalDataRequested.technology == technology, - # orm.AdditionalDataRequested.data_type == data_type, - # ).delete() - # session.commit() - # - # # Query database for missing additional data - # units_for_request = self._get_units_for_request( - # data_type, session, additional_data_orm, technology - # ) - # - # # Prepare data for additional data request - # for basic_unit in units_for_request: - # data_request = { - # "EinheitMastrNummer": basic_unit.EinheitMastrNummer, - # "technology": self.unit_type_map[basic_unit.Einheittyp], - # "data_type": data_type, - # "request_date": datetime.datetime.now( - # tz=datetime.timezone.utc - # ), - # } - # if data_type == "unit_data": - # data_request[ - # "additional_data_id" - # ] = basic_unit.EinheitMastrNummer - # elif data_type == "eeg_data": - # data_request[ - # "additional_data_id" - # ] = basic_unit.EegMastrNummer - # elif data_type == "kwk_data": - # data_request[ - # "additional_data_id" - # ] = basic_unit.KwkMastrNummer - # elif data_type == "permit_data": - # data_request[ - # "additional_data_id" - # ] = basic_unit.GenMastrNummer - # data_requests.append(data_request) - # - # # Insert new requests for additional data into database - # session.bulk_insert_mappings(orm.AdditionalDataRequested, data_requests) + def create_additional_data_requests( + self, + technology, + data_types=["unit_data", "eeg_data", "kwk_data", "permit_data"], + delete_existing=True, + ): + """ + Create new requests for additional unit data + + For units that exist in basic_units but not in the table for additional + data of `data_type`, a new data request + is submitted. + Parameters + ---------- + technology: str + Specify technology additional data should be requested for. + data_types: list + Select type of additional data that is to be requested. + Defaults to all data that is available for a + technology. + delete_existing: bool + Toggle deletion of already existing requests for additional data. + Defaults to True. + """ + + data_requests = [] + + with session_scope(engine=self._engine) as session: + # Check which additional data is missing + for data_type in data_types: + if data_type_available := self.orm_map[technology].get(data_type, None): + log.info( + f"Create requests for additional data of type {data_type} for {technology}" + ) + + # Get ORM for additional data by technology and data_type + additional_data_orm = getattr(orm, data_type_available) + + # Delete prior additional data requests for this technology and data_type + if delete_existing: + session.query(orm.AdditionalDataRequested).filter( + orm.AdditionalDataRequested.technology == technology, + orm.AdditionalDataRequested.data_type == data_type, + ).delete() + session.commit() + + # Query database for missing additional data + units_for_request = self._get_units_for_request( + data_type, session, additional_data_orm, technology + ) + + # Prepare data for additional data request + for basic_unit in units_for_request: + data_request = { + "EinheitMastrNummer": basic_unit.EinheitMastrNummer, + "technology": self.unit_type_map[basic_unit.Einheittyp], + "data_type": data_type, + "request_date": datetime.datetime.now( + tz=datetime.timezone.utc + ), + } + if data_type == "unit_data": + data_request[ + "additional_data_id" + ] = basic_unit.EinheitMastrNummer + elif data_type == "eeg_data": + data_request[ + "additional_data_id" + ] = basic_unit.EegMastrNummer + elif data_type == "kwk_data": + data_request[ + "additional_data_id" + ] = basic_unit.KwkMastrNummer + elif data_type == "permit_data": + data_request[ + "additional_data_id" + ] = basic_unit.GenMastrNummer + data_requests.append(data_request) + + # Insert new requests for additional data into database + session.bulk_insert_mappings(orm.AdditionalDataRequested, data_requests) def _add_data_source_and_download_date(self, entry: dict) -> dict: """Adds DatenQuelle = 'APT' and DatumDownload = date.today""" @@ -843,13 +837,10 @@ def _preprocess_additional_data_entry(self, unit_dat, technology, data_type): # non-compatible with sqlite or postgresql # This replaces the list with the first element in the list - data_as_list = ["NetzbetreiberMastrNummer","Netzbetreiberzuordnungen"] + data_as_list = ["NetzbetreiberMastrNummer", "Netzbetreiberzuordnungen"] for dat in data_as_list: - if ( - dat in unit_dat - and type(unit_dat[dat]) == list - ): + if dat in unit_dat and type(unit_dat[dat]) == list: if len(unit_dat[dat]) > 0: unit_dat[dat] = f"{unit_dat[dat][0]}" else: @@ -987,9 +978,8 @@ def restore(self, dumpfile): dump is restored from CWD. - Warnings - -------- - If tables that are restored from the dump contain data, restore doesn't work! + !!! warning + If tables that are restored from the dump contain data, restore doesn't work! """ # Interpret file name and path @@ -1015,7 +1005,7 @@ def restore(self, dumpfile): proc.wait() -def list_of_dicts_to_columns(row) -> pd.Series: #FIXME: Function not used +def list_of_dicts_to_columns(row) -> pd.Series: # FIXME: Function not used """ Expand data stored in dict to spearate columns @@ -1041,5 +1031,3 @@ def list_of_dicts_to_columns(row) -> pd.Series: #FIXME: Function not used columns[k].append(v) return pd.Series(columns) - - diff --git a/setup.py b/setup.py index 90c56813..ee8f092f 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ -from setuptools import setup, find_packages -from os import path import os +from os import path + +from setuptools import setup here = path.abspath(path.dirname(__file__)) @@ -64,6 +65,10 @@ "xmltodict", "pre-commit", "bump2version", + "mkdocstrings[python]", + "mkdocs-material", + "mkdocs-include-markdown-plugin", + "mike" ] }, package_data={