Skip to content

Commit

Permalink
Merge branch 'develop' into feature-434-documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
FlorianK13 authored Aug 21, 2023
2 parents ef69002 + 366b444 commit 6180b0e
Show file tree
Hide file tree
Showing 40 changed files with 2,255 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.13.1
current_version = 0.13.2
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)((?P<release>(a|na))+(?P<build>\d+))?
serialize =
{major}.{minor}.{patch}{release}{build}
Expand Down
3 changes: 0 additions & 3 deletions .github/workflows/ci-develop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@ name: CI

on:
workflow_dispatch:
push:
branches:
- develop
pull_request:
branches:
- develop
Expand Down
5 changes: 1 addition & 4 deletions .github/workflows/ci-production.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@ name: CI

on:
workflow_dispatch:
push:
branches:
- production
pull_request:
branches:
- production
Expand Down Expand Up @@ -34,7 +31,7 @@ jobs:
- name: create package
run: python setup.py sdist
- name: import open-mastr
run: python -m pip install ./dist/open_mastr-0.13.1.tar.gz
run: python -m pip install ./dist/open_mastr-0.13.2.tar.gz
- name: Create credentials file
env:
MASTR_TOKEN: ${{ secrets.MASTR_TOKEN }}
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ ENV/

# PyCharm
.idea
data/
config.ini
postprocessing/plots/

Expand Down
14 changes: 13 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,22 @@ For each version important additions, changes and removals are listed here.
The format is inspired from [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## [v0.13.1] Hotfix - 2023-04-11
## [v0.1X.X] current - 2023-XX-XX
### Added
- User-defined output path for csv, xml, database [#402](https://github.com/OpenEnergyPlatform/open-MaStR/pull/402)
### Changed
- Using sphinx version <7 to build documentation [#454](https://github.com/OpenEnergyPlatform/open-MaStR/pull/454)
### Removed
- Delete `on push` for github workflow [#445](https://github.com/OpenEnergyPlatform/open-MaStR/pull/445)

## [v0.13.2] Hotfix - 2023-08-07
### Changed
- Changed the name of the bulk tables for technology=gsgk [#456](https://github.com/OpenEnergyPlatform/open-MaStR/pull/456)

## [v0.13.1] Hotfix - 2023-04-11
### Added
- Add new table and new columns to the data model [#440](https://github.com/OpenEnergyPlatform/open-MaStR/pull/440)

## [v0.13.0] Maintenance release - 2023-02-16
### Added
- Add a `workflow_dispatch` to run CI pipelines from a button click [#389](https://github.com/OpenEnergyPlatform/open-MaStR/pull/389)
Expand Down
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ authors:
title: "open-MaStR"
type: software
license: AGPL-3.0
version: 0.13.1
version: 0.13.2
doi:
date-released: 2023-04-11
date-released: 2023-08-07
url: "https://github.com/OpenEnergyPlatform/open-MaStR/"
2 changes: 1 addition & 1 deletion docs/_data/raw_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ solar

.. csv-table::
:file: raw/bnetza_mastr_solar_raw.csv
:widths: 20, 35, 15, 15
:widths: 20, 35, 15, 30
:header-rows: 1


Expand Down
5 changes: 5 additions & 0 deletions docs/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ There are some environment variables to customize open-MaStR:
* - SQLITE_DATABASE_PATH
- Path to the SQLite file. This allows to use to use multiple instances of the MaStR database. The database instances exist in parallel and are independent of each other.
- `/home/mastr-rabbit/.open-MaStR/data/sqlite/your_custom_instance_name.db`
* - OUTPUT_PATH
- | Path to user-defined output directory for CSV data, XML file and database.
| If not specified, output directory defaults to `$HOME/.open-MaStR/`
- | Linux: `/home/mastr-rabbit/open-mastr-user-defined-output-path`
| Windows: `C:\\Users\\open-mastr-user-defined-output-path`
MaStR account and credentials
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
3 changes: 2 additions & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mkdocstrings[python]
mkdocs-material
mkdocs-include-markdown-plugin
mkdocs-include-markdown-plugin

8 changes: 8 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,14 @@
"""

from open_mastr import Mastr
import os

## specify download parameter

# set custom output path for: csv-export, database, xml-export.
# see documentation: https://open-mastr.readthedocs.io/en/latest/advanced.html#environment-variables
# os.environ['OUTPUT_PATH'] = "/your/custom/output_path"

# bulk download
bulk_date = "today"
bulk_cleansing = True
Expand Down Expand Up @@ -67,6 +72,9 @@
db = Mastr()

if __name__ == "__main__":



## download Markstammdatenregister
# bulk download
db.download(method="bulk", data=data_bulk, date=bulk_date, bulk_cleansing=True)
Expand Down
11 changes: 6 additions & 5 deletions open_mastr/mastr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
create_data_dir,
get_data_version_dir,
get_project_home_dir,
setup_logger,
get_output_dir,
setup_logger
)

from open_mastr.utils.constants import ADDITIONAL_TABLES, TECHNOLOGIES
Expand Down Expand Up @@ -63,12 +64,12 @@ class Mastr:

def __init__(self, engine="sqlite") -> None:
validate_parameter_format_for_mastr_init(engine)

self.output_dir = get_output_dir()
self.home_directory = get_project_home_dir()
self._sqlite_folder_path = os.path.join(self.home_directory, "data", "sqlite")
self._sqlite_folder_path = os.path.join(self.output_dir, "data", "sqlite")
os.makedirs(self._sqlite_folder_path, exist_ok=True)

self.engine = create_database_engine(engine, self.home_directory)
self.engine = create_database_engine(engine, self.output_dir)

print(
f"Data will be written to the following database: {self.engine.url}\n"
Expand Down Expand Up @@ -194,7 +195,7 @@ def download(
if method == "bulk":
# Find the name of the zipped xml folder
bulk_download_date = parse_date_string(date)
xml_folder_path = os.path.join(self.home_directory, "data", "xml_download")
xml_folder_path = os.path.join(self.output_dir, "data", "xml_download")
os.makedirs(xml_folder_path, exist_ok=True)
zipped_xml_file_path = os.path.join(
xml_folder_path,
Expand Down
21 changes: 20 additions & 1 deletion open_mastr/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
def get_project_home_dir():
"""Get root dir of project data
On linux this path equals `$HOME/open-MaStR/`, respectively `~/open-MaStR/`
On linux this path equals `$HOME/.open-MaStR/`, respectively `~/.open-MaStR/`
which is also called `PROJECTHOME`.
Returns
Expand All @@ -47,6 +47,21 @@ def get_project_home_dir():
return os.path.join(os.path.expanduser("~"), ".open-MaStR")


def get_output_dir():
"""Get output directory for csv data, xml file and database. Defaults to get_project_home_dir()
Returns
-------
path-like object
Absolute path to output path
"""

if "OUTPUT_PATH" in os.environ:
return os.environ.get('OUTPUT_PATH')

return get_project_home_dir()


def get_data_version_dir():
"""
Subdirectory of data/ in PROJECTHOME
Expand All @@ -59,6 +74,10 @@ def get_data_version_dir():
Absolute path to `PROJECTHOME/data/<data-version>/`
"""
data_version = get_data_config()

if "OUTPUT_PATH" in os.environ:
return os.path.join(os.environ.get('OUTPUT_PATH'), "data", data_version)

return os.path.join(get_project_home_dir(), "data", data_version)


Expand Down
4 changes: 2 additions & 2 deletions open_mastr/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@
"biomass": ["anlageneegbiomasse", "einheitenbiomasse"],
"hydro": ["anlageneegwasser", "einheitenwasser"],
"gsgk": [
"anlageneeggeosolarthermiegrubenklaerschlammdruckentspannung",
"einheitengeosolarthermiegrubenklaerschlammdruckentspannung",
"anlageneeggeothermiegrubengasdruckentspannung",
"einheitengeothermiegrubengasdruckentspannung",
],
"combustion": ["anlagenkwk", "einheitenverbrennung"],
"nuclear": ["einheitenkernkraft"],
Expand Down
4 changes: 2 additions & 2 deletions open_mastr/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ def chunks(lst, n):
yield lst[i : i + n]


def create_database_engine(engine, home_directory) -> sqlalchemy.engine.Engine:
def create_database_engine(engine, output_dir) -> sqlalchemy.engine.Engine:
if engine == "sqlite":
sqlite_database_path = os.environ.get(
"SQLITE_DATABASE_PATH",
os.path.join(home_directory, "data", "sqlite", "open-mastr.db"),
os.path.join(output_dir, "data", "sqlite", "open-mastr.db"),
)
db_url = f"sqlite:///{sqlite_database_path}"
return create_engine(db_url)
Expand Down
17 changes: 9 additions & 8 deletions open_mastr/xml_download/utils_download_bulk.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import requests
from tqdm import tqdm
import time
from bs4 import BeautifulSoup
import numpy as np
import os
import shutil
from zipfile import ZipFile, BadZipfile
import time
from zipfile import BadZipfile, ZipFile

import numpy as np
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm

# setup logger
from open_mastr.utils.config import setup_logger
Expand Down Expand Up @@ -71,9 +72,9 @@ def download_xml_Mastr(
url = get_url_from_Mastr_website()
time_a = time.perf_counter()
r = requests.get(url, stream=True)
total_length = int(10000 * 1024 * 1024)
total_length = int(18000 * 1024 * 1024)
with open(save_path, "wb") as zfile, tqdm(
desc=save_path, total=(total_length / 1024 / 1024), unit="MB"
desc=save_path, total=(total_length / 1024 / 1024), unit=""
) as bar:
for chunk in r.iter_content(chunk_size=1024 * 1024):
# chunk size of 1024 * 1024 needs 9min 11 sek = 551sek
Expand Down
Loading

0 comments on commit 6180b0e

Please sign in to comment.