diff --git a/dfschema/core/core.py b/dfschema/core/core.py index 72ab9cb..5e3dfda 100644 --- a/dfschema/core/core.py +++ b/dfschema/core/core.py @@ -224,7 +224,7 @@ def to_file(self, path: Union[str, Path]) -> None: import yaml with path.open("w") as f: - yaml.dump(schema_dict, f) + yaml.dump(schema_dict, f, default_flow_style=False) except ImportError: raise ImportError("PyYaml is required to load yaml files") else: diff --git a/dfschema/core/generate.py b/dfschema/core/generate.py index 3948c38..8d9ade2 100644 --- a/dfschema/core/generate.py +++ b/dfschema/core/generate.py @@ -15,7 +15,9 @@ def generate_schema_dict_from_df(df: pd.DataFrame) -> dict: cd["dtype"] = ( "string" if pd.api.types.is_string_dtype(df[col]) else str(df[col].dtype) ) - cd["na_pct_below"] = max(0.01, (df[col].isnull().mean() + 0.1)) # +10% + cd["na_pct_below"] = min( + max(0.01, float(df[col].isnull().mean() + 0.1)), 1.0 + ) # +10% if pd.api.types.is_numeric_dtype(df[col]): add_range = 0.05 * df[col].std() diff --git a/tests/test_cli.py b/tests/test_cli.py index 8802a68..82d4879 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -53,18 +53,20 @@ def test_cli_validate_error(): def test_cli_update(): + from tempfile import TemporaryDirectory from dfschema.cli import app from dfschema.core.config import CURRENT_PROTOCOL_VERSION - output_path = "active_sales_v2.json" - result = runner.invoke( - app, - [ - "update", - "tests/test_schemas/v1/good/active_sales.json", - output_path, - ], - ) + with TemporaryDirectory() as tmpdirname: + output_path = f"{tmpdirname}/active_sales_v2.json" + result = runner.invoke( + app, + [ + "update", + "tests/test_schemas/v1/good/active_sales.json", + output_path, + ], + ) assert result.exit_code == 0, result.stdout diff --git a/tests/test_read_write.py b/tests/test_read_write.py index 93f310b..b0d797c 100644 --- a/tests/test_read_write.py +++ b/tests/test_read_write.py @@ -31,7 +31,7 @@ def test_write_schema_file(format, sample_df): from dfschema import DfSchema from tempfile import TemporaryDirectory - schema = DfSchema.from_df(sample_df) + schema: DfSchema = DfSchema.from_df(sample_df) # type: ignore # create a temporary directory using the context manager with TemporaryDirectory() as tmpdirname: @@ -44,7 +44,10 @@ def test_write_schema_file(format, sample_df): if format == "yml": import yaml - schema_structure = yaml.safe_load(txt) + try: + schema_structure = yaml.safe_load(txt) + except Exception as e: + raise Exception(txt) from e elif format == "json": import json