Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

64 rename na limit #72

Merged
merged 16 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dfschema/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def to_file(self, path: Union[str, Path]) -> None:
import yaml

with path.open("w") as f:
yaml.dump(schema_dict, f)
yaml.dump(schema_dict, f, default_flow_style=False)
except ImportError:
raise ImportError("PyYaml is required to load yaml files")
else:
Expand Down
4 changes: 3 additions & 1 deletion dfschema/core/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ def generate_schema_dict_from_df(df: pd.DataFrame) -> dict:
cd["dtype"] = (
"string" if pd.api.types.is_string_dtype(df[col]) else str(df[col].dtype)
)
cd["na_pct_below"] = max(0.01, (df[col].isnull().mean() + 0.1)) # +10%
cd["na_pct_below"] = min(
max(0.01, float(df[col].isnull().mean() + 0.1)), 1.0
) # +10%

if pd.api.types.is_numeric_dtype(df[col]):
add_range = 0.05 * df[col].std()
Expand Down
20 changes: 11 additions & 9 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,20 @@ def test_cli_validate_error():


def test_cli_update():
from tempfile import TemporaryDirectory
from dfschema.cli import app
from dfschema.core.config import CURRENT_PROTOCOL_VERSION

output_path = "active_sales_v2.json"
result = runner.invoke(
app,
[
"update",
"tests/test_schemas/v1/good/active_sales.json",
output_path,
],
)
with TemporaryDirectory() as tmpdirname:
output_path = f"{tmpdirname}/active_sales_v2.json"
result = runner.invoke(
app,
[
"update",
"tests/test_schemas/v1/good/active_sales.json",
output_path,
],
)

assert result.exit_code == 0, result.stdout

Expand Down
7 changes: 5 additions & 2 deletions tests/test_read_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_write_schema_file(format, sample_df):
from dfschema import DfSchema
from tempfile import TemporaryDirectory

schema = DfSchema.from_df(sample_df)
schema: DfSchema = DfSchema.from_df(sample_df) # type: ignore

# create a temporary directory using the context manager
with TemporaryDirectory() as tmpdirname:
Expand All @@ -44,7 +44,10 @@ def test_write_schema_file(format, sample_df):
if format == "yml":
import yaml

schema_structure = yaml.safe_load(txt)
try:
schema_structure = yaml.safe_load(txt)
except Exception as e:
raise Exception(txt) from e
elif format == "json":
import json

Expand Down
Loading