From 856d66d6075a6cccf81ab1f547d84e6986fe2d61 Mon Sep 17 00:00:00 2001 From: Philipp Kats Date: Mon, 12 Aug 2024 22:19:17 -0400 Subject: [PATCH] fixed: issue#64 - correct na_pct_velow --- dfschema/core/generate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dfschema/core/generate.py b/dfschema/core/generate.py index 134bd03..3948c38 100644 --- a/dfschema/core/generate.py +++ b/dfschema/core/generate.py @@ -10,12 +10,12 @@ def generate_schema_dict_from_df(df: pd.DataFrame) -> dict: columns = [] for col in df.columns: - cd = {"name": col} + cd: dict = {"name": col} cd["dtype"] = ( "string" if pd.api.types.is_string_dtype(df[col]) else str(df[col].dtype) ) - cd["na_limit"] = max(0.9999, (df[col].isnull().mean() + 0.1)) # +10% + cd["na_pct_below"] = max(0.01, (df[col].isnull().mean() + 0.1)) # +10% if pd.api.types.is_numeric_dtype(df[col]): add_range = 0.05 * df[col].std()