Skip to content

Commit

Permalink
fixed: issue#64 - correct na_pct_velow
Browse files Browse the repository at this point in the history
  • Loading branch information
Casyfill committed Aug 13, 2024
1 parent 960dd19 commit 856d66d
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions dfschema/core/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ def generate_schema_dict_from_df(df: pd.DataFrame) -> dict:
columns = []

for col in df.columns:
cd = {"name": col}
cd: dict = {"name": col}

cd["dtype"] = (
"string" if pd.api.types.is_string_dtype(df[col]) else str(df[col].dtype)
)
cd["na_limit"] = max(0.9999, (df[col].isnull().mean() + 0.1)) # +10%
cd["na_pct_below"] = max(0.01, (df[col].isnull().mean() + 0.1)) # +10%

if pd.api.types.is_numeric_dtype(df[col]):
add_range = 0.05 * df[col].std()
Expand Down

0 comments on commit 856d66d

Please sign in to comment.