-
Notifications
You must be signed in to change notification settings - Fork 119
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SNOW-1818205: Add support for pd.json_normalize (#2657)
<!--- Please answer these questions before creating your pull request. Thanks! ---> 1. Which Jira issue is this PR addressing? Make sure that there is an accompanying issue to your PR. <!--- In this section, please add a Snowflake Jira issue number. Note that if a corresponding GitHub issue exists, you should still include the Snowflake Jira issue number. For example, for GitHub issue #1400, you should add "SNOW-1335071" here. ---> Fixes SNOW-1818205 2. Fill out the following pre-review checklist: - [ ] I am adding a new automated test(s) to verify correctness of my new code - [ ] If this test skips Local Testing mode, I'm requesting review from @snowflakedb/local-testing - [ ] I am adding new logging messages - [ ] I am adding a new telemetry message - [ ] I am adding new credentials - [ ] I am adding a new dependency - [ ] If this is a new feature/behavior, I'm adding the Local Testing parity changes. - [ ] I acknowledge that I have ensured my changes to be thread-safe. Follow the link for more information: [Thread-safe Developer Guidelines](https://docs.google.com/document/d/162d_i4zZ2AfcGRXojj0jByt8EUq-DrSHPPnTa4QvwbA/edit#bookmark=id.e82u4nekq80k) 3. Please describe how your code solves the related issue. Add support for pd.json_normalize.
- Loading branch information
1 parent
c8161c4
commit bbd7a62
Showing
6 changed files
with
235 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
# | ||
# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. | ||
# | ||
import modin.pandas as pd | ||
import pandas as native_pd | ||
import pytest | ||
|
||
from tests.integ.modin.utils import assert_frame_equal | ||
from tests.integ.utils.sql_counter import SqlCounter | ||
|
||
|
||
def test_json_normalize_basic(): | ||
data = [ | ||
{"id": 1, "name": {"first": "Coleen", "last": "Volk"}}, | ||
{"name": {"given": "Mark", "family": "Regner"}}, | ||
{"id": 2, "name": "Faye Raker"}, | ||
] | ||
|
||
with SqlCounter(query_count=1): | ||
assert_frame_equal( | ||
pd.json_normalize(data), | ||
native_pd.json_normalize(data), | ||
check_dtype=False, | ||
) | ||
|
||
|
||
@pytest.mark.parametrize("max_level", [0, 1]) | ||
def test_json_normalize_max_level(max_level): | ||
data = [ | ||
{ | ||
"id": 1, | ||
"name": "Cole Volk", | ||
"fitness": {"height": 130, "weight": 60}, | ||
}, | ||
{"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}}, | ||
{ | ||
"id": 2, | ||
"name": "Faye Raker", | ||
"fitness": {"height": 130, "weight": 60}, | ||
}, | ||
] | ||
|
||
with SqlCounter(query_count=1): | ||
assert_frame_equal( | ||
pd.json_normalize(data=data, max_level=max_level), | ||
native_pd.json_normalize(data=data, max_level=max_level), | ||
check_dtype=False, | ||
) | ||
|
||
|
||
def test_json_normalize_record_path_meta(): | ||
data = [ | ||
{ | ||
"state": "Florida", | ||
"shortname": "FL", | ||
"info": {"governor": "Rick Scott"}, | ||
"counties": [ | ||
{"name": "Dade", "population": 12345}, | ||
{"name": "Broward", "population": 40000}, | ||
{"name": "Palm Beach", "population": 60000}, | ||
], | ||
}, | ||
{ | ||
"state": "Ohio", | ||
"shortname": "OH", | ||
"info": {"governor": "John Kasich"}, | ||
"counties": [ | ||
{"name": "Summit", "population": 1234}, | ||
{"name": "Cuyahoga", "population": 1337}, | ||
], | ||
}, | ||
] | ||
|
||
with SqlCounter(query_count=1): | ||
assert_frame_equal( | ||
pd.json_normalize( | ||
data=data, | ||
record_path="counties", | ||
meta=["state", "shortname", ["info", "governor"]], | ||
), | ||
native_pd.json_normalize( | ||
data=data, | ||
record_path="counties", | ||
meta=["state", "shortname", ["info", "governor"]], | ||
), | ||
check_dtype=False, | ||
) | ||
|
||
|
||
def test_json_normalize_record_prefix(): | ||
data = {"A": [1, 2]} | ||
|
||
with SqlCounter(query_count=1): | ||
assert_frame_equal( | ||
pd.json_normalize(data=data, record_prefix="Prefix."), | ||
native_pd.json_normalize(data=data, record_prefix="Prefix."), | ||
check_dtype=False, | ||
) |