Skip to content

Commit

Permalink
try zip import
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-joshi committed Oct 9, 2024
1 parent 2a85e96 commit ed226f5
Showing 1 changed file with 40 additions and 18 deletions.
58 changes: 40 additions & 18 deletions tests/integ/modin/test_modin_stored_procedures.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
#

import os

import modin.pandas as pd

from snowflake.snowpark import Session
Expand All @@ -21,10 +23,30 @@
"numpy",
]

# Snowpark pandas strictly pins the modin dependency version, so while testing a dependency upgrade,
# we need to upload snowflake-snowpark-python as a zip file. Otherwise, the conda package solver
# will resolve snowflake-snowpark-python==1.16.0, the newest version which does not pin a modin
# version.
# We still specify snowflake-snowpark-python in the package list to prevent the sproc registration
# code from failing in the solver step; the import here will override whatever version is chosen.
IMPORT_LIST = [
# The current path of this file is `tests/modin/integ/test_modin_stored_procedures.py`, so we need
# to go back to the repository root to reach `src/snowflake/snowpark/`.
(
os.path.join(
os.path.dirname(
os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
),
"src/snowflake/snowpark",
),
"snowflake.snowpark",
),
]


@sql_count_checker(query_count=4, sproc_count=1)
def test_sproc_head(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run(session_: Session) -> str:
df = pd.DataFrame(
[["a", 2.1, 1], ["b", 4.2, 2], ["c", 6.3, None]],
Expand All @@ -41,7 +63,7 @@ def run(session_: Session) -> str:

@sql_count_checker(query_count=4, sproc_count=1)
def test_sproc_dropna(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run(session_: Session) -> int:
default_index_snowpark_pandas_df = pd.DataFrame(
[["a", 2.1, 1], ["b", None, 2], ["c", 6.3, None]],
Expand All @@ -56,7 +78,7 @@ def run(session_: Session) -> int:

@sql_count_checker(query_count=4, sproc_count=1)
def test_sproc_idx(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run(session_: Session) -> str:
df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
df_result = df["a"]
Expand All @@ -67,7 +89,7 @@ def run(session_: Session) -> str:

@sql_count_checker(query_count=4, sproc_count=1)
def test_sproc_loc(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run(session_: Session) -> str:
df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
df_result = df.loc[df["a"] > 2]
Expand All @@ -78,7 +100,7 @@ def run(session_: Session) -> str:

@sql_count_checker(query_count=4, sproc_count=1)
def test_sproc_iloc(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run(session_: Session) -> str:
df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
df_result = df.iloc[0, 1]
Expand All @@ -89,7 +111,7 @@ def run(session_: Session) -> str:

@sql_count_checker(query_count=4, sproc_count=1)
def test_sproc_missing_val(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run(session_: Session) -> int:
import numpy as np

Expand All @@ -110,7 +132,7 @@ def run(session_: Session) -> int:

@sql_count_checker(query_count=4, sproc_count=1)
def test_sproc_type_conv(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run(session_: Session) -> str:
df = pd.DataFrame({"int": [1, 2, 3], "str": ["4", "5", "6"]})
df_result = df.astype(float)["int"].iloc[0]
Expand All @@ -121,14 +143,14 @@ def run(session_: Session) -> str:

@sql_count_checker(query_count=8, sproc_count=2)
def test_sproc_binary_ops(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def add(session_: Session) -> str:
df_1 = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
df_2 = pd.DataFrame([[6, 7, 8]])
df_result = df_1.add(df_2)
return str(df_result)

@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def plus(session_: Session) -> str:
s1 = pd.Series([1, 2, 3])
s2 = pd.Series([2, 2, 2])
Expand All @@ -141,7 +163,7 @@ def plus(session_: Session) -> str:

@sql_count_checker(query_count=8, sproc_count=2)
def test_sproc_agg(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run_agg(session_: Session) -> str:
import numpy as np

Expand All @@ -152,7 +174,7 @@ def run_agg(session_: Session) -> str:
df_result = df.agg(["sum", "min"])
return str(df_result)

@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run_median(session_: Session) -> str:
import numpy as np

Expand All @@ -172,7 +194,7 @@ def run_median(session_: Session) -> str:

@sql_count_checker(query_count=8, sproc_count=2)
def test_sproc_merge(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run_merge(session_: Session) -> str:
df1 = pd.DataFrame(
{"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]}
Expand All @@ -183,7 +205,7 @@ def run_merge(session_: Session) -> str:
df_result = df1.merge(df2, left_on="lkey", right_on="rkey")
return str(df_result["value_x"])

@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run_join(session_: Session) -> str:
df = pd.DataFrame(
{
Expand All @@ -207,7 +229,7 @@ def run_join(session_: Session) -> str:

@sql_count_checker(query_count=4, sproc_count=1)
def test_sproc_groupby(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run(session_: Session) -> str:
df = pd.DataFrame(
{
Expand All @@ -226,7 +248,7 @@ def run(session_: Session) -> str:

@sql_count_checker(query_count=4, sproc_count=1)
def test_sproc_pivot(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run(session_: Session) -> str:
df = pd.DataFrame(
{
Expand Down Expand Up @@ -260,7 +282,7 @@ def run(session_: Session) -> str:

@sql_count_checker(query_count=4, sproc_count=1)
def test_sproc_apply(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run(session_: Session) -> str:
import numpy as np

Expand All @@ -273,7 +295,7 @@ def run(session_: Session) -> str:

@sql_count_checker(query_count=4, sproc_count=1)
def test_sproc_applymap(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run(session_: Session) -> str:
df = pd.DataFrame([[1, 2.12], [3.356, 4.567]])
df_result = df.applymap(lambda x: len(str(x)))
Expand All @@ -284,7 +306,7 @@ def run(session_: Session) -> str:

@sql_count_checker(query_count=4, sproc_count=1)
def test_sproc_devguide_example(session):
@sproc(packages=PACKAGE_LIST)
@sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
def run(session_: Session) -> int:
# Create a Snowpark Pandas DataFrame with sample data.
df = pd.DataFrame(
Expand Down

0 comments on commit ed226f5

Please sign in to comment.