From ed226f5101fd02eb9342dd758ed8b5ea5105b383 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Wed, 9 Oct 2024 15:13:34 -0700 Subject: [PATCH] try zip import --- .../modin/test_modin_stored_procedures.py | 58 +++++++++++++------ 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/tests/integ/modin/test_modin_stored_procedures.py b/tests/integ/modin/test_modin_stored_procedures.py index 045639fb3f3..f397bb7ded8 100644 --- a/tests/integ/modin/test_modin_stored_procedures.py +++ b/tests/integ/modin/test_modin_stored_procedures.py @@ -3,6 +3,8 @@ # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. # +import os + import modin.pandas as pd from snowflake.snowpark import Session @@ -21,10 +23,30 @@ "numpy", ] +# Snowpark pandas strictly pins the modin dependency version, so while testing a dependency upgrade, +# we need to upload snowflake-snowpark-python as a zip file. Otherwise, the conda package solver +# will resolve snowflake-snowpark-python==1.16.0, the newest version which does not pin a modin +# version. +# We still specify snowflake-snowpark-python in the package list to prevent the sproc registration +# code from failing in the solver step; the import here will override whatever version is chosen. +IMPORT_LIST = [ + # The current path of this file is `tests/modin/integ/test_modin_stored_procedures.py`, so we need + # to go back to the repository root to reach `src/snowflake/snowpark/`. + ( + os.path.join( + os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + ), + "src/snowflake/snowpark", + ), + "snowflake.snowpark", + ), +] + @sql_count_checker(query_count=4, sproc_count=1) def test_sproc_head(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run(session_: Session) -> str: df = pd.DataFrame( [["a", 2.1, 1], ["b", 4.2, 2], ["c", 6.3, None]], @@ -41,7 +63,7 @@ def run(session_: Session) -> str: @sql_count_checker(query_count=4, sproc_count=1) def test_sproc_dropna(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run(session_: Session) -> int: default_index_snowpark_pandas_df = pd.DataFrame( [["a", 2.1, 1], ["b", None, 2], ["c", 6.3, None]], @@ -56,7 +78,7 @@ def run(session_: Session) -> int: @sql_count_checker(query_count=4, sproc_count=1) def test_sproc_idx(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run(session_: Session) -> str: df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]}) df_result = df["a"] @@ -67,7 +89,7 @@ def run(session_: Session) -> str: @sql_count_checker(query_count=4, sproc_count=1) def test_sproc_loc(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run(session_: Session) -> str: df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]}) df_result = df.loc[df["a"] > 2] @@ -78,7 +100,7 @@ def run(session_: Session) -> str: @sql_count_checker(query_count=4, sproc_count=1) def test_sproc_iloc(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run(session_: Session) -> str: df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]}) df_result = df.iloc[0, 1] @@ -89,7 +111,7 @@ def run(session_: Session) -> str: @sql_count_checker(query_count=4, sproc_count=1) def test_sproc_missing_val(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run(session_: Session) -> int: import numpy as np @@ -110,7 +132,7 @@ def run(session_: Session) -> int: @sql_count_checker(query_count=4, sproc_count=1) def test_sproc_type_conv(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run(session_: Session) -> str: df = pd.DataFrame({"int": [1, 2, 3], "str": ["4", "5", "6"]}) df_result = df.astype(float)["int"].iloc[0] @@ -121,14 +143,14 @@ def run(session_: Session) -> str: @sql_count_checker(query_count=8, sproc_count=2) def test_sproc_binary_ops(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def add(session_: Session) -> str: df_1 = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) df_2 = pd.DataFrame([[6, 7, 8]]) df_result = df_1.add(df_2) return str(df_result) - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def plus(session_: Session) -> str: s1 = pd.Series([1, 2, 3]) s2 = pd.Series([2, 2, 2]) @@ -141,7 +163,7 @@ def plus(session_: Session) -> str: @sql_count_checker(query_count=8, sproc_count=2) def test_sproc_agg(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run_agg(session_: Session) -> str: import numpy as np @@ -152,7 +174,7 @@ def run_agg(session_: Session) -> str: df_result = df.agg(["sum", "min"]) return str(df_result) - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run_median(session_: Session) -> str: import numpy as np @@ -172,7 +194,7 @@ def run_median(session_: Session) -> str: @sql_count_checker(query_count=8, sproc_count=2) def test_sproc_merge(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run_merge(session_: Session) -> str: df1 = pd.DataFrame( {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]} @@ -183,7 +205,7 @@ def run_merge(session_: Session) -> str: df_result = df1.merge(df2, left_on="lkey", right_on="rkey") return str(df_result["value_x"]) - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run_join(session_: Session) -> str: df = pd.DataFrame( { @@ -207,7 +229,7 @@ def run_join(session_: Session) -> str: @sql_count_checker(query_count=4, sproc_count=1) def test_sproc_groupby(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run(session_: Session) -> str: df = pd.DataFrame( { @@ -226,7 +248,7 @@ def run(session_: Session) -> str: @sql_count_checker(query_count=4, sproc_count=1) def test_sproc_pivot(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run(session_: Session) -> str: df = pd.DataFrame( { @@ -260,7 +282,7 @@ def run(session_: Session) -> str: @sql_count_checker(query_count=4, sproc_count=1) def test_sproc_apply(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run(session_: Session) -> str: import numpy as np @@ -273,7 +295,7 @@ def run(session_: Session) -> str: @sql_count_checker(query_count=4, sproc_count=1) def test_sproc_applymap(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run(session_: Session) -> str: df = pd.DataFrame([[1, 2.12], [3.356, 4.567]]) df_result = df.applymap(lambda x: len(str(x))) @@ -284,7 +306,7 @@ def run(session_: Session) -> str: @sql_count_checker(query_count=4, sproc_count=1) def test_sproc_devguide_example(session): - @sproc(packages=PACKAGE_LIST) + @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST) def run(session_: Session) -> int: # Create a Snowpark Pandas DataFrame with sample data. df = pd.DataFrame(