try zip import

snowflakedb · Oct 9, 2024 · ed226f5 · ed226f5
1 parent 2a85e96
commit ed226f5
Showing 1 changed file with 40 additions and 18 deletions.
diff --git a/tests/integ/modin/test_modin_stored_procedures.py b/tests/integ/modin/test_modin_stored_procedures.py
@@ -3,6 +3,8 @@
 # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
 #
 
+import os
+
 import modin.pandas as pd
 
 from snowflake.snowpark import Session
@@ -21,10 +23,30 @@
     "numpy",
 ]
 
+# Snowpark pandas strictly pins the modin dependency version, so while testing a dependency upgrade,
+# we need to upload snowflake-snowpark-python as a zip file. Otherwise, the conda package solver
+# will resolve snowflake-snowpark-python==1.16.0, the newest version which does not pin a modin
+# version.
+# We still specify snowflake-snowpark-python in the package list to prevent the sproc registration
+# code from failing in the solver step; the import here will override whatever version is chosen.
+IMPORT_LIST = [
+    # The current path of this file is `tests/modin/integ/test_modin_stored_procedures.py`, so we need
+    # to go back to the repository root to reach `src/snowflake/snowpark/`.
+    (
+        os.path.join(
+            os.path.dirname(
+                os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+            ),
+            "src/snowflake/snowpark",
+        ),
+        "snowflake.snowpark",
+    ),
+]
+
 
 @sql_count_checker(query_count=4, sproc_count=1)
 def test_sproc_head(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run(session_: Session) -> str:
         df = pd.DataFrame(
             [["a", 2.1, 1], ["b", 4.2, 2], ["c", 6.3, None]],
@@ -41,7 +63,7 @@ def run(session_: Session) -> str:
 
 @sql_count_checker(query_count=4, sproc_count=1)
 def test_sproc_dropna(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run(session_: Session) -> int:
         default_index_snowpark_pandas_df = pd.DataFrame(
             [["a", 2.1, 1], ["b", None, 2], ["c", 6.3, None]],
@@ -56,7 +78,7 @@ def run(session_: Session) -> int:
 
 @sql_count_checker(query_count=4, sproc_count=1)
 def test_sproc_idx(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run(session_: Session) -> str:
         df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
         df_result = df["a"]
@@ -67,7 +89,7 @@ def run(session_: Session) -> str:
 
 @sql_count_checker(query_count=4, sproc_count=1)
 def test_sproc_loc(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run(session_: Session) -> str:
         df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
         df_result = df.loc[df["a"] > 2]
@@ -78,7 +100,7 @@ def run(session_: Session) -> str:
 
 @sql_count_checker(query_count=4, sproc_count=1)
 def test_sproc_iloc(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run(session_: Session) -> str:
         df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
         df_result = df.iloc[0, 1]
@@ -89,7 +111,7 @@ def run(session_: Session) -> str:
 
 @sql_count_checker(query_count=4, sproc_count=1)
 def test_sproc_missing_val(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run(session_: Session) -> int:
         import numpy as np
 
@@ -110,7 +132,7 @@ def run(session_: Session) -> int:
 
 @sql_count_checker(query_count=4, sproc_count=1)
 def test_sproc_type_conv(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run(session_: Session) -> str:
         df = pd.DataFrame({"int": [1, 2, 3], "str": ["4", "5", "6"]})
         df_result = df.astype(float)["int"].iloc[0]
@@ -121,14 +143,14 @@ def run(session_: Session) -> str:
 
 @sql_count_checker(query_count=8, sproc_count=2)
 def test_sproc_binary_ops(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def add(session_: Session) -> str:
         df_1 = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
         df_2 = pd.DataFrame([[6, 7, 8]])
         df_result = df_1.add(df_2)
         return str(df_result)
 
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def plus(session_: Session) -> str:
         s1 = pd.Series([1, 2, 3])
         s2 = pd.Series([2, 2, 2])
@@ -141,7 +163,7 @@ def plus(session_: Session) -> str:
 
 @sql_count_checker(query_count=8, sproc_count=2)
 def test_sproc_agg(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run_agg(session_: Session) -> str:
         import numpy as np
 
@@ -152,7 +174,7 @@ def run_agg(session_: Session) -> str:
         df_result = df.agg(["sum", "min"])
         return str(df_result)
 
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run_median(session_: Session) -> str:
         import numpy as np
 
@@ -172,7 +194,7 @@ def run_median(session_: Session) -> str:
 
 @sql_count_checker(query_count=8, sproc_count=2)
 def test_sproc_merge(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run_merge(session_: Session) -> str:
         df1 = pd.DataFrame(
             {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]}
@@ -183,7 +205,7 @@ def run_merge(session_: Session) -> str:
         df_result = df1.merge(df2, left_on="lkey", right_on="rkey")
         return str(df_result["value_x"])
 
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run_join(session_: Session) -> str:
         df = pd.DataFrame(
             {
@@ -207,7 +229,7 @@ def run_join(session_: Session) -> str:
 
 @sql_count_checker(query_count=4, sproc_count=1)
 def test_sproc_groupby(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run(session_: Session) -> str:
         df = pd.DataFrame(
             {
@@ -226,7 +248,7 @@ def run(session_: Session) -> str:
 
 @sql_count_checker(query_count=4, sproc_count=1)
 def test_sproc_pivot(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run(session_: Session) -> str:
         df = pd.DataFrame(
             {
@@ -260,7 +282,7 @@ def run(session_: Session) -> str:
 
 @sql_count_checker(query_count=4, sproc_count=1)
 def test_sproc_apply(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run(session_: Session) -> str:
         import numpy as np
 
@@ -273,7 +295,7 @@ def run(session_: Session) -> str:
 
 @sql_count_checker(query_count=4, sproc_count=1)
 def test_sproc_applymap(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run(session_: Session) -> str:
         df = pd.DataFrame([[1, 2.12], [3.356, 4.567]])
         df_result = df.applymap(lambda x: len(str(x)))
@@ -284,7 +306,7 @@ def run(session_: Session) -> str:
 
 @sql_count_checker(query_count=4, sproc_count=1)
 def test_sproc_devguide_example(session):
-    @sproc(packages=PACKAGE_LIST)
+    @sproc(packages=PACKAGE_LIST, imports=IMPORT_LIST)
     def run(session_: Session) -> int:
         # Create a Snowpark Pandas DataFrame with sample data.
         df = pd.DataFrame(