snowflakedb · sfc-gh-azhan · Oct 28, 2024 · Oct 25, 2024 · Oct 25, 2024
@@ -246,6 +246,63 @@ jobs:
             .tox/.coverage
             .tox/coverage.xml
 
+  test-ast-encoding:
+    name: Test AST Encoding py-${{ matrix.os }}-${{ matrix.python-version }}
+    needs: build
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos-latest, windows-latest, ubuntu-latest]
+        # TODO SNOW-1763185 use py39 by default
+        # Protobuf encoding differs per python version, use 3.8 by default.
+        python-version: [ "3.8"]
+        cloud-provider: [aws]
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Display Python version
+        run: python -c "import sys; print(sys.version)"
+      - name: Decrypt parameters.py
+        shell: bash
+        run: .github/scripts/decrypt_parameters.sh
+        env:
+          PARAMETER_PASSWORD: ${{ secrets.PARAMETER_PASSWORD }}
+          CLOUD_PROVIDER: ${{ matrix.cloud-provider }}
+      - name: Download wheel(s)
+        uses: actions/download-artifact@v4
+        with:
+          name: wheel
+          path: dist
+      - name: Show wheels downloaded
+        run: ls -lh dist
+        shell: bash
+      - name: Upgrade setuptools, pip and wheel
+        run: python -m pip install -U setuptools pip wheel
+      - name: Install tox
+        run: python -m pip install tox
+      - name: Set up windows environment
+        if: runner.os == 'Windows'
+        # windows timezone management is broken, that's why we need a special fix here
+        run: |
+            "TZ=EST" >> $env:GITHUB_ENV
+            echo "TZ=EST" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
+      - name: Run tests
+        # TODO: SNOW-1763186 use pytest marker to test multiple TZs
+        run: TZ=EST python -m tox -e "py${PYTHON_VERSION/\./}-ast"
+        env:
+          PYTHON_VERSION: ${{ matrix.python-version }}
+          cloud_provider: ${{ matrix.cloud-provider }}
+          PYTEST_ADDOPTS: --color=yes --tb=short
+          TOX_PARALLEL_NO_SPINNER: 1
+          SNOWFLAKE_IS_PYTHON_RUNTIME_TEST: 1
+          TZ: EST
+        shell: bash
+
   test-snowpark-pandas:
     name: Test modin-${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
     needs: build

@@ -0,0 +1,40 @@
+[comment]: <> (TODO SNOW-1764181 update readme to match with current test pratice)
+
+## AST Tests
+
+This driver enables testing of the AST generation that will be used in the server-side Snowpark implementation, starting with Phase 0.
+
+All generated AST should be tested using this mechanism. To add a test, create a new file under `tests/ast/data`. Files look like the following example. The test driver sets up the session and looks at the accumulated lazy values in the resulting environment.
+
+N.B. No eager evaluation is permitted, as any intermediate batches will not be observed. This can easily be changed if necessary, however.
+
+```python
+## TEST CASE
+
+df = session.table(tables.table1)
+df = df.filter("STR LIKE '%e%'")
+
+## EXPECTED ENCODED AST
+
+[...]
+
+## EXPECTED UNPARSER OUTPUT
+
+res1 = session.table('table1')
+
+res2 = res1.filter('STR LIKE '%e%'')
+```
+
+To generate the expected output the first time the test is run, or when the AST generation changes, run:
+```bash
+pytest --update-expectations tests/ast
+```
+
+For these tests to work, the Unparser must be built in the monorepo:
+```bash
+cd my-monorepo-path
+cd Snowflake/unparser
+sbt assembly
+```
+
+The location of the Unparser can be set either via the environment variable `SNOWPARK_UNPARSER_JAR` or via the _pytest_ commandline argument `--unparser-jar=<path>`.
@@ -0,0 +1,160 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import logging
+import os
+from functools import cached_property
+
+import pytest
+
+from snowflake.snowpark import Session
+
+
+def default_unparser_path():
+    explicit = os.getenv("SNOWPARK_UNPARSER_JAR")
+    default_default = f"{os.getenv('HOME')}/Snowflake/trunk/Snowpark/unparser/target/scala-2.13/unparser-assembly-0.1.jar"
+    return explicit or default_default
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--unparser-jar",
+        action="store",
+        default=default_unparser_path(),
+        type=str,
+        help="Path to the Unparser JAR built in the monorepo. To build it, run `sbt assembly` from the unparser directory.",
+    )
+    parser.addoption(
+        "--update-expectations",
+        action="store_true",
+        default=False,
+        help="If set, overwrite test files with the actual output as the expected output.",
+    )
+
+
+def pytest_configure(config):
+    pytest.unparser_jar = config.getoption("--unparser-jar")
+    if not os.path.exists(pytest.unparser_jar):
+        pytest.unparser_jar = None
+    pytest.update_expectations = config.getoption("--update-expectations")
+
+    if pytest.unparser_jar is None and pytest.update_expectations:
+        raise RuntimeError(
+            f"Unparser JAR not found at {pytest.unparser_jar}. "
+            f"Please set the correct path with --unparser-jar or SNOWPARK_UNPARSER_JAR."
+        )
+
+
+class TestTables:
+    def __init__(self, session) -> None:
+        self._session = session
+
+    @cached_property
+    def table1(self) -> str:
+        table_name: str = "table1"
+        return self._save_table(
+            table_name,
+            [
+                [1, "one"],
+                [2, "two"],
+                [3, "three"],
+            ],
+            schema=["num", "str"],
+        )
+
+    @cached_property
+    def table2(self) -> str:
+        table_name: str = "table2"
+        return self._save_table(
+            table_name,
+            [
+                [1, [1, 2, 3], {"Ashi Garami": "Single Leg X"}, "Kimura"],
+                [2, [11, 22], {"Sankaku": "Triangle"}, "Coffee"],
+                [3, [], {}, "Tea"],
+            ],
+            schema=["idx", "lists", "maps", "strs"],
+        )
+
+    @cached_property
+    def df1_table(self) -> str:
+        table_name: str = "df1"
+        return self._save_table(
+            table_name,
+            [
+                [1, 2],
+                [3, 4],
+            ],
+            schema=["a", "b"],
+        )
+
+    @cached_property
+    def df2_table(self) -> str:
+        table_name: str = "df2"
+        return self._save_table(
+            table_name,
+            [
+                [0, 1],
+                [3, 4],
+            ],
+            schema=["c", "d"],
+        )
+
+    @cached_property
+    def df3_table(self) -> str:
+        table_name: str = "df3"
+        return self._save_table(
+            table_name,
+            [
+                [1, 2],
+            ],
+            schema=["a", "b"],
+        )
+
+    @cached_property
+    def df4_table(self) -> str:
+        table_name: str = "df4"
+        return self._save_table(
+            table_name,
+            [
+                [2, 1],
+            ],
+            schema=["b", "a"],
+        )
+
+    @cached_property
+    def double_quoted_table(self) -> str:
+        table_name: str = '"the#qui.ck#bro.wn#""Fox""won\'t#jump!"'
+        return self._save_table(
+            table_name,
+            [
+                [1, "one"],
+                [2, "two"],
+                [3, "three"],
+            ],
+            schema=["num", 'Owner\'s""opinion.s'],
+        )
+
+    def _save_table(self, name: str, *args, **kwargs):
+        kwargs.pop("_emit_ast", None)
+        kwargs.pop("_ast_stmt", None)
+        kwargs.pop("_ast", None)
+        df = self._session.create_dataframe(*args, _emit_ast=False, **kwargs)
+        logging.debug("Creating table %s", name)
+        df.write.save_as_table(name, _emit_ast=False)
+        return name
+
+
+# For test performance (especially integration tests), it would be very valuable to create the Snowpark session and the
+# temporary tables only once per test session. Unfortunately, the local testing features don't work well with any scope
+# setting above "function" (e.g. "module" or "session").
+# TODO: SNOW-1748311 use scope="module"
+@pytest.fixture(scope="function")
+def session(local_testing_mode):
+    with Session.builder.config("local_testing", local_testing_mode).create() as s:
+        s.ast_enabled = True
+        yield s
+
+
+@pytest.fixture(scope="function")
+def tables(session):
+    return TestTables(session)
@@ -0,0 +1,10 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from snowflake.snowpark.mock._connection import MockServerConnection
+
+
+def test_ensure_valid_test_setup(session):
+    assert session.ast_enabled
+    assert isinstance(session._conn, MockServerConnection)
@@ -57,6 +57,7 @@ setenv =
     unit: SNOWFLAKE_TEST_TYPE = unit
     integ: SNOWFLAKE_TEST_TYPE = integ
     doctest: SNOWFLAKE_TEST_TYPE = doctest
+    ast: SNOWFLAKE_TEST_TYPE = ast
     # Add common parts into pytest command
     SNOWFLAKE_PYTEST_COV_LOCATION = {env:JUNIT_REPORT_DIR:{toxworkdir}}/junit.{envname}-{env:cloud_provider:dev}.xml
     SNOWFLAKE_PYTEST_COV_CMD = --cov snowflake.snowpark --junitxml {env:SNOWFLAKE_PYTEST_COV_LOCATION} --cov-report=
@@ -86,6 +87,9 @@ passenv =
     PYTEST_ADDOPTS
     SNOWFLAKE_IS_PYTHON_RUNTIME_TEST
     snowflake_path
+    ; Below only used in AST tests
+    TZ
+    GITHUB_ENV
 commands =
     notudf: {env:SNOWFLAKE_PYTEST_CMD} -m "{env:SNOWFLAKE_TEST_TYPE} and not udf" {posargs:} src/snowflake/snowpark tests
     udf: {env:SNOWFLAKE_PYTEST_CMD} -m "{env:SNOWFLAKE_TEST_TYPE} or udf" {posargs:} src/snowflake/snowpark tests
@@ -105,6 +109,8 @@ commands =
     # This one is only called by jenkins job and the only difference from `snowparkpandasnotdoctest` is that it uses
     # MODIN_PYTEST_NO_COV_CMD instead of MODIN_PYTEST_CMD
     snowparkpandasjenkins: {env:MODIN_PYTEST_NO_COV_CMD} --durations=20 -m "{env:SNOWFLAKE_TEST_TYPE}" {posargs:} {env:SNOW_1314507_WORKAROUND_RERUN_FLAGS} tests/unit/modin tests/integ/modin
+    # Snowpark IR commands:
+    ast: {env:SNOWFLAKE_PYTEST_DAILY_CMD} --local_testing_mode -m "{env:SNOWFLAKE_TEST_TYPE}" {posargs:} tests
 
 [testenv:nopandas]
 allowlist_externals = bash