Skip to content

Commit

Permalink
SNOW-1491199 add ast-encoding test in precommit
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-azhan committed Oct 25, 2024
1 parent 8383698 commit 95fd33e
Show file tree
Hide file tree
Showing 5 changed files with 268 additions and 0 deletions.
56 changes: 56 additions & 0 deletions .github/workflows/precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,62 @@ jobs:
.tox/.coverage
.tox/coverage.xml
test-ast-encoding:
name: Test AST Encoding py-${{ matrix.os }}-${{ matrix.python-version }}
needs: build
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-latest, windows-latest, ubuntu-latest]
# TODO SNOW-1763185 use py39 by default
# Protobuf encoding differs per python version, use 3.8 by default.
python-version: [ "3.8"]
cloud-provider: [aws]
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Display Python version
run: python -c "import sys; print(sys.version)"
- name: Decrypt parameters.py
shell: bash
run: .github/scripts/decrypt_parameters.sh
env:
PARAMETER_PASSWORD: ${{ secrets.PARAMETER_PASSWORD }}
CLOUD_PROVIDER: ${{ matrix.cloud-provider }}
- name: Download wheel(s)
uses: actions/download-artifact@v4
with:
name: wheel
path: dist
- name: Show wheels downloaded
run: ls -lh dist
shell: bash
- name: Upgrade setuptools, pip and wheel
run: python -m pip install -U setuptools pip wheel
- name: Install tox
run: python -m pip install tox
- name: Set up windows environment
if: runner.os == 'Windows'
# TODO: SNOW-1763186 use pytest marker to test multiple TZs
run: |
"TZ=EST" >> $env:GITHUB_ENV
echo "TZ=EST" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
- name: Run tests
run: TZ=EST python -m tox -e "py${PYTHON_VERSION/\./}-ast"
env:
PYTHON_VERSION: ${{ matrix.python-version }}
cloud_provider: ${{ matrix.cloud-provider }}
PYTEST_ADDOPTS: --color=yes --tb=short
TOX_PARALLEL_NO_SPINNER: 1
SNOWFLAKE_IS_PYTHON_RUNTIME_TEST: 1
TZ: EST
shell: bash

test-snowpark-pandas:
name: Test modin-${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
needs: build
Expand Down
38 changes: 38 additions & 0 deletions tests/ast/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
## AST Tests

This driver enables testing of the AST generation that will be used in the server-side Snowpark implementation, starting with Phase 0.

All generated AST should be tested using this mechanism. To add a test, create a new file under `tests/ast/data`. Files look like the following example. The test driver sets up the session and looks at the accumulated lazy values in the resulting environment.

N.B. No eager evaluation is permitted, as any intermediate batches will not be observed. This can easily be changed if necessary, however.

```python
## TEST CASE

df = session.table(tables.table1)
df = df.filter("STR LIKE '%e%'")

## EXPECTED ENCODED AST

[...]

## EXPECTED UNPARSER OUTPUT

res1 = session.table('table1')

res2 = res1.filter('STR LIKE '%e%'')
```

To generate the expected output the first time the test is run, or when the AST generation changes, run:
```bash
pytest --update-expectations tests/ast
```

For these tests to work, the Unparser must be built in the monorepo:
```bash
cd my-monorepo-path
cd Snowflake/unparser
sbt assembly
```

The location of the Unparser can be set either via the environment variable `SNOWPARK_UNPARSER_JAR` or via the _pytest_ commandline argument `--unparser-jar=<path>`.
158 changes: 158 additions & 0 deletions tests/ast/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#
# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
#
import logging
import os
from functools import cached_property

import pytest

from snowflake.snowpark import Session


def default_unparser_path():
explicit = os.getenv("SNOWPARK_UNPARSER_JAR")
default_default = f"{os.getenv('HOME')}/Snowflake/trunk/Snowpark/unparser/target/scala-2.13/unparser-assembly-0.1.jar"
return explicit or default_default


def pytest_addoption(parser):
parser.addoption(
"--unparser-jar",
action="store",
default=default_unparser_path(),
type=str,
help="Path to the Unparser JAR built in the monorepo. To build it, run `sbt assembly` from the unparser directory.",
)
parser.addoption(
"--update-expectations",
action="store_true",
default=False,
help="If set, overwrite test files with the actual output as the expected output.",
)


def pytest_configure(config):
pytest.unparser_jar = config.getoption("--unparser-jar")
if not os.path.exists(pytest.unparser_jar):
pytest.unparser_jar = None
logging.error(
f"Unparser JAR not found at {pytest.unparser_jar}. "
f"Please set the correct path with --unparser-jar or SNOWPARK_UNPARSER_JAR."
)
pytest.update_expectations = config.getoption("--update-expectations")


class TestTables:
def __init__(self, session) -> None:
self._session = session

@cached_property
def table1(self) -> str:
table_name: str = "table1"
return self._save_table(
table_name,
[
[1, "one"],
[2, "two"],
[3, "three"],
],
schema=["num", "str"],
)

@cached_property
def table2(self) -> str:
table_name: str = "table2"
return self._save_table(
table_name,
[
[1, [1, 2, 3], {"Ashi Garami": "Single Leg X"}, "Kimura"],
[2, [11, 22], {"Sankaku": "Triangle"}, "Coffee"],
[3, [], {}, "Tea"],
],
schema=["idx", "lists", "maps", "strs"],
)

@cached_property
def df1_table(self) -> str:
table_name: str = "df1"
return self._save_table(
table_name,
[
[1, 2],
[3, 4],
],
schema=["a", "b"],
)

@cached_property
def df2_table(self) -> str:
table_name: str = "df2"
return self._save_table(
table_name,
[
[0, 1],
[3, 4],
],
schema=["c", "d"],
)

@cached_property
def df3_table(self) -> str:
table_name: str = "df3"
return self._save_table(
table_name,
[
[1, 2],
],
schema=["a", "b"],
)

@cached_property
def df4_table(self) -> str:
table_name: str = "df4"
return self._save_table(
table_name,
[
[2, 1],
],
schema=["b", "a"],
)

@cached_property
def double_quoted_table(self) -> str:
table_name: str = '"the#qui.ck#bro.wn#""Fox""won\'t#jump!"'
return self._save_table(
table_name,
[
[1, "one"],
[2, "two"],
[3, "three"],
],
schema=["num", 'Owner\'s""opinion.s'],
)

def _save_table(self, name: str, *args, **kwargs):
kwargs.pop("_emit_ast", None)
kwargs.pop("_ast_stmt", None)
kwargs.pop("_ast", None)
df = self._session.create_dataframe(*args, _emit_ast=False, **kwargs)
logging.debug("Creating table %s", name)
df.write.save_as_table(name, _emit_ast=False)
return name


# For test performance (especially integration tests), it would be very valuable to create the Snowpark session and the
# temporary tables only once per test session. Unfortunately, the local testing features don't work well with any scope
# setting above "function" (e.g. "module" or "session").
# TODO: SNOW-1763053 use scope="module"
@pytest.fixture(scope="function")
def session():
with Session.builder.config("local_testing", True).create() as s:
s.ast_enabled = True
yield s


@pytest.fixture(scope="function")
def tables(session):
return TestTables(session)
10 changes: 10 additions & 0 deletions tests/ast/test_ast_setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#
# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
#

from snowflake.snowpark.mock._connection import MockServerConnection


def test_session(session):
assert session.ast_enabled
assert isinstance(session._conn, MockServerConnection)
6 changes: 6 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ setenv =
unit: SNOWFLAKE_TEST_TYPE = unit
integ: SNOWFLAKE_TEST_TYPE = integ
doctest: SNOWFLAKE_TEST_TYPE = doctest
ast: SNOWFLAKE_TEST_TYPE = ast
# Add common parts into pytest command
SNOWFLAKE_PYTEST_COV_LOCATION = {env:JUNIT_REPORT_DIR:{toxworkdir}}/junit.{envname}-{env:cloud_provider:dev}.xml
SNOWFLAKE_PYTEST_COV_CMD = --cov snowflake.snowpark --junitxml {env:SNOWFLAKE_PYTEST_COV_LOCATION} --cov-report=
Expand Down Expand Up @@ -86,6 +87,9 @@ passenv =
PYTEST_ADDOPTS
SNOWFLAKE_IS_PYTHON_RUNTIME_TEST
snowflake_path
; Below only used in AST tests
TZ
GITHUB_ENV
commands =
notudf: {env:SNOWFLAKE_PYTEST_CMD} -m "{env:SNOWFLAKE_TEST_TYPE} and not udf" {posargs:} src/snowflake/snowpark tests
udf: {env:SNOWFLAKE_PYTEST_CMD} -m "{env:SNOWFLAKE_TEST_TYPE} or udf" {posargs:} src/snowflake/snowpark tests
Expand All @@ -105,6 +109,8 @@ commands =
# This one is only called by jenkins job and the only difference from `snowparkpandasnotdoctest` is that it uses
# MODIN_PYTEST_NO_COV_CMD instead of MODIN_PYTEST_CMD
snowparkpandasjenkins: {env:MODIN_PYTEST_NO_COV_CMD} --durations=20 -m "{env:SNOWFLAKE_TEST_TYPE}" {posargs:} {env:SNOW_1314507_WORKAROUND_RERUN_FLAGS} tests/unit/modin tests/integ/modin
# Snowpark IR commands:
ast: {env:SNOWFLAKE_PYTEST_DAILY_CMD} -m "{env:SNOWFLAKE_TEST_TYPE}" {posargs:} tests

[testenv:nopandas]
allowlist_externals = bash
Expand Down

0 comments on commit 95fd33e

Please sign in to comment.