From 99d62f366c055fa39fd2c1ef9921a7878b51b77b Mon Sep 17 00:00:00 2001 From: Alejandro Do Nascimento Mora Date: Tue, 19 Nov 2024 21:21:58 +0100 Subject: [PATCH] feat: add windows support --- .github/workflows/ci.yml | 70 +++++++++++ projects/extension/build.py | 115 +++++++++++------- projects/extension/justfile | 2 +- projects/extension/tests/vectorizer/db.py | 2 + .../tests/vectorizer/test_chunking.py | 6 +- .../tests/vectorizer/test_embedding.py | 5 +- .../tests/vectorizer/test_formatting.py | 6 +- .../extension/tests/vectorizer/test_grants.py | 6 +- .../tests/vectorizer/test_indexing.py | 7 +- .../tests/vectorizer/test_processing.py | 6 +- .../tests/vectorizer/test_scheduling.py | 6 +- .../tests/vectorizer/test_vectorizer.py | 6 +- 12 files changed, 157 insertions(+), 80 deletions(-) create mode 100644 projects/extension/tests/vectorizer/db.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 94b540900..b2684a1f4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,6 +53,76 @@ jobs: just ext docker-stop just ext docker-rm + build-and-test-extension-windows-pg14: + runs-on: windows-latest + + steps: + - uses: actions/checkout@v4 + - uses: taiki-e/install-action@just + + - name: Install PostgreSQL + uses: ikalnytskyi/action-setup-postgres@v7 + with: + postgres-version: 16 + + - name: Configure Passwordless Authentication + shell: bash + run: | + # This is the PGDATA dir used on the previous step. + PGDATA="$RUNNER_TEMP/pgdata" + # Add trust authentication to pg_hba.conf the extension tests + # required this, otherwise they will fail. + echo "host all all 127.0.0.1/32 trust" > "$PGDATA/pg_hba.conf" + echo "host all all ::1/128 trust" >> "$PGDATA/pg_hba.conf" + + # Restart PostgreSQL to apply changes + pg_ctl -D "$PGDATA" restart + + - name: Install pgvector + shell: cmd + env: + PGROOT: "C:/Program Files/PostgreSQL/16" + run: | + git clone --branch v0.8.0 https://github.com/pgvector/pgvector.git + cd pgvector + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" && ^ + nmake /NOLOGO /F Makefile.win && ^ + nmake /NOLOGO /F Makefile.win install + + - name: Create pip cache directory + run: | + New-Item -Path "C:\Users\runneradmin\AppData\Local\pip\cache" -ItemType Directory -Force + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" # caching pip dependencies + + - name: Install test dependencies + shell: cmd + run: | + pip3 install -r projects/extension/requirements-test.txt + + - name: Build and install extension + shell: cmd + env: + PGAI_EXT_SKIP_BUILD_CHECKS: "true" + run: | + just ext build-install + + - name: Run test server + shell: cmd + env: + PYTHONUTF8: 1 + run: | + start /b python3 -m fastapi dev projects/extension/tests/vectorizer/server.py + + - name: Run test + shell: cmd + run: | + just ext test + build-and-test-pgai: runs-on: ubuntu-latest diff --git a/projects/extension/build.py b/projects/extension/build.py index 6812a5d47..a577f71ad 100755 --- a/projects/extension/build.py +++ b/projects/extension/build.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 import hashlib -import re import os +import platform +import re import shutil import subprocess import sys import tempfile from pathlib import Path - HELP = """Available targets: - help displays this message and exits - build-install runs build followed by install @@ -325,9 +325,10 @@ def build_feature_flags() -> str: def build_sql() -> None: - check_versions() - check_incremental_sql_files(incremental_sql_files()) - check_idempotent_sql_files(idempotent_sql_files()) + if not os.environ.get("PGAI_EXT_SKIP_BUILD_CHECKS", "").lower() == "true": + check_versions() + check_incremental_sql_files(incremental_sql_files()) + check_idempotent_sql_files(idempotent_sql_files()) build_control_file() hr = "".rjust(80, "-") # "horizontal rule" osf = output_sql_file() @@ -375,34 +376,51 @@ def clean_sql() -> None: def postgres_bin_dir() -> Path: - bin_dir = os.getenv("PG_BIN") - if bin_dir: - return Path(bin_dir).resolve() - else: - bin_dir = Path(f"/usr/lib/postgresql/{pg_major()}/bin") + bin_dir_env = os.getenv("PG_BIN") + if bin_dir_env: + bin_dir = Path(bin_dir_env) if bin_dir.is_dir(): - return bin_dir.resolve() - else: - p = shutil.which("pg_config") - if not p: - fatal("pg_config not found") - return Path(p).parent.resolve() + dir = bin_dir.resolve() + print(f"pg bin dir: using PG_BIN environment variable {dir}") + return dir + + bin_dir = Path(f"/usr/lib/postgresql/{pg_major()}/bin") + if platform.system() == "Windows": + program_files = os.environ.get("ProgramFiles", r"C:\Program Files") + bin_dir = Path(f"{program_files}\\PostgreSQL\\{pg_major()}\\bin") + if bin_dir.is_dir(): + dir = bin_dir.resolve() + print(f"pg bin dir: using default postgres bin directory {dir}") + return dir + else: + p = shutil.which("pg_config") + if p is None: + fatal("pg_config not found") + dir = Path(p).parent.resolve() + print(f"pg bin dir: parent directory of pg_config {dir}") + return dir def pg_config() -> Path: - return postgres_bin_dir() / "pg_config" + cmd = "pg_config" + if platform.system() == "Windows": + cmd = "pg_config.exe" + return postgres_bin_dir() / cmd def extension_install_dir() -> Path: - proc = subprocess.run( - f"{pg_config()} --sharedir", - check=True, - shell=True, - env=os.environ, - text=True, - capture_output=True, - ) - return Path(str(proc.stdout).strip()).resolve() / "extension" + try: + proc = subprocess.run( + [pg_config(), "--sharedir"], + check=True, + env=os.environ, + text=True, + capture_output=True, + ) + return Path(str(proc.stdout).strip()).resolve() / "extension" + except subprocess.CalledProcessError as e: + print(f"Error running pg_config: {e.stderr}", file=sys.stderr) + raise e def install_sql() -> None: @@ -437,9 +455,12 @@ def python_install_dir() -> Path: # don't do it. i'm warning you # seriously. # you'll wreck old versions. look at build_idempotent_sql_file() - return Path( - "/usr/local/lib/pgai" - ).resolve() # CONTROLS WHERE THE PYTHON LIB AND DEPS ARE INSTALLED + if platform.system() == "Windows": + program_files = Path(os.environ.get("ProgramFiles", r"C:\Program Files")) + return (program_files / "pgai" / "lib").resolve() + else: + # Use the existing Unix-like directory structure + return Path("/usr/local/lib/pgai").resolve() def install_old_py_deps() -> None: @@ -468,25 +489,25 @@ def install_prior_py() -> None: fatal(f"'{os.sep}' in version {version}. this is not supported") version_target_dir = python_install_dir().joinpath(version) if version_target_dir.exists(): + print(f"Version {version} already installed in {version_target_dir}") continue - tmp_dir = Path(tempfile.gettempdir()).joinpath("pgai", version) - tmp_dir.mkdir(parents=True, exist_ok=True) - branch = git_tag(version) - subprocess.run( - f"git clone https://github.com/timescale/pgai.git --branch {branch} {tmp_dir}", - shell=True, - check=True, - env=os.environ, - ) - tmp_src_dir = tmp_dir.joinpath("projects", "extension").resolve() - subprocess.run( - f'pip3 install -v --compile -t "{version_target_dir}" "{tmp_src_dir}"', - check=True, - shell=True, - env=os.environ, - cwd=str(tmp_src_dir), - ) - shutil.rmtree(tmp_dir) + + with tempfile.TemporaryDirectory() as tmp_dir: + branch = git_tag(version) + subprocess.run( + f"git clone https://github.com/timescale/pgai.git --branch {branch} {tmp_dir}", + shell=True, + check=True, + env=os.environ, + ) + tmp_src_dir = Path(tmp_dir).joinpath("projects", "extension").resolve() + subprocess.run( + f'pip3 install -v --compile -t "{version_target_dir}" "{tmp_src_dir}"', + check=True, + shell=True, + env=os.environ, + cwd=str(tmp_src_dir), + ) def build_init_py() -> None: diff --git a/projects/extension/justfile b/projects/extension/justfile index a4cf73ad3..ae0b6c850 100644 --- a/projects/extension/justfile +++ b/projects/extension/justfile @@ -3,7 +3,7 @@ PG_BIN := "/usr/lib/postgresql/" + PG_MAJOR + "/bin" # Show list of recipes default: - @just --list + @just --list ci: docker-build docker-run #!/usr/bin/env bash diff --git a/projects/extension/tests/vectorizer/db.py b/projects/extension/tests/vectorizer/db.py new file mode 100644 index 000000000..00173869c --- /dev/null +++ b/projects/extension/tests/vectorizer/db.py @@ -0,0 +1,2 @@ +def db_url(user: str) -> str: + return f"postgres://{user}@127.0.0.1:5432/test" diff --git a/projects/extension/tests/vectorizer/test_chunking.py b/projects/extension/tests/vectorizer/test_chunking.py index de78f4f74..dfe7da74b 100644 --- a/projects/extension/tests/vectorizer/test_chunking.py +++ b/projects/extension/tests/vectorizer/test_chunking.py @@ -3,16 +3,14 @@ import psycopg import pytest +from .db import db_url + # skip tests in this module if disabled enable_vectorizer_tests = os.getenv("ENABLE_VECTORIZER_TESTS") if enable_vectorizer_tests == "0": pytest.skip(allow_module_level=True) -def db_url(user: str) -> str: - return f"postgres://{user}@127.0.0.1:5432/test" - - def test_chunking_character_text_splitter(): tests = [ ( diff --git a/projects/extension/tests/vectorizer/test_embedding.py b/projects/extension/tests/vectorizer/test_embedding.py index 030f570cf..f392a09dd 100644 --- a/projects/extension/tests/vectorizer/test_embedding.py +++ b/projects/extension/tests/vectorizer/test_embedding.py @@ -2,6 +2,7 @@ import psycopg import pytest +from .db import db_url # skip tests in this module if disabled enable_vectorizer_tests = os.getenv("ENABLE_VECTORIZER_TESTS") @@ -9,10 +10,6 @@ pytest.skip(allow_module_level=True) -def db_url(user: str) -> str: - return f"postgres://{user}@127.0.0.1:5432/test" - - def test_embedding_openai(): tests = [ ( diff --git a/projects/extension/tests/vectorizer/test_formatting.py b/projects/extension/tests/vectorizer/test_formatting.py index 1c36f01c0..dfac48b41 100644 --- a/projects/extension/tests/vectorizer/test_formatting.py +++ b/projects/extension/tests/vectorizer/test_formatting.py @@ -3,16 +3,14 @@ import psycopg import pytest +from .db import db_url + # skip tests in this module if disabled enable_vectorizer_tests = os.getenv("ENABLE_VECTORIZER_TESTS") if enable_vectorizer_tests == "0": pytest.skip(allow_module_level=True) -def db_url(user: str) -> str: - return f"postgres://{user}@127.0.0.1:5432/test" - - def test_formatting_python_template(): tests = [ ( diff --git a/projects/extension/tests/vectorizer/test_grants.py b/projects/extension/tests/vectorizer/test_grants.py index 951a1ed0e..55959560b 100644 --- a/projects/extension/tests/vectorizer/test_grants.py +++ b/projects/extension/tests/vectorizer/test_grants.py @@ -3,16 +3,14 @@ import psycopg import pytest +from .db import db_url + # skip tests in this module if disabled enable_vectorizer_tests = os.getenv("ENABLE_VECTORIZER_TESTS") if enable_vectorizer_tests == "0": pytest.skip(allow_module_level=True) -def db_url(user: str) -> str: - return f"postgres://{user}@127.0.0.1:5432/test" - - def test_grant_to(): tests = [ ( diff --git a/projects/extension/tests/vectorizer/test_indexing.py b/projects/extension/tests/vectorizer/test_indexing.py index 5ec08f6dd..a72f24a65 100644 --- a/projects/extension/tests/vectorizer/test_indexing.py +++ b/projects/extension/tests/vectorizer/test_indexing.py @@ -3,16 +3,15 @@ import psycopg import pytest + +from .db import db_url + # skip tests in this module if disabled enable_vectorizer_tests = os.getenv("ENABLE_VECTORIZER_TESTS") if enable_vectorizer_tests == "0": pytest.skip(allow_module_level=True) -def db_url(user: str) -> str: - return f"postgres://{user}@127.0.0.1:5432/test" - - def test_indexing_none(): tests = [ ( diff --git a/projects/extension/tests/vectorizer/test_processing.py b/projects/extension/tests/vectorizer/test_processing.py index 056cc47d4..3be61eca3 100644 --- a/projects/extension/tests/vectorizer/test_processing.py +++ b/projects/extension/tests/vectorizer/test_processing.py @@ -3,16 +3,14 @@ import psycopg import pytest +from .db import db_url + # skip tests in this module if disabled enable_vectorizer_tests = os.getenv("ENABLE_VECTORIZER_TESTS") if enable_vectorizer_tests == "0": pytest.skip(allow_module_level=True) -def db_url(user: str) -> str: - return f"postgres://{user}@127.0.0.1:5432/test" - - def test_processing_default(): tests = [ ( diff --git a/projects/extension/tests/vectorizer/test_scheduling.py b/projects/extension/tests/vectorizer/test_scheduling.py index 50453e8fc..001be5f93 100644 --- a/projects/extension/tests/vectorizer/test_scheduling.py +++ b/projects/extension/tests/vectorizer/test_scheduling.py @@ -3,16 +3,14 @@ import psycopg import pytest +from .db import db_url + # skip tests in this module if disabled enable_vectorizer_tests = os.getenv("ENABLE_VECTORIZER_TESTS") if enable_vectorizer_tests == "0": pytest.skip(allow_module_level=True) -def db_url(user: str) -> str: - return f"postgres://{user}@127.0.0.1:5432/test" - - def test_scheduling_none(): tests = [ ( diff --git a/projects/extension/tests/vectorizer/test_vectorizer.py b/projects/extension/tests/vectorizer/test_vectorizer.py index 040ae1c82..f07d1f1f8 100644 --- a/projects/extension/tests/vectorizer/test_vectorizer.py +++ b/projects/extension/tests/vectorizer/test_vectorizer.py @@ -6,6 +6,8 @@ import pytest from psycopg.rows import namedtuple_row +from .db import db_url + # skip tests in this module if disabled enable_vectorizer_tests = os.getenv("ENABLE_VECTORIZER_TESTS") if enable_vectorizer_tests == "0": @@ -166,10 +168,6 @@ """.strip() -def db_url(user: str) -> str: - return f"postgres://{user}@127.0.0.1:5432/test" - - def psql_cmd(cmd: str) -> str: cmd = f'''psql -X -d "{db_url('test')}" -c "{cmd}"''' proc = subprocess.run(cmd, shell=True, check=True, text=True, capture_output=True)