Skip to content

Feature/3 return tika metadata #147

Feature/3 return tika metadata

Feature/3 return tika metadata #147

Workflow file for this run

# This file was autogenerated by maturin v1.6.0 using:
# maturin generate-ci github -o ../../.github/workflows/release_pyton_pytest.yml --pytest
#
# Then adapted to to the project
#
name: CI
on:
push:
branches:
- main
tags:
- '*'
pull_request:
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
cancel-in-progress: true
permissions:
contents: read
jobs:
linux:
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: ubuntu-latest
target: x86_64
#- runner: self-hosted-arm64
# target: aarch64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.8'
# On linux we dont use graalvm/[email protected] action to install graalvm because it will install it
# on the runner machine and on linux the build will happen inside a manylinux docker.
# Instead, we use a script to install graalvm inside the docker container
# the script is launched by setting the before-script-linux config option of the maturin action
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
command: build
#working-directory: "bindings/extractous-python" # dont use working dir with docker fails to reapply permission on target dir
args: --manifest-path bindings/extractous-python/Cargo.toml --release --out bindings/extractous-python/dist -i python3.8 --compatibility manylinux_2_28
sccache: 'false'
target: ${{ matrix.platform.target }}
container: quay.io/pypa/manylinux_2_28_${{ matrix.platform.target }}:latest
#before-script-linux: .github/workflows/install-openssl.sh
before-script-linux: .github/workflows/install-graalvm-sdkman.sh 22.0.1-graalce
docker-options: "-e JAVA_HOME=/root/.sdkman/candidates/java/22.0.1-graalce -e GRAALVM_HOME=/root/.sdkman/candidates/java/22.0.1-graalce"
#docker-options: "--mount type=bind,source=/opt/hostedtoolcache,target=/opt/hostedtoolcache -e JAVA_HOME -e GRAALVM_HOME"
# On linux we don't need to patch the wheel as the RPATH is set by the rustc compiler
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheels-linux-${{ matrix.platform.target }}
path: bindings/extractous-python/dist
- name: pytest
if: ${{ startsWith(matrix.platform.target, 'x86_64') }}
shell: bash
run: |
set -e
sudo apt install tesseract-ocr tesseract-ocr-deu tesseract-ocr-ara
python3 -m venv .venv
source .venv/bin/activate
pip install extractous --find-links bindings/extractous-python/dist --no-index --force-reinstall
pip install pytest scikit-learn
cd bindings/extractous-python
pytest -s
- name: pytest
if: ${{ !startsWith(matrix.platform.target, 'x86') && matrix.platform.target != 'ppc64' }}
uses: uraimo/run-on-arch-action@v2
with:
arch: ${{ matrix.platform.target }}
distro: ubuntu22.04
githubToken: ${{ github.token }}
install: |
apt-get update
apt-get install tesseract-ocr tesseract-ocr-deu tesseract-ocr-ara
apt-get install -y --no-install-recommends python3 python3-pip
pip3 install -U pip pytest scikit-learn
run: |
set -e
pip3 install extractous --find-links bindings/extractous-python/dist --no-index --force-reinstall
cd bindings/extractous-python
pytest -s
windows:
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: windows-latest
target: x64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.8'
architecture: ${{ matrix.platform.target }}
- uses: graalvm/[email protected]
with:
java-version: '22'
distribution: 'graalvm-community'
set-java-home: 'true'
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
working-directory: "bindings/extractous-python"
args: --release --out dist --find-interpreter
sccache: 'true'
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheels-windows-${{ matrix.platform.target }}
path: bindings/extractous-python/dist
- name: pytest
if: ${{ !startsWith(matrix.platform.target, 'aarch64') }}
shell: cmd
run: |
python -m venv .venv
.venv\Scripts\activate.bat
pip install extractous --find-links bindings/extractous-python/dist --no-index --force-reinstall
pip install pytest scikit-learn
cd bindings\extractous-python
pytest -s
macos:
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: macos-13
target: x86_64
- runner: macos-14
target: aarch64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.8'
- uses: graalvm/[email protected]
with:
java-version: '22'
distribution: 'liberica'
set-java-home: 'true'
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
working-directory: "bindings/extractous-python"
args: --release --out dist --find-interpreter
sccache: 'true'
- name: Patch wheel lib
run: |
set -e
python3 -m venv .venv
source .venv/bin/activate
pip install wheel
bash .github/workflows/patch-wheel-lib-macos.sh bindings/extractous-python/dist
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheels-macos-${{ matrix.platform.target }}
path: bindings/extractous-python/dist
- name: pytest
run: |
set -e
brew install tesseract tesseract-lang
python3 -m venv .venv
source .venv/bin/activate
pip install extractous --find-links bindings/extractous-python/dist --no-index --force-reinstall
pip install pytest scikit-learn
cd bindings/extractous-python
pytest -s
sdist:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Build sdist
uses: PyO3/maturin-action@v1
with:
working-directory: "bindings/extractous-python"
command: sdist
args: --out dist
- name: Upload sdist
uses: actions/upload-artifact@v4
with:
name: wheels-sdist
path: bindings/extractous-python/dist
# Follows the guide on https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
# We use 2 actions one to publish on PyPi on tag pushes to main brnach and the other to publish on TestPyPi on any push
publish-to-testpypi:
name: Publish to TestPyPI
# publish to TestPyPI on any push
needs: [linux, macos, windows, sdist]
runs-on: ubuntu-latest
environment:
name: testpypi
url: https://pypi.org/p/extractous
permissions:
contents: read
id-token: write # IMPORTANT: mandatory for trusted publishing
steps:
- uses: actions/download-artifact@v4
with:
pattern: wheels-*
merge-multiple: true
path: bindings/extractous-python/dist
- name: Publish to TestPyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository-url: https://test.pypi.org/legacy/
skip-existing: true
packages-dir: bindings/extractous-python/dist
publish-to-pypi:
name: Publish to PyPI
# only publish to PyPI on tag pushes
if: "startsWith(github.ref, 'refs/tags/')"
needs: [linux, macos, windows, sdist]
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/extractous
permissions:
id-token: write # IMPORTANT: mandatory for trusted publishing
steps:
- uses: actions/download-artifact@v4
with:
pattern: wheels-*
merge-multiple: true
path: bindings/extractous-python/dist
- name: Publish to TestPyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
skip-existing: true
packages-dir: bindings/extractous-python/dist