From 5aef43910abe72c1150afb750cb956aa9684adca Mon Sep 17 00:00:00 2001 From: Derek A Dombek <50093944+derekadombek@users.noreply.github.com> Date: Wed, 7 Aug 2024 08:40:30 -0600 Subject: [PATCH] OCR CI build artifact (#153) * init for [IDWA-OCR-72] Install OCR into an executable * edit readme with the build command * add refs to form_filled to use this without args * lint * lint * lint * build/upload artifact * pip install pyinstaller * pip install pyinstaller * rm dev * put build in dependencies * add requirements.txt and use pyinstaller cli * rm working directory * point to main and not dir main * try dist/ and --onefile * try -windowed * dist/main * upgrade upload action * macos-latest * try using assets from tests * try using assets from tests * revert back to dup assets * CLI for better handling of arguments * lint * lint * rm args with pyinstaller because of new cli * use ^ woth version * install docopt with gh action job * rm unused assets in the ocr dir * docs * docs * upload bin for each os * matrix exp * matrix exp * matrix exp * zip * zip * check path * check path * check path * whoops * try gzexe * && * wip * try building release * fix the needs: * try building release * try building release * try building release * try building release * add checkout * change from action * add another checkout * add paths * try using workflow_call * try using workflow_call * wip * using download action * using download action * try that * token * try for loop * dont use matrix * token ref * try with workspace * try with workspace * try with workspace * whoops * working dir * working dir * add --repo * think i got it * try encoding with jq * try encoding with jq * github.repository * full url * matrix again * see what dir we're in * path to artifactas * put everything in first job with create * put everything in first job with create * upload all in dir * write all * try dif action * try dif action * try with content * upgrade action * new output * upgrade upload and download versions * just path for download * ls * ls * add to uplaod * try full workflow * forgot to switch needs job * again * fix file names * fix file names * change release title name * missed diffs * clean-up * try changing ref * try changing ref * try changing ref * try changing ref * try changing ref * try changing ref * try changing ref * create and see if upload asset chooses it * create and see if upload asset chooses it * create and see if upload asset chooses it * create and see if upload asset chooses it * create and see if upload asset chooses it * create and see if upload asset chooses it * create and see if upload asset chooses it * use ref at checkout * ls * use ref at checkout * try new upload action * fix script * create tag again * full workflow * full workflow * full workflow * cleaned up and made as workflow_dispatch --------- Co-authored-by: Derek Dombek --- .github/workflows/build-ocr.yml | 53 +++++++++++++++++++++++++++++++ .github/workflows/release-ocr.yml | 38 ++++++++-------------- OCR/README.md | 4 +++ OCR/ocr/pyinstaller.py | 23 ++++++++++++++ OCR/poetry.lock | 7 ++-- OCR/pyproject.toml | 1 + requirements.txt | 7 ++++ 7 files changed, 106 insertions(+), 27 deletions(-) create mode 100644 .github/workflows/build-ocr.yml create mode 100644 OCR/ocr/pyinstaller.py create mode 100644 requirements.txt diff --git a/.github/workflows/build-ocr.yml b/.github/workflows/build-ocr.yml new file mode 100644 index 00000000..ce30992a --- /dev/null +++ b/.github/workflows/build-ocr.yml @@ -0,0 +1,53 @@ +name: Build & Upload OCR Binaries +on: + workflow_call: + outputs: + output-file: + description: "The first output string" + value: ${{ jobs.build.outputs.output_artifacts }} + workflow_dispatch: + +jobs: + build: + strategy: + matrix: + include: + - os: macos-latest + name: macos + cmd: > + pyinstaller -F -w -n main-macos ./OCR/ocr/main.py && + cd dist/ && + zip -r9 main-macos main-macos + out_file: main-macos.zip + - os: windows-latest + name: windows + cmd: pyinstaller -F -w -n main-windows ./OCR/ocr/main.py + out_file: main-windows.exe + - os: ubuntu-latest + name: ubuntu + cmd: > + pyinstaller -F -w -n main-ubuntu ./OCR/ocr/main.py && + cd dist/ && + zip -r9 main-ubuntu main-ubuntu + out_file: main-ubuntu.zip + runs-on: ${{ matrix.os }} + outputs: + output_artifacts: ${{ steps.artifacts.outputs.matrix.out_file }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt pyinstaller + pip install docopt + - name: Build binaries for all OS's + run: ${{ matrix.cmd }} + - name: Upload Artifacts To Workflow + uses: actions/upload-artifact@v4 + id: artifacts + with: + name: main-${{ matrix.name }} + path: ./dist/${{ matrix.out_file}} diff --git a/.github/workflows/release-ocr.yml b/.github/workflows/release-ocr.yml index 420857fc..f25b4419 100644 --- a/.github/workflows/release-ocr.yml +++ b/.github/workflows/release-ocr.yml @@ -1,47 +1,38 @@ name: Release MDE-OCR artifacts +run-name: Release MDE-OCR artifacts - by @${{ github.actor }} on: - # workflow_dispatch: - # inputs: - # tag: - # description: 'target environment' - # required: true - push: - branches: - - idwa-ocr-ci-for-executable - paths: - - .github/workflows/release-ocr.yml - - .github/workflows/build-ocr.yml - - OCR/** - # tags: - # - 'v*' + workflow_dispatch: + inputs: + tag: + description: 'Version tag for new release' + required: true jobs: create-release: name: Create Release - runs-on: [ubuntu-latest] permissions: contents: write steps: - uses: actions/checkout@v4 - name: Create tag - uses: actions/github-script@v5 + uses: actions/github-script@v7 with: script: | github.rest.git.createRef({ owner: context.repo.owner, repo: context.repo.repo, - ref: 'refs/tags/1.0.0', + ref: 'refs/tags/${{ github.event.inputs.tag }}', sha: context.sha }) - name: Create release id: create_release env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - tag: ${{ github.ref_name }} + tag: ${{ github.event.inputs.tag }} run: | gh release create "$tag" \ --repo="$GITHUB_REPOSITORY" \ - --title="MDE-OCR ${tag#v}" \ + --title="MDE-OCR ${tag}" \ --generate-notes - name: Output Release URL File run: echo "${{ steps.create_release.outputs.upload_url }}" > release_url.txt @@ -62,9 +53,8 @@ jobs: with: path: artifacts merge-multiple: true - - name: Upload release binaries - uses: alexellis/upload-assets@0.4.1 - env: - GITHUB_TOKEN: ${{ github.token }} + - name: Release Upload Assets + uses: jaywcjlove/github-action-upload-assets@main with: - asset_paths: '["./artifacts/*"]' \ No newline at end of file + tag: ${{ github.event.inputs.tag }} + asset-path: '["./artifacts/*"]' diff --git a/OCR/README.md b/OCR/README.md index acbb0537..f7277d12 100644 --- a/OCR/README.md +++ b/OCR/README.md @@ -29,6 +29,10 @@ Run main, hoping to convert this to a cli at some point poetry run main ``` +To build the OCR service into an executable artifact +```shell +poetry run build +``` Adding new dependencies ```shell diff --git a/OCR/ocr/pyinstaller.py b/OCR/ocr/pyinstaller.py new file mode 100644 index 00000000..64bab811 --- /dev/null +++ b/OCR/ocr/pyinstaller.py @@ -0,0 +1,23 @@ +import PyInstaller.__main__ +from pathlib import Path + +HERE = Path(__file__).parent.absolute() +path_to_main = str(HERE / "main.py") + + +# This function installs/packages the main OCR function as an executable. +# You could also use the commandline. Using `pyinstaller ./OCR/ocr/main.py -F -w` works the same is the function below. +# If you need to add asset paths, follow the example below. +def install(): + PyInstaller.__main__.run( + [ + path_to_main, + "--onefile", + "--windowed", + # SOURCE:DESTINATION + # "--add-data=ocr/assets/form_filled.png:assets/", + # "--add-data=ocr/assets/form_segmention_template.png:assets/", + # "--add-data=ocr/assets/labels.json:assets/", + # other pyinstaller options... + ] + ) diff --git a/OCR/poetry.lock b/OCR/poetry.lock index 8c0f6a43..39059386 100644 --- a/OCR/poetry.lock +++ b/OCR/poetry.lock @@ -246,13 +246,13 @@ tqdm = ["tqdm"] [[package]] name = "huggingface-hub" -version = "0.23.4" +version = "0.23.5" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.23.4-py3-none-any.whl", hash = "sha256:3a0b957aa87150addf0cc7bd71b4d954b78e749850e1e7fb29ebbd2db64ca037"}, - {file = "huggingface_hub-0.23.4.tar.gz", hash = "sha256:35d99016433900e44ae7efe1c209164a5a81dbbcd53a52f99c281dcd7ce22431"}, + {file = "huggingface_hub-0.23.5-py3-none-any.whl", hash = "sha256:d7a7d337615e11a45cc14a0ce5a605db6b038dc24af42866f731684825226e90"}, + {file = "huggingface_hub-0.23.5.tar.gz", hash = "sha256:67a9caba79b71235be3752852ca27da86bd54311d2424ca8afdb8dda056edf98"}, ] [package.dependencies] @@ -1158,6 +1158,7 @@ python-versions = ">=3.8" files = [ {file = "PyMuPDFb-1.24.6-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:21e3ed890f736def68b9a031122ae1fb854d5cb9a53aa144b6e2ca3092416a6b"}, {file = "PyMuPDFb-1.24.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:8704d2dfadc9448ce184597d8b0f9c30143e379ac948a517f9c4db7c0c71ed51"}, + {file = "PyMuPDFb-1.24.6-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:01662584d5cfa7a91f77585f13fc23a12291cfd76a57e0a28dd5a56bf521cb2c"}, {file = "PyMuPDFb-1.24.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1f7657353529ae3f88575c83ee49eac9adea311a034b9c97248a65cee7df0e5"}, {file = "PyMuPDFb-1.24.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cebc2cedb870d1e1168e2f502eb06f05938f6df69103b0853a2b329611ec19a7"}, {file = "PyMuPDFb-1.24.6-py3-none-win32.whl", hash = "sha256:ac4b865cd1e239db04674f85e02844a0e405f8255ee7a74dfee0d86aad0d3576"}, diff --git a/OCR/pyproject.toml b/OCR/pyproject.toml index 939325ef..7c374173 100644 --- a/OCR/pyproject.toml +++ b/OCR/pyproject.toml @@ -34,6 +34,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry.scripts] main = "ocr.main:main" +build = "ocr.pyinstaller:install" [tool.ruff] line-length = 118 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..2fba91a4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +numpy==1.26.4 +opencv-python==4.9.0.80 +python-dotenv==1.0.1 +Pillow>=10.3.0 +torch==1.13.1 +docopt==0.6.2 +git+https://github.com/huggingface/transformers.git \ No newline at end of file