diff --git a/.build/ci-versions.yml b/.build/ci-versions.yml new file mode 100644 index 0000000..c36821f --- /dev/null +++ b/.build/ci-versions.yml @@ -0,0 +1,51 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +########### ~~~ CI TEST MATRIX VERSIONS ~~~ ###################################################################################### +########### DEFINE A CI REFERENCE COMBINATIONS TO TEST AND PREVENT TESTING ALL THE COMBINATION WITCH LEADS TO TAKE A LOT OF TIME # +#### PUT THE SPARK VERSIONS TO TEST IN CORRESPONDANCE WITH 'reference-versions.yml' FILE ######################################### +#### !!! ANY DECLARED TEST VERSION WHICH IS NOT PRESENT IN 'reference-versions.yml' FILE IS SKIPPED DURING BUILD !!! ############# +#### REMOVE, UPDATE OR ADD VERSIONS TO TEST ###################################################################################### +versions: + # Maximum python version supported by spark-3.2.x: 3.9 + # Java support: 8/11 + - python_version: 3.9 + spark_version: [3.2.4] + java_version: [11] + scala_version: [2.12] + hadoop_version: 3.2 + # Maximum python version supported by spark-3.3.x: 3.10 + # Java support: 8/11/17 + - python_version: '3.10' + spark_version: [3.3.4] + java_version: [17] + scala_version: [2.12, 2.13] + hadoop_version: 3 + # Maximum python version supported by spark-3.4.x: 3.11 + # Java support: 8/11/17 + - python_version: 3.11 + spark_version: [3.4.2] + java_version: [17] + scala_version: [2.12, 2.13] + hadoop_version: 3 + # https://spark.apache.org/releases/spark-release-3-5-0.html + # Minimum supported java version: 17/21 + - python_version: 3.11 + spark_version: [3.5.1] + java_version: [17] + scala_version: [2.13] + hadoop_version: 3 + diff --git a/.build/images.yml b/.build/images.yml new file mode 100644 index 0000000..da019f2 --- /dev/null +++ b/.build/images.yml @@ -0,0 +1,60 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +images: + - name: docker.io/eclipse-temurin + tags: + - ${java_version}-jre-jammy + - name: spark-base + dependsOn: docker.io/eclipse-temurin + tags: + - spark-${spark_version}-scala-${scala_version}-java-${java_version} + - spark-${spark_version}-scala-${scala_version}-java-${java_version}-$(date '+%Y-%m-%d') + - spark-${spark_version}-scala-${scala_version}-java-${java_version}-${git_release_version} + - spark-${spark_version}-scala-${scala_version}-java-${java_version}-$(date '+%Y-%m-%d')-${git_release_version} + #- spark-${spark_version}-scala-${scala_version}-java-${java_version}-${git_commit_short_sha} + - name: spark + dependsOn: spark-base + tags: + - spark-${spark_version}-scala-${scala_version}-java-${java_version} + - spark-${spark_version}-scala-${scala_version}-java-${java_version}-$(date '+%Y-%m-%d') + - spark-${spark_version}-scala-${scala_version}-java-${java_version}-${git_release_version} + - spark-${spark_version}-scala-${scala_version}-java-${java_version}-$(date '+%Y-%m-%d')-${git_release_version} + #- spark-${spark_version}-scala-${scala_version}-java-${java_version}-${git_commit_short_sha} + - name: spark-py + dependsOn: spark + tags: + - spark-${spark_version}-python-${python_version}-scala-${scala_version}-java-${java_version} + - spark-${spark_version}-python-${python_version}-scala-${scala_version}-java-${java_version}-$(date '+%Y-%m-%d') + - spark-${spark_version}-python-${python_version}-scala-${scala_version}-java-${java_version}-${git_release_version} + - spark-${spark_version}-python-${python_version}-scala-${scala_version}-java-${java_version}-$(date '+%Y-%m-%d')-${git_release_version} + #- spark-${spark_version}-python-${python_version}-scala-${scala_version}-java-${java_version}-${git_commit_short_sha} + - name: spark-r + dependsOn: spark + tags: + - spark-${spark_version}-r-${r_version}-scala-${scala_version}-java-${java_version} + - spark-${spark_version}-r-${r_version}-scala-${scala_version}-java-${java_version}-$(date '+%Y-%m-%d') + - spark-${spark_version}-r-${r_version}-scala-${scala_version}-java-${java_version}-${git_release_version} + - spark-${spark_version}-r-${r_version}-scala-${scala_version}-java-${java_version}-$(date '+%Y-%m-%d')-${git_release_version} + #- spark-${spark_version}-r-${r_version}-scala-${scala_version}-java-${java_version}-${git_commit_short_sha} + - name: spark-py-r + dependsOn: spark-py + tags: + - spark-${spark_version}-python-${python_version}-r-${r_version}-scala-${scala_version}-java-${java_version} + - spark-${spark_version}-python-${python_version}-r-${r_version}-scala-${scala_version}-java-${java_version}-$(date '+%Y-%m-%d') + - spark-${spark_version}-python-${python_version}-r-${r_version}-scala-${scala_version}-java-${java_version}-${git_release_version} + - spark-${spark_version}-python-${python_version}-r-${r_version}-scala-${scala_version}-java-${java_version}-$(date '+%Y-%m-%d')-${git_release_version} + #- spark-${spark_version}-python-${python_version}-r-${r_version}-scala-${scala_version}-java-${java_version}-${git_commit_short_sha} diff --git a/.build/reference-versions.yml b/.build/reference-versions.yml new file mode 100644 index 0000000..450ca54 --- /dev/null +++ b/.build/reference-versions.yml @@ -0,0 +1,50 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +### REFERENCE MATRIX VERSIONS ############################## +#### !!! DOT NOT DELETE ANY ELEMENT !!! #################### +######## APPEND ONLY WHEN NEW SPARK VERSION IS REALEASED ### +############ USED AS REFERENCE DURING BUILD ################ +versions: + # Maximum python version supported by spark-3.2.x: 3.9 + # Java support: 8/11 + - python_version: 3.9 + spark_version: [3.2.1, 3.2.2, 3.2.3, 3.2.4] + java_version: [11] + scala_version: [2.12, 2.13] + hadoop_version: 3.2 + # Maximum python version supported by spark-3.3.x: 3.10 + # Java support: 8/11/17 + - python_version: '3.10' + spark_version: [3.3.1, 3.3.2, 3.3.3, 3.3.4] + java_version: [17] + scala_version: [2.12, 2.13] + hadoop_version: 3 + # Maximum python version supported by spark-3.4.x: 3.11 + # Java support: 8/11/17 + - python_version: 3.11 + spark_version: [3.4.1, 3.4.2] + java_version: [17] + scala_version: [2.12, 2.13] + hadoop_version: 3 + # https://spark.apache.org/releases/spark-release-3-5-0.html + # Minimum supported java version: 17/21 + - python_version: 3.11 + spark_version: [3.5.1] + java_version: [17] + scala_version: [2.12, 2.13] + hadoop_version: 3 + diff --git a/.build/release-versions.yml b/.build/release-versions.yml new file mode 100644 index 0000000..fe814fe --- /dev/null +++ b/.build/release-versions.yml @@ -0,0 +1,50 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +########### CURRENT MATRIX VERSIONS ################################################################################ +#### PUT THE SPARK VERSIONS TO BUILD IN CORRESPONDANCE WITH 'reference-versions.yml' FILE ########################## +#### !!! ANY DECLARED VERSION WHICH IS NOT PRESENT IN 'reference-versions.yml' FILE IS SKIPPED DURING BUILD !!! #### +#### REMOVE, UPDATE OR ADD VERSIONS ################################################################################ +versions: + # Maximum python version supported by spark-3.2.x: 3.9 + # Java support: 8/11 + - python_version: 3.9 + spark_version: [3.2.1, 3.2.2, 3.2.3, 3.2.4] + java_version: [11] + scala_version: [2.12, 2.13] + hadoop_version: 3.2 + # Maximum python version supported by spark-3.3.x: 3.10 + # Java support: 8/11/17 + - python_version: '3.10' + spark_version: [3.3.1, 3.3.2, 3.3.3, 3.3.4] + java_version: [17] + scala_version: [2.12, 2.13] + hadoop_version: 3 + # Maximum python version supported by spark-3.4.x: 3.11 + # Java support: 8/11/17 + - python_version: 3.11 + spark_version: [3.4.1, 3.4.2] + java_version: [17] + scala_version: [2.12, 2.13] + hadoop_version: 3 + # https://spark.apache.org/releases/spark-release-3-5-0.html + # Minimum supported java version: 17/21 + - python_version: 3.11 + spark_version: [3.5.1] + java_version: [17] + scala_version: [2.12, 2.13] + hadoop_version: 3 + diff --git a/.github/actions/free-disk-space/action.yml b/.github/actions/free-disk-space/action.yml new file mode 100644 index 0000000..4f661da --- /dev/null +++ b/.github/actions/free-disk-space/action.yml @@ -0,0 +1,40 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Free disk space +description: Free Github runnner disk space + +runs: + using: composite + steps: + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true + + + diff --git a/.github/actions/setup-buildx/action.yaml b/.github/actions/setup-buildx/action.yaml new file mode 100644 index 0000000..6de6665 --- /dev/null +++ b/.github/actions/setup-buildx/action.yaml @@ -0,0 +1,29 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Set up QEMU and Docker Buildx +description: Set up Docker Buildx + +runs: + using: composite + steps: + - name: Set up QEMU 📦 + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx 📦 + uses: docker/setup-buildx-action@v3 + with: + driver-opts: network=host \ No newline at end of file diff --git a/.github/actions/setup-kind/action.yaml b/.github/actions/setup-kind/action.yaml new file mode 100644 index 0000000..e16035f --- /dev/null +++ b/.github/actions/setup-kind/action.yaml @@ -0,0 +1,37 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Setup kind +description: Deploy kind cluster + +runs: + using: composite + steps: + - name: Create k8s Kind Cluster + uses: helm/kind-action@v1 + with: + # https://github.com/helm/kind-action?tab=readme-ov-file#inputs + verbosity: 10 + cluster_name: "kind-ci-${{ github.job }}" + ignore_failed_clean: true # Ignore the post delete cluster action failing + wait: "180s" # Max timeout to wait Kind becomes ready + + - name: Print Kind cluster state + run: | + kubectl cluster-info + kubectl get pods -A + kubectl describe node + shell: bash \ No newline at end of file diff --git a/.github/actions/spark-image-tag/action.yaml b/.github/actions/spark-image-tag/action.yaml new file mode 100644 index 0000000..0b2b0fe --- /dev/null +++ b/.github/actions/spark-image-tag/action.yaml @@ -0,0 +1,137 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Generate spark image tags +description: Generate spark image tags + +inputs: + image: + description: Image name + required: true + spark_version: + description: Spark version + required: true + scala_version: + description: Scala version + required: true + java_version: + description: Java version + required: true + python_version: + description: Python version + required: true + ci_repo: + description: The CI registry repo + required: false + git_tag_name: + description: The Git remote latest tag name + required: false + publish_repo: + description: The official registry repo + required: false + publish_to_registry: + description: Whether to push or not to the official registry repo + required: true + +outputs: + parent_image: + description: "Image tags" + value: ${{ steps.tags.outputs.parent_image }} + latest_tag: + description: "CI image tags (ex.: spark-3.3.4....)" + value: ${{ steps.tags.outputs.latest_tag }} + publish_tags: + description: "Image tags to push into registry (ex.: quay.io/spark-r:spark-3.3.4...)" + value: ${{ steps.tags.outputs.publish_tags }} + +runs: + using: composite + steps: + - name: Install yq + run: | + sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.42.1/yq_linux_amd64 + sudo chmod a+x /usr/local/bin/yq + shell: bash + + - name: Expose git commit sha as env variable + uses: rlespinasse/git-commit-data-action@v1.5.0 + + - name: Get current branch 📦 + id: git-branch + uses: tj-actions/branch-names@v8 + + - name: Generate spark image tags 📦 + id: tags + run: | + ### Inputs + ### Variables substitution used in '.build/images.yml' file + spark_version=${{ inputs.spark_version }} + scala_version=${{ inputs.scala_version }} + java_version=${{ inputs.java_version }} + python_version=${{ inputs.python_version }} + git_tag_name=${{ inputs.git_tag_name }} + git_release_version=$(echo '${{ inputs.git_tag_name }}' | tr -d 'v') + + git_commit_sha=${{ env.GIT_COMMIT_SHA }} + git_commit_short_sha=${{ env.GIT_COMMIT_SHORT_SHA }} + git_commit_short_sha=${{ env.GIT_COMMIT_SHORT_SHA }} + + + ### Outputs - Parse: .build/images.yml + PARENT_IMAGE_NAME=$(yq '(.images[] | select(.name == "${{ inputs.image }}").dependsOn)' .build/images.yml) + PARENT_IMAGE_NAME=$(eval echo ${PARENT_IMAGE_NAME}) + + PARENT_IMAGE_TAG=$(yq -oc "(.images[] | select(.name == \"${PARENT_IMAGE_NAME}\").tags[0])" .build/images.yml) + PARENT_IMAGE_TAG=$(eval echo ${PARENT_IMAGE_TAG}) + PARENT_IMAGE_NAME="${PARENT_IMAGE_NAME}:${PARENT_IMAGE_TAG}" + + LATEST_TAG=$(yq -oc '(.images[] | select(.name == "${{ inputs.image }}").tags[0])' .build/images.yml) + LATEST_TAG=$(eval echo ${LATEST_TAG}) + + PUBLISH_TAGS=$(yq -oc '[.images[] | select(.name == "${{ inputs.image }}").tags | .[] |"${{ inputs.publish_repo }}/${{ inputs.image }}:" + .]' .build/images.yml) + PUBLISH_TAGS=$(eval echo ${PUBLISH_TAGS}) + + ### For pull request branchs merge, suffix the CI tag with the branch name + #### The tag is pushed in the CI registry only + CI_GIT_BRANCH_SUFFIX="${{ steps.git-branch.outputs.current_branch }}" + CI_GIT_BRANCH_SUFFIX=${CI_GIT_BRANCH_SUFFIX//\//-} + + if [[ "${{ inputs.publish_to_registry }}" == "false" ]] + then + LATEST_TAG="${LATEST_TAG}-${CI_GIT_BRANCH_SUFFIX}" + fi + + # The image can inherit from a community image like docker.io/eclipse-temurin, ... + if [[ "${PARENT_IMAGE_NAME}" != *"/"* ]] + then + if [[ "${{ inputs.publish_to_registry }}" == "true" ]] + then + PARENT_IMAGE_NAME="${{ inputs.publish_repo }}/${PARENT_IMAGE_NAME}" + else + PARENT_IMAGE_NAME="${{ inputs.ci_repo }}/${PARENT_IMAGE_NAME}-${CI_GIT_BRANCH_SUFFIX}" + fi + fi + + # Logging + echo "parent_image=${PARENT_IMAGE_NAME}" + echo "latest_tag=${LATEST_TAG}" + echo "publish_tags=${PUBLISH_TAGS}" + # Set outputs + echo "parent_image=${PARENT_IMAGE_NAME}" >> $GITHUB_OUTPUT + echo "latest_tag=${LATEST_TAG}" >> $GITHUB_OUTPUT + echo "publish_tags=${PUBLISH_TAGS}" >> $GITHUB_OUTPUT + + shell: bash diff --git a/.github/actions/spark-tests-prepare/action.yml b/.github/actions/spark-tests-prepare/action.yml new file mode 100644 index 0000000..96242af --- /dev/null +++ b/.github/actions/spark-tests-prepare/action.yml @@ -0,0 +1,63 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Prepare integration tests +description: Prepare integration tests + +inputs: + spark_version: + description: Spark version + required: true + scala_version: + description: Scala version + required: true + java_version: + description: Java version + required: true + +outputs: + git_tag_checkout_dir: + description: "Git checkout tag local source directory" + value: ${{ steps.git-checkout-tag.outputs.git_tag_checkout_dir }} + +runs: + using: composite + # https://github.com/apache/spark/blob/master/.github/workflows/build_and_test.yml + steps: + - name: Set up Java ${{ inputs.java_version }} + uses: actions/setup-java@v4 + with: + distribution: 'zulu' + java-version: ${{ inputs.java_version }} + + - name: Cache Scala, SBT and Maven + uses: actions/cache@v4 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ inputs.spark_version }}-scala${{ inputs.scala_version }}-java${{ inputs.java_version }} + + - name: Cache Coursier local repository + uses: actions/cache@v4 + with: + path: ~/.cache/coursier + key: build-${{ inputs.spark_version }}-scala${{ inputs.scala_version }}-java${{ inputs.java_version }}-coursier + + + diff --git a/.github/actions/spark-tests-run/action.yml b/.github/actions/spark-tests-run/action.yml new file mode 100644 index 0000000..9063b9c --- /dev/null +++ b/.github/actions/spark-tests-run/action.yml @@ -0,0 +1,125 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Run integration tests +description: Run integration tests + +inputs: + ci-repo: + description: The CI registry repo URL + required: true + image: + description: Spark image name to test (ex. spark) + required: true + image-tag: + description: Spark image tag to test (ex. latest) + required: true + scala_version: + description: Scala version + required: true + git_checkout_tag_dir: + description: Git checkout tag directory + required: true + +runs: + using: composite + # https://github.com/apache/spark/tree/master/resource-managers/kubernetes/integration-tests + # https://github.com/apache/spark/blob/master/.github/workflows/build_and_test.yml + # https://github.com/apache/spark/pull/35830 + steps: + - name: Load image ${{ inputs.image }} into Kind and setup Spark RBACs + run: | + kubectl create clusterrolebinding serviceaccounts-cluster-admin \ + --clusterrole=cluster-admin \ + --group=system:serviceaccounts || true + # Pull and Load the image into all kind nodes (current setup mono node) for fast executors startup + docker pull ${{ inputs.ci-repo}}/${{ inputs.image }}:${{ inputs.image-tag }} + kind load docker-image ${{ inputs.ci-repo}}/${{ inputs.image }}:${{ inputs.image-tag }} --name kind-ci-${{ github.job }} + shell: bash + + - name: Change Scala version to ${{ inputs.scala_version }} + run: | + ./dev/change-scala-version.sh ${{ inputs.scala_version }} + echo "SCALA_PROFILE=scala-${{ inputs.scala_version }}" >> $GITHUB_ENV + + working-directory: ${{ inputs.git_checkout_tag_dir }} + shell: bash + + - name: Run base integration tests (${{ inputs.image }}) + if: inputs.image == 'spark-base' || inputs.image == 'spark' + run: | + build/sbt -P${{ env.SCALA_PROFILE }} -Pkubernetes -Pkubernetes-integration-tests \ + -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 \ + -Dspark.kubernetes.test.deployMode=cloud \ + -Dspark.kubernetes.test.imageRepo=${{ inputs.ci-repo}} -Dspark.kubernetes.test.imageTag=${{ inputs.image-tag }} \ + -Dspark.kubernetes.test.jvmImage=${{ inputs.image }} \ + -Dspark.kubernetes.test.pythonImage=${{ inputs.image }} \ + -Dspark.kubernetes.test.pythonImage=${{ inputs.image }} \ + -Dspark.kubernetes.test.rImage=${{ inputs.image }} \ + 'kubernetes-integration-tests/testOnly -- -z "Run SparkPi"' + + working-directory: ${{ inputs.git_checkout_tag_dir }} + shell: bash + + - name: Run spark-py integration tests (${{ inputs.image }}) + if: inputs.image == 'spark-py' + run: | + build/sbt -P${{ env.SCALA_PROFILE }} -Pkubernetes -Pkubernetes-integration-tests \ + -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 \ + -Dspark.kubernetes.test.deployMode=cloud \ + -Dspark.kubernetes.test.imageRepo=${{ inputs.ci-repo}} -Dspark.kubernetes.test.imageTag=${{ inputs.image-tag }} \ + -Dspark.kubernetes.test.jvmImage=${{ inputs.image }} \ + -Dspark.kubernetes.test.pythonImage=${{ inputs.image }} \ + -Dspark.kubernetes.test.pythonImage=${{ inputs.image }} \ + -Dspark.kubernetes.test.rImage=${{ inputs.image }} \ + 'kubernetes-integration-tests/testOnly -- -z "Run PySpark"' + + working-directory: ${{ inputs.git_checkout_tag_dir }} + shell: bash + + - name: Run spark-r integration tests (${{ inputs.image }}) + if: inputs.image == 'spark-r' + run: | + build/sbt -P${{ env.SCALA_PROFILE }} -Pkubernetes -Pkubernetes-integration-tests \ + -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 \ + -Dspark.kubernetes.test.deployMode=cloud \ + -Dspark.kubernetes.test.imageRepo=${{ inputs.ci-repo}} -Dspark.kubernetes.test.imageTag=${{ inputs.image-tag }} \ + -Dspark.kubernetes.test.jvmImage=${{ inputs.image }} \ + -Dspark.kubernetes.test.pythonImage=${{ inputs.image }} \ + -Dspark.kubernetes.test.pythonImage=${{ inputs.image }} \ + -Dspark.kubernetes.test.rImage=${{ inputs.image }} \ + -Psparkr -Dtest.include.tags=r \ + 'kubernetes-integration-tests/testOnly' + + working-directory: ${{ inputs.git_checkout_tag_dir }} + shell: bash + + # - name: Run All integration tests (${{ inputs.image }}) + # if: inputs.image == 'spark-py-r' + # run: | + # build/sbt -P${{ env.SCALA_PROFILE }} -Pkubernetes -Pkubernetes-integration-tests \ + # -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 \ + # -Dspark.kubernetes.test.deployMode=cloud \ + # -Dspark.kubernetes.test.imageRepo=${{ inputs.ci-repo}} -Dspark.kubernetes.test.imageTag=${{ inputs.image-tag }} \ + # -Dspark.kubernetes.test.jvmImage=${{ inputs.image }} \ + # -Dspark.kubernetes.test.pythonImage=${{ inputs.image }} \ + # -Dspark.kubernetes.test.pythonImage=${{ inputs.image }} \ + # -Dspark.kubernetes.test.rImage=${{ inputs.image }} \ + # 'kubernetes-integration-tests/testOnly' + + # working-directory: ${{ inputs.git_checkout_tag_dir }} + # shell: bash + diff --git a/.github/actions/spark-version-matrix/action.yml b/.github/actions/spark-version-matrix/action.yml new file mode 100644 index 0000000..782b21c --- /dev/null +++ b/.github/actions/spark-version-matrix/action.yml @@ -0,0 +1,59 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Build Spark versions matrix +description: Build Spark versions matrix from '.build/versions.yml' and '.build/reference-versions.yml' files + +inputs: + use_matrix: + description: The matrix version file to use + required: true + +outputs: + matrix: + description: "Spark versions matrix" + value: ${{ steps.generate-matrix.outputs.matrix }} + +runs: + using: composite + steps: + - name: Generate Matrix + id: generate-matrix + run: | + + INPUT_MATRIX=$(yq -oj ${{ inputs.use_matrix }} | jq '.versions | .[] | + {python_version: .python_version, + hadoop_version: .hadoop_version} + + (.spark_version[] | {spark_version: .}) + + (.scala_version[] | {scala_version: .}) + + (.java_version[] | {java_version: .})' | jq -c --slurp '.') + REF_MATRIX=$(yq -oj .build/reference-versions.yml | jq '.versions | .[] | + {python_version: .python_version, + hadoop_version: .hadoop_version} + + (.spark_version[] | {spark_version: .}) + + (.scala_version[] | {scala_version: .}) + + (.java_version[] | {java_version: .})' | jq -c --slurp '.') + + ### Intersection between the versions matrix and the reference versions matrix + ### When the intersection is empty, the jobs are skipped! + MATRIX=$(jq --argjson IN ${INPUT_MATRIX} --argjson REF ${REF_MATRIX} -cn '$IN - ($IN- $REF)') + + LENGHT=$(echo ${MATRIX} | jq '. | length') + echo "${MATRIX}" + echo "Found ${LENGHT} compatible version combinations" + echo "matrix=${MATRIX}" >> $GITHUB_OUTPUT + + shell: bash diff --git a/.github/workflows/build-image-template.yml b/.github/workflows/build-image-template.yml new file mode 100644 index 0000000..3615a1f --- /dev/null +++ b/.github/workflows/build-image-template.yml @@ -0,0 +1,219 @@ + +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Spark build single image template + +on: + workflow_call: + inputs: + image: + description: The spark image name (ex. spark-base, spark, spark-py, spark-r, etc) + required: true + type: string + spark_version: + description: Spark version + required: true + type: string + scala_version: + description: Scala version + required: true + type: string + java_version: + description: Java version + required: true + type: string + hadoop_version: + description: Hadoop version + required: true + type: string + python_version: + description: Python version + required: true + type: string + publish_to_registry: + description: Wheter to push to the registry + required: false + type: string + default: "false" + registry: + description: The container registry + required: false + type: string + ci_registry: + description: "The registry used to push ci images" + required: false + type: string + default: "ghcr.io" + git_latest_release_tag: + description: The latest remote release tag + required: false + type: string + default: "" + runs-on: + description: GitHub Actions Runner image + required: true + type: string + +jobs: + + build-test-push: + name: ${{ inputs.image }} (scala-${{ inputs.scala_version }}, java-${{ inputs.java_version }}, python-${{ inputs.python_version }}, hadoop-${{ inputs.hadoop_version }}) + runs-on: ${{ inputs.runs-on }} + steps: + + ### The publish and periodic rebuilds are based on the latest stable github release tag + - name: Checkout latest Github Release tag (${{ inputs.git_latest_release_tag }}) ⚡️ + if: inputs.publish_to_registry == 'true' + uses: actions/checkout@v4 + with: + ref: ${{ inputs.git_latest_release_tag }} + + ### The CI is based on the main branch + - name: Checkout Repo ⚡️ + if: inputs.publish_to_registry == 'false' + uses: actions/checkout@v4 + + ### Common steps between CI and Publish + - name: Free up disk space 📦 + uses: ./.github/actions/free-disk-space + + - name: Set up QEMU and Docker Buildx 📦 + uses: ./.github/actions/setup-buildx + + - name: Set up CI and official registries 📦 + id: registry-repos + run: | + echo "repo_owner=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_OUTPUT + echo "ci_repo=${{ inputs.ci_registry }}/${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_OUTPUT + echo "publish_repo=${{ inputs.registry }}/${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_OUTPUT + shell: bash + + - name: Generate image tags 📦 + id: image-tags + uses: ./.github/actions/spark-image-tag + with: + image: ${{ inputs.image }} + spark_version: ${{ inputs.spark_version}} + scala_version: ${{ inputs.scala_version }} + java_version: ${{ inputs.java_version }} + python_version: ${{ inputs.python_version}} + ci_repo: ${{ steps.registry-repos.outputs.ci_repo }} + publish_repo: ${{ steps.registry-repos.outputs.publish_repo }} + publish_to_registry: ${{ inputs.publish_to_registry }} + git_tag_name: ${{ inputs.git_latest_release_tag }} + + - name: Login to the CI registry 🔐 + if: inputs.publish_to_registry == 'false' + uses: docker/login-action@v3 + with: + registry: ${{ inputs.ci_registry }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push to ci registry + if: inputs.publish_to_registry == 'false' + uses: docker/build-push-action@v5 + with: + context: ${{ inputs.image }} + platforms: linux/amd64,linux/arm64 + push: true + build-args: | + SPARK_VERSION=${{ inputs.spark_version}} + SCALA_VERSION=${{ inputs.scala_version }} + JAVA_VERSION=${{ inputs.java_version }} + PYTHON_VERSION=${{ inputs.python_version }} + HADOOP_VERSION=${{ inputs.hadoop_version }} + BASE_IMAGE=${{ steps.image-tags.outputs.parent_image }} + tags: | + ${{ steps.registry-repos.outputs.ci_repo }}/${{ inputs.image }}:${{ steps.image-tags.outputs.latest_tag }} + labels: | + org.opencontainers.image.title="${{ inputs.image }}" + org.opencontainers.image.version="${{ inputs.spark_version}}" + org.opencontainers.image.description="Spark image" + org.opencontainers.image.base.name="${{ steps.image-tags.outputs.parent_image }}" + org.opencontainers.image.source="https://github.com/${{ github.repository }}" + org.opencontainers.image.licenses="Apache-2.0" + + ### CI Steps + # https://github.com/nektos/act/issues/678 + # https://github.com/apache/spark/pull/35830 + - name: Checkout integration tests tag v${{ inputs.spark_version }} (${{ inputs.spark_version}} > 3.3.0) ⚡️ + if: inputs.publish_to_registry == 'false' && !(startsWith(inputs.spark_version, '3.1') || startsWith(inputs.spark_version, '3.2') || startsWith(inputs.spark_version, '3.3.0')) + id: git-checkout-tag + run: | + CHECKOUT_TAG_DIR="$(mktemp -d)/spark" + git clone https://github.com/apache/spark.git ${CHECKOUT_TAG_DIR} + cd ${CHECKOUT_TAG_DIR} + git checkout v${{ inputs.spark_version }} + echo "checkout_directory=${CHECKOUT_TAG_DIR}" >> $GITHUB_OUTPUT + shell: bash + + - name: Prepare integration tests env (${{ inputs.spark_version}} > 3.3.0) 📦 + if: inputs.publish_to_registry == 'false' && !(startsWith(inputs.spark_version, '3.1') || startsWith(inputs.spark_version, '3.2') || startsWith(inputs.spark_version, '3.3.0')) + uses: ./.github/actions/spark-tests-prepare + with: + spark_version: ${{ inputs.spark_version}} + scala_version: ${{ inputs.scala_version }} + java_version: ${{ inputs.java_version }} + + - name: Set up Kind integration tests cluster (${{ inputs.spark_version}} > 3.3.0) 📦 + if: inputs.publish_to_registry == 'false' && !(startsWith(inputs.spark_version, '3.1') || startsWith(inputs.spark_version, '3.2') || startsWith(inputs.spark_version, '3.3.0')) + uses: ./.github/actions/setup-kind + + - name: Run integration tests (${{ inputs.spark_version}} > 3.3.0) ✅ + if: inputs.publish_to_registry == 'false' && !(startsWith(inputs.spark_version, '3.1') || startsWith(inputs.spark_version, '3.2') || startsWith(inputs.spark_version, '3.3.0')) + uses: ./.github/actions/spark-tests-run + with: + ci-repo: ${{ steps.registry-repos.outputs.ci_repo }} + image: ${{ inputs.image }} + image-tag: ${{ steps.image-tags.outputs.latest_tag }} + scala_version: ${{ inputs.scala_version }} + git_checkout_tag_dir: ${{ steps.git-checkout-tag.outputs.checkout_directory }} + + ### Publish steps + ### The publish and periodic rebuilds are based on the latest stable github release tag + - name: Login into official registry 🔐 + if: inputs.publish_to_registry == 'true' + uses: docker/login-action@v3 + with: + registry: ${{ inputs.registry }} + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_ROBOT_TOKEN }} + + - name: Build and push to official registry 📤 + if: inputs.publish_to_registry == 'true' + uses: docker/build-push-action@v5 + with: + context: ${{ inputs.image }} + platforms: linux/amd64,linux/arm64 + push: true + build-args: | + SPARK_VERSION=${{ inputs.spark_version}} + SCALA_VERSION=${{ inputs.scala_version }} + JAVA_VERSION=${{ inputs.java_version }} + PYTHON_VERSION=${{ inputs.python_version }} + HADOOP_VERSION=${{ inputs.hadoop_version }} + BASE_IMAGE=${{ steps.image-tags.outputs.parent_image }} + tags: ${{ steps.image-tags.outputs.publish_tags }} + labels: | + org.opencontainers.image.title="${{ inputs.image }}" + org.opencontainers.image.version="${{ inputs.spark_version}}" + org.opencontainers.image.description="Spark image" + org.opencontainers.image.base.name="${{ steps.image-tags.outputs.parent_image }}" + org.opencontainers.image.source="https://github.com/${{ github.repository }}" + org.opencontainers.image.licenses="Apache-2.0" + diff --git a/.github/workflows/build-images-template.yml b/.github/workflows/build-images-template.yml new file mode 100644 index 0000000..5e5593a --- /dev/null +++ b/.github/workflows/build-images-template.yml @@ -0,0 +1,125 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: spark build multiple images template + +on: + workflow_call: + inputs: + spark_version: + description: Spark version + required: true + type: string + scala_version: + description: Scala version + required: true + type: string + java_version: + description: Java version + required: true + type: string + hadoop_version: + description: Hadoop version + required: true + type: string + python_version: + description: Python version + required: true + type: string + registry: + description: The container registry + required: false + type: string + publish_to_registry: + description: Wheter to push to the registry + required: false + type: string + default: "false" + git_latest_release_tag: + description: The latest remote release tag + required: false + type: string + default: "" + runs-on: + description: GitHub Actions Runner image + required: false + type: string + default: "ubuntu-latest" + +jobs: + + spark-base: + uses: ./.github/workflows/build-image-template.yml + with: + image: spark-base + python_version: ${{ inputs.python_version }} + spark_version: ${{ inputs.spark_version }} + java_version: ${{ inputs.java_version }} + scala_version: ${{ inputs.scala_version }} + hadoop_version: ${{ inputs.hadoop_version }} + registry: ${{ inputs.registry }} + publish_to_registry: ${{ inputs.publish_to_registry }} + git_latest_release_tag: ${{ inputs.git_latest_release_tag }} + runs-on: ${{ inputs.runs-on }} + secrets: inherit + + spark: + uses: ./.github/workflows/build-image-template.yml + needs: [spark-base] + with: + image: spark + python_version: ${{ inputs.python_version }} + spark_version: ${{ inputs.spark_version }} + java_version: ${{ inputs.java_version }} + scala_version: ${{ inputs.scala_version }} + hadoop_version: ${{ inputs.hadoop_version }} + registry: ${{ inputs.registry }} + publish_to_registry: ${{ inputs.publish_to_registry }} + git_latest_release_tag: ${{ inputs.git_latest_release_tag }} + runs-on: ${{ inputs.runs-on }} + secrets: inherit + + spark-py: + uses: ./.github/workflows/build-image-template.yml + needs: [spark] + with: + image: spark-py + python_version: ${{ inputs.python_version }} + spark_version: ${{ inputs.spark_version }} + java_version: ${{ inputs.java_version }} + scala_version: ${{ inputs.scala_version }} + hadoop_version: ${{ inputs.hadoop_version }} + registry: ${{ inputs.registry }} + publish_to_registry: ${{ inputs.publish_to_registry }} + git_latest_release_tag: ${{ inputs.git_latest_release_tag }} + runs-on: ${{ inputs.runs-on }} + secrets: inherit + + spark-r: + uses: ./.github/workflows/build-image-template.yml + needs: [spark] + with: + image: spark-r + python_version: ${{ inputs.python_version }} + spark_version: ${{ inputs.spark_version }} + java_version: ${{ inputs.java_version }} + scala_version: ${{ inputs.scala_version }} + hadoop_version: ${{ inputs.hadoop_version }} + registry: ${{ inputs.registry }} + publish_to_registry: ${{ inputs.publish_to_registry }} + git_latest_release_tag: ${{ inputs.git_latest_release_tag }} + runs-on: ${{ inputs.runs-on }} + secrets: inherit diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0da6ca0 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,87 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: ci + +on: + pull_request: + branches: + - main + paths: + - ".github/workflows/**" + - ".github/actions/**" + - ".build/**" + + - "spark/**" + - "spark-*/**" + + - "!README.md" + + push: + branches: + - main + paths: + - ".github/workflows/**" + - ".github/actions/**" + - ".build/**" + + - "spark/**" + - "spark-*/**" + + - "!README.md" + + workflow_dispatch: + +# https://docs.github.com/en/actions/using-jobs/using-concurrency +concurrency: + # Only cancel in-progress jobs or runs for the current workflow - matches against branch & tags + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + packages: write + +jobs: + + get-ci-versions: + runs-on: "ubuntu-latest" + outputs: + matrix: ${{ steps.ci-versions.outputs.matrix }} + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Get CI versions matrix 📥 + id: ci-versions + uses: ./.github/actions/spark-version-matrix + with: + use_matrix: ".build/ci-versions.yml" + + spark-ci: + name: spark-ci (spark-${{ matrix.version.spark_version }}) + needs: [get-ci-versions] + strategy: + fail-fast: false + matrix: + version: ${{ fromJson(needs.get-ci-versions.outputs.matrix) }} + uses: ./.github/workflows/build-images-template.yml + with: + python_version: ${{ matrix.version.python_version }} + spark_version: ${{ matrix.version.spark_version }} + java_version: ${{ matrix.version.java_version }} + scala_version: ${{ matrix.version.scala_version }} + hadoop_version: ${{ matrix.version.hadoop_version }} + publish_to_registry: "false" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..4d8f7d4 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,106 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: publish + +on: + ### Periodically rebuild all the images to fix os security vulnerabilities + schedule: + # At 05:00 AM, only on Tuesday + #- cron: "0 5 * * 2" + # https://crontab.cronhub.io/ + # At 05:"0 AM, only on Tuesday + - cron: "0 5 * * 2" + # The release should be created manually (or with user token=pr approval/merge) in order to trigger the event + ### https://github.com/orgs/community/discussions/25281 + ### Instead of using the event, we call the workflow from release-please workflow (more secure) + #release: + # types: [published] + + workflow_dispatch: + +# https://docs.github.com/en/actions/using-jobs/using-concurrency +concurrency: + # Only cancel in-progress jobs or runs for the current workflow - matches against branch & tags + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + packages: write + +jobs: + + latest-github-release: + if: github.repository_owner == 'OKDP' + runs-on: "ubuntu-latest" + outputs: + tag_name: ${{ steps.git-release-tag.outputs.tag_name }} + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Get latest GitHub Release tag name 📥 + id: git-release-tag + uses: InsonusK/get-latest-release@v1.0.1 + with: + myToken: ${{ github.token }} + exclude_types: "draft" + view_top: 1 + + - name: Info - Found latest release tag + run: | + echo "id: ${{ steps.git-release-tag.outputs.id }}" + echo "name: ${{ steps.git-release-tag.outputs.name }}" + echo "tag_name: ${{ steps.git-release-tag.outputs.tag_name }}" + echo "created_at: ${{ steps.git-release-tag.outputs.created_at }}" + echo "draft: ${{ steps.git-release-tag.outputs.draft }}" + echo "prerelease: ${{ steps.git-release-tag.outputs.prerelease }}" + shell: bash + + get-release-versions: + if: github.repository_owner == 'OKDP' + runs-on: "ubuntu-latest" + outputs: + matrix: ${{ steps.release-versions.outputs.matrix }} + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Get release versions matrix 📥 + id: release-versions + uses: ./.github/actions/spark-version-matrix + with: + use_matrix: ".build/release-versions.yml" + + spark-publish: + if: github.repository_owner == 'OKDP' && needs.latest-github-release.outputs.tag_name != '' + name: spark-publish (${{ needs.latest-github-release.outputs.tag_name }}/spark-${{ matrix.version.spark_version }}) + needs: [latest-github-release, get-release-versions] + strategy: + fail-fast: false + matrix: + version: ${{ fromJson(needs.get-release-versions.outputs.matrix) }} + uses: ./.github/workflows/build-images-template.yml + with: + python_version: ${{ matrix.version.python_version }} + spark_version: ${{ matrix.version.spark_version }} + java_version: ${{ matrix.version.java_version }} + scala_version: ${{ matrix.version.scala_version }} + hadoop_version: ${{ matrix.version.hadoop_version }} + registry: ${{ vars.REGISTRY || 'quay.io' }} + publish_to_registry: "true" + git_latest_release_tag: ${{ needs.latest-github-release.outputs.tag_name }} + secrets: inherit diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml new file mode 100644 index 0000000..f73f3c2 --- /dev/null +++ b/.github/workflows/release-please.yml @@ -0,0 +1,70 @@ +# +# Copyright 2024 tosit.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: release-please + +on: + pull_request: + types: + - closed + branches: + - main + +permissions: + contents: write + pull-requests: write + +# https://docs.github.com/en/actions/using-jobs/using-concurrency +concurrency: + # Only cancel in-progress jobs or runs for the current workflow - matches against branch & tags + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +defaults: + run: + shell: bash + +jobs: + release-please: + runs-on: ubuntu-latest + outputs: + release_created: ${{ steps.release-please.outputs.release_created }} + tag_name: ${{ steps.release-please.outputs.tag_name }} + # Skip the release process in the fork + # The pull request should come from the same repo (github_token from the fork does not have write permissions) + if: github.repository_owner == 'OKDP' && github.event.pull_request.merged == true && github.event.pull_request.head.repo.full_name == github.repository + steps: + - uses: google-github-actions/release-please-action@v4 + id: release-please + + publish: + runs-on: ubuntu-latest + needs: [release-please] + if: needs.release-please.outputs.release_created == 'true' + permissions: + contents: write + actions: write + packages: write + steps: + - name: "Publish images to official registry" + env: + GH_REPO: ${{ github.repository }} + GH_TOKEN: ${{ github.token }} + GH_DEBUG: api + run: | + gh workflow run publish.yml + shell: bash + \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..94d918d --- /dev/null +++ b/.gitignore @@ -0,0 +1,32 @@ +### IntelliJ IDEA ### +.idea +*.iml + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache +bin/ +!**/src/main/**/bin/ +!**/src/test/**/bin/ + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ + + +### Mac OS ### +.DS_Store + +### vscode ### +.vscode/ + +# Other +tmp/ \ No newline at end of file diff --git a/.release-please-manifest.json b/.release-please-manifest.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/.release-please-manifest.json @@ -0,0 +1 @@ +{} diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c16bed3 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 tosit.io + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index b9d63e1..8b01d7e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,63 @@ -# spark-images -Collection of Spark docker images for OKDP +[![ci](https://github.com/okdp/spark-images/actions/workflows/ci.yml/badge.svg)](https://github.com/okdp/spark-images/actions/workflows/ci.yml) +[![Release](https://img.shields.io/github/v/release/okdp/spark-images)](https://github.com/okdp/spark-images/releases/latest) +[![License Apache2](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](http://www.apache.org/licenses/LICENSE-2.0) + + +Collection of [Apache Spark](https://spark.apache.org/) docker images for [OKDP Platform](https://okdp.io/). + +Currently, the images are built from the [Apache Spark project distribution](https://archive.apache.org/dist/spark) and the requirement may evolve to produce them from the [source code](https://github.com/apache/spark). + +The image relashionship is described by the following diagram: + +
+ +
+ + + + +| Image | Description | +|:---------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `JRE` | The JRE LTS base image supported by Apache Spark depending on the version. This includes Java 11/17/21. Please, check the [reference versions](.build/reference-versions.yml) or [Apache Spark website](https://spark.apache.org/docs/latest/) for more information. | +| `spark-base` | The Apache Spark base image with official spark binaries (scala/java) and without OKDP extensions. | +| `spark` | The Apache Spark image with official spark binaries (scala/java) and OKDP extensions. | +| `spark-py` | The Apache Spark image with official spark binaries (scala/java), OKDP extensions and python support. | +| `spark-r` | The Apache Spark image with official spark binaries (scala/java), OKDP extensions and R support. | + +# Tagging + +The project builds the images with a long format tags. Each tag combines multiple compatible versions combinations. + +There are multiple tags levels and the format to use depends on your convenience in term of stability and reproducibility. + +The images are pushed to [quay.io/okdp](https://quay.io/organization/okdp) repository with the following [tags](.build/images.yml): + +| Images | Tags | +|:--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| spark-base, spark | spark-