Merge pull request #28 from lelouvincx/dev

Merge from dev to main for first release
lelouvincx · Oct 7, 2024 · 1a60f49 · 1a60f49
2 parents 261f2f2 + 844fd6b
commit 1a60f49
Show file tree

Hide file tree

Showing 282 changed files with 1,770 additions and 6,310 deletions.
diff --git a/...fka/config/connect-distributed.properties → .docker/connect-distributed.properties b/...fka/config/connect-distributed.properties → .docker/connect-distributed.properties
@@ -20,7 +20,7 @@
 # the `bootstrap.servers` and those specifying replication factors.
 
 # A list of host/port pairs to use for establishing the initial connection to the Kafka cluster.
-bootstrap.servers=localhost:9092
+bootstrap.servers=kafka-0:9092
 
 # unique name for the cluster, used in forming the Connect cluster group. Note that this must not conflict with consumer group IDs
 group.id=connect-cluster

diff --git a/.docker/images/app/.dockerignore b/.docker/images/app/.dockerignore
@@ -0,0 +1,6 @@
+.pytest_cache
+.ruff_cache
+__pycache__
+logs/
+.coverage
+experiment.ipynb
diff --git a/.docker/images/app/Dockerfile b/.docker/images/app/Dockerfile
@@ -0,0 +1,24 @@
+FROM python:3.11-slim
+
+# Label for github packages
+LABEL org.opencontainers.image.source=https://github.com/lelouvincx/Chinh-Dinh-training
+LABEL org.opencontainers.image.description="Data generator (called upstream-app), generates data to source_db."
+
+WORKDIR /app
+
+# Activate python virtual environment
+RUN python3 -m venv .venv
+RUN . .venv/bin/activate
+
+RUN pip install --no-cache-dir --upgrade pip
+
+# Install requirements
+COPY .docker/images/app/requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir -r /app/requirements.txt
+
+# Install curl
+RUN apt-get update && apt-get install -y --no-install-recommends curl && apt-get autoremove -y
+
+COPY app .
+
+CMD [ "streamlit", "run", "app/streamlit_app.py", "--server.address=0.0.0.0" ]
diff --git a/.docker/images/app/requirements.txt b/.docker/images/app/requirements.txt
@@ -0,0 +1,12 @@
+psycopg2-binary==2.9.7
+Faker==19.6.0
+streamlit==1.26.0
+confluent-kafka==2.2.0
+sqlalchemy==2.0.20
+python-dotenv==1.0.0
+ruff==0.0.287
+black==23.9.1
+pytest==7.4.2
+pytest-dependency==0.5.1
+pytest-ordering==0.6
+pytest-cov==4.1.0
diff --git a/.docker/images/kafka-connect/Dockerfile b/.docker/images/kafka-connect/Dockerfile
@@ -0,0 +1,13 @@
+FROM confluentinc/cp-server-connect:7.1.1
+
+# Label for github packages
+LABEL org.opencontainers.image.source=https://github.com/lelouvincx/Chinh-Dinh-training
+
+# Install debezium-connector-postgresql and kafka-connect-jdbc
+RUN echo "INFO: Installing Connectors"
+RUN confluent-hub install --no-prompt debezium/debezium-connector-postgresql:2.2.1
+RUN confluent-hub install --no-prompt confluentinc/kafka-connect-jdbc:10.7.4
+
+RUN echo "INFO: Launching Kafka Connect workers"
+
+CMD [ "/etc/confluent/docker/run" ]
diff --git a/...plication/.docker/postgres-add-de-user.sh → .docker/postgres-add-de-user.sh b/...plication/.docker/postgres-add-de-user.sh → .docker/postgres-add-de-user.sh
@@ -4,11 +4,15 @@ set -e
 PGPASSWORD=${POSTGRES_PASSWORD} psql -v ON_ERROR_STOP=1 --username ${POSTGRES_USER} --dbname ${POSTGRES_DB} <<-EOSQL
 	  CREATE USER azure_pg_admin;
 	  GRANT ALL PRIVILEGES ON DATABASE ${POSTGRES_DB} TO azure_pg_admin;
+
 	  CREATE USER azure_superuser;
 	  ALTER USER azure_superuser WITH SUPERUSER;
 	  GRANT ALL PRIVILEGES ON DATABASE ${POSTGRES_DB} TO azure_superuser;
+
 	  CREATE USER greglow;
 	  GRANT ALL PRIVILEGES ON DATABASE ${POSTGRES_DB} TO greglow;
-	  CREATE USER data_engineer;
+
+	  CREATE USER data_engineer WITH PASSWORD '${POSTGRES_DE_PASSWORD}';
+	  ALTER USER data_engineer WITH REPLICATION;
 	  GRANT ALL PRIVILEGES ON DATABASE ${POSTGRES_DB} TO data_engineer;
 EOSQL
diff --git a/database-replication/.dockerignore → .dockerignore b/database-replication/.dockerignore → .dockerignore
diff --git a/database-replication/.gitattributes → .gitattributes b/database-replication/.gitattributes → .gitattributes
diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml
@@ -0,0 +1,194 @@
+name: General Continuous Integration
+run-name: ${{ github.actor }} is testing Github Actions
+
+
+on: [push]
+
+
+jobs:
+  explore-github-actions:
+    runs-on: ubuntu-22.04
+    steps:
+      - run: echo "The job was automatically triggered by a ${{ github.event_name }} event."
+
+      - run: echo "This job is now running on a ${{ runner.os }} server hosted by Github."
+
+      - run: echo "The name of your branch is ${{ github.ref }}  and your repository is ${{ github.repository }}."
+
+      - name: Checkout repository code
+        uses: actions/checkout@v4
+
+      - run: echo "The ${{ github.repository }} repository has been cloned to the runner."
+
+      - run: echo "The workflow is now ready to test your code on the runner."
+
+      - name: View environment variables
+        run: printenv
+
+      - name: List files in the repository
+        run: |
+          ls -lah ${{ github.workspace }}
+
+      - run: echo "This job's status is ${{ job.status }}."
+
+  check-changes:
+    runs-on: ubuntu-22.04
+
+    outputs:
+      upstream-app: ${{ steps.changes.outputs.upstream-app }}
+      kafka-connect: ${{ steps.changes.outputs.kafka-connect }}
+
+    steps:
+      - name: Checkout repository code
+        uses: actions/checkout@v4
+
+      - name: Check changes
+        uses: dorny/paths-filter@v2
+        id: changes
+        with:
+          base: ${{ github.ref }}
+          ref: ${{ github.ref }}
+          filters: |
+            upstream-app:
+              - ".docker/images/app/**"
+            kafka-connect:
+              - ".docker/images/kafka-connect/**"
+
+  build-push-upstream-app:
+    needs: check-changes
+    if: ${{ needs.check-changes.outputs.upstream-app == 'true' }}
+    runs-on: ubuntu-22.04
+
+    env:
+      REGISTRY: ghcr.io
+      UPSTREAM_APP_IMAGE_NAME: upstream-app
+
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Checkout repository code
+        uses: actions/checkout@v4
+
+      - name: Setup QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Login to the container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) for docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ github.actor }}/${{ env.UPSTREAM_APP_IMAGE_NAME }}
+
+      - name: Build and push image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: .docker/images/app/Dockerfile
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+  build-push-kafka-connect:
+    needs: check-changes
+    if: ${{ needs.check-changes.outputs.kafka-connect == 'true' }}
+    runs-on: ubuntu-22.04
+
+    env:
+      REGISTRY: ghcr.io
+      KAFKA_CONNECT_IMAGE_NAME: kafka-connect
+
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Checkout repository code
+        uses: actions/checkout@v4
+
+      - name: Setup QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Login to the container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) for docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ github.actor }}/${{ env.KAFKA_CONNECT_IMAGE_NAME }}
+
+      - name: Build and push image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: .docker/images/kafka-connect/Dockerfile
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+  unit-test-upstream-app:
+    needs: build-push-upstream-app
+    if: | # Always run after build-push-upstream-app
+      always() &&
+      (needs.build-push-upstream-app.result == 'success' || needs.build-push-upstream-app.result == 'skipped')
+    runs-on: ubuntu-22.04
+
+    env:
+      POSTGRES_USER: admin
+      POSTGRES_PASSWORD: admin123
+      POSTGRES_DB: wideworldimporters
+      POSTGRES_PORT: 5432
+      REGISTRY: ghcr.io
+      UPSTREAM_APP_IMAGE_NAME: upstream-app
+
+    steps:
+      - name: Checkout repository code
+        uses: actions/checkout@v4
+
+      - name: Setup QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Login to the container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) from existing docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ github.actor }}/${{ env.UPSTREAM_APP_IMAGE_NAME }}
+
+      - name: Setup docker-compose
+        uses: KengoTODA/actions-setup-docker-compose@main
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: View current working dir
+        run: pwd && ls -a && ls -lah app
+
+      - name: Compose up services
+        run: docker-compose version && docker-compose -f app/tests/docker-compose.yml --project-directory . up -d
+
+      - name: View running services
+        run: docker-compose -f app/tests/docker-compose.yml --project-directory . ps -a && sleep 15
+
+      - name: Unit tests
+        run: docker-compose -f app/tests/docker-compose.yml --project-directory . exec upstream-app python -m pytest --log-cli-level info -p no:warnings -v /app/tests
+
+      - name: Compose down services
+        run: docker-compose -f app/tests/docker-compose.yml --project-directory . down
diff --git a/.github/workflows/naming-policy.yml b/.github/workflows/naming-policy.yml
@@ -0,0 +1,15 @@
+name: Check naming policy
+run-name: Check naming policy for ${{ github.ref }}
+
+on: [pull_request]
+
+jobs:
+  branch-naming-rules:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: deepakputhraya/action-branch-name@master
+        with:
+          regex: '([a-z])+\/(\d+)-([a-z])+' # Regex the branch should match. This example enforces grouping
+          allowed_prefixes: 'feat,fix,refactor,docs' # All branches should start with the given prefix
+          ignore: main,dev # Ignore exactly matching branch names from convention
+          max_length: 100 # Max length of the branch name
diff --git a/.gitignore b/.gitignore
@@ -85,25 +85,25 @@ ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
-# .python-version
+.python-version
 
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
-#Pipfile.lock
+Pipfile.lock
 
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
+poetry.lock
 
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
+pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/#use-with-ide
@@ -130,7 +130,6 @@ venv.bak/
 *.pyc 
 **/*.pyc
 
-
 # Spyder project settings
 .spyderproject
 .spyproject
@@ -160,7 +159,22 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
 
 # Ruff
 .ruff_cache*
+
+# Docker stuff
+.docker/data/*
+.docker/backups/*
+.docker/log/*
+
+# Misc
+tmp/
+learning/
+learn-kafka/
+learn-sqlserver/
+database-replication/
+
+database-replication.code-workspace
+restore.sql