Skip to content

Commit

Permalink
Merge pull request #28 from lelouvincx/dev
Browse files Browse the repository at this point in the history
Merge from dev to main for first release
  • Loading branch information
lelouvincx authored Oct 7, 2024
2 parents 261f2f2 + 844fd6b commit 1a60f49
Show file tree
Hide file tree
Showing 282 changed files with 1,770 additions and 6,310 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# the `bootstrap.servers` and those specifying replication factors.

# A list of host/port pairs to use for establishing the initial connection to the Kafka cluster.
bootstrap.servers=localhost:9092
bootstrap.servers=kafka-0:9092

# unique name for the cluster, used in forming the Connect cluster group. Note that this must not conflict with consumer group IDs
group.id=connect-cluster
Expand Down
6 changes: 6 additions & 0 deletions .docker/images/app/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.pytest_cache
.ruff_cache
__pycache__
logs/
.coverage
experiment.ipynb
24 changes: 24 additions & 0 deletions .docker/images/app/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
FROM python:3.11-slim

# Label for github packages
LABEL org.opencontainers.image.source=https://github.com/lelouvincx/Chinh-Dinh-training
LABEL org.opencontainers.image.description="Data generator (called upstream-app), generates data to source_db."

WORKDIR /app

# Activate python virtual environment
RUN python3 -m venv .venv
RUN . .venv/bin/activate

RUN pip install --no-cache-dir --upgrade pip

# Install requirements
COPY .docker/images/app/requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir -r /app/requirements.txt

# Install curl
RUN apt-get update && apt-get install -y --no-install-recommends curl && apt-get autoremove -y

COPY app .

CMD [ "streamlit", "run", "app/streamlit_app.py", "--server.address=0.0.0.0" ]
12 changes: 12 additions & 0 deletions .docker/images/app/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
psycopg2-binary==2.9.7
Faker==19.6.0
streamlit==1.26.0
confluent-kafka==2.2.0
sqlalchemy==2.0.20
python-dotenv==1.0.0
ruff==0.0.287
black==23.9.1
pytest==7.4.2
pytest-dependency==0.5.1
pytest-ordering==0.6
pytest-cov==4.1.0
13 changes: 13 additions & 0 deletions .docker/images/kafka-connect/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM confluentinc/cp-server-connect:7.1.1

# Label for github packages
LABEL org.opencontainers.image.source=https://github.com/lelouvincx/Chinh-Dinh-training

# Install debezium-connector-postgresql and kafka-connect-jdbc
RUN echo "INFO: Installing Connectors"
RUN confluent-hub install --no-prompt debezium/debezium-connector-postgresql:2.2.1
RUN confluent-hub install --no-prompt confluentinc/kafka-connect-jdbc:10.7.4

RUN echo "INFO: Launching Kafka Connect workers"

CMD [ "/etc/confluent/docker/run" ]
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@ set -e
PGPASSWORD=${POSTGRES_PASSWORD} psql -v ON_ERROR_STOP=1 --username ${POSTGRES_USER} --dbname ${POSTGRES_DB} <<-EOSQL
CREATE USER azure_pg_admin;
GRANT ALL PRIVILEGES ON DATABASE ${POSTGRES_DB} TO azure_pg_admin;
CREATE USER azure_superuser;
ALTER USER azure_superuser WITH SUPERUSER;
GRANT ALL PRIVILEGES ON DATABASE ${POSTGRES_DB} TO azure_superuser;
CREATE USER greglow;
GRANT ALL PRIVILEGES ON DATABASE ${POSTGRES_DB} TO greglow;
CREATE USER data_engineer;
CREATE USER data_engineer WITH PASSWORD '${POSTGRES_DE_PASSWORD}';
ALTER USER data_engineer WITH REPLICATION;
GRANT ALL PRIVILEGES ON DATABASE ${POSTGRES_DB} TO data_engineer;
EOSQL
File renamed without changes.
File renamed without changes.
194 changes: 194 additions & 0 deletions .github/workflows/continuous-integration.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
name: General Continuous Integration
run-name: ${{ github.actor }} is testing Github Actions


on: [push]


jobs:
explore-github-actions:
runs-on: ubuntu-22.04
steps:
- run: echo "The job was automatically triggered by a ${{ github.event_name }} event."

- run: echo "This job is now running on a ${{ runner.os }} server hosted by Github."

- run: echo "The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."

- name: Checkout repository code
uses: actions/checkout@v4

- run: echo "The ${{ github.repository }} repository has been cloned to the runner."

- run: echo "The workflow is now ready to test your code on the runner."

- name: View environment variables
run: printenv

- name: List files in the repository
run: |
ls -lah ${{ github.workspace }}
- run: echo "This job's status is ${{ job.status }}."

check-changes:
runs-on: ubuntu-22.04

outputs:
upstream-app: ${{ steps.changes.outputs.upstream-app }}
kafka-connect: ${{ steps.changes.outputs.kafka-connect }}

steps:
- name: Checkout repository code
uses: actions/checkout@v4

- name: Check changes
uses: dorny/paths-filter@v2
id: changes
with:
base: ${{ github.ref }}
ref: ${{ github.ref }}
filters: |
upstream-app:
- ".docker/images/app/**"
kafka-connect:
- ".docker/images/kafka-connect/**"
build-push-upstream-app:
needs: check-changes
if: ${{ needs.check-changes.outputs.upstream-app == 'true' }}
runs-on: ubuntu-22.04

env:
REGISTRY: ghcr.io
UPSTREAM_APP_IMAGE_NAME: upstream-app

permissions:
contents: read
packages: write

steps:
- name: Checkout repository code
uses: actions/checkout@v4

- name: Setup QEMU
uses: docker/setup-qemu-action@v3

- name: Login to the container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels) for docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ github.actor }}/${{ env.UPSTREAM_APP_IMAGE_NAME }}

- name: Build and push image
uses: docker/build-push-action@v5
with:
context: .
file: .docker/images/app/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

build-push-kafka-connect:
needs: check-changes
if: ${{ needs.check-changes.outputs.kafka-connect == 'true' }}
runs-on: ubuntu-22.04

env:
REGISTRY: ghcr.io
KAFKA_CONNECT_IMAGE_NAME: kafka-connect

permissions:
contents: read
packages: write

steps:
- name: Checkout repository code
uses: actions/checkout@v4

- name: Setup QEMU
uses: docker/setup-qemu-action@v3

- name: Login to the container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels) for docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ github.actor }}/${{ env.KAFKA_CONNECT_IMAGE_NAME }}

- name: Build and push image
uses: docker/build-push-action@v5
with:
context: .
file: .docker/images/kafka-connect/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

unit-test-upstream-app:
needs: build-push-upstream-app
if: | # Always run after build-push-upstream-app
always() &&
(needs.build-push-upstream-app.result == 'success' || needs.build-push-upstream-app.result == 'skipped')
runs-on: ubuntu-22.04

env:
POSTGRES_USER: admin
POSTGRES_PASSWORD: admin123
POSTGRES_DB: wideworldimporters
POSTGRES_PORT: 5432
REGISTRY: ghcr.io
UPSTREAM_APP_IMAGE_NAME: upstream-app

steps:
- name: Checkout repository code
uses: actions/checkout@v4

- name: Setup QEMU
uses: docker/setup-qemu-action@v3

- name: Login to the container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels) from existing docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ github.actor }}/${{ env.UPSTREAM_APP_IMAGE_NAME }}

- name: Setup docker-compose
uses: KengoTODA/actions-setup-docker-compose@main
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: View current working dir
run: pwd && ls -a && ls -lah app

- name: Compose up services
run: docker-compose version && docker-compose -f app/tests/docker-compose.yml --project-directory . up -d

- name: View running services
run: docker-compose -f app/tests/docker-compose.yml --project-directory . ps -a && sleep 15

- name: Unit tests
run: docker-compose -f app/tests/docker-compose.yml --project-directory . exec upstream-app python -m pytest --log-cli-level info -p no:warnings -v /app/tests

- name: Compose down services
run: docker-compose -f app/tests/docker-compose.yml --project-directory . down
15 changes: 15 additions & 0 deletions .github/workflows/naming-policy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: Check naming policy
run-name: Check naming policy for ${{ github.ref }}

on: [pull_request]

jobs:
branch-naming-rules:
runs-on: ubuntu-22.04
steps:
- uses: deepakputhraya/action-branch-name@master
with:
regex: '([a-z])+\/(\d+)-([a-z])+' # Regex the branch should match. This example enforces grouping
allowed_prefixes: 'feat,fix,refactor,docs' # All branches should start with the given prefix
ignore: main,dev # Ignore exactly matching branch names from convention
max_length: 100 # Max length of the branch name
26 changes: 20 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,25 +85,25 @@ ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
Expand All @@ -130,7 +130,6 @@ venv.bak/
*.pyc
**/*.pyc


# Spyder project settings
.spyderproject
.spyproject
Expand Down Expand Up @@ -160,7 +159,22 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/

# Ruff
.ruff_cache*

# Docker stuff
.docker/data/*
.docker/backups/*
.docker/log/*

# Misc
tmp/
learning/
learn-kafka/
learn-sqlserver/
database-replication/

database-replication.code-workspace
restore.sql
Loading

0 comments on commit 1a60f49

Please sign in to comment.