diff --git a/Dockerfile b/Dockerfile index a91e52c..1fce265 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,15 @@ -FROM apache/airflow:2.10.0-python3.10 +FROM apache/airflow:2.7.1-python3.9 -USER root - -ARG AIRFLOW_HOME=/opt/airflow +USER root -ADD dags /opt/airflow/dags +ARG AIRFLOW_HOME=/opt/airflow ADD airflow.cfg /opt/airflow/airflow.cfg USER airflow -RUN pip install --upgrade pip +RUN pip install --upgrade pip USER root @@ -33,7 +31,7 @@ RUN chown -R "airflow:root" /opt/airflow/ ADD ssh /home/airflow/.ssh/ RUN chown -R airflow:root /home/airflow/.ssh -USER airflow +USER airflow RUN pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org boto3 @@ -47,4 +45,3 @@ RUN pip install -r /requirements.txt RUN git config --global user.email "your email" RUN git config --global user.name "your username" - diff --git a/README.md b/README.md index 555abe6..6098530 100755 --- a/README.md +++ b/README.md @@ -7,15 +7,15 @@ L'infrastructure actuelle est basée sur du LocalExecutor (le scheduler, le webs ## Installation ``` -git clone git@github.com:etalab/data-engineering-stack.git +git clone git@github.com:datagouv/data-engineering-stack.git cd data-engineering-stack # Create directories necessary for Airflow to work ./1_prepareDirs.sh -# Prepare .env file +# Prepare .env file ./2_prepare_env.sh -nano .env +nano .env # Edit POSTGRES_USER ; POSTGRES_PASSWORD ; POSTGRES_DB ; AIRFLOW_ADMIN_MAIL ; AIRFLOW_ADMIN_FIRSTNAME ; AIRFLOW_ADMIN_NAME ; AIRFLOW_ADMIN_PASSWORD # Launch services @@ -31,6 +31,6 @@ docker-compose up --build -d ./refreshBagDags.sh ``` -## Connections +## Variables and connections Connections can be created manually or with python scripts `createConn.py` (using Airflow API) inside each projects. You need also to add your ssh key inside `ssh` folder of repository for the container to be able to see it in `/home/airflow/.ssh/` folder of container. diff --git a/docker-compose.yml b/docker-compose.yml index 425b5a5..79d255c 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,7 @@ -version: "3" services: postgres: image: postgres:12 - user: "${AIRFLOW_UID}:${AIRFLOW_GID}" + user: root volumes: - ./pg-airflow:/var/lib/postgresql/data env_file: @@ -22,7 +21,8 @@ services: env_file: - .env volumes: - - ./dags:/opt/airflow/dags + - ${LOCAL_AIRFLOW_DAG_PATH}:/opt/airflow/dags/datagouvfr_data_pipelines + - ${LOCAL_TMP_PATH}:/tmp - ./scripts:/opt/airflow/scripts - ./logs:/opt/airflow/logs - ./plugins:/opt/airflow/plugins