Samagra-Development / samagra-devops

Devops repo for all Samagra Deployments
https://samagra-development.github.io/Samagra-DevOps-Guide/
MIT License
5 stars 8 forks source link

Upgrade Airflow version for custom Image #2

Open nileshbhadana opened 2 years ago

nileshbhadana commented 2 years ago

Problem Statement Upgrade airflow version to latest in custom image.

Dockerfile

# VERSION 1.10.9
# AUTHOR: Matthieu "Puckel_" Roisil
# DESCRIPTION: Basic Airflow container
# BUILD: docker build --rm -t puckel/docker-airflow .
# SOURCE: https://github.com/puckel/docker-airflow

FROM python:3.9-slim-buster
LABEL maintainer="Puckel_"

# Never prompt the user for choices on installation/configuration of packages
ENV DEBIAN_FRONTEND noninteractive
ENV TERM linux

# Airflow
ARG AIRFLOW_VERSION=2.4.0
ARG AIRFLOW_USER_HOME=/usr/local/airflow
ARG AIRFLOW_DEPS=""
ARG PYTHON_DEPS=""
ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME}

# Define en_US.
ENV LANGUAGE en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV LC_CTYPE en_US.UTF-8
ENV LC_MESSAGES en_US.UTF-8

COPY requirements.txt /requirements.txt

# Disable noisy "Handling signal" log messages:
# ENV GUNICORN_CMD_ARGS --log-level WARNING
RUN apt-get update && apt-get install -y libpq5 libpq-dev gcc
RUN set -ex \
    && buildDeps=' \
    freetds-dev \
    libkrb5-dev \
    libsasl2-dev \
    libssl-dev \
    libffi-dev \
    libpq-dev \
    git \
    ' \
    && apt-get update -yqq \
    && apt-get upgrade -yqq \
    && apt-get install -yqq --no-install-recommends \
    $buildDeps \
    freetds-bin \
    build-essential \
    default-libmysqlclient-dev \
    apt-utils \
    curl \
    rsync \
    netcat \
    locales \
    && sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \
    && locale-gen \
    && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \
    && useradd -ms /bin/bash -d ${AIRFLOW_USER_HOME} airflow \
    && pip install -U pip setuptools wheel \
    && pip install pytz \
    && pip install pyOpenSSL \
    && pip install ndg-httpsclient \
    && pip install pyasn1 \
    && pip install singer-target-postgres \
    && pip install psycopg2==2.8.4 \
    && pip install psycopg2-binary==2.8.4 \
    && pip install flask_bcrypt \
    && pip install sqlalchemy==1.2.0 \
    && pip install SQLAlchemy==1.3.23 \
    && pip install Flask-SQLAlchemy==2.4.4 \
    && pip install hvac \
    && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql,ssh${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \
    && pip install 'redis==3.2' \
    && if [ -n "${PYTHON_REQS}" ]; then pip install -r ${PYTHON_REQS}; fi \
    && apt-get purge --auto-remove -yqq $buildDeps \
    && apt-get autoremove -yqq --purge \
    && apt-get clean \
    && rm -rf \
    /var/lib/apt/lists/* \
    /tmp/* \
    /var/tmp/* \
    /usr/share/man \
    /usr/share/doc \
    /usr/share/doc-base
RUN pip install -r requirements.txt
COPY script/entrypoint.sh /entrypoint.sh
COPY config/airflow.cfg ${AIRFLOW_USER_HOME}/airflow.cfg
RUN apt-get autoremove -y gcc
RUN chown -R airflow: ${AIRFLOW_USER_HOME}

# install Java
USER root
RUN echo "deb http://security.debian.org/debian-security stretch/updates main" >> /etc/apt/sources.list                                                   
RUN mkdir -p /usr/share/man/man1 && \
    apt-get update -y && \
    apt-get install -y openjdk-8-jdk

RUN apt-get install unzip -y && \
    apt-get autoremove -y

USER airflow
EXPOSE 8080 5555 8793

USER airflow
WORKDIR ${AIRFLOW_USER_HOME}
ENTRYPOINT ["/entrypoint.sh"]
CMD ["webserver"]

Docker-compose.yaml

version: "2.2"
services:
  redis:
    depends_on:
      - vault-dev
    image: "redis:5.0.5"
    command: redis-server --requirepass redispass

  postgres:
    image: postgres:9.6
    environment:
      - POSTGRES_USER=airflow
      - POSTGRES_PASSWORD=airflow
      - POSTGRES_DB=airflow
      - PGDATA=/var/lib/postgresql/data/pgdata
    volumes:
      - ./pgdata:/var/lib/postgresql/data/pgdata

  webserver:
    build: .
    image: docker.io/nileshbhadana/airflow
    restart: never
    depends_on:
      - postgres
      - redis
    environment:
      - LOAD_EX=n
      - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
      - EXECUTOR=Celery
      - POSTGRES_USER=airflow
      - POSTGRES_PASSWORD=airflow
      - POSTGRES_DB=airflow
      - REDIS_PASSWORD=redispass
      - AIRFLOW__SECRETS__BACKEND_KWARGS=${AIRFLOW__SECRETS__BACKEND_KWARGS}
    volumes:
      - ./dags:/usr/local/airflow/dags
      - ./plugins:/usr/local/airflow/plugins
      - ./requirements.txt:/requirements.txt
      - ./config/airflow.cfg:/usr/local/airflow/airflow.cfg      
    ports:
      - "8181:8080"
    command: webserver
    healthcheck:
      test: ["CMD-SHELL", "[ -f /usr/local/airflow/airflow-webserver.pid ]"]
      interval: 30s
      timeout: 30s
      retries: 3

  flower:
    build: .
    image: docker.io/nileshbhadana/airflow
    restart: never
    depends_on:
      - redis
    environment:
      - EXECUTOR=Celery
      - REDIS_PASSWORD=redispass
    volumes:
      - ./config/airflow.cfg:/usr/local/airflow/airflow.cfg
    ports:
      - "5555:5555"
    command: flower

  scheduler:
    build: .
    image: docker.io/nileshbhadana/airflow
    restart: never
    depends_on:
      - webserver
    volumes:
      - ./dags:/usr/local/airflow/dags
      - ./plugins:/usr/local/airflow/plugins
      - ./requirements.txt:/requirements.txt
      - ./config/airflow.cfg:/usr/local/airflow/airflow.cfg
    environment:
      - LOAD_EX=n
      - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
      - EXECUTOR=Celery
      - POSTGRES_USER=airflow
      - POSTGRES_PASSWORD=airflow
      - POSTGRES_DB=airflow
      - REDIS_PASSWORD=redispass
      - AIRFLOW__SECRETS__BACKEND_KWARGS=${AIRFLOW__SECRETS__BACKEND_KWARGS}
    command: scheduler

  worker:
    build: .
    image: docker.io/nileshbhadana/airflow
    restart: never
    depends_on:
      - scheduler
    volumes:
      - ./dags:/usr/local/airflow/dags
      # Uncomment to include custom plugins
      - ./plugins:/usr/local/airflow/plugins
      - ./requirements.txt:/requirements.txt
      - ./config/airflow.cfg:/usr/local/airflow/airflow.cfg      
    environment:
      - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
      - EXECUTOR=Celery
      - POSTGRES_USER=airflow
      - POSTGRES_PASSWORD=airflow
      - POSTGRES_DB=airflow
      - REDIS_PASSWORD=redispass
      - AIRFLOW__SECRETS__BACKEND_KWARGS=${AIRFLOW__SECRETS__BACKEND_KWARGS}
    command: worker

  vault-dev:
    image: vault
    container_name: vault-dev
    command: [ 'vault', 'server', '-config=/vault/config' ]
    environment:
      VAULT_DEV_ROOT_TOKEN_ID: "myroot"
      VAULT_LOCAL_CONFIG: '{"backend": {"file": {"path": "/vault/file"}}, "default_lease_ttl": "168h", "max_lease_ttl": "720h"}'
      VAULT_SUPPLEMENTAL_CONFIG: '{"ui":true, "listener": {"tcp":{"address": "0.0.0.0:8200", "tls_disable": 1}}}'
      VAULT_ADDR: "http://127.0.0.1:8200"
    ports:
      - "8200:8200"
    volumes:
      - ./vault:/vault/file
      - ./unseal:/vault/unseal
      - ./docker-entrypoint.sh:/usr/local/bin/docker-entrypoint.sh
    cap_add:
      - IPC_LOCK

Added the following block in airflow.cfg for hashicorp:

[secrets]
backend = airflow.providers.hashicorp.secrets.vault.VaultBackend
backend_kwargs = {"connections_path": "connections", "variables_path": "variables", "mount_point": "airflow", "url": "http://127.0.0.1:8200", "auth_type": "token", "token": "myroot"}
aryasoni98 commented 1 year ago

I would like to work on it .