alberttwong / onehouse-demos

Demos for Onehouse.ai, a company specializing in data lakehouse solutions.
http://onehouse.ai
0 stars 0 forks source link

Kakfa doesn't work very well in containers due to network #1

Closed alberttwong closed 1 month ago

alberttwong commented 1 month ago

I tried to use this (confluent kafka container with ngrok) as template to the debezium images but it didn't work. https://rmoff.net/2023/11/01/using-apache-kafka-with-ngrok/

alberttwong commented 1 month ago

figured it out.

albert@Alberts-MBP postgressql-debezium % cat docker-compose.yml
version: "3"

services:

  ngrok:
    image: ngrok/ngrok:latest
    container_name: ngrok
    command: tcp kafka:9092 --log stdout --authtoken $NGROK_AUTH_TOKEN
    ports:
      - 4040:4040 # Web dashboard for ngrok
  zookeeper:
    image: quay.io/debezium/zookeeper:2.7.0.Final
    ports:
     - 2181:2181
     - 2888:2888
     - 3888:3888
  kafka:
    image: quay.io/debezium/kafka:2.7.0.Final
    ports:
     - 29092:29092
    links:
     - zookeeper
    environment:
     - ZOOKEEPER_CONNECT=zookeeper:2181
     - KAFKA_LISTENERS=DOCKER://kafka:29092, NGROK://kafka:9092
     - KAFKA_ADVERTISED_LISTENERS=DOCKER://kafka:29092
     - KAFKA_INTER_BROKER_LISTENER_NAME=DOCKER
     - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=DOCKER:PLAINTEXT,NGROK:PLAINTEXT
    entrypoint:
      - /bin/sh
      - -c
      - |
        echo "Waiting for ngrok tunnel to be created"
        while : ; do
          curl_status=$$(curl -s -o /dev/null -w %{http_code} http://ngrok:4040/api/tunnels/command_line)
          echo -e $$(date) "\tTunnels API HTTP state: " $$curl_status " (waiting for 200)"
          if [ $$curl_status -eq 200 ] ; then
            break
          fi
          sleep 5
        done
        echo "ngrok tunnel is up"
        NGROK_LISTENER=$(curl -s  http://ngrok:4040/api/tunnels/command_line | grep -Po '"public_url":.*?[^\\]",' | cut -d':' -f2- | tr -d ',"' | sed 's/tcp:\/\//NGROK:\/\//g')
        echo $$NGROK_LISTENER
        export KAFKA_ADVERTISED_LISTENERS="$$KAFKA_ADVERTISED_LISTENERS, $$NGROK_LISTENER"
        echo "KAFKA_ADVERTISED_LISTENERS is set to " $$KAFKA_ADVERTISED_LISTENERS
        /docker-entrypoint.sh start
  postgres:
    image: quay.io/debezium/example-postgres:2.7.0.Final
    ports:
     - 5432:5432
    environment:
     - POSTGRES_USER=postgres
     - POSTGRES_PASSWORD=postgres
  connect:
    image: quay.io/debezium/connect:2.7.0.Final
    ports:
     - 8083:8083
    links:
     - kafka
     - postgres
    environment:
     - BOOTSTRAP_SERVERS=kafka:29092
     - GROUP_ID=1
     - CONFIG_STORAGE_TOPIC=my_connect_configs
     - OFFSET_STORAGE_TOPIC=my_connect_offsets
     - STATUS_STORAGE_TOPIC=my_connect_statuses
    volumes:
     - ./kafka/connect/thirdparty:/kafka/connect/thirdparty

  metastore_db:
    image: postgres:11
    hostname: metastore_db
    environment:
      POSTGRES_USER: hive
      POSTGRES_PASSWORD: hive
      POSTGRES_DB: metastore

  hive-metastore:
    hostname: hive-metastore
    image: 'starburstdata/hive:3.1.2-e.18'
    ports:
      - '9083:9083' # Metastore Thrift
    environment:
      HIVE_METASTORE_DRIVER: org.postgresql.Driver
      HIVE_METASTORE_JDBC_URL: jdbc:postgresql://metastore_db:5432/metastore
      HIVE_METASTORE_USER: hive
      HIVE_METASTORE_PASSWORD: hive
      HIVE_METASTORE_WAREHOUSE_DIR: s3a://warehouse/
      S3_ENDPOINT: http://minio:9000
      S3_ACCESS_KEY: admin
      S3_SECRET_KEY: password
      S3_PATH_STYLE_ACCESS: "true"
      REGION: ""
      GOOGLE_CLOUD_KEY_FILE_PATH: ""
      AZURE_ADL_CLIENT_ID: ""
      AZURE_ADL_CREDENTIAL: ""
      AZURE_ADL_REFRESH_URL: ""
      AZURE_ABFS_STORAGE_ACCOUNT: ""
      AZURE_ABFS_ACCESS_KEY: ""
      AZURE_WASB_STORAGE_ACCOUNT: ""
      AZURE_ABFS_OAUTH: ""
      AZURE_ABFS_OAUTH_TOKEN_PROVIDER: ""
      AZURE_ABFS_OAUTH_CLIENT_ID: ""
      AZURE_ABFS_OAUTH_SECRET: ""
      AZURE_ABFS_OAUTH_ENDPOINT: ""
      AZURE_WASB_ACCESS_KEY: ""
      HIVE_METASTORE_USERS_IN_ADMIN_ROLE: "admin"
    depends_on:
      - metastore_db
    healthcheck:
      test: bash -c "exec 6<> /dev/tcp/localhost/9083"

  minio:
    image: minio/minio
    environment:
      - MINIO_ROOT_USER=admin
      - MINIO_ROOT_PASSWORD=password
      - MINIO_DOMAIN=minio
    networks:
      default:
        aliases:
          - warehouse.minio
    ports:
      - 9001:9001
      - 9000:9000
    command: ["server", "/data", "--console-address", ":9001"]
  mc:
    depends_on:
      - minio
    image: minio/mc
    environment:
      - AWS_ACCESS_KEY_ID=admin
      - AWS_SECRET_ACCESS_KEY=password
      - AWS_REGION=us-east-1
    entrypoint: >
      /bin/sh -c "
      until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
      /usr/bin/mc rm -r --force minio/warehouse;
      /usr/bin/mc mb minio/warehouse;
      /usr/bin/mc policy set public minio/warehouse;
      tail -f /dev/null
      "

networks:
  default:
     name: datalakehouse