risingwavelabs / risingwave

Best-in-class stream processing, analytics, and management. Perform continuous analytics, or build event-driven applications, real-time ETL pipelines, and feature stores in minutes. Unified streaming and batch. PostgreSQL compatible.
https://go.risingwave.com/slack
Apache License 2.0
6.78k stars 561 forks source link

batch select from s3 source hangs #17984

Closed xxchan closed 1 month ago

xxchan commented 1 month ago

Describe the bug

Reported in community https://risingwave-community.slack.com/archives/C03BW71523T/p1723121143187519

Error message/log

No response

To Reproduce

Docker compose:

services:
  minio:
    image: minio/minio:latest
    container_name: minio
    environment:
      MINIO_ROOT_USER: mlops
      MINIO_ROOT_PASSWORD: password
    entrypoint: "
      /bin/sh -c '

      set -e

      mkdir -p \"/data/hummock001\"
      mkdir -p \"/data/monitoring\" 

      /usr/bin/docker-entrypoint.sh \"$$0\" \"$$@\"

      '"
    command: server /data --console-address ":9001"
    ports:
      - "9000:9000"
      - "9001:9001"
      - "38223:38223"
    volumes:
      - minio_data:/data
    healthcheck:
      test: ["CMD", "mc", "ready", "local"]
      interval: 5s
      timeout: 5s
      retries: 5

  postgres:
    image: "postgres:15-alpine"
    environment:
      - POSTGRES_HOST_AUTH_METHOD=trust
      - POSTGRES_USER=postgres
      - POSTGRES_DB=metadata
      - POSTGRES_INITDB_ARGS=--encoding=UTF-8 --lc-collate=C --lc-ctype=C
    expose:
      - "5432"
    ports:
      - "8432:5432"
    volumes:
      - "postgres:/var/lib/postgresql/data"
    healthcheck:
      test: [ "CMD-SHELL", "pg_isready -U postgres" ]
      interval: 2s
      timeout: 5s
      retries: 5
    restart: always

  risingwave-standalone:
    image: risingwavelabs/risingwave:latest
    container_name: risingwave-standalone
    command: "standalone --meta-opts=\" \
                    --listen-addr 0.0.0.0:5690 \
                    --advertise-addr 0.0.0.0:5690 \
                    --dashboard-host 0.0.0.0:5691 \
                    --prometheus-host 0.0.0.0:1250 \
                    --backend sql \
                    --sql-endpoint postgres://postgres:@postgres:5432/metadata \
                    --state-store hummock+minio://mlops:password@minio:9000/hummock001 \
                    --data-directory hummock_001 \
                    --config-path /risingwave.toml\" \
                 --compute-opts=\" \
                    --config-path /risingwave.toml \
                    --listen-addr 0.0.0.0:5688 \
                    --prometheus-listener-addr 0.0.0.0:1250 \
                    --advertise-addr 0.0.0.0:5688 \
                    --async-stack-trace verbose \
                    #--parallelism 4 \
                    #--total-memory-bytes 8589934592 \
                    --role both \
                    --meta-address http://0.0.0.0:5690/\" \
                 --frontend-opts=\" \
                   --config-path /risingwave.toml \
                   --listen-addr 0.0.0.0:4566 \
                   --advertise-addr 0.0.0.0:4566 \
                   --prometheus-listener-addr 0.0.0.0:1250 \
                   --health-check-listener-addr 0.0.0.0:6786 \
                   --meta-addr http://0.0.0.0:5690/\" \
                 --compactor-opts=\" \
                   --listen-addr 0.0.0.0:6660 \
                   --prometheus-listener-addr 0.0.0.0:1250 \
                   --advertise-addr 0.0.0.0:6660 \
                   --meta-address http://0.0.0.0:5690/\""
    expose:
      - "6660"
      - "4566"
      - "5688"
      - "5690"
      - "1250"
      - "5691"
    ports:
      - "4566:4566"
      - "5690:5690"
      - "5691:5691"
      - "1250:1250"
    depends_on:
      - postgres
      - minio
    volumes:
      - "./risingwave.toml:/risingwave.toml"
    environment:
      RUST_BACKTRACE: "1"
      # If ENABLE_TELEMETRY is not set, telemetry will start by default
      ENABLE_TELEMETRY: ${ENABLE_TELEMETRY:-true}
      RW_TELEMETRY_TYPE: ${RW_TELEMETRY_TYPE:-"docker-compose"}
      RW_SECRET_STORE_PRIVATE_KEY_HEX: ${RW_SECRET_STORE_PRIVATE_KEY_HEX:-0123456789abcdef}
    healthcheck:
      test:
        - CMD-SHELL
        - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/6660; exit $$?;'
        - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/5688; exit $$?;'
        - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/4566; exit $$?;'
        - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/5690; exit $$?;'
      interval: 1s
      timeout: 5s
    restart: always
    deploy:
      resources:
        limits:
          memory: 28G
        reservations:
          memory: 28G

  superset:
    build:
      context: ./superset
      dockerfile: dockerfile
    container_name: superset
    environment:
      - ADMIN_USERNAME=admin
      - ADMIN_EMAIL=admin@superset.com
      - ADMIN_PASSWORD=password
    ports:
      - '19088:8088'
    volumes:
      - superset_home:/app/superset_home

volumes:
  minio_data:
    external: false
  postgres:
    external: false
  superset_home:
    external: false

example.json:

{"id":1,"name":"Adair","amount":75.48,"operation_time":"2024-05-04T01:16:02Z"}
CREATE
SOURCE minio_expanses (
                   id int ,
                   name TEXT,
                   amount NUMERIC,
                   operation_time TIMESTAMP WITH TIME ZONE,
) WITH ( connector = 's3_v2',
    s3.region_name = 'us-east-1',
    s3.bucket_name = 'monitoring',
    s3.credentials.access = '<ACCESS_KEY>',
    s3.credentials.secret = '<SECRET>',
    s3.endpoint_url = 'http://minio:9000/') FORMAT PLAIN ENCODE JSON;

SELECT * FROM minio_expanses;

Expected behavior

No response

How did you deploy RisingWave?

No response

The version of RisingWave

No response

Additional context

No response

xxchan commented 1 month ago

@wcy-fdu PTAL, thanks!