ubccr / hpc-toolset-tutorial

Tutorial for installing Open XDMoD, OnDemand, & ColdFront
GNU General Public License v3.0
121 stars 72 forks source link

Ondemand not working on an EC2 instance #147

Closed SajidK25 closed 2 years ago

SajidK25 commented 2 years ago

I am trying to deploy "hpc-toolset-tutorial" into an EC2 instance. I can access all the application accept "OnDeman". Here is my docker-compose.yml file so far:-

version: "3.9"

services:
  ldap:
    image: ubccr/hpcts:ldap-${HPCTS_VERSION}
    build:
      context: ./ldap
    hostname: ldap
    container_name: ldap
    environment:
      - CONTAINER_LOG_LEVEL=debug
      - LDAP_RFC2307BIS_SCHEMA=true
      - LDAP_REMOVE_CONFIG_AFTER_SETUP=false
      - LDAP_TLS_VERIFY_CLIENT=never
    networks:
      - compute

  base:
    image: ubccr/hpcts:base-${HPCTS_VERSION}
    build:
      context: ./base
    networks:
      - compute
    depends_on:
      - ldap

  mongodb:
    image: mongo:${MONGODB_VERSION}
    hostname: mongodb
    container_name: mongodb
    environment:
      - MONGO_INITDB_ROOT_USERNAME=admin
      - MONGO_INITDB_ROOT_PASSWORD=hBbeOfpFLfFT5ZO
    networks:
      - compute
    volumes:
      - ./mongodb:/docker-entrypoint-initdb.d 
      - data_db:/data/db
    expose:
      - "27017"
  mysql:
    image: mariadb:${MARIADB_VERSION}
    hostname: mysql
    container_name: mysql
    environment:
      MYSQL_ALLOW_EMPTY_PASSWORD: "yes"
    networks:
      - compute
    volumes:
      - ./database:/docker-entrypoint-initdb.d
      - ./database:/etc/mysql/conf.d
      - ./slurm/slurmdbd.conf:/etc/slurm/slurmdbd.conf
      - var_lib_mysql:/var/lib/mysql
    expose:
      - "3306"

  slurmdbd:
    image: ubccr/hpcts:slurm-${HPCTS_VERSION}
    build:
      context: ./slurm
      args:
        SLURM_VERSION: $SLURM_VERSION
        HPCTS_VERSION: $HPCTS_VERSION
    command: ["slurmdbd"]
    container_name: slurmdbd
    hostname: slurmdbd
    networks:
      - compute
    volumes:
      - etc_munge:/etc/munge
      - etc_slurm:/etc/slurm
      - slurmdbd_state:/var/lib/slurmd
    expose:
      - "22"
      - "6819"
    depends_on:
      - base
      - ldap
      - mysql

  slurmctld:
    image: ubccr/hpcts:slurm-${HPCTS_VERSION}
    command: ["slurmctld"]
    container_name: slurmctld
    hostname: slurmctld
    networks:
      - compute
    volumes:
      - etc_munge:/etc/munge
      - etc_slurm:/etc/slurm
      - home:/home
      - slurmctld_state:/var/lib/slurmd
    expose:
      - "22"
      - "6817"
    depends_on:
      - ldap
      - slurmdbd

  cpn01:
    init: true
    image: ubccr/hpcts:slurm-${HPCTS_VERSION}
    command: ["slurmd"]
    hostname: cpn01
    container_name: cpn01
    networks:
      - compute
    volumes:
      - etc_munge:/etc/munge
      - etc_slurm:/etc/slurm
      - home:/home
      - cpn01_slurmd_state:/var/lib/slurmd
    expose:
      - "22"
      - "6818"
    depends_on:
      - ldap
      - slurmctld

  cpn02:
    init: true
    image: ubccr/hpcts:slurm-${HPCTS_VERSION}
    command: ["slurmd"]
    hostname: cpn02
    container_name: cpn02
    networks:
      - compute
    volumes:
      - etc_munge:/etc/munge
      - etc_slurm:/etc/slurm
      - home:/home
      - cpn02_slurmd_state:/var/lib/slurmd
    expose:
      - "22"
      - "6818"
    depends_on:
      - ldap
      - slurmctld

  frontend:
    image: ubccr/hpcts:slurm-${HPCTS_VERSION}
    command: ["frontend"]
    hostname: frontend
    container_name: frontend
    networks:
      - compute
    volumes:
      - etc_munge:/etc/munge
      - etc_slurm:/etc/slurm
      - home:/home
    ports:
      - "0.0.0.0:6222:22"
    depends_on:
      - ldap
      - slurmctld

  coldfront:
    image: ubccr/hpcts:coldfront-${HPCTS_VERSION}
    build:
      context: ./coldfront
      args:
        HPCTS_VERSION: $HPCTS_VERSION
    command: ["serve"]
    hostname: coldfront
    container_name: coldfront
    networks:
      - compute
    volumes:
      - etc_munge:/etc/munge
      - etc_slurm:/etc/slurm
      - home:/home
      - srv_www:/srv/www
    expose:
      - "22"
    ports:
      - "0.0.0.0:2443:443"
    depends_on:
      - ldap
      - mysql
      - frontend

  ondemand:
    image: ubccr/hpcts:ondemand-${HPCTS_VERSION}
    build:
      context: ./ondemand
      args:
        HPCTS_VERSION: $HPCTS_VERSION
    command: ["serve"]
    hostname: ondemand
    container_name: ondemand
    networks:
      - compute
    volumes:
      - etc_munge:/etc/munge
      - etc_slurm:/etc/slurm
      - home:/home
    expose:
      - "22"
    ports:
      - "0.0.0.0:3443:3443"
      - "0.0.0.0:5554:5554"
    depends_on:
      - ldap
      - frontend

  xdmod:
    image: ubccr/hpcts:xdmod-${HPCTS_VERSION}
    build:
      context: ./xdmod
      args:
        HPCTS_VERSION: $HPCTS_VERSION
    command: ["serve"]
    hostname: xdmod
    container_name: xdmod
    networks:
      - compute
    volumes:
      - etc_xdmod:/etc/xdmod
      - etc_munge:/etc/munge
      - etc_slurm:/etc/slurm
      - home:/home
    expose:
      - "22"
    ports:
      - "0.0.0.0:4443:443"
    depends_on:
      - mongodb
      - ldap
      - mysql
      - frontend

volumes:
  etc_xdmod:
  etc_munge:
  etc_slurm:
  home:
  var_lib_mysql:
  cpn01_slurmd_state:
  cpn02_slurmd_state:
  slurmctld_state:
  slurmdbd_state:
  data_db:
  srv_www:

networks:
  compute:

I already have edited ondemand/install.sh

#!/bin/bash
set -e

trap 'ret=$?; test $ret -ne 0 && printf "failed\n\n" >&2; exit $ret' EXIT

log_info() {
  printf "\n\e[0;35m $1\e[0m\n\n"
}

log_info "Setting up Ondemand"
mkdir -p /etc/ood/config/clusters.d
mkdir -p /etc/ood/config/apps/shell
mkdir -p /etc/ood/config/apps/bc_desktop
mkdir -p /etc/ood/config/apps/dashboard
mkdir -p /etc/ood/config/apps/myjobs/templates
echo "DEFAULT_SSHHOST=frontend" > /etc/ood/config/apps/shell/env
echo "OOD_DEFAULT_SSHHOST=frontend" >> /etc/ood/config/apps/shell/env
echo "OOD_SSHHOST_ALLOWLIST=ondemand:cpn01:cpn02" >> /etc/ood/config/apps/shell/env
echo "OOD_DEV_SSH_HOST=ondemand" >> /etc/ood/config/apps/dashboard/env
echo "MOTD_PATH=/etc/motd" >> /etc/ood/config/apps/dashboard/env
echo "MOTD_FORMAT=markdown" >> /etc/ood/config/apps/dashboard/env
echo "OOD_BC_DYNAMIC_JS=1" >> /etc/ood/config/apps/dashboard/env

log_info "Configuring Ondemand ood_portal.yml .."

tee /etc/ood/config/ood_portal.yml <<EOF
---
#
# Portal configuration
#
listen_addr_port:
  - '3443'
servername: null
port: 3443
ssl: null
  # - 'SSLCertificateFile "/etc/pki/tls/certs/localhost.crt"'
  # - 'SSLCertificateKeyFile "/etc/pki/tls/private/localhost.key"'
node_uri: "/node"
rnode_uri: "/rnode"
oidc_scope: "openid profile email groups"
dex:
  client_redirect_uris:
    - "https://75.101.240.220:4443/simplesaml/module.php/authoidcoauth2/linkback.php"
    - "https://75.101.240.220:2443/oidc/callback/"
  client_secret: 334389048b872a533002b34d73f8c29fd09efc50
  client_id: null
  connectors:
    - type: ldap
      id: ldap
      name: LDAP
      config:
        host: ldap:636
        insecureSkipVerify: true
        bindDN: cn=admin,dc=example,dc=org
        bindPW: admin
        userSearch:
          baseDN: ou=People,dc=example,dc=org
          filter: "(objectClass=posixAccount)"
          username: uid
          idAttr: uid
          emailAttr: mail
          nameAttr: gecos
          preferredUsernameAttr: uid
        groupSearch:
          baseDN: ou=Groups,dc=example,dc=org
          filter: "(objectClass=posixGroup)"
          userMatchers:
            - userAttr: DN
              groupAttr: member
          nameAttr: cn
  # This is the default, but illustrating how to change
  frontend:
    theme: ondemand
EOF

log_info "Generating new httpd24 and dex configs.."
/opt/ood/ood-portal-generator/sbin/update_ood_portal

log_info "Adding new theme to dex"
sed -i "s/theme: ondemand/theme: hpc-coop/g" /etc/ood/dex/config.yaml

dnf clean all
rm -rf /var/cache/dnf

log_info "Cloning repos to assist with app development.."
mkdir -p /var/git
git clone https://github.com/OSC/bc_example_jupyter.git --bare /var/git/bc_example_jupyter
git clone https://github.com/OSC/ood-example-ps.git --bare /var/git/ood-example-ps

log_info "Enabling app development for hpcadmin..."
mkdir -p /var/www/ood/apps/dev/hpcadmin
ln -s /home/hpcadmin/ondemand/dev /var/www/ood/apps/dev/hpcadmin/gateway
echo 'if [[ ${HOSTNAME} == ondemand ]]; then source scl_source enable ondemand; fi' >> /home/hpcadmin/.bash_profile

Where I am doing wrong? TIA

aebruno commented 2 years ago

Check the output of docker-compose logs -f ondemand. Are there any errors in there?

SajidK25 commented 2 years ago

Check the output of docker-compose logs -f ondemand. Are there any errors in there? No error: here is the output


ondemand  | ---> Cleaning NGINX ...
ondemand  | ---> Populating /etc/ssh/ssh_known_hosts from frontend for ondemand...
ondemand  | # frontend:22 SSH-2.0-OpenSSH_8.0
ondemand  | # frontend:22 SSH-2.0-OpenSSH_8.0
ondemand  | # frontend:22 SSH-2.0-OpenSSH_8.0
ondemand  | ---> Starting SSSD on ondemand ...
ondemand  | ---> Starting the MUNGE Authentication service (munged) on ondemand ...
ondemand  | ---> Starting sshd on ondemand...
ondemand  | ---> Running update ood portal...
ondemand  | (2022-08-05 18:29:45): [sssd] [server_setup] (0x1f7c0): Starting with debug level = 0x0070
ondemand  | (2022-08-05 18:29:45): (2022-08-05 18:29:45): [be[default]] [server_setup] (0x1f7c0): Starting with debug level = 0x0070
ondemand  | [be[implicit_files]] [server_setup] (0x1f7c0): Starting with debug level = 0x0070
ondemand  | (2022-08-05 18:29:46): [nss] [server_setup] (0x1f7c0): Starting with debug level = 0x0070
ondemand  | (2022-08-05 18:29:46): [pam] [server_setup] (0x1f7c0): Starting with debug level = 0x0070
ondemand  | cp -p /etc/pki/tls/certs/localhost.crt /etc/ood/dex/localhost.crt
ondemand  | chown ondemand-dex:ondemand-dex /etc/ood/dex/localhost.crt
ondemand  | cp -p /etc/pki/tls/private/localhost.key /etc/ood/dex/localhost.key
ondemand  | chown ondemand-dex:ondemand-dex /etc/ood/dex/localhost.key
ondemand  | No change in Apache config.
ondemand  | mv /etc/ood/dex/config.yaml /etc/ood/dex/config.yaml.20220805T182947
ondemand  | mv /tmp/dex_config20220805-41-1x4ajo3 /etc/ood/dex/config.yaml
ondemand  | chown ondemand-dex:ondemand-dex /etc/ood/dex/config.yaml
ondemand  | chmod 600 /etc/ood/dex/config.yaml
ondemand  | Backing up previous Dex config to: '/etc/ood/dex/config.yaml.20220805T182947'
ondemand  | Generating new Dex config at: /etc/ood/dex/config.yaml
ondemand  | Completed successfully!
ondemand  | 
ondemand  | Restart the ondemand-dex service now.
ondemand  | 
ondemand  | Suggested command:
ondemand  |     sudo systemctl restart ondemand-dex.service
ondemand  | 
ondemand  | ---> Starting ondemand-dex...
ondemand  | ---> Starting ondemand httpd24...
ondemand  | time="2022-08-05T18:29:47Z" level=info msg="config issuer: https://localhost:5554"
ondemand  | AH00558: httpd: Could not reliably determine the server's fully qualified domain name, using 192.168.144.10. Set the 'ServerName' directive globally to suppress this message
ondemand  | time="2022-08-05T18:29:47Z" level=info msg="config storage: sqlite3"
ondemand  | time="2022-08-05T18:29:47Z" level=info msg="config static client: OnDemand"
ondemand  | time="2022-08-05T18:29:47Z" level=info msg="config connector: ldap"
ondemand  | time="2022-08-05T18:29:47Z" level=info msg="config skipping approval screen"
ondemand  | time="2022-08-05T18:29:47Z" level=info msg="keys expired, rotating"
ondemand  | time="2022-08-05T18:29:47Z" level=info msg="keys rotated, next rotation: 2022-08-06 00:29:47.454819268 +0000 UTC"
ondemand  | time="2022-08-05T18:29:47Z" level=info msg="listening (http/telemetry) on 0.0.0.0:5558"
ondemand  | time="2022-08-05T18:29:47Z" level=info msg="listening (http) on 0.0.0.0:5556"
ondemand  | time="2022-08-05T18:29:47Z" level=info msg="listening (https) on 0.0.0.0:5554"
ondemand  | time="2022-08-06T00:29:47Z" level=info msg="keys expired, rotating"
ondemand  | time="2022-08-06T00:29:47Z" level=info msg="keys rotated, next rotation: 2022-08-06 06:29:47.930602445 +0000 UTC"

**When I tried to acces the application using this url "https://75.101.240.220:3443/", it was redirected to "https://localhost:3443/" .**
SajidK25 commented 2 years ago

following is the /etc/ood/dex/config.yml inside ondemand container:-

issuer: http://ondemand:5556
storage:
  type: sqlite3
  config:
    file: "/etc/ood/dex/dex.db"
web:
  http: 0.0.0.0:5556
telemetry:
  http: 0.0.0.0:5558
staticClients:
- id:
  redirectURIs:
  - http://:3443/oidc
  - https://75.101.240.220:4443/simplesaml/module.php/authoidcoauth2/linkback.php
  - https://75.101.240.220:2443/oidc/callback/
  name: OnDemand
  secret: 334389048b872a533002b34d73f8c29fd09efc50
connectors:
- type: ldap
  id: ldap
  name: LDAP
  config:
    host: ldap:636
    insecureSkipVerify: true
    bindDN: cn=admin,dc=example,dc=org
    bindPW: admin
    userSearch:
      baseDN: ou=People,dc=example,dc=org
      filter: "(objectClass=posixAccount)"
      username: uid
      idAttr: uid
      emailAttr: mail
      nameAttr: gecos
      preferredUsernameAttr: uid
    groupSearch:
      baseDN: ou=Groups,dc=example,dc=org
      filter: "(objectClass=posixGroup)"
      userMatchers:
      - userAttr: DN
        groupAttr: member
      nameAttr: cn
oauth2:
  skipApprovalScreen: true
enablePasswordDB: false
frontend:
  dir: "/usr/share/ondemand-dex/web"
  theme: ondemand