d2iq-archive / dcos-flink-service

11 stars 17 forks source link

Deployment is stuck when using Kerberos #50

Open djannot opened 6 years ago

djannot commented 6 years ago

Here is the json options file I used:

{
  "service": {
    "name": "flink",
    "slots": 1,
    "parallelism-default": 1,
    "role": "*",
    "hostattribute": "",
    "principal": "",
    "secret": "",
    "user": "root",
    "log-level": "INFO",
    "extra-args": ""
  },
  "app-master": {
    "cpus": 1,
    "memory": 1024,
    "heap": 256
  },
  "task-managers": {
    "count": 1,
    "cpus": 1,
    "memory": 1024,
    "heap": 512,
    "memory-preallocation": true
  },
  "security": {
    "kerberos": {
      "krb5conf": "W2xpYmRlZmF1bHRzXQpkZWZhdWx0X3JlYWxtID0gTUVTT1MuTEFCCgpbcmVhbG1zXQogIE1FU09TLkxBQiA9IHsKICAgIGtkYyA9IGtkYy5tYXJhdGhvbi5jb250YWluZXJpcC5kY29zLnRoaXNkY29zLmRpcmVjdG9yeTo4OAogIH0K",
      "use-ticket-cache": true,
      "keytab": "BQIAAABBAAEACU1FU09TLkxBQgAFdXNlcjEAAAABWnHlkwIAEgAgZYYGbUvyN44UuDKxJzQnxpTLKjf14OFC1M5Y0TU3YrMAAAAxAAEACU1FU09TLkxBQgAFdXNlcjEAAAABWnHlkwIAEQAQgxuDzuzWCXmoag8VafLdhAAAADkAAQAJTUVTT1MuTEFCAAV1c2VyMQAAAAFaceWTAgAQABikZJIqRhopJXz9AkNA98JX913Hj9k0tXAAAAAxAAEACU1FU09TLkxBQgAFdXNlcjEAAAABWnHlkwIAFwAQMEBv/fRwUUNGDgkial5htQ==",
      "principal": "user1@MESOS.LAB"
    },
    "ssl": {
      "enabled": false,
      "keyStoreBase64": "",
      "trustStoreBase64": "",
      "keyPassword": "",
      "keyStorePassword": "",
      "trustStorePassword": "",
      "enabledAlgorithms": "TLS_RSA_WITH_AES_128_CBC_SHA",
      "enableArtifactServerSSL": false
    }
  },
  "hdfs": {
    "config-url": "http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints"
  }
}

Here is what I can see in the stderr:

+ cp /mnt/mesos/sandbox/hdfs-site.xml /etc/hadoop/conf/
+ cp /mnt/mesos/sandbox/core-site.xml /etc/hadoop/conf/
+ [[ W2xpYmRlZmF1bHRzXQpkZWZhdWx0X3JlYWxtID0gTUVTT1MuTEFCCgpbcmVhbG1zXQogIE1FU09TLkxBQiA9IHsKICAgIGtkYyA9IGtkYy5tYXJhdGhvbi5jb250YWluZXJpcC5kY29zLnRoaXNkY29zLmRpcmVjdG9yeTo4OAogIH0K != '' ]]
+ echo W2xpYmRlZmF1bHRzXQpkZWZhdWx0X3JlYWxtID0gTUVTT1MuTEFCCgpbcmVhbG1zXQogIE1FU09TLkxBQiA9IHsKICAgIGtkYyA9IGtkYy5tYXJhdGhvbi5jb250YWluZXJpcC5kY29zLnRoaXNkY29zLmRpcmVjdG9yeTo4OAogIH0K
+ base64 -d
+ exec runsvdir -P /etc/service
+ exec
+ exec
...
+ exec
+ exec
I0131 17:41:25.690531 30398 executor.cpp:293] Received killTask for task flink.72ab5556-06ad-11e8-9981-a65b58687427
+ exec
+ exec
+ exec
+ exec
+ exec
+ exec
+ exec
I0131 17:41:35.695581 30398 executor.cpp:293] Received killTask for task flink.72ab5556-06ad-11e8-9981-a65b58687427
+ exec
+ exec
+ exec
+ exec
+ exec
+ exec
+ exec
I0131 17:41:45.705524 30399 executor.cpp:293] Received killTask for task flink.72ab5556-06ad-11e8-9981-a65b58687427
I0131 17:41:45.833313 30397 executor.cpp:552] Container exited with status 137
W0131 17:41:45.833313 30393 logging.cpp:93] RAW: Received signal SIGTERM from process 3152 of user 0; exiting