alibaba / SREWorks

Cloud Native DataOps & AIOps Platform | 云原生数智运维平台
Apache License 2.0
1.79k stars 395 forks source link

runtime为containerd是否影响部署?,目前部分组件无法正常运行 #29

Open yuanyp8 opened 2 years ago

yuanyp8 commented 2 years ago


helm install sreworks ./ --create-namespace --namespace sreworks --set global.accessMode="nodePort" --set appmanager.home.url="http://xxx:xxxx" --set global.storageClass="my_sc" --set platformName="OneOps"


sreworks                       sreworks-appmanager-cluster-initjob-tzjms                         0/1     CrashLoopBackOff        4 (33s ago)         3m1s
sreworks                       sreworks-appmanager-operator-controller-manager-74948f9668k454s   2/2     Running                 0                   3m2s
sreworks                       sreworks-appmanager-postrun-dprtp                                 1/1     Running                 2 (44s ago)         3m2s
sreworks                       sreworks-appmanager-server-6fd5455df5-srfkw                       0/1     Init:CrashLoopBackOff   4 (81s ago)         3m2s
sreworks                       sreworks-core-init-job-gpwbx                                      0/1     CrashLoopBackOff        4 (39s ago)         3m2s
sreworks                       sreworks-kafka-0                                                  1/1     Running                 2 (2m37s ago)       3m2s
sreworks                       sreworks-minio-65f775b959-5k6x6                                   1/1     Running                 0                   3m2s
sreworks                       sreworks-mysql-0                                                  1/1     Running                 0                   3m2s
sreworks                       sreworks-redis-master-0                                           1/1     Running                 0                   3m2s
sreworks                       sreworks-saas-aiops-init-job-2llnm                                1/1     Running                 0                   3m1s
sreworks                       sreworks-saas-app-init-job-5f8p6                                  1/1     Running                 0                   3m1s
sreworks                       sreworks-saas-cluster-init-job-7pswz                              1/1     Running                 0                   3m1s
sreworks                       sreworks-saas-dataops-init-job-kpkb8                              1/1     Running                 0                   3m1s
sreworks                       sreworks-saas-demoapp-init-job-tcpc4                              1/1     Running                 0                   3m1s
sreworks                       sreworks-saas-healing-init-job-dq79b                              1/1     Running                 0                   3m1s
sreworks                       sreworks-saas-health-init-job-9csks                               1/1     Running                 0                   3m2s
sreworks                       sreworks-saas-help-init-job-fv7j8                                 1/1     Running                 0                   3m2s
sreworks                       sreworks-saas-job-init-job-mpcw2                                  1/1     Running                 0                   3m2s
sreworks                       sreworks-saas-ocenter-init-job-4p4tj                              1/1     Running                 0                   3m1s
sreworks                       sreworks-saas-search-init-job-z646b                               1/1     Running                 0                   3m1s
sreworks                       sreworks-saas-system-init-job-v877c                               1/1     Running                 0                   3m1s
sreworks                       sreworks-saas-team-init-job-7cwjr                                 1/1     Running                 0                   3m1s
sreworks                       sreworks-saas-upload-init-job-t6qs9                               1/1     Running                 0                   3m1s
sreworks                       sreworks-zookeeper-0                                              1/1     Running                 0                   3m2s


# kubectl logs -f -n sreworks sreworks-appmanager-cluster-initjob-tzjms
+ python /app/sbin/
Traceback (most recent call last):
  File "/app/sbin/", line 98, in <module>
  File "/app/sbin/", line 74, in init_cluster
    items = requests.get("%s/clusters" % ENDPOINT, headers=HEADERS).json().get('data', {}).get('items', [])
  File "/usr/local/lib/python2.7/site-packages/requests/", line 75, in get
    return request('get', url, params=params, **kwargs)
  File "/usr/local/lib/python2.7/site-packages/requests/", line 61, in request
    return session.request(method=method, url=url, **kwargs)
  File "/usr/local/lib/python2.7/site-packages/requests/", line 529, in request
    resp = self.send(prep, **send_kwargs)
  File "/usr/local/lib/python2.7/site-packages/requests/", line 645, in send
    r = adapter.send(request, **kwargs)
  File "/usr/local/lib/python2.7/site-packages/requests/", line 519, in send
    raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='sreworks-appmanager', port=80): Max retries exceeded with url: /clusters (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f680c8b8190>: Failed to establish a new connection: [Errno 111] Connection refused',))

# kubectl logs -f -n sreworks sreworks-appmanager-server-6fd5455df5-srfkw
Error from server (BadRequest): container "server" in pod "sreworks-appmanager-server-6fd5455df5-srfkw" is waiting to start: PodInitializing

# kubectl logs -f -n sreworks sreworks-core-init-job-gpwbx
+ cat /swcli/swcli.yaml

endpoint: http://sreworks-appmanager
username: superuser
password: yJfIYmjAiCL0ondV3kY7e5x6kVTpvC3h
client-id: superclient
client-secret: stLCjCPKbWmki65DsAj2jPoeBLPimpJa
+ cd /root/saas/swcore/api/core/
+ '[[' false '==' true ]]
+ cat /run/secrets/
+ export 'NAMESPACE_ID=sreworks'
+ '[[' nodePort '==' ingress ]]
+ envsubst
+ /root/swcli --config /swcli/swcli.yaml app-package import '--app-id=flycore' --filepath /root/saas/swcore/ '--print-only-app-package-id=true' '--reset-version=true'
Error: Post "http://sreworks-appmanager/oauth/token": dial tcp connect: connection refused
+ result=

# kubectl logs -f -n sreworks sreworks-appmanager-postrun-dprtp
+ set -e
+ PYTHON_BIN=python
+ RUN_DIR=/app/postrun
++ awk 'BEGIN{for(v in ENVIRON) printf "${%s} ", v;}'
++ find /app/postrun
+ for file in $(find $RUN_DIR)
+ '[' trun == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' emas == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' json == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' json == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' json == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' json == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' json == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' json == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' json == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' json == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' json == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' json == .tpl ']'
+ for file in $(find $RUN_DIR)
+ '[' json == .tpl ']'
+ for file in $(find $RUN_DIR)

+ sleep 60s
++ find /app/postrun -maxdepth 1 -type f -name '*.sh' -o -name '*.py'
++ sort
+ for script in `find $RUN_DIR -maxdepth 1 -type f -name "*.sh" -o -name "*.py" | sort`
+ SRCPWD=/app/postrun/01_init_definition_schema
+ '[' /app/postrun/01_init_definition_schema '!=' /app/postrun/ ']'
+ echo 'Execute python script: ,' /app/postrun/, /app/postrun/01_init_definition_schema
+ SRCPWD=/app/postrun/01_init_definition_schema
Execute python script: , /app/postrun/, /app/postrun/01_init_definition_schema
+ python /app/postrun/
Traceback (most recent call last):
  File "/app/postrun/", line 54, in <module>
  File "/app/postrun/", line 50, in apply_all_definition_schemas
  File "/app/postrun/", line 29, in apply
    response = + '/definition-schemas', json=post_json)
  File "/usr/local/lib/python2.7/site-packages/requests/", line 117, in post
    return request('post', url, data=data, json=json, **kwargs)
  File "/usr/local/lib/python2.7/site-packages/requests/", line 61, in request
    return session.request(method=method, url=url, **kwargs)
  File "/usr/local/lib/python2.7/site-packages/requests/", line 529, in request
    resp = self.send(prep, **send_kwargs)
  File "/usr/local/lib/python2.7/site-packages/requests/", line 645, in send
    r = adapter.send(request, **kwargs)
  File "/usr/local/lib/python2.7/site-packages/requests/", line 519, in send
    raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='sreworks-appmanager', port=80): Max retries exceeded with url: /definition-schemas (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f5592bd84d0>: Failed to establish a new connection: [Errno 111] Connection refused',))
yuanyp8 commented 2 years ago


yuanyp8 commented 2 years ago

any updates?

sreworks commented 2 years ago



sreworks commented 2 years ago

根据您反馈的日志来看,是 sreworks-appmanager 未正常启动,导致postrun和cluster-initjob未正常运行,进而导致core-init-job核心应用的注入异常。containerd不影响部署,已有验证过在containerd场景下能够正常使用。

sreworks commented 2 years ago


yanlingsishao commented 2 years ago
