Open hari5g900 opened 2 years ago
rollsite(1) --> rollsite(10000)
This looks wierd. Could you show your rollsite configurations in your cluster.yaml files of both parties?
I used the helm charts to deploy to specific worker nodes as kubefate wasn't responding to nodeSelector. (I want to force each party to one worker node).
Exchange:
partyId: 1
partyName: fate-exchange
image:
registry: federatedai
isThridParty:
tag: 1.9.0-release
pullPolicy: IfNotPresent
imagePullSecrets:
- name: myregistrykey
podSecurityPolicy:
enabled: false
partyList:
- partyId: 9999
partyIp: 192.168.122.234
partyPort: 30091
- partyId: 10000
partyIp: 192.168.122.187
partyPort: 30101
modules:
rollsite:
include: true
ip: rollsite
type: NodePort
nodePort: 30000
loadBalancerIP:
enableTLS: false
nodeSelector:
location: gb
tolerations:
affinity:
# partyList is used to configure the cluster information of all parties that join in the exchange deployment mode. (When eggroll was used as the calculation engine at the time)
partyList:
- partyId: 9999
partyIp: 10.30.173.234
partyPort: 30091
- partyId: 10000
partyIp: 10.30.173.187
partyPort: 30101
Party-9999
image:
registry: federatedai
isThridParty:
tag: 1.9.0-release
pullPolicy: IfNotPresent
imagePullSecrets:
partyId: 9999
partyName: fate-9999
# Computing : Eggroll, Spark, Spark_local
computing: Eggroll
# Federation: Eggroll(computing: Eggroll), Pulsar/RabbitMQ(computing: Spark/Spark_local)
federation: Eggroll
# Storage: Eggroll(computing: Eggroll), HDFS(computing: Spark), LocalFS(computing: Spark_local)
storage: Eggroll
# Algorithm: Basic, NN
algorithm: Basic
# Device: CPU, IPCL
device: IPCL
istio:
enabled: false
podSecurityPolicy:
enabled: false
ingressClassName: nginx
ingress:
fateboard:
# annotations:
hosts:
- name: party9999.fateboard.example.com
path: /
# tls: []
# - secretName: my-tls-secret
# hosts:
# - fateboard.example.com
client:
# annotations:
hosts:
- name: party9999.notebook.example.com
path: /
# tls: []
exchange:
partyIp: 192.168.122.234
partyPort: 30000
persistence:
enabled: false
modules:
rollsite:
include: true
ip: rollsite
type: NodePort
nodePort: 30091
loadBalancerIP:
enableTLS: false
nodeSelector:
location: gb
tolerations:
affinity:
polling:
enabled: false
partyList:
- partyId: 9999
partyIp: 192.168.122.234
partyPort: 30091
- partyId: 10000
partyIp: 192.168.122.187
partyPort: 30101
# type: client
# server:
# ip: 192.168.9.1
# port: 9370
# type: server
# clientList:
# - partID: 9999
# concurrency: 50
lbrollsite:
include: false
ip: rollsite
type: ClusterIP
nodePort: 30091
loadBalancerIP:
size: "2M"
nodeSelector:
location: gb
tolerations:
affinity:
python:
include: true
type: NodePort
httpNodePort: 30097
grpcNodePort: 30092
loadBalancerIP:
serviceAccountName:
nodeSelector:
location: gb
tolerations:
affinity:
logLevel: INFO
# subPath: ""
existingClaim:
claimName: python-data
storageClass:
accessMode: ReadWriteOnce
size: 1Gi
clustermanager:
cores_per_node: 16
nodes: 2
spark:
cores_per_node: 20
nodes: 2
master: spark://spark-master:7077
driverHost: fateflow
driverHostType:
portMaxRetries:
driverStartPort:
blockManagerStartPort:
pysparkPython:
hdfs:
name_node: hdfs://name_node:9000
path_prefix:
rabbitmq:
host: rabbitmq
mng_port: 15672
port: 5672
user: fate
password: fate
pulsar:
host: pulsar
mng_port: 8080
port: 6650
nginx:
host: nginx
http_port: 9300
grpc_port: 9310
client:
include: true
ip: client
type: ClusterIP
nodeSelector:
location: gb
tolerations:
affinity:
subPath: "client"
existingClaim:
storageClass:
accessMode: ReadWriteOnce
size: 1Gi
clustermanager:
include: true
ip: clustermanager
type: ClusterIP
nodeSelector:
location: gb
tolerations:
affinity:
nodemanager:
include: true
replicas: 2
nodeSelector:
location: gb
tolerations:
affinity:
sessionProcessorsPerNode: 2
subPath: "nodemanager"
storageClass:
accessMode: ReadWriteOnce
size: 1Gi
existingClaim:
resources:
requests:
cpu: "2"
memory: "4Gi"
client:
include: true
ip: client
type: ClusterIP
nodeSelector:
location: gb
tolerations:
affinity:
subPath: "client"
existingClaim:
storageClass:
accessMode: ReadWriteOnce
size: 1Gi
mysql:
include: true
type: ClusterIP
nodeSelector:
location: gb
tolerations:
affinity:
ip: mysql
port: 3306
database: eggroll_meta
user: fate
password: fate_dev
subPath: "mysql"
existingClaim:
claimName: mysql-data
storageClass:
accessMode: ReadWriteOnce
size: 1Gi
serving:
ip: 192.168.9.1
port: 30095
useRegistry: false
zookeeper:
hosts:
- serving-zookeeper.fate-serving-9999:2181
use_acl: false
fateboard:
include: true
type: ClusterIP
username: admin
password: admin
party10000 is the same as party9999, but with the expected changes in ips
Your exchange looks good.
For party 9999 and party 10000, for partyList
, you need to configure the exchange's information:
- partyId: 1
partyIp: <exchange_ip>
partyPort: <exchange_port>
Your exchange looks good.
For party 9999 and party 10000, for
partyList
, you need to configure the exchange's information:- partyId: 1 partyIp: <exchange_ip> partyPort: <exchange_port>
Redeployed after these changes but the issue remains:
ValueError: job submit failed, err msg: {'jobId': '202211021422045066850', 'retcode': 103, 'retmsg': 'Traceback (most recent call last):\n File "/data/projects/fate/fateflow/python/fate_flow/scheduler/dag_scheduler.py", line 142, in submit\n raise Exception("create job failed", response)\nException: (\'create job failed\', {\'guest\': {9999: {\'retcode\': <RetCode.FEDERATED_ERROR: 104>, \'retmsg\': \'Federated schedule error, <_InactiveRpcError of RPC that terminated with:\\n\\tstatus = StatusCode.UNKNOWN\\n\\tdetails = "UNKNOWN: \\n[Roll Site Error TransInfo] \\n location msg=com.google.protobuf.GeneratedMessageV3.isStringEmpty(Ljava/lang/Object;)Z \\n stack info=java.lang.NoSuchMethodError: com.google.protobuf.GeneratedMessageV3.isStringEmpty(Ljava/lang/Object;)Z\\n\\tat com.webank.ai.eggroll.api.networking.proxy.Proxy$Task.getSerializedSize(Proxy.java:1120)\\n\\tat com.google.protobuf.CodedOutputStream.computeMessageSizeNoTag(CodedOutputStream.java:877)\\n\\tat com.google.protobuf.CodedOutputStream.computeMessageSize(CodedOutputStream.java:661)\\n\\tat com.webank.ai.eggroll.api.networking.proxy.Proxy$Metadata.getSerializedSize(Proxy.java:4893)\\n\\tat com.google.protobuf.CodedOutputStream.computeMessageSizeNoTag(CodedOutputStream.java:877)\\n\\tat com.google.protobuf.CodedOutputStream.computeMessageSize(CodedOutputStream.java:661)\\n\\tat com.webank.ai.eggroll.api.networking.proxy.Proxy$Packet.getSerializedSize(Proxy.java:7465)\\n\\tat io.grpc.protobuf.lite.ProtoInputStream.available(ProtoInputStream.java:108)\\n\\tat io.grpc.internal.MessageFramer.getKnownLength(MessageFramer.java:205)\\n\\tat io.grpc.internal.MessageFramer.writePayload(MessageFramer.java:137)\\n\\tat io.grpc.internal.AbstractStream.writeMessage(AbstractStream.java:65)\\n\\tat io.grpc.internal.ForwardingClientStream.writeMessage(ForwardingClientStream.java:37)\\n\\tat io.grpc.internal.RetriableStream$1SendMessageEntry.runWith(RetriableStream.java:490)\\n\\tat io.grpc.internal.RetriableStream.delayOrExecute(RetriableStream.java:467)\\n\\tat io.grpc.internal.RetriableStream.sendMessage(RetriableStream.java:494)\\n\\tat io.grpc.internal.ClientCallImpl.sendMessageInternal(ClientCallImpl.java:542)\\n\\tat io.grpc.internal.ClientCallImpl.sendMessage(ClientCallImpl.java:528)\\n\\tat io.grpc.stub.ClientCalls.asyncUnaryRequestCall(ClientCalls.java:308)\\n\\tat io.grpc.stub.ClientCalls.futureUnaryCall(ClientCalls.java:218)\\n\\tat io.grpc.stub.ClientCalls.blockingUnaryCall(ClientCalls.java:146)\\n\\tat com.webank.ai.eggroll.api.networking.proxy.DataTransferServiceGrpc$DataTransferServiceBlockingStub.unaryCall(DataTransferServiceGrpc.java:348)\\n\\tat com.webank.eggroll.rollsite.EggSiteServicer.unaryCall(EggSiteServicer.scala:138)\\n\\tat com.webank.ai.eggroll.api.networking.proxy.DataTransferServiceGrpc$MethodHandlers.invoke(DataTransferServiceGrpc.java:406)\\n\\tat io.grpc.stub.ServerCalls$UnaryServerCallHandler$UnaryServerCallListener.onHalfClose(ServerCalls.java:180)\\n\\tat io.grpc.PartialForwardingServerCallListener.onHalfClose(PartialForwardingServerCallListener.java:35)\\n\\tat io.grpc.ForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:23)\\n\\tat io.grpc.ForwardingServerCallListener$SimpleForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:40)\\n\\tat io.grpc.Contexts$ContextualizedServerCallListener.onHalfClose(Contexts.java:86)\\n\\tat io.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.halfClosed(ServerCallImpl.java:331)\\n\\tat io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1HalfClosed.runInContext(ServerImpl.java:814)\\n\\tat io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)\\n\\tat io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123)\\n\\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\\n\\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\\n\\tat java.lang.Thread.run(Thread.java:750)\\n \\n\\nexception trans path: rollsite(1) --> rollsite(10000)"\\n\\tdebug_error_string = "{"created":"@1667398960.656877902","description":"Error received from peer ipv4:10.43.70.88:9370","file":"src/core/lib/surface/call.cc","file_line":952,"grpc_message":"UNKNOWN: \\\\n[Roll Site Error TransInfo] \\\\n location msg=com.google.protobuf.GeneratedMessageV3.isStringEmpty(Ljava/lang/Object;)Z \\\\n stack info=java.lang.NoSuchMethodError: com.google.protobuf.GeneratedMessageV3.isStringEmpty(Ljava/lang/Object;)Z\\\\n\\\\tat com.webank.ai.eggroll.api.networking.proxy.Proxy$Task.getSerializedSize(Proxy.java:1120)\\\\n\\\\tat com.google.protobuf.CodedOutputStream.computeMessageSizeNoTag(CodedOutputStream.java:877)\\\\n\\\\tat com.google.protobuf.CodedOutputStream.computeMessageSize(CodedOutputStream.java:661)\\\\n\\\\tat com.webank.ai.eggroll.api.networking.proxy.Proxy$Metadata.getSerializedSize(Proxy.java:4893)\\\\n\\\\tat com.google.protobuf.CodedOutputStream.computeMessageSizeNoTag(CodedOutputStream.java:877)\\\\n\\\\tat com.google.protobuf.CodedOutputStream.computeMessageSize(CodedOutputStream.java:661)\\\\n\\\\tat com.webank.ai.eggroll.api.networking.proxy.Proxy$Packet.getSerializedSize(Proxy.java:7465)\\\\n\\\\tat io.grpc.protobuf.lite.ProtoInputStream.available(ProtoInputStream.java:108)\\\\n\\\\tat io.grpc.internal.MessageFramer.getKnownLength(MessageFramer.java:205)\\\\n\\\\tat io.grpc.internal.MessageFramer.writePayload(MessageFramer.java:137)\\\\n\\\\tat io.grpc.internal.AbstractStream.writeMessage(AbstractStream.java:65)\\\\n\\\\tat io.grpc.internal.ForwardingClientStream.writeMessage(ForwardingClientStream.java:37)\\\\n\\\\tat io.grpc.internal.RetriableStream$1SendMessageEntry.runWith(RetriableStream.java:490)\\\\n\\\\tat io.grpc.internal.RetriableStream.delayOrExecute(RetriableStream.java:467)\\\\n\\\\tat io.grpc.internal.RetriableStream.sendMessage(RetriableStream.java:494)\\\\n\\\\tat io.grpc.internal.ClientCallImpl.sendMessageInternal(ClientCallImpl.java:542)\\\\n\\\\tat io.grpc.internal.ClientCallImpl.sendMessage(ClientCallImpl.java:528)\\\\n\\\\tat io.grpc.stub.ClientCalls.asyncUnaryRequestCall(ClientCalls.java:308)\\\\n\\\\tat io.grpc.stub.ClientCalls.futureUnaryCall(ClientCalls.java:218)\\\\n\\\\tat io.grpc.stub.ClientCalls.blockingUnaryCall(ClientCalls.java:146)\\\\n\\\\tat com.webank.ai.eggroll.api.networking.proxy.DataTransferServiceGrpc$DataTransferServiceBlockingStub.unaryCall(DataTransferServiceGrpc.java:348)\\\\n\\\\tat com.webank.eggroll.rollsite.EggSiteServicer.unaryCall(EggSiteServicer.scala:138)\\\\n\\\\tat com.webank.ai.eggroll.api.networking.proxy.DataTransferServiceGrpc$MethodHandlers.invoke(DataTransferServiceGrpc.java:406)\\\\n\\\\tat io.grpc.stub.ServerCalls$UnaryServerCallHandler$UnaryServerCallListener.onHalfClose(ServerCalls.java:180)\\\\n\\\\tat io.grpc.PartialForwardingServerCallListener.onHalfClose(PartialForwardingServerCallListener.java:35)\\\\n\\\\tat io.grpc.ForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:23)\\\\n\\\\tat io.grpc.ForwardingServerCallListener$SimpleForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:40)\\\\n\\\\tat io.grpc.Contexts$ContextualizedServerCallListener.onHalfClose(Contexts.java:86)\\\\n\\\\tat io.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.halfClosed(ServerCallImpl.java:331)\\\\n\\\\tat io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1HalfClosed.runInContext(ServerImpl.java:814)\\\\n\\\\tat io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)\\\\n\\\\tat io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123)\\\\n\\\\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\\\\n\\\\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\\\\n\\\\tat java.lang.Thread.run(Thread.java:750)\\\\n \\\\n\\\\nexception trans path: rollsite(1) --> rollsite(10000)","grpc_status":2}"\\n>\'}}, \'host\': {10000: {\'data\': {\'components\': {\'data_transform_0\': {\'need_run\': True}, \'evaluation_0\': {\'need_run\': True}, \'hetero_secureboost_0\': {\'need_run\': True}, \'intersect_0\': {\'need_run\': True}, \'reader_0\': {\'need_run\': True}}}, \'retcode\': 0, \'retmsg\': \'success\'}}})\n'}
You need to show the after-changed configuration, in specific, the rollsite section, of all the 3 parties: 9999, 10000, and 1.
Party 9999:
modules:
rollsite:
include: true
ip: rollsite
type: NodePort
nodePort: 30091
loadBalancerIP:
enableTLS: false
nodeSelector:
location: gb
tolerations:
affinity:
polling:
enabled: false
partyList:
- partyId: 1
partyIp: 192.168.122.234
partyPort: 30000
- partyId: 9999
partyIp: 192.168.122.234
partyPort: 30091
- partyId: 10000
partyIp: 192.168.122.187
partyPort: 30101
Party 10000:
modules:
rollsite:
include: true
ip: rollsite
type: NodePort
nodePort: 30101
loadBalancerIP:
enableTLS: false
nodeSelector:
location: it
tolerations:
affinity:
polling:
enabled: false
partyList:
- partyId: 1
partyIp: 192.168.122.234
partyPort: 30000
- partyId: 9999
partyIp: 192.168.122.234
partyPort: 30091
- partyId: 10000
partyIp: 192.168.122.187
partyPort: 30101
Exchange (Party 1):
modules:
rollsite:
include: true
ip: rollsite
type: NodePort
nodePort: 30000
loadBalancerIP:
enableTLS: false
nodeSelector:
location: gb
tolerations:
affinity:
# partyList is used to configure the cluster information of all parties that join in the exchange deployment mode. (When eggroll was used as the calculation engine at the time)
partyList:
- partyId: 9999
partyIp: 192.168.122.234
partyPort: 30091
- partyId: 10000
partyIp: 192.168.122.187
partyPort: 30101
Well, the configuraion looks good too.
I think one possible reason is that the pod in your 192.168.122.234 cannot communicate with 192.168.122.187.
192.168.122.234 is for 9999 and 1 192.168.122.187 is for 10000
The log indicates that 1 cannot call the grpc api of 10000, and seems like 1 can call the grpc api of 9999.
Let's just do one experiment:
As the ip addresses of your 2 nodes seems like some kind of internal ip address. So this could be the reason.
I can ping the ip from rollsite(1) to 122.187.
Please show me all the pod status in your 2 K8s clusters.
I only have one k8s cluster. I'm assuming you mean this:
root@master:~# kubectl get pods -A -o wide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
fate-10000 client-0 1/1 Running 0 2m6s 10.42.189.68 worker2 <none> <none>
fate-10000 clustermanager-7b5f6b54b5-nwxcq 1/1 Running 0 2m27s 10.42.189.71 worker2 <none> <none>
fate-10000 mysql-0 1/1 Running 0 2m27s 10.42.189.72 worker2 <none> <none>
fate-10000 nodemanager-0 2/2 Running 0 2m27s 10.42.189.127 worker2 <none> <none>
fate-10000 nodemanager-1 2/2 Running 0 2m16s 10.42.189.69 worker2 <none> <none>
fate-10000 python-0 2/2 Running 0 2m6s 10.42.189.125 worker2 <none> <none>
fate-10000 rollsite-7b465d6bfb-5twgb 1/1 Running 0 2m27s 10.42.189.126 worker2 <none> <none>
fate-9999 client-0 1/1 Running 0 5d1h 10.42.235.148 worker1 <none> <none>
fate-9999 clustermanager-c6dc9d5dd-ttdm8 1/1 Running 0 5d1h 10.42.235.145 worker1 <none> <none>
fate-9999 mysql-0 1/1 Running 0 5d1h 10.42.235.133 worker1 <none> <none>
fate-9999 nodemanager-0 2/2 Running 0 5d1h 10.42.235.134 worker1 <none> <none>
fate-9999 nodemanager-1 2/2 Running 0 5d1h 10.42.235.150 worker1 <none> <none>
fate-9999 python-0 2/2 Running 0 5d1h 10.42.235.140 worker1 <none> <none>
fate-9999 rollsite-677cf9c869-m2vvs 1/1 Running 0 5d1h 10.42.235.188 worker1 <none> <none>
fate-exchange rollsite-6b868bfb47-6k4v7 1/1 Running 0 5d3h 10.42.235.184 worker1 <none> <none>
ingress-nginx nginx-ingress-controller-bgw7l 1/1 Running 1 (45d ago) 46d 10.30.173.13 worker1 <none> <none>
ingress-nginx nginx-ingress-controller-tlqdx 1/1 Running 0 33d 10.30.173.16 worker2 <none> <none>
kube-fate kubefate-7f8dd7b578-7gkzj 1/1 Running 4 (45d ago) 46d 10.42.235.160 worker1 <none> <none>
kube-fate mariadb-77777bd455-s7fx9 1/1 Running 1 (45d ago) 46d 10.42.235.161 worker1 <none> <none>
kube-system calico-kube-controllers-5cc86cb94f-vngnd 1/1 Running 1 (45d ago) 46d 10.42.235.154 worker1 <none> <none>
kube-system calico-node-8rc7d 1/1 Running 0 8d 10.30.173.14 control01 <none> <none>
kube-system calico-node-shzld 1/1 Running 0 8d 10.30.173.16 worker2 <none> <none>
kube-system calico-node-x98nn 1/1 Running 0 8d 10.30.173.13 worker1 <none> <none>
kube-system coredns-59499769fb-4rsfd 1/1 Running 1 (45d ago) 46d 10.42.235.163 worker1 <none> <none>
kube-system coredns-59499769fb-j4znb 1/1 Running 0 11d 10.42.189.75 worker2 <none> <none>
kube-system coredns-autoscaler-67cbd4599c-48gdl 1/1 Running 1 (45d ago) 46d 10.42.235.151 worker1 <none> <none>
kube-system metrics-server-585b7cc746-lkp78 1/1 Running 1 (45d ago) 46d 10.42.235.162 worker1 <none> <none>
kube-system rke-coredns-addon-deploy-job-v2s9d 0/1 Completed 0 46d 10.30.173.14 control01 <none> <none>
kube-system rke-ingress-controller-deploy-job-xpj2s 0/1 Completed 0 46d 10.30.173.14 control01 <none> <none>
kube-system rke-metrics-addon-deploy-job-5rpbk 0/1 Completed 0 46d 10.30.173.14 control01 <none> <none>
kube-system rke-network-plugin-deploy-job-qjtzb 0/1 Completed 0 46d 10.30.173.14 control01 <none> <none>
You have 3 rollsite pods, could youe please use kubectl exec
get into all of them and go to /data/projects/fate/eggroll/conf/route_table/
then post the route_table configuration here?
Also please run kubectl get svc -A
to show the service information.
What deployment mode you are use? Kubernetes
What KubeFATE and FATE version you are using?
v1.9.0
What OS you are using for docker-compse or Kubernetes? Please also clear the version of OS.
Desktop (please complete the following information):
To Reproduce
I have two parties in one k8s cluster. I can upload the data into both parties with [pipeline_tutorial_upload.ipynb].
The following error occurs when I try to run pipeline.fit() in [pipeline_tutorial_hetero_sbt.ipynb]. There are no additional logs created which is accessible through the fateboard.
What happen?
ValueError: job submit failed, err msg: {'jobId': '202210311731580815390', 'retcode': 103, 'retmsg': 'Traceback (most recent call last):\n File "/data/projects/fate/fateflow/python/fate_flow/scheduler/dag_scheduler.py", line 142, in submit\n raise Exception("create job failed", response)\nException: (\'create job failed\', {\'guest\': {9999: {\'retcode\': <RetCode.FEDERATED_ERROR: 104>, \'retmsg\': \'Federated schedule error, <_InactiveRpcError of RPC that terminated with:\n\tstatus = StatusCode.UNKNOWN\n\tdetails = "UNKNOWN: \n[Roll Site Error TransInfo] \n location msg=com.google.protobuf.GeneratedMessageV3.isStringEmpty(Ljava/lang/Object;)Z \n stack info=java.lang.NoSuchMethodError: com.google.protobuf.GeneratedMessageV3.isStringEmpty(Ljava/lang/Object;)Z\n\tat com.webank.ai.eggroll.api.networking.proxy.Proxy$Task.getSerializedSize(Proxy.java:1120)\n\tat com.google.protobuf.CodedOutputStream.computeMessageSizeNoTag(CodedOutputStream.java:877)\n\tat com.google.protobuf.CodedOutputStream.computeMessageSize(CodedOutputStream.java:661)\n\tat com.webank.ai.eggroll.api.networking.proxy.Proxy$Metadata.getSerializedSize(Proxy.java:4893)\n\tat com.google.protobuf.CodedOutputStream.computeMessageSizeNoTag(CodedOutputStream.java:877)\n\tat com.google.protobuf.CodedOutputStream.computeMessageSize(CodedOutputStream.java:661)\n\tat com.webank.ai.eggroll.api.networking.proxy.Proxy$Packet.getSerializedSize(Proxy.java:7465)\n\tat io.grpc.protobuf.lite.ProtoInputStream.available(ProtoInputStream.java:108)\n\tat io.grpc.internal.MessageFramer.getKnownLength(MessageFramer.java:205)\n\tat io.grpc.internal.MessageFramer.writePayload(MessageFramer.java:137)\n\tat io.grpc.internal.AbstractStream.writeMessage(AbstractStream.java:65)\n\tat io.grpc.internal.ForwardingClientStream.writeMessage(ForwardingClientStream.java:37)\n\tat io.grpc.internal.RetriableStream$1SendMessageEntry.runWith(RetriableStream.java:490)\n\tat io.grpc.internal.RetriableStream.delayOrExecute(RetriableStream.java:467)\n\tat io.grpc.internal.RetriableStream.sendMessage(RetriableStream.java:494)\n\tat io.grpc.internal.ClientCallImpl.sendMessageInternal(ClientCallImpl.java:542)\n\tat io.grpc.internal.ClientCallImpl.sendMessage(ClientCallImpl.java:528)\n\tat io.grpc.stub.ClientCalls.asyncUnaryRequestCall(ClientCalls.java:308)\n\tat io.grpc.stub.ClientCalls.futureUnaryCall(ClientCalls.java:218)\n\tat io.grpc.stub.ClientCalls.blockingUnaryCall(ClientCalls.java:146)\n\tat com.webank.ai.eggroll.api.networking.proxy.DataTransferServiceGrpc$DataTransferServiceBlockingStub.unaryCall(DataTransferServiceGrpc.java:348)\n\tat com.webank.eggroll.rollsite.EggSiteServicer.unaryCall(EggSiteServicer.scala:138)\n\tat com.webank.ai.eggroll.api.networking.proxy.DataTransferServiceGrpc$MethodHandlers.invoke(DataTransferServiceGrpc.java:406)\n\tat io.grpc.stub.ServerCalls$UnaryServerCallHandler$UnaryServerCallListener.onHalfClose(ServerCalls.java:180)\n\tat io.grpc.PartialForwardingServerCallListener.onHalfClose(PartialForwardingServerCallListener.java:35)\n\tat io.grpc.ForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:23)\n\tat io.grpc.ForwardingServerCallListener$SimpleForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:40)\n\tat io.grpc.Contexts$ContextualizedServerCallListener.onHalfClose(Contexts.java:86)\n\tat io.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.halfClosed(ServerCallImpl.java:331)\n\tat io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1HalfClosed.runInContext(ServerImpl.java:814)\n\tat io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)\n\tat io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:750)\n \n\nexception trans path: rollsite(1) --> rollsite(10000)"\n\tdebug_error_string = "{"created":"@1667237524.262698319","description":"Error received from peer ipv4:10.43.199.241:9370","file":"src/core/lib/surface/call.cc","file_line":952,"grpc_message":"UNKNOWN: \\n[Roll Site Error TransInfo] \\n location msg=com.google.protobuf.GeneratedMessageV3.isStringEmpty(Ljava/lang/Object;)Z \\n stack info=java.lang.NoSuchMethodError: com.google.protobuf.GeneratedMessageV3.isStringEmpty(Ljava/lang/Object;)Z\\n\\tat com.webank.ai.eggroll.api.networking.proxy.Proxy$Task.getSerializedSize(Proxy.java:1120)\\n\\tat com.google.protobuf.CodedOutputStream.computeMessageSizeNoTag(CodedOutputStream.java:877)\\n\\tat com.google.protobuf.CodedOutputStream.computeMessageSize(CodedOutputStream.java:661)\\n\\tat com.webank.ai.eggroll.api.networking.proxy.Proxy$Metadata.getSerializedSize(Proxy.java:4893)\\n\\tat com.google.protobuf.CodedOutputStream.computeMessageSizeNoTag(CodedOutputStream.java:877)\\n\\tat com.google.protobuf.CodedOutputStream.computeMessageSize(CodedOutputStream.java:661)\\n\\tat com.webank.ai.eggroll.api.networking.proxy.Proxy$Packet.getSerializedSize(Proxy.java:7465)\\n\\tat io.grpc.protobuf.lite.ProtoInputStream.available(ProtoInputStream.java:108)\\n\\tat io.grpc.internal.MessageFramer.getKnownLength(MessageFramer.java:205)\\n\\tat io.grpc.internal.MessageFramer.writePayload(MessageFramer.java:137)\\n\\tat io.grpc.internal.AbstractStream.writeMessage(AbstractStream.java:65)\\n\\tat io.grpc.internal.ForwardingClientStream.writeMessage(ForwardingClientStream.java:37)\\n\\tat io.grpc.internal.RetriableStream$1SendMessageEntry.runWith(RetriableStream.java:490)\\n\\tat io.grpc.internal.RetriableStream.delayOrExecute(RetriableStream.java:467)\\n\\tat io.grpc.internal.RetriableStream.sendMessage(RetriableStream.java:494)\\n\\tat io.grpc.internal.ClientCallImpl.sendMessageInternal(ClientCallImpl.java:542)\\n\\tat io.grpc.internal.ClientCallImpl.sendMessage(ClientCallImpl.java:528)\\n\\tat io.grpc.stub.ClientCalls.asyncUnaryRequestCall(ClientCalls.java:308)\\n\\tat io.grpc.stub.ClientCalls.futureUnaryCall(ClientCalls.java:218)\\n\\tat io.grpc.stub.ClientCalls.blockingUnaryCall(ClientCalls.java:146)\\n\\tat com.webank.ai.eggroll.api.networking.proxy.DataTransferServiceGrpc$DataTransferServiceBlockingStub.unaryCall(DataTransferServiceGrpc.java:348)\\n\\tat com.webank.eggroll.rollsite.EggSiteServicer.unaryCall(EggSiteServicer.scala:138)\\n\\tat com.webank.ai.eggroll.api.networking.proxy.DataTransferServiceGrpc$MethodHandlers.invoke(DataTransferServiceGrpc.java:406)\\n\\tat io.grpc.stub.ServerCalls$UnaryServerCallHandler$UnaryServerCallListener.onHalfClose(ServerCalls.java:180)\\n\\tat io.grpc.PartialForwardingServerCallListener.onHalfClose(PartialForwardingServerCallListener.java:35)\\n\\tat io.grpc.ForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:23)\\n\\tat io.grpc.ForwardingServerCallListener$SimpleForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:40)\\n\\tat io.grpc.Contexts$ContextualizedServerCallListener.onHalfClose(Contexts.java:86)\\n\\tat io.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.halfClosed(ServerCallImpl.java:331)\\n\\tat io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1HalfClosed.runInContext(ServerImpl.java:814)\\n\\tat io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)\\n\\tat io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123)\\n\\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\\n\\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\\n\\tat java.lang.Thread.run(Thread.java:750)\\n \\n\\nexception trans path: rollsite(1) --> rollsite(10000)","grpc_status":2}"\n>\'}}, \'host\': {10000: {\'data\': {\'components\': {\'data_transform_0\': {\'need_run\': True}, \'evaluation_0\': {\'need_run\': True}, \'hetero_secureboost_0\': {\'need_run\': True}, \'intersect_0\': {\'need_run\': True}, \'reader_0\': {\'need_run\': True}}}, \'retcode\': 0, \'retmsg\': \'success\'}}})\n'}