Open JiaIcecream opened 6 days ago
Install/Deploy
Yes
ubuntu 20.04
kuscia v0.8.0b0
docker
23.0.6
secretflow
secretflow 1.5.0b0
{ "job_id": "reefr2024092413473599050", "initiator": "com2023011620060497797", "max_parallelism": 1, "tasks": [ { "task_id": "reefr2024092413473599050-qagbmwrn-node-35", "app_image": "secretflow-image", "alias": "reefr2024092413473599050-qagbmwrn-node-35", "priority": 100, "parties": [ { "domain_id": "com2023011620060497797", "role": "partner" }, { "domain_id": "com2023011620072311738", "role": "partner" } ], "dependencies": [], "task_input_config": { "sf_cluster_desc": { "devices": [ { "config": "{\"runtime_config\":{\"protocol\":\"SEMI2K\",\"field\":\"FM128\"},\"party_address\":{\"com2023011620060497797\":\"172.16.16.111\",\"com2023011620072311738\":\"172.16.16.116\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}", "name": "spu", "parties": [ "com2023011620060497797", "com2023011620072311738" ], "type": "spu" }, { "config": "{\"mode\":\"PHEU\",\"schema\":\"paillier\",\"key_size\":2048}", "name": "heu", "parties": [ "com2023011620060497797", "com2023011620072311738" ], "type": "heu" } ], "parties": [ "com2023011620060497797", "com2023011620072311738" ], "ray_fed_config": { "cross_silo_comm_backend": "brpc_link" } }, "sf_datasource_config": { "com2023011620060497797": { "id": "default-data-source" }, "com2023011620072311738": { "id": "default-data-source" } }, "sf_input_ids": [ "res2024082209552885306", "res2024082210012837065" ], "sf_node_eval_param": { "attr_paths": [ "input/receiver_input/key", "input/sender_input/key", "protocol", "sort_result", "allow_duplicate_keys", "allow_duplicate_keys/no/skip_duplicates_check", "fill_value_int", "ecdh_curve" ], "attrs": [ { "is_na": false, "ss": [ "id1" ] }, { "is_na": false, "ss": [ "id2" ] }, { "is_na": false, "s": "PROTOCOL_RR22" }, { "b": true, "is_na": false }, { "is_na": false, "s": "no" }, { "is_na": true }, { "is_na": true }, { "is_na": false, "s": "CURVE_FOURQ" } ], "checkpoint_uri": "ckbeob-qagbmwrn-node-35-output-0", "domain": "data_prep", "inputs": [ { "data_refs": [ { "format": "csv", "party": "com2023011620060497797", "uri": "1724291682857_diabetes-active000.csv" } ], "meta": { "@type": "type.googleapis.com/secretflow.spec.v1.IndividualTable", "line_count": "-1" }, "type": "sf.table.individual" }, { "data_refs": [ { "format": "csv", "party": "com2023011620072311738", "uri": "1724292080572_diabetes-passive001.csv" } ], "meta": { "@type": "type.googleapis.com/secretflow.spec.v1.IndividualTable", "line_count": "-1" }, "type": "sf.table.individual" } ], "name": "psi", "version": "0.0.5" }, "sf_output_ids": [ "reefr2024092413473599050-qagbmwrn-node-35-output-0" ], "sf_output_uris": [ "reefr2024092413473599050-qagbmwrn-node-35-output-0" ] } }, { "task_id": "reefr2024092413473599050-qagbmwrn-node-36", "app_image": "secretflow-image", "alias": "reefr2024092413473599050-qagbmwrn-node-36", "priority": 100, "parties": [ { "domain_id": "com2023011620060497797", "role": "partner" }, { "domain_id": "com2023011620072311738", "role": "partner" } ], "dependencies": [ "reefr2024092413473599050-qagbmwrn-node-35" ], "task_input_config": { "sf_cluster_desc": { "devices": [ { "config": "{\"runtime_config\":{\"protocol\":\"SEMI2K\",\"field\":\"FM128\"},\"party_address\":{\"com2023011620060497797\":\"172.16.16.111\",\"com2023011620072311738\":\"172.16.16.116\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}", "name": "spu", "parties": [ "com2023011620060497797", "com2023011620072311738" ], "type": "spu" }, { "config": "{\"mode\":\"PHEU\",\"schema\":\"paillier\",\"key_size\":2048}", "name": "heu", "parties": [ "com2023011620060497797", "com2023011620072311738" ], "type": "heu" } ], "parties": [ "com2023011620060497797", "com2023011620072311738" ], "ray_fed_config": { "cross_silo_comm_backend": "brpc_link" } }, "sf_datasource_config": { "com2023011620060497797": { "id": "default-data-source" }, "com2023011620072311738": { "id": "default-data-source" } }, "sf_input_ids": [ "reefr2024092413473599050-qagbmwrn-node-35-output-0" ], "sf_node_eval_param": { "attr_paths": [ "train_size", "test_size", "random_state", "shuffle" ], "attrs": [ { "f": 0.75, "is_na": false }, { "f": 0.25, "is_na": false }, { "i64": 1024, "is_na": false }, { "b": true, "is_na": false } ], "checkpoint_uri": "ckbeob-qagbmwrn-node-36-output-0", "domain": "data_prep", "name": "train_test_split", "version": "0.0.1" }, "sf_output_ids": [ "reefr2024092413473599050-qagbmwrn-node-36-output-0", "reefr2024092413473599050-qagbmwrn-node-36-output-1" ], "sf_output_uris": [ "reefr2024092413473599050-qagbmwrn-node-36-output-0", "reefr2024092413473599050-qagbmwrn-node-36-output-1" ] } }, { "task_id": "reefr2024092413473599050-qagbmwrn-node-37", "app_image": "secretflow-image", "alias": "reefr2024092413473599050-qagbmwrn-node-37", "priority": 100, "parties": [ { "domain_id": "com2023011620060497797", "role": "partner" }, { "domain_id": "com2023011620072311738", "role": "partner" } ], "dependencies": [ "reefr2024092413473599050-qagbmwrn-node-36" ], "task_input_config": { "sf_cluster_desc": { "devices": [ { "config": "{\"runtime_config\":{\"protocol\":\"SEMI2K\",\"field\":\"FM128\"},\"party_address\":{\"com2023011620060497797\":\"172.16.16.111\",\"com2023011620072311738\":\"172.16.16.116\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}", "name": "spu", "parties": [ "com2023011620060497797", "com2023011620072311738" ], "type": "spu" }, { "config": "{\"mode\":\"PHEU\",\"schema\":\"paillier\",\"key_size\":2048}", "name": "heu", "parties": [ "com2023011620060497797", "com2023011620072311738" ], "type": "heu" } ], "parties": [ "com2023011620060497797", "com2023011620072311738" ], "ray_fed_config": { "cross_silo_comm_backend": "brpc_link" } }, "sf_datasource_config": { "com2023011620060497797": { "id": "default-data-source" }, "com2023011620072311738": { "id": "default-data-source" } }, "sf_input_ids": [ "reefr2024092413473599050-qagbmwrn-node-36-output-0" ], "sf_node_eval_param": { "attr_paths": [ "input/in_ds/features", "rules" ], "attrs": [ { "is_na": false, "ss": [ "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" ] }, { "s": "{\"op\":\"STANDARDIZE\"}" } ], "checkpoint_uri": "ckbeob-qagbmwrn-node-37-output-0", "domain": "preprocessing", "name": "feature_calculate", "version": "0.0.1" }, "sf_output_ids": [ "reefr2024092413473599050-qagbmwrn-node-37-output-0", "reefr2024092413473599050-qagbmwrn-node-37-output-1" ], "sf_output_uris": [ "reefr2024092413473599050-qagbmwrn-node-37-output-0", "reefr2024092413473599050-qagbmwrn-node-37-output-1" ] } }, { "task_id": "reefr2024092413473599050-qagbmwrn-node-38", "app_image": "secretflow-image", "alias": "reefr2024092413473599050-qagbmwrn-node-38", "priority": 100, "parties": [ { "domain_id": "com2023011620060497797", "role": "partner" }, { "domain_id": "com2023011620072311738", "role": "partner" } ], "dependencies": [ "reefr2024092413473599050-qagbmwrn-node-36" ], "task_input_config": { "sf_cluster_desc": { "devices": [ { "config": "{\"runtime_config\":{\"protocol\":\"SEMI2K\",\"field\":\"FM128\"},\"party_address\":{\"com2023011620060497797\":\"172.16.16.111\",\"com2023011620072311738\":\"172.16.16.116\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}", "name": "spu", "parties": [ "com2023011620060497797", "com2023011620072311738" ], "type": "spu" }, { "config": "{\"mode\":\"PHEU\",\"schema\":\"paillier\",\"key_size\":2048}", "name": "heu", "parties": [ "com2023011620060497797", "com2023011620072311738" ], "type": "heu" } ], "parties": [ "com2023011620060497797", "com2023011620072311738" ], "ray_fed_config": { "cross_silo_comm_backend": "brpc_link" } }, "sf_datasource_config": { "com2023011620060497797": { "id": "default-data-source" }, "com2023011620072311738": { "id": "default-data-source" } }, "sf_input_ids": [ "reefr2024092413473599050-qagbmwrn-node-36-output-1" ], "sf_node_eval_param": { "attr_paths": [ "input/in_ds/features", "rules" ], "attrs": [ { "is_na": false, "ss": [ "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" ] }, { "s": "{\"op\":\"STANDARDIZE\"}" } ], "checkpoint_uri": "ckbeob-qagbmwrn-node-38-output-0", "domain": "preprocessing", "name": "feature_calculate", "version": "0.0.1" }, "sf_output_ids": [ "reefr2024092413473599050-qagbmwrn-node-38-output-0", "reefr2024092413473599050-qagbmwrn-node-38-output-1" ], "sf_output_uris": [ "reefr2024092413473599050-qagbmwrn-node-38-output-0", "reefr2024092413473599050-qagbmwrn-node-38-output-1" ] } }, { "task_id": "reefr2024092413473599050-qagbmwrn-node-39", "app_image": "secretflow-image", "alias": "reefr2024092413473599050-qagbmwrn-node-39", "priority": 100, "parties": [ { "domain_id": "com2023011620060497797", "role": "partner" }, { "domain_id": "com2023011620072311738", "role": "partner" } ], "dependencies": [ "reefr2024092413473599050-qagbmwrn-node-37" ], "task_input_config": { "sf_cluster_desc": { "devices": [ { "config": "{\"runtime_config\":{\"protocol\":\"SEMI2K\",\"field\":\"FM128\"},\"party_address\":{\"com2023011620060497797\":\"172.16.16.111\",\"com2023011620072311738\":\"172.16.16.116\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}", "name": "spu", "parties": [ "com2023011620060497797", "com2023011620072311738" ], "type": "spu" }, { "config": "{\"mode\":\"PHEU\",\"schema\":\"paillier\",\"key_size\":2048}", "name": "heu", "parties": [ "com2023011620060497797", "com2023011620072311738" ], "type": "heu" } ], "parties": [ "com2023011620060497797", "com2023011620072311738" ], "ray_fed_config": { "cross_silo_comm_backend": "brpc_link" } }, "sf_datasource_config": { "com2023011620060497797": { "id": "default-data-source" }, "com2023011620072311738": { "id": "default-data-source" } }, "sf_input_ids": [ "reefr2024092413473599050-qagbmwrn-node-37-output-0" ], "sf_node_eval_param": { "attr_paths": [ "input/train_dataset/feature_selects", "input/train_dataset/label", "epochs", "learning_rate", "batch_size", "sig_type", "reg_type", "penalty", "l2_norm", "eps", "report_weights" ], "attrs": [ { "is_na": false, "ss": [ "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" ] }, { "is_na": false, "ss": [ "y" ] }, { "i64": 10, "is_na": false }, { "f": 0.1, "is_na": false }, { "i64": 16, "is_na": false }, { "is_na": false, "s": "t1" }, { "is_na": false, "s": "linear" }, { "is_na": false, "s": "None" }, { "f": 0.5, "is_na": false }, { "f": 0.001, "is_na": false }, { "is_na": true } ], "checkpoint_uri": "ckbeob-qagbmwrn-node-39-output-0", "domain": "ml.train", "name": "ss_lgd_train", "version": "0.0.4" }, "sf_output_ids": [ "reefr2024092413473599050-qagbmwrn-node-39-output-0", "reefr2024092413473599050-qagbmwrn-node-39-output-1" ], "sf_output_uris": [ "reefr2024092413473599050-qagbmwrn-node-39-output-0", "reefr2024092413473599050-qagbmwrn-node-39-output-1" ] } }, { "task_id": "reefr2024092413473599050-qagbmwrn-node-40", "app_image": "secretflow-image", "alias": "reefr2024092413473599050-qagbmwrn-node-40", "priority": 100, "parties": [ { "domain_id": "com2023011620060497797", "role": "partner" }, { "domain_id": "com2023011620072311738", "role": "partner" } ], "dependencies": [ "reefr2024092413473599050-qagbmwrn-node-39", "reefr2024092413473599050-qagbmwrn-node-38" ], "task_input_config": { "sf_cluster_desc": { "devices": [ { "config": "{\"runtime_config\":{\"protocol\":\"SEMI2K\",\"field\":\"FM128\"},\"party_address\":{\"com2023011620060497797\":\"172.16.16.111\",\"com2023011620072311738\":\"172.16.16.116\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}", "name": "spu", "parties": [ "com2023011620060497797", "com2023011620072311738" ], "type": "spu" }, { "config": "{\"mode\":\"PHEU\",\"schema\":\"paillier\",\"key_size\":2048}", "name": "heu", "parties": [ "com2023011620060497797", "com2023011620072311738" ], "type": "heu" } ], "parties": [ "com2023011620060497797", "com2023011620072311738" ], "ray_fed_config": { "cross_silo_comm_backend": "brpc_link" } }, "sf_datasource_config": { "com2023011620060497797": { "id": "default-data-source" }, "com2023011620072311738": { "id": "default-data-source" } }, "sf_input_ids": [ "reefr2024092413473599050-qagbmwrn-node-39-output-0", "reefr2024092413473599050-qagbmwrn-node-38-output-0" ], "sf_node_eval_param": { "attr_paths": [ "input/feature_dataset/saved_features", "batch_size", "receiver", "pred_name", "save_ids", "save_label" ], "attrs": [ { "is_na": true, "ss": [] }, { "i64": 16, "is_na": false }, { "is_na": false, "ss": [ "com2023011620060497797" ] }, { "is_na": false, "s": "pred" }, { "b": true, "is_na": false }, { "is_na": true } ], "checkpoint_uri": "ckreefr2024092413473599050-qagbmwrn-node-40-output-0", "domain": "ml.predict", "name": "ss_lgd_predict", "version": "0.0.4" }, "sf_output_ids": [ "reefr2024092413473599050-qagbmwrn-node-40-output-0" ], "sf_output_uris": [ "reefr2024092413473599050-qagbmwrn-node-40-output-0" ] } } ] }
你好! 我们使用kuscia开启了一个任务,但是出现了以下报错: "err_msg": "0/1 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/disk-pressure: }. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling., reject the pod reefr2024092414135423423-qagbmwrn-node-35-partner-0 even after PostFilter.;", 看报错日志显示是磁盘空间不足,我们检查了机器的磁盘空间仅有34G可用; 我们定位了kuscia容器内主要占用空间的目录,如下: [root@idata-kuscia-autonomy-com2023011620060497797 root]# ls bin io.containerd.metadata.v1.bolt io.containerd.snapshotter.v1.aufs io.containerd.snapshotter.v1.native lib io.containerd.content.v1.content io.containerd.runtime.v1.linux io.containerd.snapshotter.v1.btrfs io.containerd.snapshotter.v1.overlayfs tmpmounts io.containerd.grpc.v1.cri io.containerd.runtime.v2.task io.containerd.snapshotter.v1.fuse-overlayfs io.containerd.snapshotter.v1.stargz [root@idata-kuscia-autonomy-com2023011620060497797 root]# du -h --max-depth=1 8.0K ./io.containerd.snapshotter.v1.native 4.0K ./bin 100G ./io.containerd.snapshotter.v1.overlayfs 540K ./io.containerd.grpc.v1.cri 52K ./io.containerd.runtime.v2.task 4.0K ./lib 8.0K ./io.containerd.snapshotter.v1.fuse-overlayfs 8.0K ./io.containerd.snapshotter.v1.aufs 4.0K ./io.containerd.snapshotter.v1.btrfs 4.0K ./io.containerd.runtime.v1.linux 71G ./io.containerd.content.v1.content 5.4M ./io.containerd.metadata.v1.bolt 4.0K ./tmpmounts 32K ./io.containerd.snapshotter.v1.stargz 171G . [root@idata-kuscia-autonomy-com2023011620060497797 root]# pwd /home/kuscia/containerd/root
如上
请问,大量占用空间的文件是什么,能不能删除;谢谢
这些文件和目录主要与containerd这个容器运行时相关的数据和配置有关不建议删除,可以通过docker system prune 回收磁盘空间并移除不再使用的资源。
docker system prune作用
Issue Type
Install/Deploy
Search for existing issues similar to yours
Yes
OS Platform and Distribution
ubuntu 20.04
Kuscia Version
kuscia v0.8.0b0
Deployment
docker
deployment Version
23.0.6
App Running type
secretflow
App Running version
secretflow 1.5.0b0
Configuration file used to run kuscia.
What happend and What you expected to happen.
Kuscia log output.