Closed phoenixdong closed 2 months ago
Example parallelism_to_groups.json
{
"dp": [
[
0,
4
],
[
1,
5
],
[
2,
6
],
[
3,
7
],
[
8,
12
],
[
9,
13
],
[
10,
14
],
[
11,
15
]
],
"dp-cp": [
[
0,
4
],
[
1,
5
],
[
2,
6
],
[
3,
7
],
[
8,
12
],
[
9,
13
],
[
10,
14
],
[
11,
15
]
],
"cp": [
[
0
],
[
1
],
[
2
],
[
3
],
[
4
],
[
5
],
[
6
],
[
7
],
[
8
],
[
9
],
[
10
],
[
11
],
[
12
],
[
13
],
[
14
],
[
15
]
],
"tp-pp": [
[
0,
1,
2,
3,
8,
9,
10,
11
],
[
4,
5,
6,
7,
12,
13,
14,
15
]
],
"tp-ep-pp": [
[
0,
1,
2,
3,
8,
9,
10,
11
],
[
4,
5,
6,
7,
12,
13,
14,
15
]
],
"tp": [
[
0,
1,
2,
3
],
[
4,
5,
6,
7
],
[
8,
9,
10,
11
],
[
12,
13,
14,
15
]
],
"pp": [
[
0,
8
],
[
1,
9
],
[
2,
10
],
[
3,
11
],
[
4,
12
],
[
5,
13
],
[
6,
14
],
[
7,
15
]
],
"tp-dp-cp": [
[
0,
1,
2,
3,
4,
5,
6,
7
],
[
8,
9,
10,
11,
12,
13,
14,
15
]
],
"tp-dp": [
[
0,
1,
2,
3,
4,
5,
6,
7
],
[
8,
9,
10,
11,
12,
13,
14,
15
]
],
"tp-cp": [
[
0,
1,
2,
3
],
[
4,
5,
6,
7
],
[
8,
9,
10,
11
],
[
12,
13,
14,
15
]
],
"tp-ep": [
[
0,
1,
2,
3
],
[
4,
5,
6,
7
],
[
8,
9,
10,
11
],
[
12,
13,
14,
15
]
],
"ep": [
[
0
],
[
1
],
[
2
],
[
3
],
[
4
],
[
5
],
[
6
],
[
7
],
[
8
],
[
9
],
[
10
],
[
11
],
[
12
],
[
13
],
[
14
],
[
15
]
]
}
Example rank_to_host_name_and_ip.json
"0": {
"host_name:": "node_0",
"host_ip": "127.0.0.0"
},
"1": {
"host_name:": "node_0",
"host_ip": "127.0.0.0"
},
"2": {
"host_name:": "node_0",
"host_ip": "127.0.0.0"
},
"3": {
"host_name:": "node_0",
"host_ip": "127.0.0.0"
},
"4": {
"host_name:": "node_0",
"host_ip": "127.0.0.0"
},
"5": {
"host_name:": "node_0",
"host_ip": "127.0.0.0"
},
"6": {
"host_name:": "node_0",
"host_ip": "127.0.0.0"
},
"7": {
"host_name:": "node_0",
"host_ip": "127.0.0.0"
},
"8": {
"host_name:": "node_1",
"host_ip": "127.0.0.1"
},
"9": {
"host_name:": "node_1",
"host_ip": "127.0.0.1"
},
"10": {
"host_name:": "node_1",
"host_ip": "127.0.0.1"
},
"11": {
"host_name:": "node_1",
"host_ip": "127.0.0.1"
},
"12": {
"host_name:": "node_1",
"host_ip": "127.0.0.1"
},
"13": {
"host_name:": "node_1",
"host_ip": "127.0.0.1"
},
"14": {
"host_name:": "node_1",
"host_ip": "127.0.0.1"
},
"15": {
"host_name:": "node_1",
"host_ip": "127.0.0.1"
}
}
Example rank_to_parallelism_to_group_id.json
{
"0": {
"dp": 0,
"dp-cp": 0,
"cp": 0,
"tp-pp": 0,
"tp-ep-pp": 0,
"tp": 0,
"pp": 0,
"tp-dp-cp": 0,
"tp-dp": 0,
"tp-cp": 0,
"tp-ep": 0,
"ep": 0
},
"4": {
"dp": 0,
"dp-cp": 0,
"cp": 4,
"tp-pp": 1,
"tp-ep-pp": 1,
"tp": 1,
"pp": 4,
"tp-dp-cp": 0,
"tp-dp": 0,
"tp-cp": 1,
"tp-ep": 1,
"ep": 4
},
"1": {
"dp": 1,
"dp-cp": 1,
"cp": 1,
"tp-pp": 0,
"tp-ep-pp": 0,
"tp": 0,
"pp": 1,
"tp-dp-cp": 0,
"tp-dp": 0,
"tp-cp": 0,
"tp-ep": 0,
"ep": 1
},
"5": {
"dp": 1,
"dp-cp": 1,
"cp": 5,
"tp-pp": 1,
"tp-ep-pp": 1,
"tp": 1,
"pp": 5,
"tp-dp-cp": 0,
"tp-dp": 0,
"tp-cp": 1,
"tp-ep": 1,
"ep": 5
},
"2": {
"dp": 2,
"dp-cp": 2,
"cp": 2,
"tp-pp": 0,
"tp-ep-pp": 0,
"tp": 0,
"pp": 2,
"tp-dp-cp": 0,
"tp-dp": 0,
"tp-cp": 0,
"tp-ep": 0,
"ep": 2
},
"6": {
"dp": 2,
"dp-cp": 2,
"cp": 6,
"tp-pp": 1,
"tp-ep-pp": 1,
"tp": 1,
"pp": 6,
"tp-dp-cp": 0,
"tp-dp": 0,
"tp-cp": 1,
"tp-ep": 1,
"ep": 6
},
"3": {
"dp": 3,
"dp-cp": 3,
"cp": 3,
"tp-pp": 0,
"tp-ep-pp": 0,
"tp": 0,
"pp": 3,
"tp-dp-cp": 0,
"tp-dp": 0,
"tp-cp": 0,
"tp-ep": 0,
"ep": 3
},
"7": {
"dp": 3,
"dp-cp": 3,
"cp": 7,
"tp-pp": 1,
"tp-ep-pp": 1,
"tp": 1,
"pp": 7,
"tp-dp-cp": 0,
"tp-dp": 0,
"tp-cp": 1,
"tp-ep": 1,
"ep": 7
},
"8": {
"dp": 4,
"dp-cp": 4,
"cp": 8,
"tp-pp": 0,
"tp-ep-pp": 0,
"tp": 2,
"pp": 0,
"tp-dp-cp": 1,
"tp-dp": 1,
"tp-cp": 2,
"tp-ep": 2,
"ep": 8
},
"12": {
"dp": 4,
"dp-cp": 4,
"cp": 12,
"tp-pp": 1,
"tp-ep-pp": 1,
"tp": 3,
"pp": 4,
"tp-dp-cp": 1,
"tp-dp": 1,
"tp-cp": 3,
"tp-ep": 3,
"ep": 12
},
"9": {
"dp": 5,
"dp-cp": 5,
"cp": 9,
"tp-pp": 0,
"tp-ep-pp": 0,
"tp": 2,
"pp": 1,
"tp-dp-cp": 1,
"tp-dp": 1,
"tp-cp": 2,
"tp-ep": 2,
"ep": 9
},
"13": {
"dp": 5,
"dp-cp": 5,
"cp": 13,
"tp-pp": 1,
"tp-ep-pp": 1,
"tp": 3,
"pp": 5,
"tp-dp-cp": 1,
"tp-dp": 1,
"tp-cp": 3,
"tp-ep": 3,
"ep": 13
},
"10": {
"dp": 6,
"dp-cp": 6,
"cp": 10,
"tp-pp": 0,
"tp-ep-pp": 0,
"tp": 2,
"pp": 2,
"tp-dp-cp": 1,
"tp-dp": 1,
"tp-cp": 2,
"tp-ep": 2,
"ep": 10
},
"14": {
"dp": 6,
"dp-cp": 6,
"cp": 14,
"tp-pp": 1,
"tp-ep-pp": 1,
"tp": 3,
"pp": 6,
"tp-dp-cp": 1,
"tp-dp": 1,
"tp-cp": 3,
"tp-ep": 3,
"ep": 14
},
"11": {
"dp": 7,
"dp-cp": 7,
"cp": 11,
"tp-pp": 0,
"tp-ep-pp": 0,
"tp": 2,
"pp": 3,
"tp-dp-cp": 1,
"tp-dp": 1,
"tp-cp": 2,
"tp-ep": 2,
"ep": 11
},
"15": {
"dp": 7,
"dp-cp": 7,
"cp": 15,
"tp-pp": 1,
"tp-ep-pp": 1,
"tp": 3,
"pp": 7,
"tp-dp-cp": 1,
"tp-dp": 1,
"tp-cp": 3,
"tp-ep": 3,
"ep": 15
}
}
Add print_ranks method for output during initialization:
Enable this feature by adding the following fields to the configured yaml file, for example:
If not configured, the above file information will be output to stdout by default.