else if (prompt_config_.pp_depth_ != token_config_.pp_depth_) {
int tp_layers_per_stage = (local_num_layer_ * prompt_config_.pp_depth_) / token_config_.pp_depth_;
std::vector<std::vector<int>> tp_ranks(token_config_.pp_depth_, std::vector<int>{});
for (auto l : args.layer_ids) {
int dst_rank = l / tp_layers_per_stage + token_config_.pp_depth_ * token_config_.tp_size_;
tp_ranks[dst_rank].push_back(l);
}
...
}
`
if promptconfig.ppdepth != tokenconfig.ppdepth, here will recalc the dst_rank for each layer.
the tp_ranks size is inited by tokenconfig.pp_depth,but dst_rank(tokenconfig.ppdepth * tokenconfig.tpsize) is always large then tokenconfig.pp_depth,it will be overflow here
`void BaseCacheManager::stream_out(StreamInfo args) { if (!args.is_prompt || promptconfig.is_same(tokenconfig)) { // TODO: check this args.peer_rank = tokenconfig.ppdepth * tokenconfig.tpsize + myrank; scatter(args); }
} `
if promptconfig.ppdepth != tokenconfig.ppdepth, here will recalc the dst_rank for each layer.
the tp_ranks size is inited by tokenconfig.pp_depth,but dst_rank(tokenconfig.ppdepth * tokenconfig.tpsize) is always large then tokenconfig.pp_depth,it will be overflow here