dmlc / dmlc-core

A common bricks library for building scalable and portable distributed machine learning.
Apache License 2.0
865 stars 519 forks source link

Bug in InputSplitBase::Chunk::Append() ? #648

Open ztma opened 3 years ago

ztma commented 3 years ago

When split->ReadChunk(...) returns a true value and size is set to zero, the code double the buffer size, but in the next loop, variable 'size' should also be adjust accordingly before call split->ReadChunk again.

bool InputSplitBase::Chunk::Append(InputSplitBase *split, size_t buffer_size) {
  size_t previous_size = end - begin;
  data.resize(data.size() + buffer_size);
  while (true) {
    // leave one tail chunk
    size_t size = buffer_size * sizeof(uint32_t);
    // set back to 0 for string safety
    data.back() = 0;
    if (!split->ReadChunk(reinterpret_cast<char *>(BeginPtr(data)) + previous_size, &size))
      return false;
    if (size == 0) {
      data.resize(data.size() * 2);
    } else {
      begin = reinterpret_cast<char *>(BeginPtr(data));
      end = begin + previous_size + size;
      break;
    }
  }
  return true;
}