apache / arrow

Apache Arrow is a multi-language toolbox for accelerated data interchange and in-memory processing
https://arrow.apache.org/
Apache License 2.0
13.87k stars 3.38k forks source link

[C++] Potential issue in `CompareColumnsToRows` on AVX2 platforms when row table contains more than 2^31 rows #43046

Open zanmato1984 opened 6 days ago

zanmato1984 commented 6 days ago

Describe the bug, including details regarding any error messages, version, and platform.

See this TODO: https://github.com/apache/arrow/blob/e635cc23f2f39055ff463730bf1e328af791f85c/cpp/src/arrow/compute/row/compare_internal_avx2.cc#L239-L240

When the right row ids in irow_right are bigger than 2^31 (0x80000000) - and that's very possible - the AVX2 gather intrinsic may have the same issue as https://github.com/apache/arrow/issues/41813#issuecomment-2168286330.

Constructing tests to verify that, and fix followed if necessary.

Component(s)

C++

zanmato1984 commented 5 days ago

Gotcha! The following test crashes exactly the way as this issue suspected.

// Compare columns to rows of row ids larger than 2^31 within a row table.
// Certain AVX2 instructions may behave unexpectedly causing troubles like GH-43046.
TEST(KeyCompare, CompareColumnsToRowsMany) {
  if constexpr (sizeof(void*) == 4) {
    GTEST_SKIP() << "Test only works on 64-bit platforms";
  }

  // The idea of this case is to create a row table using one fixed length column and one
  // var length column (so the row is hence var length and has offset buffer), with more
  // than 2^31 rows. Then compare the rows with row ids larger than 2^31.

  // A small batch to append to the row table repeatedly to grow the row table to big
  // enough.
  constexpr int64_t num_rows_batch = std::numeric_limits<uint16_t>::max() + 1ll;
  // The number of rows in the row table is one batch larger than 2^31, and we'll compare
  // the last num_rows_batch rows.
  constexpr int64_t num_rows_row_table =
      std::numeric_limits<int32_t>::max() + 1ll + num_rows_batch;

  MemoryPool* pool = default_memory_pool();

  // The left side columns with num_rows_batch rows.
  std::vector<KeyColumnArray> columns_left;
  ExecBatch batch_left;
  {
    std::vector<Datum> values;

    // A fixed length array containing random values.
    ASSERT_OK_AND_ASSIGN(auto value_fixed_length,
                         ::arrow::gen::Random(uint32())->Generate(num_rows_batch));
    values.push_back(std::move(value_fixed_length));

    // A var length array containing small var length values ("X").
    ASSERT_OK_AND_ASSIGN(auto value_var_length,
                         ::arrow::gen::Constant(std::make_shared<BinaryScalar>("X"))
                             ->Generate(num_rows_batch));
    values.push_back(std::move(value_var_length));

    batch_left = ExecBatch(std::move(values), num_rows_batch);
    ASSERT_OK(ColumnArraysFromExecBatch(batch_left, &columns_left));
  }

  // The right side row table with num_rows_row_table rows.
  RowTableImpl row_table_right;
  {
    // Encode the row table with the left columns repeatedly.
    std::vector<KeyColumnMetadata> column_metadatas;
    ASSERT_OK(ColumnMetadatasFromExecBatch(batch_left, &column_metadatas));
    RowTableMetadata table_metadata;
    table_metadata.FromColumnMetadataVector(column_metadatas, sizeof(uint64_t),
                                            sizeof(uint64_t));
    ASSERT_OK(row_table_right.Init(pool, table_metadata));
    RowTableImpl row_table_batch;
    ASSERT_OK(row_table_batch.Init(pool, table_metadata));
    std::vector<uint16_t> row_ids(num_rows_batch);
    std::iota(row_ids.begin(), row_ids.end(), 0);
    RowTableEncoder row_encoder;
    row_encoder.Init(column_metadatas, sizeof(uint64_t), sizeof(uint64_t));
    row_encoder.PrepareEncodeSelected(0, num_rows_batch, columns_left);
    ASSERT_OK(row_encoder.EncodeSelected(
        &row_table_batch, static_cast<uint32_t>(num_rows_batch), row_ids.data()));
    for (int i = 0; i < num_rows_row_table / num_rows_batch; ++i) {
      ASSERT_OK(row_table_right.AppendSelectionFrom(row_table_batch, num_rows_batch,
                                                    /*source_row_ids=*/NULLPTR));
    }

    // The row table must contain an offset buffer.
    ASSERT_NE(row_table_right.offsets(), NULLPTR);
    ASSERT_EQ(row_table_right.length(), num_rows_row_table);
  }

  // The rows to compare, left row i to right row 2^31 + i.
  std::vector<uint32_t> row_ids_to_compare(num_rows_batch);
  std::iota(row_ids_to_compare.begin(), row_ids_to_compare.end(),
            num_rows_row_table - num_rows_batch);

  TempVectorStack stack;
  ASSERT_OK(
      stack.Init(pool, KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows_batch)));
  LightContext ctx{CpuInfo::GetInstance()->hardware_flags(), &stack};

  {
    // No selection, output no match row ids.
    uint32_t num_rows_no_match;
    std::vector<uint16_t> row_ids_out(num_rows_batch);
    KeyCompare::CompareColumnsToRows(num_rows_batch, /*sel_left_maybe_null=*/NULLPTR,
                                     row_ids_to_compare.data(), &ctx, &num_rows_no_match,
                                     row_ids_out.data(), columns_left, row_table_right,
                                     /*are_cols_in_encoding_order=*/true,
                                     /*out_match_bitvector_maybe_null=*/NULLPTR);
    ASSERT_EQ(num_rows_no_match, 0);
  }

  {
    // No selection, output match bit vector.
    std::vector<uint8_t> match_bitvector(BytesForBits(num_rows_batch));
    KeyCompare::CompareColumnsToRows(
        num_rows_batch, /*sel_left_maybe_null=*/NULLPTR, row_ids_to_compare.data(), &ctx,
        /*out_num_rows=*/NULLPTR, /*out_sel_left_maybe_same=*/NULLPTR, columns_left,
        row_table_right,
        /*are_cols_in_encoding_order=*/true, match_bitvector.data());
    ASSERT_EQ(arrow::internal::CountSetBits(match_bitvector.data(), 0, num_rows_batch),
              num_rows_batch);
  }

  std::vector<uint16_t> selection_left(num_rows_batch);
  std::iota(selection_left.begin(), selection_left.end(), 0);

  {
    // With selection, output no match row ids.
    uint32_t num_rows_no_match;
    std::vector<uint16_t> row_ids_out(num_rows_batch);
    KeyCompare::CompareColumnsToRows(num_rows_batch, selection_left.data(),
                                     row_ids_to_compare.data(), &ctx, &num_rows_no_match,
                                     row_ids_out.data(), columns_left, row_table_right,
                                     /*are_cols_in_encoding_order=*/true,
                                     /*out_match_bitvector_maybe_null=*/NULLPTR);
    ASSERT_EQ(num_rows_no_match, 0);
  }

  {
    // With selection, output match bit vector.
    std::vector<uint8_t> match_bitvector(BytesForBits(num_rows_batch));
    KeyCompare::CompareColumnsToRows(
        num_rows_batch, selection_left.data(), row_ids_to_compare.data(), &ctx,
        /*out_num_rows=*/NULLPTR, /*out_sel_left_maybe_same=*/NULLPTR, columns_left,
        row_table_right,
        /*are_cols_in_encoding_order=*/true, match_bitvector.data());
    ASSERT_EQ(arrow::internal::CountSetBits(match_bitvector.data(), 0, num_rows_batch),
              num_rows_batch);
  }
}

Result:

[==========] Running 1 test from 1 test suite.
[----------] Global test environment set-up.
[----------] 1 test from KeyCompare
[ RUN      ] KeyCompare.CompareColumnsToRowsMany
AddressSanitizer:DEADLYSIGNAL
=================================================================
==1649==ERROR: AddressSanitizer: SEGV on unknown address 0x0004256d0800 (pc 0x000113702525 bp 0x7ff7bc6ff660 sp 0x7ff7bc6fefc0 T0)
==1649==The signal is caused by a READ memory access.
    #0 0x113702525 in unsigned int arrow::compute::KeyCompare::CompareBinaryColumnToRowHelper_avx2<false, unsigned int arrow::compute::KeyCompare::CompareBinaryColumnToRowImp_avx2<false>(unsigned int, unsigned int, unsigned short const*, unsigned int const*, arrow::compute::LightContext*, arrow::compute::KeyColumnArray const&, arrow::compute::RowTableImpl const&, unsigned char*)::'lambda2'(unsigned char const*, unsigned char const*, unsigned int, long long vector[4], long long vector[4])>(unsigned int, unsigned int, unsigned short const*, unsigned int const*, arrow::compute::LightContext*, arrow::compute::KeyColumnArray const&, arrow::compute::RowTableImpl const&, unsigned char*, unsigned int arrow::compute::KeyCompare::CompareBinaryColumnToRowImp_avx2<false>(unsigned int, unsigned int, unsigned short const*, unsigned int const*, arrow::compute::LightContext*, arrow::compute::KeyColumnArray const&, arrow::compute::RowTableImpl const&, unsigned char*)::'lambda2'(unsigned char const*, unsigned char const*, unsigned int, long long vector[4], long long vector[4])) compare_internal_avx2.cc:242
    #1 0x1136f3506 in unsigned int arrow::compute::KeyCompare::CompareBinaryColumnToRowImp_avx2<false>(unsigned int, unsigned int, unsigned short const*, unsigned int const*, arrow::compute::LightContext*, arrow::compute::KeyColumnArray const&, arrow::compute::RowTableImpl const&, unsigned char*) compare_internal_avx2.cc:504
    #2 0x1136f273d in arrow::compute::KeyCompare::CompareBinaryColumnToRow_avx2(bool, unsigned int, unsigned int, unsigned short const*, unsigned int const*, arrow::compute::LightContext*, arrow::compute::KeyColumnArray const&, arrow::compute::RowTableImpl const&, unsigned char*) compare_internal_avx2.cc:664
    #3 0x113668a89 in void arrow::compute::KeyCompare::CompareBinaryColumnToRow<false>(unsigned int, unsigned int, unsigned short const*, unsigned int const*, arrow::compute::LightContext*, arrow::compute::KeyColumnArray const&, arrow::compute::RowTableImpl const&, unsigned char*) compare_internal.cc:135
    #4 0x11366628d in arrow::compute::KeyCompare::CompareColumnsToRows(unsigned int, unsigned short const*, unsigned int const*, arrow::compute::LightContext*, unsigned int*, unsigned short*, std::__1::vector<arrow::compute::KeyColumnArray, std::__1::allocator<arrow::compute::KeyColumnArray>> const&, arrow::compute::RowTableImpl const&, bool, unsigned char*) compare_internal.cc:381
    #5 0x103bc261b in arrow::compute::KeyCompare_CompareColumnsToRowsMany_Test::TestBody() compare_test.cc:389
    #6 0x10536076f in void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*)+0x47 (libarrow_testing.1700.0.0.dylib:x86_64+0x3ff76f)
    #7 0x1053606db in testing::Test::Run()+0xc5 (libarrow_testing.1700.0.0.dylib:x86_64+0x3ff6db)
    #8 0x105361286 in testing::TestInfo::Run()+0x11e (libarrow_testing.1700.0.0.dylib:x86_64+0x400286)
    #9 0x105361c2a in testing::TestSuite::Run()+0x1d4 (libarrow_testing.1700.0.0.dylib:x86_64+0x400c2a)
    #10 0x10536c694 in testing::internal::UnitTestImpl::RunAllTests()+0x34c (libarrow_testing.1700.0.0.dylib:x86_64+0x40b694)
    #11 0x10536c22f in bool testing::internal::HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*)+0x47 (libarrow_testing.1700.0.0.dylib:x86_64+0x40b22f)
    #12 0x10536c1bc in testing::UnitTest::Run()+0x68 (libarrow_testing.1700.0.0.dylib:x86_64+0x40b1bc)
    #13 0x103bd9c6d in main+0x41 (arrow-compute-internals-test:x86_64+0x1003dcc6d)
    #14 0x7ff805c7f385 in start+0x795 (dyld:x86_64+0xfffffffffff5c385)

==1649==Register values:
rax = 0x00000006256d0800  rbx = 0x00007ff7bc6ff000  rcx = 0x00000000ffffffff  rdx = 0x00000000ffffffff
rdi = 0x00007ff7bc6fefe0  rsi = 0x00000000ffffffff  rbp = 0x00007ff7bc6ff660  rsp = 0x00007ff7bc6fefc0
 r8 = 0x00000000ffffffff   r9 = 0x00000000ffffffff  r10 = 0x00000000ffffffff  r11 = 0x000060d000004620
r12 = 0x0000000103bbe670  r13 = 0x0000616000000980  r14 = 0x0000602000002590  r15 = 0x0000000000000000
AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV compare_internal_avx2.cc:242 in unsigned int arrow::compute::KeyCompare::CompareBinaryColumnToRowHelper_avx2<false, unsigned int arrow::compute::KeyCompare::CompareBinaryColumnToRowImp_avx2<false>(unsigned int, unsigned int, unsigned short const*, unsigned int const*, arrow::compute::LightContext*, arrow::compute::KeyColumnArray const&, arrow::compute::RowTableImpl const&, unsigned char*)::'lambda2'(unsigned char const*, unsigned char const*, unsigned int, long long vector[4], long long vector[4])>(unsigned int, unsigned int, unsigned short const*, unsigned int const*, arrow::compute::LightContext*, arrow::compute::KeyColumnArray const&, arrow::compute::RowTableImpl const&, unsigned char*, unsigned int arrow::compute::KeyCompare::CompareBinaryColumnToRowImp_avx2<false>(unsigned int, unsigned int, unsigned short const*, unsigned int const*, arrow::compute::LightContext*, arrow::compute::KeyColumnArray const&, arrow::compute::RowTableImpl const&, unsigned char*)::'lambda2'(unsigned char const*, unsigned char const*, unsigned int, long long vector[4], long long vector[4]))
==1649==ABORTING