apache / arrow

Apache Arrow is a multi-language toolbox for accelerated data interchange and in-memory processing
https://arrow.apache.org/
Apache License 2.0
14.42k stars 3.51k forks source link

[C++][Gandiva] InExpression for Decimal128 segfaults. #43198

Open vivek-kumar-Niel opened 3 months ago

vivek-kumar-Niel commented 3 months ago

Describe the bug, including details regarding any error messages, version, and platform.

TEST_F(TestIn, TestInDecimal) {
  int test = 5;
  while(test--) {
  int32_t precision = 38;
  int32_t scale = 5;
  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);

  // schema for input fields
  auto field0 = field("f0", arrow::decimal(precision, scale));
  auto schema = arrow::schema({field0});

  // Build In f0 + f1 in (6, 11)
  auto node_f0 = TreeExprBuilder::MakeField(field0);

  gandiva::DecimalScalar128 d0("6", precision, scale);
  gandiva::DecimalScalar128 d1("12", precision, scale);
  gandiva::DecimalScalar128 d2("11", precision, scale);
  std::unordered_set<gandiva::DecimalScalar128> in_constants({d0, d1, d2});
  auto in_expr = TreeExprBuilder::MakeInExpressionDecimal(node_f0, in_constants);
  auto condition = TreeExprBuilder::MakeCondition(in_expr);

  std::shared_ptr<Filter> filter;
  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
  EXPECT_TRUE(status.ok());

  // Create a row-batch with some sample data
  int num_records = 5;
  auto values0 = MakeDecimalVector({"1", "2", "0", "-6", "6"});
  auto array0 =
      MakeArrowArrayDecimal(decimal_type, values0, {true, true, true, false, true});
  // expected output (indices for which condition matches)
  auto exp = MakeArrowArrayUint16({4});

  // prepare input record batch
  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});

  std::shared_ptr<SelectionVector> selection_vector;
  status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
  EXPECT_TRUE(status.ok());

  // Evaluate expression
  status = filter->Evaluate(*in_batch, selection_vector);
  EXPECT_TRUE(status.ok());

  // Validate results
  EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
  }
}

running this test multiple times in a loop segfaults,

cause : in function

bool gdv_fn_in_expr_lookup_decimal(int64_t ptr, int64_t value_high, int64_t value_low,
                                   int32_t precision, int32_t scale, bool in_validity) {
  if (!in_validity) {
    return false;
  }
  gandiva::DecimalScalar128 value(value_high, value_low, precision, scale);
  gandiva::InHolder<gandiva::DecimalScalar128>* holder =
      reinterpret_cast<gandiva::InHolder<gandiva::DecimalScalar128>*>(ptr);
  return holder->HasValue(value);
}

here the data value pointed by ptr for InHolder getting corrupted.

Component(s)

C++ C++ - Gandiva

kou commented 3 months ago

@niyue @js8544 Could you take a look at this?

vivek-kumar-Niel commented 2 months ago

hello any updates on the issue ?