intel / hyperscan

High-performance regular expression matching library
https://www.hyperscan.io
Other
4.71k stars 705 forks source link

memory leak found by address sanitizer #398

Open niyue opened 1 year ago

niyue commented 1 year ago

I am using hyperscan 5.4.1, and have code snippet like below:

class PatternScanner {
public:
  explicit PatternScanner(const std::string &pattern, unsigned int flags = CH_FLAG_DOTALL,
                     unsigned int mode = CH_MODE_GROUPS) {
    ch_compile_error_t *compile_err;
    auto compile_result =
        ch_compile(pattern.c_str(), flags, mode, nullptr, &_database, &compile_err);
    if (compile_result != HS_SUCCESS) {
      ch_free_database(_database);
      ch_free_compile_error(compile_err);
      throw std::runtime_error("Failed to compile regular expression pattern");
    }
    if (ch_alloc_scratch(_database, &_scratch) != HS_SUCCESS) {
      ch_free_database(_database);
      ch_free_scratch(_scratch);
      throw std::runtime_error("Failed to allocate scratch space for hyperscan");
    }
  }

  ~PatternScanner() {
    ch_free_scratch(_scratch);
    ch_free_database(_database);
  }

  std::tuple<unsigned long long, unsigned long long> scan(const std::string &haystack) const {
    unsigned long long context[2];
    context[0] = PATTERN_NOT_FOUND;
    context[1] = PATTERN_NOT_FOUND;
    const int scan_result = ch_scan(_database, haystack.c_str(), haystack.size(), 0, _scratch,
                                    on_pattern_match, on_error, context);
    if (scan_result != HS_SUCCESS && scan_result != HS_SCAN_TERMINATED) {
      throw std::runtime_error("Failed to scan the given string");
    }
    return std::make_tuple(context[0], context[1]);
  }
  static const int PATTERN_NOT_FOUND = -1;

private:
  ch_database_t *_database = nullptr;
  ch_scratch_t *_scratch = nullptr;
  static int on_pattern_match(unsigned int id, unsigned long long from, unsigned long long to,
                              unsigned int flags, unsigned int size, const ch_capture_t *captured,
                              void *ctx) {
    auto tuple = static_cast<unsigned long long *>(ctx);
    tuple[0] = from;
    tuple[1] = to;
    return HS_SCAN_TERMINATED;
  }

  static ch_callback_t on_error(ch_error_event_t error_type, unsigned int id, void *info,
                                void *ctx) {
    return -1;
  }
};

When I use address sanitizer to check the code (-fsanitize=address), I found it reported something like this:

==6464==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 96 byte(s) in 1 object(s) allocated from:
    #0 0x4ef7ad in malloc (/....../build-cmake-debug-linux/utility-tests+0x4ef7ad)
    #1 0xd55426 in _pcre_jit_compile /vcpkg/buildtrees/pcre/src/71f246c0ab-0bfdac5640.clean/pcre_jit_compile.c:11551:15
    #2 0xd5836d in pcre_study /vcpkg/buildtrees/pcre/src/71f246c0ab-0bfdac5640.clean/pcre_study.c:1630:5
    #3 0x7dca7a in buildPcre /vcpkg/buildtrees/chimera/src/1327c0c999-5e957771f8.clean/chimera/ch_compile.cpp:349:23
    #4 0x7dca7a in PatternData /vcpkg/buildtrees/chimera/src/1327c0c999-5e957771f8.clean/chimera/ch_compile.cpp:285:14
    #5 0x7dca7a in make_unique<(anonymous namespace)::PatternData, char const*&, unsigned int&, unsigned int&, unsigned int&, unsigned int&, long unsigned int&, long unsigned int&, const hs_platform_info*&> /vcpkg/installed/x64-linux-haswell/include/boost/smart_ptr/make_unique.hpp:33:31
    #6 0x7dca7a in ch::ch_compile_multi_int(char const* const*, unsigned int const*, unsigned int const*, unsigned int, unsigned int, unsigned long, unsigned long, hs_platform_info const*, ch_database**) /vcpkg/buildtrees/chimera/src/1327c0c999-5e957771f8.clean/chimera/ch_compile.cpp:499:74
    #7 0x7ddb7f in ch_compile /vcpkg/buildtrees/chimera/src/1327c0c999-5e957771f8.clean/chimera/ch_compile.cpp:707:33
    #8 0x61e0fb in PatternScanner::PatternScanner(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, unsigned int, unsigned int) /....../pattern_scanner.h:14:9

I checked my usage and it seems the usage is correct, and the test case only goes through the normal code path. Do you have any idea if this is an issue for hyperscan (or PCRE)?

hongyang7 commented 1 year ago

Hi @niyue , can you reproduce the issue at latest version 5.4.2?