PCRE2Project / pcre2

PCRE2 development is now based here.
Other
879 stars 183 forks source link

Suggest fuzzer for pcre2. #150

Closed autofuzzoss closed 1 month ago

autofuzzoss commented 1 year ago

I suggest this fuzzer for continuous vulnerability checks. Slightly modified existing pcre2_fuzzsupport.c to test random input.

/*
 * This fuzzer is generated by UTopia with some manual modifications.
 * (UTopia Project: https://github.com/Samsung/UTopia)
 */

#include "FuzzedDataProvider.h"

#include <errno.h>
#include <gtest/gtest.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define PCRE2_CODE_UNIT_WIDTH 8
#include "pcre2.h"

#define MAX_MATCH_SIZE 1000

#define DFA_WORKSPACE_COUNT 100

#define ALLOWED_COMPILE_OPTIONS                                                \
  (PCRE2_ANCHORED | PCRE2_ALLOW_EMPTY_CLASS | PCRE2_ALT_BSUX |                 \
   PCRE2_ALT_CIRCUMFLEX | PCRE2_ALT_VERBNAMES | PCRE2_AUTO_CALLOUT |           \
   PCRE2_CASELESS | PCRE2_DOLLAR_ENDONLY | PCRE2_DOTALL | PCRE2_DUPNAMES |     \
   PCRE2_ENDANCHORED | PCRE2_EXTENDED | PCRE2_FIRSTLINE |                      \
   PCRE2_MATCH_UNSET_BACKREF | PCRE2_MULTILINE | PCRE2_NEVER_BACKSLASH_C |     \
   PCRE2_NO_AUTO_CAPTURE | PCRE2_NO_AUTO_POSSESS | PCRE2_NO_DOTSTAR_ANCHOR |   \
   PCRE2_NO_START_OPTIMIZE | PCRE2_UCP | PCRE2_UNGREEDY |                      \
   PCRE2_USE_OFFSET_LIMIT | PCRE2_UTF)

#define ALLOWED_MATCH_OPTIONS                                                  \
  (PCRE2_ANCHORED | PCRE2_ENDANCHORED | PCRE2_NOTBOL | PCRE2_NOTEOL |          \
   PCRE2_NOTEMPTY | PCRE2_NOTEMPTY_ATSTART | PCRE2_PARTIAL_HARD |              \
   PCRE2_PARTIAL_SOFT | PCRE2_NO_JIT)

/* This is the callout function. Its only purpose is to halt matching if there
are more than 100 callouts, as one way of stopping too much time being spent on
fruitless matches. The callout data is a pointer to the counter. */

static int callout_function(pcre2_callout_block *cb, void *callout_data) {
  (void)cb; /* Avoid unused parameter warning */
  *((uint32_t *)callout_data) += 1;
  return (*((uint32_t *)callout_data) > 100) ? PCRE2_ERROR_CALLOUT : 0;
}

int test(const char *data, size_t size) {
  uint32_t compile_options;
  uint32_t match_options;
  pcre2_match_data *match_data = NULL;
  pcre2_match_context *match_context = NULL;
  size_t match_size;
  int dfa_workspace[DFA_WORKSPACE_COUNT];
  int r1, r2;
  int i;

  if (size < 1)
    return 0;

  /* Limiting the length of the subject for matching stops fruitless searches
  in large trees taking too much time. */

  match_size = (size > MAX_MATCH_SIZE) ? MAX_MATCH_SIZE : size;

  /* Figure out some options to use. Initialize the random number to ensure
  repeatability. Ensure that we get a 32-bit unsigned random number for testing
  options. (RAND_MAX is required to be at least 32767, but is commonly
  2147483647, which excludes the top bit.) */

  srand((unsigned int)(data[size / 2]));
  r1 = rand();
  r2 = rand();

  /* Ensure that all undefined option bits are zero (waste of time trying them)
  and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
  input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no
  reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set
  because \C in random patterns is highly likely to cause a crash. */

  compile_options = ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) &
                     ALLOWED_COMPILE_OPTIONS) |
                    PCRE2_NEVER_BACKSLASH_C;

  match_options = ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) &
                   ALLOWED_MATCH_OPTIONS);

  /* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
  allowed together and just give an immediate error return. */

  if (((compile_options | match_options) & PCRE2_ENDANCHORED) != 0)
    match_options &= ~(PCRE2_PARTIAL_HARD | PCRE2_PARTIAL_SOFT);

  /* Do the compile with and without the options, and after a successful
  compile, likewise do the match with and without the options. */

  for (i = 0; i < 2; i++) {
    uint32_t callout_count = 0;
    int errorcode;
    PCRE2_SIZE erroroffset;
    pcre2_code *code;

    code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options,
                         &errorcode, &erroroffset, NULL);

    /* Compilation succeeded */

    if (code != NULL) {
      int j;
      uint32_t save_match_options = match_options;

#ifdef SUPPORT_JIT
      pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
#endif

      /* Create match data and context blocks only when we first need them. Set
      low match and depth limits to avoid wasting too much searching large
      pattern trees. Almost all matches are going to fail. */

      if (match_data == NULL) {
        match_data = pcre2_match_data_create(32, NULL);
        if (match_data == NULL) {
          return 0;
        }
      }

      if (match_context == NULL) {
        match_context = pcre2_match_context_create(NULL);
        if (match_context == NULL) {
          return 0;
        }
        (void)pcre2_set_match_limit(match_context, 100);
        (void)pcre2_set_depth_limit(match_context, 100);
        (void)pcre2_set_callout(match_context, callout_function,
                                &callout_count);
      }

      /* Match twice, with and without options. */

      for (j = 0; j < 2; j++) {
        callout_count = 0;
        errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size,
                                0, match_options, match_data, match_context);

        match_options = 0; /* For second time */
      }

      /* Match with DFA twice, with and without options. */

      match_options =
          save_match_options & ~PCRE2_NO_JIT; /* Not valid for DFA */

      for (j = 0; j < 2; j++) {
        callout_count = 0;
        errorcode = pcre2_dfa_match(
            code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0, match_options,
            match_data, match_context, dfa_workspace, DFA_WORKSPACE_COUNT);

        match_options = 0; /* For second time */
      }

      match_options = save_match_options; /* Reset for the second compile */
      pcre2_code_free(code);
    }

    /* Compilation failed */

    else {
      unsigned char buffer[256];
      pcre2_get_error_message(errorcode, buffer, 256);
      if (strstr((const char *)buffer, "internal error") != NULL)
        abort();
    }

    compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
  }

  if (match_data != NULL)
    pcre2_match_data_free(match_data);
  if (match_context != NULL)
    pcre2_match_context_free(match_context);

  return 0;
}

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, uint32_t size) {
    FuzzedDataProvider provider(data, size);
    std::string fuzz_input = provider.ConsumeRemainingBytesAsString();

    test(fuzz_input.c_str(), fuzz_input.length());

    return 0;
}
PhilipHazel commented 1 year ago

I'm not sure what you are suggesting here. Are you suggesting including this file in the repo and/or distribution as generally useful? The existing pcre2_fuzzsupport.c was specifically created for the use of the oss-fuzz (Cluster-Fuzz) service that google runs, and I would not want to change that. Your file includes "gtest/gtest.h" and "FuzzedDataProvider.h", which are not standard headers, and you have also removed the standalone facility, which is useful for checkout out fuzz failure reports. Please give more background information.

PhilipHazel commented 1 month ago

As there has been no further information on this, I am going to close the issue.