hanickadot / compile-time-regular-expressions

Compile Time Regular Expression in C++
https://twitter.com/hankadusikova
Apache License 2.0
3.32k stars 183 forks source link

How to get the match group for a match from tokenize? #309

Closed alexios-angel closed 3 months ago

alexios-angel commented 3 months ago

I have a regex that has 14 capture groups and I would like to get the capture group of the resulting match in order to tokenized the string

Is there any way to do the below?:

#include <iostream>
#include <string_view>
#include <ctre.hpp>

using namespace std;
using namespace ctre::literals;

int main() {
    constexpr auto pattern = r"r((\d+)|([a-zA-Z]+))r"_ctre;
    constexpr string_veiw input = "123 abc 456 xyz";

    for (auto match : ctre::tokenize<pattern>(input)) {
        court << "Match is from group " << match.get_group();
    }

    return 0;
}
hanickadot commented 3 months ago
for (auto match : ctre::tokenize<pattern>(input)) {
  auto && [_, number, identifier] = match;
  if (number) {
    cout << "Match is from group [number]\n";
  } else { // assert(identifier);
    cout << "Match is from group [identifier]\n";
  }
}
alexios-angel commented 3 months ago

For anyone in the future that would like to get the grouping id use the below code sample; Only tested in Clang.

#include <iostream>
#include <ctre.hpp>

// Primary template
template <typename... Args>
struct RegexResultsNumberOfTemplateArgs;

// Specialization for variadic template
template <typename... Args>
struct RegexResultsNumberOfTemplateArgs<const ctre::regex_results<Args...>> {
    static constexpr std::size_t value = sizeof...(Args);
};

template <size_t MaxDepth, size_t N = 1, typename Match>
int get_matching_group(const Match& match) {
    if constexpr (N >= MaxDepth) {
        return -1; // No matching group found within MaxDepth
    } else {
        if (match.template get<N>()) {
            return N;
        } else {
            return get_matching_group<MaxDepth, N + 1>(match);
        }
    }
}

void lexicalize(std::string &filedata) {
  static constexpr ctll::fixed_string TokenRegex {"([a-zA-Z])|(\d)"};
  const auto &matches = ctre::tokenize<TokenRegex>(filedata);

  for (const auto &match : matches) {
        constexpr size_t num_of_regex_groups = RegexResultsNumberOfTemplateArgs<typeof(match)>::value;
        size_t group = get_matching_group<num_of_regex_groups>(match);
        std::cout << group << "\n";
  }
}