boostorg / wave

Boost.org wave module
http://boost.org/libs/wave
21 stars 49 forks source link

Simple unknown directive => found_unknown_directive is not called, stripped of pound. #137

Closed realazthat closed 2 years ago

realazthat commented 2 years ago

My minimal reproducible example and output:

// g++-11 boost-wave-reproduction.cxx  --std=c++17 -lboost_wave -lboost_thread -lboost_filesystem
// ./a.out

#include <string>
#include <filesystem>
#include <iostream>

#include "boost/wave.hpp"
#include "boost/wave/cpp_context.hpp"
#include "boost/wave/cpp_exceptions.hpp"
#include "boost/wave/cpplexer/cpp_lex_iterator.hpp"
#include "boost/wave/language_support.hpp"

template<typename Token>
struct ContextPolicy : boost::wave::context_policies::eat_whitespace<Token> {
  typedef boost::wave::context_policies::eat_whitespace<Token> base_type;

  template <typename ContextT, typename TokenT>
  bool found_directive(ContextT const &ctx, 
      TokenT const &directive) {
    std::cout << "found_directive, directive.get_value(): " << directive.get_value() << std::endl;
    return this->base_type::found_directive(ctx, directive);
  }

  template <typename ContextT, typename ContainerT>
  bool found_unknown_directive(ContextT const& ctx,
      ContainerT const& line, ContainerT& pending) {
    std::cout << "found_unknown_directive, line: ";
    for (const auto& token : line) {
      std::cout << token.get_value() << ",";
    }
    std::cout << std::endl;
    return this->base_type::found_unknown_directive(ctx, line, pending);
  }

  template <typename ContextT, typename TokenT>
  TokenT const& generated_token(ContextT const &ctx, 
      TokenT const& token) {
    std::cout << "generated_token()" << std::endl;
    std::cout << " token.is_valid(): " << (token.is_valid() ? "true" : "false") << std::endl;
    boost::wave::token_id token_id = boost::wave::token_id(token);
    std::cout << " token_id: " << token_id << std::endl;

    if (token.is_valid()) {
      std::cout << " boost::wave::get_token_name(token_id): " << boost::wave::get_token_name(token_id) << std::endl;
      std::cout << " token.get_value(): " << token.get_value() << std::endl;
    }
    return this->base_type::generated_token(ctx, token);
  }
};

int main() {

  typedef boost::wave::cpplexer::lex_iterator<
      boost::wave::cpplexer::lex_token<>>
      lex_iterator_type;
  typedef boost::wave::context<
    /*Iterator=*/std::string::const_iterator,
    /*LexIterator=*/lex_iterator_type,
    /*InputPolicy=*/boost::wave::iteration_context_policies::load_file_to_string,
    /*ContextPolicy=*/ContextPolicy<typename lex_iterator_type::token_type>>
      context_type;

  std::filesystem::path source_path = "/path/to/source.cxx";
  std::string source = R"source(
// This gets replaced by "#version" as expected.
TEST
// The pound sign doesn't appear, and found_unknown_directive is not called.
#version
)source";

  context_type ctx(/*first_=*/source.begin(),
                      /*last_=*/source.end(),
                      /*fname=*/source_path.string().c_str());
  ctx.set_language(/*language=*/boost::wave::language_support::support_cpp11);

  assert(ctx.add_macro_definition(
      /*macrostring=*/"TEST=#version"));
  context_type::iterator_type first = ctx.begin();
  context_type::iterator_type last = ctx.end();

  std::ostringstream sstr;
  while (first != last) {
    sstr << (*first).get_value();
    ++first;
  }

  std::string preprocessed_source = sstr.str();

  std::cout << preprocessed_source << std::endl;
  return 0;
}

Output:

$ ./a.out 
generated_token()
 token.is_valid(): true
 token_id: 402915724
 boost::wave::get_token_name(token_id): POUND
 token.get_value(): #
generated_token()
 token.is_valid(): true
 token_id: 134480252
 boost::wave::get_token_name(token_id): IDENTIFIER
 token.get_value(): version
generated_token()
 token.is_valid(): true
 token_id: 1476395402
 boost::wave::get_token_name(token_id): NEWLINE
 token.get_value(): 

generated_token()
 token.is_valid(): true
 token_id: 134480252
 boost::wave::get_token_name(token_id): IDENTIFIER
 token.get_value(): version
generated_token()
 token.is_valid(): true
 token_id: 1476395402
 boost::wave::get_token_name(token_id): NEWLINE
 token.get_value(): 

generated_token()
 token.is_valid(): true
 token_id: 1610613137
 boost::wave::get_token_name(token_id): EOF
 token.get_value(): 
generated_token()
 token.is_valid(): false
 token_id: 1610613138
#version
version

Expected output:

...
found_unknown_directive ...
...
#version
#version
jefftrull commented 2 years ago

I don't know what the problem is yet, but I do have a workaround for you to try:

Replace this line:

ctx.set_language(/*language=*/boost::wave::language_support::support_cpp11);

with these two:

ctx.set_language(boost::wave::enable_long_long(ctx.get_language()));
ctx.set_language(boost::wave::enable_variadics(ctx.get_language()));

C++11 support consists of three things: long long, variadics, and a removal of a warning when the input does not end in a newline. It seems like the third feature is interacting with the found_unknown_directive hook.

jefftrull commented 2 years ago

I think I have a fix, and a tentative root cause... the "ignore lack of newline before EOF" feature is checked in one place unconditionally, regardless of whether we are actually looking at EOF. Can you try changing this line to:

    return need_no_newline_at_end_of_file(ctx.get_language()) &&
        ((it == end) || (T_EOF == token_id(*it)));

and let me know if it works for you?

realazthat commented 2 years ago

and let me know if it works for you?

Nope, I don't think it is working :(

jefftrull commented 2 years ago

OK! did the behavior change in any way? For me, this causes the found_unknown_directive callback to start firing. New output:

generated_token()
 token.is_valid(): true
 token_id: 402915724
 boost::wave::get_token_name(token_id): POUND
 token.get_value(): #
generated_token()
 token.is_valid(): true
 token_id: 134480252
 boost::wave::get_token_name(token_id): IDENTIFIER
 token.get_value(): version
generated_token()
 token.is_valid(): true
 token_id: 1476395402
 boost::wave::get_token_name(token_id): NEWLINE
 token.get_value(): 

found_unknown_directive, line: #,version,
,
terminate called after throwing an instance of 'boost::wrapexcept<boost::wave::preprocess_exception>'
  what():  boost::wave::preprocess_exception
Aborted
realazthat commented 2 years ago

OK my bad, it was still using the system boost, which I uninstalled and now it worked.

$ LD_LIBRARY_PATH=$HOME/boost/stage/lib/ ./a.out
found_unknown_directive, line: #,version,
,
terminate called after throwing an instance of 'boost::wrapexcept<boost::wave::preprocess_exception>'
  what():  boost::wave::preprocess_exception
Aborted

Note, I commented out the generated_token() logging, so this is consistent with your output.

jefftrull commented 2 years ago

Great! OK, what else is left? Any other behavior that seems wrong?

realazthat commented 2 years ago

Great! OK, what else is left? Any other behavior that seems wrong?

I think that does it.