Open mingodad opened 9 months ago
Using the EBNF
syntax to also give a high level view for the actual parser (not finished):
//
// EBNF to be viewd at https://www.bottlecaps.de/rr/ui
//
// Copy and paste this at https://www.bottlecaps.de/rr/ui in the 'Edit Grammar' tab
// then click the 'View Diagram' tab.
//
parse_translation_unit ::=
parse_module_unit
| parse_top_level_declaration_seq
parse_module_unit ::=
parse_module_head parse_global_module_fragment
parse_module_declaration parse_declaration_seq parse_private_module_fragment
parse_top_level_declaration_seq ::=
parse_declaration+
parse_declaration_seq ::=
parse_maybe_module
| parse_declaration+
parse_declaration ::=
parse_empty_declaration
| parse_explicit_instantiation
| parse_template_declaration
| parse_linkage_specification
| parse_namespace_definition
| parse_deduction_guide
| parse_export_declaration
| parse_module_import_declaration
| parse_attribute_declaration
| parse_block_declaration
parse_empty_declaration ::=
T_SEMICOLON
parse_explicit_instantiation ::=
T_EXTERN T_TEMPLATE parse_declaration
parse_template_declaration ::=
T_TEMPLATE T_LESS parse_template_parameter_list T_GREATER
parse_requires_clause parse_template_declaration?
parse_concept_definition?
parse_template_declaration_body
parse_linkage_specification ::=
T_EXTERN parse_optional_attribute_specifier_seq T_STRING_LITERAL
(T_LBRACE parse_declaration_seq T_RBRACE | parse_declaration )
parse_namespace_definition ::=
T_INLINE? T_NAMESPACE parse_optional_attribute_specifier_seq T_IDENTIFIER T_COLON_COLON
parse_namespace_body
parse_deduction_guide ::=
parse_explicit_specifier T_IDENTIFIER T_LPAREN parse_parameter_declaration_clause T_RPAREN
parse_simple_template_id T_SEMICOLON
parse_export_declaration ::=
T_EXPORT (T_LBRACE parse_declaration_seq T_RBRACE | parse_module_import_declaration? parse_declaration )
parse_module_import_declaration ::=
parse_import_keyword parse_import_name parse_optional_attribute_specifier_seq T_SEMICOLON
parse_attribute_declaration ::=
parse_attribute_specifier_seq T_SEMICOLON
parse_block_declaration ::=
parse_asm_declaration
| parse_namespace_alias_definition
| parse_static_assert_declaration
| parse_opaque_enum_declaration
| parse_using_enum_declaration
| parse_using_directive
| parse_alias_declaration
| parse_using_declaration
| parse_simple_declaration
parse_asm_declaration ::=
T_ASM AsmQualifier* T_LPAREN T_STRING_LITERAL
AsmOutputOperandList? AsmInpputOperandList?
T_RPAREN T_SEMICOLON
AsmQualifier ::=
T_INLINE
| T_VOLATILE
| T_GOTO
AsmOutputOperandList ::=
AsmInOutOperandList
AsmInpputOperandList ::=
AsmInOutOperandList
AsmInOutOperandList ::=
T_COLON parse_asm_operand (T_COMMA parse_asm_operand)*
parse_namespace_alias_definition ::=
T_NAMESPACE T_IDENTIFIER T_EQUAL parse_qualified_namespace_specifier T_SEMICOLON
parse_static_assert_declaration ::=
T_STATIC_ASSERT T_LPAREN parse_constant_expression (T_COMMA T_STRING_LITERAL)? T_RPAREN T_SEMICOLON
parse_opaque_enum_declaration ::=
parse_optional_attribute_specifier_seq parse_enum_key parse_enum_head_name parse_enum_base T_SEMICOLON
parse_using_enum_declaration ::=
T_USING parse_elaborated_enum_specifier T_SEMICOLON
parse_using_directive ::=
T_USING T_NAMESPACE parse_optional_nested_name_specifier parse_name_id T_SEMICOLON
parse_alias_declaration ::=
T_USING T_IDENTIFIER parse_optional_attribute_specifier_seq T_EQUAL parse_defining_type_id T_SEMICOLON
parse_using_declaration ::=
T_USING parse_using_declarator_list T_SEMICOLON
parse_simple_declaration ::=
parse_one_simple_declaration+
parse_one_simple_declaration ::=
T___EXTENSION__? parse_optional_attribute_specifier_seq
(
T_SEMICOLON
| parse_template_class_declaration
| parse_empty_or_attribute_declaration
| parse_notypespec_function_definition
| parse_decl_specifier_seq
| parse_type_or_forward_declaration
| parse_structured_binding
)
parse_module_head ::=
T_EXPORT parse_id
parse_global_module_fragment ::=
parse_module_keyword T_SEMICOLON parse_declaration_seq
parse_module_declaration ::=
parse_export_keyword parse_module_keyword parse_module_name parse_module_partition
parse_optional_attribute_specifier_seq T_SEMICOLON
parse_private_module_fragment ::=
parse_module_keyword T_COLON T_PRIVATE T_SEMICOLON parse_declaration_seq
parse_block_declaration ::=
parse_class_specifier parse_attribute_specifier_seq T_SEMICOLON
parse_class_specifier ::=
parse_class_head T_LBRACE parse_class_body T_RBRACE
parse_class_head ::=
(T_CLASS | T_STRUCT | T_UNION) parse_optional_attribute_specifier_seq
parse_class_head_name parse_class_virt_specifier parse_base_clause
parse_class_head_name ::=
parse_optional_nested_name_specifier check_type_traits parse_type_name
parse_class_virt_specifier ::=
parse_final
parse_base_clause ::=
T_COLON parse_base_specifier_list
parse_base_specifier_list ::=
parse_base_specifier T_DOT_DOT_DOT ( T_COMMA parse_base_specifier T_DOT_DOT_DOT)*
parse_base_specifier ::=
parse_optional_attribute_specifier_seq
(T_VIRTUAL parse_access_specifier | parse_access_specifier T_VIRTUAL)?
parse_class_or_decltype
parse_class_body ::=
parse_member_specification*
parse_member_specification ::=
parse_member_declaration
parse_member_declaration ::=
parse_access_specifier T_COLON
| parse_empty_declaration
| parse_using_enum_declaration
| parse_alias_declaration
| parse_using_declaration
| parse_static_assert_declaration
| parse_deduction_guide
| parse_opaque_enum_declaration
| parse_template_declaration
| parse_member_declaration_helper
parse_member_declaration_helper ::=
T___EXTENSION__? parse_optional_attribute_specifier_seq parse_decl_specifier_seq_no_typespecs
(parse_notypespec_function_definition | parse_decl_specifier_seq T_SEMICOLON )
parse_notypespec_function_definition ::=
parse_declarator_id parse_function_declarator parse_requires_clause parse_virt_specifier_seq
parse_optional_attribute_specifier_seq parse_pure_specifier (T_SEMICOLON | parse_function_body)
parse_function_body ::=
parse_function_try_block
| T_EQUAL T_DEFAULT T_SEMICOLON
| T_EQUAL T_DELETE T_SEMICOLON
| parse_ctor_initializer parse_compound_statement
parse_compound_statement ::=
T_LBRACE finish_compound_statement? T_RBRACE
finish_compound_statement ::=
( parse_maybe_statement | parse_skip_statement )+
parse_maybe_statement ::=
T___EXTENSION__? (
parse_case_statement
| parse_default_statement
| parse_while_statement
| parse_do_statement
| parse_for_statement
| parse_if_statement
| parse_switch_statement
| parse_break_statement
| parse_continue_statement
| parse_return_statement
| parse_goto_statement
| parse_coroutine_return_statement
| parse_try_block
| parse_maybe_compound_statement
| parse_labeled_statement
| parse_declaration_statement
| parse_expression_statement
)
parse_case_statement ::=
T_CASE parse_constant_expression T_COLON
parse_default_statement ::=
T_DEFAULT T_COLON
parse_while_statement ::=
T_WHILE T_LPAREN parse_condition T_RPAREN
parse_do_statement ::=
T_DO parse_statement T_WHILE T_LPAREN parse_expression T_RPAREN T_SEMICOLON
parse_for_statement ::=
parse_for_range_statement
| T_FOR T_LPAREN parse_init_statement T_SEMICOLON parse_condition T_SEMICOLON parse_expression T_RPAREN
parse_statement
parse_for_range_statement ::=
T_FOR T_LPAREN parse_init_statement parse_for_range_declaration T_COLON parse_for_range_initializer T_RPAREN
parse_statement
parse_init_statement ::=
parse_simple_declaration parse_maybe_expression
parse_for_range_declaration ::=
parse_decl_specifier_seq ( parse_structured_binding | parse_declarator )
parse_for_range_initializer ::=
parse_expr_or_braced_init_list
parse_if_statement ::=
T_IF (
T_EXCLAIM T_CONSTEVAL parse_compound_statement (T_ELSE parse_statement)?
| T_CONSTEXPR? T_LPAREN parse_init_statement parse_condition T_RPAREN parse_statement (T_ELSE parse_statement)?
)
parse_switch_statement ::=
T_SWITCH T_LPAREN parse_init_statement parse_condition T_RPAREN parse_statement
parse_break_statement ::=
T_BREAK T_SEMICOLON
parse_continue_statement ::=
T_CONTINUE T_SEMICOLON
parse_return_statement ::=
T_RETURN parse_expr_or_braced_init_list? T_SEMICOLON
parse_goto_statement ::=
T_GOTO T_IDENTIFIER T_SEMICOLON
parse_coroutine_return_statement ::=
T_CO_RETURN parse_expr_or_braced_init_list? T_SEMICOLON
parse_try_block ::=
T_TRY parse_compound_statement parse_handler_seq
parse_handler_seq ::=
T_CATCH parse_handler
parse_maybe_compound_statement ::=
parse_compound_statement?
parse_labeled_statement ::=
T_IDENTIFIER T_COLON
parse_declaration_statement ::=
parse_block_declaration
parse_expression_statement ::=
T_SEMICOLON
| parse_maybe_expression T_SEMICOLON
parse_final ::=
parse_id
parse_optional_attribute_specifier_seq ::=
parse_attribute_specifier*
parse_attribute_specifier ::=
parse_cxx_attribute_specifier
| parse_gcc_attribute
| parse_alignment_specifier
| parse_asm_specifier
parse_cxx_attribute_specifier ::=
T_LBRACKET T_LBRACKET parse_attribute_using_prefix parse_attribute_list T_RBRACKET T_RBRACKET
parse_gcc_attribute ::=
T___ATTRIBUTE__ T_LPAREN parse_skip_balanced T_RPAREN
parse_alignment_specifier ::=
T_ALIGNAS T_LPAREN parse_type_id T_DOT_DOT_DOT T_RPAREN
parse_asm_specifier ::=
T_ASM T_LPAREN T_STRING_LITERAL T_RPAREN
//
//Tokens
//
T_SEMICOLON ::= ';'
T_EXTERN ::= "extern"
T_TEMPLATE ::= "template"
T_LESS ::= '<'
T_GREATER ::= '>'
T_LBRACE ::= '{'
T_RBRACE ::= '}'
T_LPAREN ::= '('
T_RPAREN ::= ')'
T_LBRACKET ::= '['
T_RBRACKET ::= ']'
T_COLON ::= ':'
T_COMMA ::= ','
T_EQUAL ::= '='
T_EXCLAIM ::= '!'
T_COLON_COLON ::= "::"
T_NAMESPACE ::= "namespace"
T_INLINE ::= "inline"
T_EXPORT ::= "export"
T_ASM ::= "asm"
T_VOLATILE ::= "volatile"
T_GOTO ::= "goto"
T_STATIC_ASSERT ::= "static_assert"
T_USING ::= "using"
T___EXTENSION__ ::= "__extension__"
T_PRIVATE ::= "private"
T___ATTRIBUTE__ ::= "__attribute__"
T_ALIGNAS ::= "allignas"
T_DOT_DOT_DOT ::= "..."
T_VIRTUAL ::= "virtual"
T_CASE ::= "case"
T_DEFAULT ::= "default"
T_WHILE ::= "while"
T_IF ::= "if"
T_ELSE ::= "else"
T_CONSTEVAL ::= "consteval"
T_SWITCH ::= "switch"
T_BREAK ::= "break"
T_RETURN ::= "return"
T_CO_RETURN ::= "co_return"
T_TRY ::= "try"
T_CATCH ::= "catch"
T_FOR ::= "for"
T_CLASS ::= "class"
T_STRUCT ::= "struct"
T_UNION ::= "union"
T_DELETE ::= "delete"
T_DO ::= "do"
T_CONSTEXPR ::= "constexpr"
T_CONTINUE ::= "continue"
T_STRING_LITERAL ::= '"' ('\' . | [^"\\n\\r\\])* '"'
T_IDENTIFIER ::= [A-Za-z_][A-Za-z0-9_]*
I figured out that the content on specs/grammar.txt
come from https://github.com/cplusplus/draft
so I made a script to scrap/convert it to an EBNF
understood by https://www.bottlecaps.de/rr/ui see the result here https://github.com/cplusplus/draft/issues/6742 .
Thanks for your suggestion but I'm not sure it is a good idea to diverge too much from the official C++ grammar, the C++ language is still evolving. In the past I had a C++ grammar in BNF for pgen, a simple generalized parser generator that I wrote, but it was way too much work to maintain.
https://github.com/robertoraggi/pgen https://github.com/robertoraggi/cplusplus/blob/554e41b4c07b88f7cf51787e37d6b21dcf7481ef/src/cxx/parser.pgen
Not at all ! This one is mechanically extracted from the files at https://github.com/cplusplus/draft/
I've just added the C++11
, C++14
, C++117
, C++20
, C++23-draft
converted to the EBNF
understood by https://www.bottlecaps.de/rr/ui and the navigable railroad diagrams read to view here https://mingodad.github.io/cpp-grammars/ .
Maybe you'll also be interested in https://github.com/thradams/cake and it's playground http://thradams.com/cake/playground.html it has an interesting borrow checker extension.
@mingodad Thanks for the suggestion, it looks like an interesting project to follow. Once I’m done with core language, I plan to implement a few extensions for C++, with reflection and memory safety being on top of my list.
Today I found this project https://github.com/srcML/srcML and they have a wasm playground here http://www.srcml.org/doc/playground.html that is very interesting and I think that it can be interesting to you too.
If we change "specs/grammar.txt" to an EBNF understood by https://www.bottlecaps.de/rr/ui we can get a nice navigable railroad diagram representation of it that can help document/develop/debug this project grammar, follow the instructions shown bellow: