tree-sitter / tree-sitter-c

C grammar for tree-sitter
MIT License
225 stars 100 forks source link

bug: Parser do not recognize macro attribute in function declaration #183

Open edogrigqv2 opened 7 months ago

edogrigqv2 commented 7 months ago

Did you check existing issues?

Tree-Sitter CLI Version, if relevant (output of tree-sitter --version)

9f398ca955d3329110d31ed523dac5275074948f

Describe the bug

Parser do not recognize macro attribute in function declaration

Steps To Reproduce/Bad Parse Tree

extern __printf(2, 3) int sprintf(char *buf, const char * fmt, ...);
extern __printf(2, 0) int vsprintf(char *buf, const char *, va_list);
extern __printf(3, 4)
int snprintf(char *buf, size_t size, const char *fmt, ...);

This outputs

(translation_unit [0, 0] - [4, 0]
  (declaration [0, 0] - [1, 69]
    (storage_class_specifier [0, 0] - [0, 6])
    type: (macro_type_specifier [0, 7] - [1, 21]
      name: (identifier [0, 7] - [0, 15])
      (ERROR [0, 16] - [1, 6]
        (number_literal [0, 16] - [0, 17])
        (number_literal [0, 19] - [0, 20])
        (primitive_type [0, 22] - [0, 25])
        (type_descriptor [0, 26] - [0, 67]
          type: (type_identifier [0, 26] - [0, 33])
          declarator: (abstract_function_declarator [0, 33] - [0, 67]
            parameters: (parameter_list [0, 33] - [0, 67]
              (parameter_declaration [0, 34] - [0, 43]
                type: (primitive_type [0, 34] - [0, 38])
                declarator: (pointer_declarator [0, 39] - [0, 43]
                  declarator: (identifier [0, 40] - [0, 43])))
              (parameter_declaration [0, 45] - [0, 61]
                (type_qualifier [0, 45] - [0, 50])
                type: (primitive_type [0, 51] - [0, 55])
                declarator: (pointer_declarator [0, 56] - [0, 61]
                  declarator: (identifier [0, 58] - [0, 61])))
              (variadic_parameter [0, 63] - [0, 66])))))
      type: (type_descriptor [1, 7] - [1, 15]
        type: (type_identifier [1, 7] - [1, 15]))
      (ERROR [1, 15] - [1, 20]
        (number_literal [1, 16] - [1, 17])
        (number_literal [1, 19] - [1, 20])))
    (ERROR [1, 22] - [1, 25]
      (identifier [1, 22] - [1, 25]))
    declarator: (function_declarator [1, 26] - [1, 68]
      declarator: (identifier [1, 26] - [1, 34])
      parameters: (parameter_list [1, 34] - [1, 68]
        (parameter_declaration [1, 35] - [1, 44]
          type: (primitive_type [1, 35] - [1, 39])
          declarator: (pointer_declarator [1, 40] - [1, 44]
            declarator: (identifier [1, 41] - [1, 44])))
        (parameter_declaration [1, 46] - [1, 58]
          (type_qualifier [1, 46] - [1, 51])
          type: (primitive_type [1, 52] - [1, 56])
          declarator: (abstract_pointer_declarator [1, 57] - [1, 58]))
        (parameter_declaration [1, 60] - [1, 67]
          type: (type_identifier [1, 60] - [1, 67])))))
  (ERROR [2, 0] - [3, 59]
    (storage_class_specifier [2, 0] - [2, 6])
    (identifier [2, 7] - [2, 15])
    (ERROR [2, 16] - [3, 3]
      (number_literal [2, 16] - [2, 17])
      (number_literal [2, 19] - [2, 20])
      (primitive_type [3, 0] - [3, 3]))
    (type_descriptor [3, 4] - [3, 58]
      type: (type_identifier [3, 4] - [3, 12])
      declarator: (abstract_function_declarator [3, 12] - [3, 58]
        parameters: (parameter_list [3, 12] - [3, 58]
          (parameter_declaration [3, 13] - [3, 22]
            type: (primitive_type [3, 13] - [3, 17])
            declarator: (pointer_declarator [3, 18] - [3, 22]
              declarator: (identifier [3, 19] - [3, 22])))
          (parameter_declaration [3, 24] - [3, 35]
            type: (primitive_type [3, 24] - [3, 30])
            declarator: (identifier [3, 31] - [3, 35]))
          (parameter_declaration [3, 37] - [3, 52]
            (type_qualifier [3, 37] - [3, 42])
            type: (primitive_type [3, 43] - [3, 47])
            declarator: (pointer_declarator [3, 48] - [3, 52]
              declarator: (identifier [3, 49] - [3, 52])))
          (variadic_parameter [3, 54] - [3, 57]))))))

Expected Behavior/Parse Tree

I believe it is possible for parser to recognize macro calls here and recognize that there are part of function declaration without ERRORs

Repro

No response

mingodad commented 7 months ago

This is another example from preprocessed sqlite3:

extern int _IO_vfprintf (_IO_FILE *__restrict, const char *__restrict,
    __gnuc_va_list);

if we add a declarator to __gnuc_va_list then it parses fine:

extern int _IO_vfprintf (_IO_FILE *__restrict, const char *__restrict,
    __gnuc_va_list va);
mingodad commented 7 months ago

And here another related error:

*__builtin_va_arg(ap, int*) = val;

It seems that va_list and va_arg need a special treatment by the parser/grammar.

mingodad commented 7 months ago

There are several others like the above:

pArgList = __builtin_va_arg(ap, PrintfArguments*);
v = __builtin_va_arg(ap, long int);
longvalue = __builtin_va_arg(ap, unsigned long int);
*(__builtin_va_arg(ap, int*)) = pAccum->nChar;
...
mingodad commented 7 months ago

With this changes I can get this project to parse va_arg expressions:

---------------------------------- grammar.js ----------------------------------
index 2a9fe20..2ff12e2 100644
@@ -904,6 +904,7 @@ module.exports = grammar({
       $.sizeof_expression,
       $.alignof_expression,
       $.offsetof_expression,
+      $.va_arg_expression,
       $.generic_expression,
       $.subscript_expression,
       $.call_expression,
@@ -1045,6 +1046,11 @@ module.exports = grammar({
       seq('(', field('type', $.type_descriptor), ',', field('member', $._field_identifier), ')'),
     )),

+    va_arg_expression: $ => prec(PREC.CALL, seq(
+      choice('va_arg', '__builtin_va_arg'),
+      seq('(', field('member', $._field_identifier), ',', field('type', $.type_descriptor), ')'),
+    )),
+
     generic_expression: $ => prec(PREC.CALL, seq(
       '_Generic',
       '(',
sqlite3Config.mutex = *__builtin_va_arg(ap, sqlite3_mutex_methods*);
tree-sitter parse cparser-nb/test-va.c
(translation_unit [0, 0] - [1, 0]
  (expression_statement [0, 0] - [0, 68]
    (assignment_expression [0, 0] - [0, 67]
      left: (field_expression [0, 0] - [0, 19]
        argument: (identifier [0, 0] - [0, 13])
        field: (field_identifier [0, 14] - [0, 19]))
      right: (pointer_expression [0, 22] - [0, 67]
        argument: (va_arg_expression [0, 23] - [0, 67]
          member: (field_identifier [0, 40] - [0, 42])
          type: (type_descriptor [0, 44] - [0, 66]
            type: (type_identifier [0, 44] - [0, 65])
            declarator: (abstract_pointer_declarator [0, 65] - [0, 66])))))))