tree-sitter / tree-sitter-verilog

SystemVerilog grammar for tree-sitter
MIT License
91 stars 35 forks source link

parse error simple valid verilog syntax #54

Open renau opened 3 years ago

renau commented 3 years ago

There seems to be a problem in the concat grammar. Given this simple verilog tests:

module test(input [4:0]b, input [4:0]c, output [8:0] a);

  assign        a = { b[1:0], c[2:1] };

endmodule

WHen I run:

./node_modules/tree-sitter-cli/tree-sitter parse test.v

I get this error at the end:

test.v  0 ms    (MISSING "++" [3, 38] - [3, 38])

but the verilog is fine.

If I change to this, it parses:

module test(input [4:0]b, input [4:0]c, output [8:0] a);

  assign        a = { b, c[2:1] };

endmodule
drom commented 2 years ago

PR is welcome!

Wren6991 commented 1 year ago

I also hit this. Reducing the testcase further:

module test(input [1:0] b, output [1:0] a);

  assign        a = { b[1:0] };

endmodule

This is the relevant part of the parse tree:

(expression [2, 20] - [2, 30]
  (inc_or_dec_expression [2, 20] - [2, 30]
    (variable_lvalue [2, 20] - [2, 30]
      (variable_lvalue [2, 22] - [2, 28]
        (simple_identifier [2, 22] - [2, 23])
        (select1 [2, 23] - [2, 28]
          (constant_range [2, 24] - [2, 27]
            (constant_expression [2, 24] - [2, 25]
              (constant_primary [2, 24] - [2, 25]
                (primary_literal [2, 24] - [2, 25]
                  (integral_number [2, 24] - [2, 25]
                    (decimal_number [2, 24] - [2, 25]
                      (unsigned_number [2, 24] - [2, 25]))))))
            (constant_expression [2, 26] - [2, 27]
              (constant_primary [2, 26] - [2, 27]
                (primary_literal [2, 26] - [2, 27]
                  (integral_number [2, 26] - [2, 27]
                    (decimal_number [2, 26] - [2, 27]
                      (unsigned_number [2, 26] - [2, 27]))))))))))
    (inc_or_dec_operator [2, 30] - [2, 30])))))))))

I think the correct parse would be: expression -> primary -> concatenation -> expression -> primary -> simple_identifier, select1

Instead it has gone down the garden path with expression -> inc_or_dec_expression. Seems like a conflict between the primary and inc_or_dec_expression options under expression.

Wren6991 commented 1 year ago

Here's an actual testcase:

diff --git a/corpus/assign.txt b/corpus/assign.txt
index 81e897a..cc3a9ab 100644
--- a/corpus/assign.txt
+++ b/corpus/assign.txt
@@ -222,6 +222,34 @@ endmodule
   ))
 ))

+
+============================================
+assign - concatenation RHS with bit_select
+============================================
+
+module mod ();
+  assign a = {b[0]};
+endmodule
+
+----
+
+
+(source_file (module_declaration
+  (module_header (module_keyword) (simple_identifier))
+  (module_nonansi_header (list_of_ports))
+  (module_or_generate_item (continuous_assign
+    (list_of_net_assignments (net_assignment
+      (net_lvalue (simple_identifier))
+      (expression (primary (concatenation
+        (expression (primary
+          (simple_identifier)
+          (select1 (bit_select1 (expression (primary (primary_literal (integral_number (decimal_number (unsigned_number))))))))
+        ))
+      )))
+    ))
+  ))
+))
+
 ============================================
 assign - bit_select
 ============================================

The failing RHS expression output from tree-sitter test:

(inc_or_dec_expression
  (variable_lvalue
    (variable_lvalue
      (simple_identifier)
      (select1
        (bit_select1
          (expression
            (primary
              (primary_literal
                (integral_number
                  (decimal_number
                    (unsigned_number))))))))))
  (inc_or_dec_operator
    (MISSING "++"))))))))))

So the ambiguity is that simple concatenation lists match both variable_lvalue and primary. For some reason it is pressing ahead with the expression -> inc_or_dec_expression -> variable_lvalue option even after seeing the missing inc_or_dec_operator, instead of backtracking to expression -> primary. I wasn't able to debug why, there are a lot of conflict rules that have been added in bulk.

I only need Verilog parsing, not SystemVerilog, so this patch works for me (and all the tests in corpus/ pass, plus my new test):

diff --git a/grammar.js b/grammar.js
index d1980f9..3d7d67b 100644
--- a/grammar.js
+++ b/grammar.js
@@ -4123,7 +4123,6 @@ const rules = {
     prec.left(PREC.UNARY, seq(
       $.unary_operator, repeat($.attribute_instance), $.primary
     )),
-    prec.left(PREC.UNARY, $.inc_or_dec_expression),
     prec.left(PREC.PARENT, seq('(', $.operator_assignment, ')')),

     exprOp($, PREC.ADD, choice('+', '-')),

Not suitable for a PR, but maybe useful to someone else who stumbles across this issue.