tree-sitter / tree-sitter-javascript

Javascript grammar for tree-sitter
MIT License
314 stars 108 forks source link

Fail to recognize regex /<!--/ #295

Closed mingodad closed 3 months ago

mingodad commented 3 months ago

The following piece of code is valid but it is parsed incorrectly:

var a = /<!--/;

I suspect that this problem stared with this commit https://github.com/tree-sitter/tree-sitter-javascript/commit/4f279cc39fc7de694258b9e63e4f5c47a872189a , the playground can parse it correctly (probably is using an old version) but if we build form this repo right now we get an error.

Here is the code that I'm using to test:

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

/* 
 * File:   main.c
 * Author: mingo
 *
 * Created on February 6, 2024, 10:47 AM
 */

#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "tree_sitter/api.h"
#include "tree_sitter/parser.h"

// Declare the `tree_sitter_c` function, which is
// implemented by the `tree-sitter-javascript` library.
TSLanguage *tree_sitter_javascript();

typedef struct {
    size_t size;
    char *str;
} StrData;

static StrData readcontent(const char *filename)
{
    StrData data;
    data.str = NULL;
    data.size = 0;
    FILE *fp;

    fp = fopen(filename, "r");
    if(fp) {
        fseek(fp, 0, SEEK_END);
        data.size = ftell(fp);
        rewind(fp);

        data.str = (char*) malloc(sizeof(char) * (data.size+1));
    if(data.str)
    {
        size_t sz = fread(data.str, 1, data.size, fp);
        if(sz == data.size)
        {
            data.str[data.size] = '\0';
        }
        else
        {
            free(data.str);
            data.str = NULL;
        }
    }

        fclose(fp);
    }
    return data;
}

bool traverse_tree(const char *fname, TSNode node)
{
    uint32_t child_count = ts_node_child_count(node);
    for(uint32_t i=0; i < child_count; ++i)
    {
        TSNode node_child = ts_node_child(node, i);
        bool is_error = ts_node_is_error(node_child);
        if(is_error)
        {
            TSPoint lc = ts_node_start_point(node_child);
            fprintf(stderr, "%s:%d:%d: Syntax error\n", fname, lc.row+1, lc.column+1);
        }
        traverse_tree(fname, node_child);
    }
}

int main(int argc, char* argv[]) {
    if(argc < 2)
    {
        printf("usage: %s input_file_name\n", argv[0]);
        return 1;
    }
    const char *input_pathname = argv[1];

    // Build a syntax tree based on source code stored in a string.
    StrData data = readcontent(input_pathname);
    if (!data.str)
    {
        fprintf(stderr, "Error: failed to open %s\n", input_pathname);
        return 1;
    }

    // Create a parser.
    TSParser *parser = ts_parser_new();

    // Set the parser's language (Javascript in this case).
    ts_parser_set_language(parser, tree_sitter_javascript());

    TSTree *tree = ts_parser_parse_string(
        parser,
        NULL,
        data.str,
        data.size
    );

    if(!tree)
    {
        ts_parser_delete(parser);
        fprintf(stderr, "Error: parsing %s\n", input_pathname);
        return 1;
    }

    // Get the root node of the syntax tree.
    TSNode root_node = ts_tree_root_node(tree);

    traverse_tree(input_pathname, root_node);
    // Print the syntax tree as an S-expression.
    //char *string = ts_node_string(root_node);
    //printf("Syntax tree: %s\n", string);

    // Free all of the heap-allocated memory.
    //free(string);
    ts_tree_delete(tree);
    ts_parser_delete(parser);
    free(data.str);
    return EXIT_SUCCESS;
}