tree-sitter / node-tree-sitter

Node.js bindings for tree-sitter
https://www.npmjs.com/package/tree-sitter
MIT License
649 stars 114 forks source link

Unsure how to recurse tree with `walk` #94

Closed bramses closed 3 years ago

bramses commented 3 years ago

Hi,

I'm trying to walk the entire tree and "visit" each node but I can't figure it out using a traditional DFS algortihm. Neither of the below work:

Case 1 - Infinite Loop:

const nodeWalk = (node) => {
  if (!node) return
  while (node) {
    console.log(node.nodeText)
    if (node.gotoFirstChild()) {
      nodeWalk(node)
    }
    node.gotoNextSibling()
  }
}

Case 2 - Incomplete Tree:

const nodeWalk = (node) => {
  while (node.gotoFirstChild()) {
    console.log(node.nodeText)

    node.gotoNextSibling()
  }
}
bramses commented 3 years ago

Note: I can DFS the tree normally, but it's just Tree Cursor that doesn't seem to work. My guess is its because the boolean of gotoNextChild() and gotoNextSibling() also move the cursor.

Running DFS without TreeCursor:

const nodeWalk = (node) => {
  if (!node || node === undefined) return
  while (node) {
    console.log(node.text)

    if (node.children && node.children.length > 0) {
      nodeWalk(node.children[0])
    }

    node = node.nextSibling
  }
}
talbergs commented 3 years ago

Here is one usage of TreeCursor in php https://github.com/talbergs/php-tree-sitter/blob/master/examples/04-walking-tree/example.php, maybe this helps / inspires.

milahu commented 3 years ago

this should be in examples ...

// https://github.com/tree-sitter/node-tree-sitter/blob/master/tree-sitter.d.ts
const TreeSitter = require('tree-sitter');
const TreeSitterC = require('tree-sitter-c');
const fs = require('fs');

process.argv.shift(); // argv[0] is node interpreter ...
const inputPath = process.argv[1];
const basename = path => path.split("/").pop();
if (inputPath == undefined) {
  console.log(`usage: node ${basename(process.argv[0])} path/to/inputfile.c`)
  process.exit(1);
}
if (!fs.existsSync(inputPath)) {
  console.log(`no such file: ${inputPath}`)
  process.exit(1);
}

const sourceCode = fs.readFileSync(inputPath, 'utf8');

const parser = new TreeSitter();
parser.setLanguage(TreeSitterC);
const tree = parser.parse(sourceCode);

const textStartLength = 50;

const walk_cursor = (cursor, level = 0) => {
  // top-down = handle node -> go to next node
  // depth-first = gotoFirstChild -> gotoNextSibling
  while (true) {

    // handle this node
    const textEscaped = cursor.nodeText.replace(/\n/g, '\\n');
    const typeEscaped = cursor.nodeType.replace('\n', '\\n');
    const textStart = (textEscaped.length < textStartLength)
      ? textEscaped : (textEscaped.slice(0, textStartLength) + ' ...');
    const textLocation = `${cursor.startIndex} ${cursor.endIndex}`; // offset in utf8 chars (or offset in bytes? which is it?)
    //const textLocation = `${cursor.startPosition.row}:${cursor.startPosition.column} ${cursor.endPosition.row}:${cursor.endPosition.column}`;
    const levelString = Array.from({ length: (level + 1) }).map(_ => '+').join('');
    console.log(`${levelString} ${textLocation} ${typeEscaped}: ${textStart}`);

    // go to next node
    if (cursor.gotoFirstChild()) {
      walk_cursor(cursor, level + 1);
      cursor.gotoParent();
    }
    if (!cursor.gotoNextSibling()) break;
  }
}

var cursor = tree.walk();
walk_cursor(cursor);
sample input + output ```c // say hello #include int main() { printf("hello\n"); return 0; } ``` output ``` + 0 83 translation_unit: // say hello\n\n#include \n\nint main() { ... ++ 0 12 comment: // say hello ++ 14 34 preproc_include: #include \n\n +++ 14 22 #include: #include +++ 23 32 system_lib_string: +++ 32 34 \n: \n\n ++ 34 81 function_definition: int main() {\n printf("hello\n");\n return 0;\n} ... +++ 34 37 primitive_type: int +++ 38 44 function_declarator: main() ++++ 38 42 identifier: main ++++ 42 44 parameter_list: () +++++ 42 43 (: ( +++++ 43 44 ): ) +++ 45 81 compound_statement: {\n printf("hello\n");\n return 0;\n} ++++ 45 46 {: { ++++ 49 67 expression_statement: printf("hello\n"); +++++ 49 66 call_expression: printf("hello\n") ++++++ 49 55 identifier: printf ++++++ 55 66 argument_list: ("hello\n") +++++++ 55 56 (: ( +++++++ 56 65 string_literal: "hello\n" ++++++++ 56 57 ": " ++++++++ 62 64 escape_sequence: \n ++++++++ 64 65 ": " +++++++ 65 66 ): ) +++++ 66 67 ;: ; ++++ 70 79 return_statement: return 0; +++++ 70 76 return: return +++++ 77 78 number_literal: 0 +++++ 78 79 ;: ; ++++ 80 81 }: } ```
bramses commented 3 years ago

Oh! I wouldn't have thought to use while (true), thank you for the code block @milahu