tree-sitter / node-tree-sitter

Node.js bindings for tree-sitter
https://www.npmjs.com/package/tree-sitter
MIT License
625 stars 107 forks source link

Tree sitter Node bindings segfault when accessing certain properties #157

Closed connor4312 closed 1 year ago

connor4312 commented 1 year ago

First, clone tree-sitter-typescript

git clone git@github.com:tree-sitter/tree-sitter-typescript.git --depth=1
cd tree-sitter-typescript
npm install && npm install tree-sitter segfault-handler

then run the following code

const Parser = require('tree-sitter');
const TypeScript = require('./typescript');

const parser = new Parser();
parser.setLanguage(TypeScript);

const sourceCode = 'let x = 1; console.log(x);';
const tree = parser.parse(sourceCode);

const SegfaultHandler = require('segfault-handler');
SegfaultHandler.registerHandler('crash.log');

console.log('get type', tree.rootNode.__proto__.type);
console.log('the program should not have crashed');
  1. Observe a segfault
PID 32120 received SIGSEGV for address: 0xef835346
SymInit: Symbol-SearchPath: '.;C:\Users\conno\Github\tree-sitter-typescript;C:\Program Files\nodejs;C:\WINDOWS;C:\WINDOWS\system32;SRV*C:\websymbols*http://msdl.microsoft.com/download/symbols;', symOptions: 530, UserName: 'conno'
OS-Version: 10.0.22621 () 0x100-0x1
C:\Users\conno\Github\tree-sitter-typescript\node_modules\segfault-handler\src\StackWalker.cpp (941): StackWalker::ShowCallstack
C:\Users\conno\Github\tree-sitter-typescript\node_modules\segfault-handler\src\segfault-handler.cpp (242): segfault_handler
00007FFA3B907A2A (ntdll): (filename not available): RtlGetLengthWithoutLastFullDosOrNtPathElement
00007FFA3B8AE232 (ntdll): (filename not available): RtlFindCharInUnicodeString
00007FFA3B932CEE (ntdll): (filename not available): KiUserExceptionDispatcher
C:\Users\conno\Github\tree-sitter-typescript\node_modules\tree-sitter\src\node.cc (121): node_tree_sitter::node_methods::UnmarshalNode
C:\Users\conno\Github\tree-sitter-typescript\node_modules\tree-sitter\src\node.cc (271): node_tree_sitter::node_methods::Type
C:\Users\conno\Github\tree-sitter-typescript\node_modules\nan\nan_callbacks_12_inl.h (177): Nan::imp::FunctionCallbackWrapper
00007FF71F54E646 (node): (filename not available): v8::internal::Builtins::code_handle
00007FF71F54E239 (node): (filename not available): v8::internal::Builtins::code_handle
00007FF71F54E4FC (node): (filename not available): v8::internal::Builtins::code_handle
00007FF71F54E360 (node): (filename not available): v8::internal::Builtins::code_handle
00007FF71F621A41 (node): (filename not available): v8::internal::SetupIsolateDelegate::SetupHeap
00007FF71F5B3F0E (node): (filename not available): v8::internal::SetupIsolateDelegate::SetupHeap
00007FF71F5BCD0F (node): (filename not available): v8::internal::SetupIsolateDelegate::SetupHeap
00007FF71F6A7BF3 (node): (filename not available): v8::internal::SetupIsolateDelegate::SetupHeap

First reported in https://github.com/microsoft/vscode-js-debug/issues/1537?notification_referrer_id=NT_kwDOACIKybI1NDY2NzAyNjkxOjIyMzA5ODU#issuecomment-1504099813

amaanq commented 1 year ago

Seems like an issue for node-tree-sitter

verhovsky commented 1 year ago

109 fixes this, but I don't like the error message and I'm not sure it's the best way to do it.

Your code accesses these properties on the prototype which doesn't make sense, but it of course shouldn't segfault.

The issue is that it calls this code:

https://github.com/tree-sitter/node-tree-sitter/blob/0bee0f97aaa3d51c2373567a896e3eda91863d15/index.js#L61-L64

but on a prototype, this is an empty object:

/tmp/tree-sitter-typescript$ node
Welcome to Node.js v20.4.0.
Type ".help" for more information.
> class SyntaxNode {
...   constructor(tree) {
...     this.tree = tree;
...   }
... 
...   get type() {
...     console.log(this)
...   }
... }
undefined
> 
> const sn = new SyntaxNode({'a': 22222})
undefined
> sn.type
SyntaxNode { tree: { a: 22222 } }
undefined
> Object.getPrototypeOf(sn).type
{}
undefined

so it calls marshalNode({}):

https://github.com/tree-sitter/node-tree-sitter/blob/0bee0f97aaa3d51c2373567a896e3eda91863d15/index.js#L630-L635

and tries to set an ArrayBuffer of 6 integers to undefined (whatever that becomes) 6 (NODE_FIELD_COUNT) times

https://github.com/tree-sitter/node-tree-sitter/blob/0bee0f97aaa3d51c2373567a896e3eda91863d15/src/node.cc#L39

connor4312 commented 1 year ago

I think throwing some kind of error in this case is fine, but a segfault is not. In this case of the issue, the debugger will try to access the getter to see if there's a type that it should show. Maybe it shouldn't, that would be a good patch to have. But accesses are already wrapped in a try/catch and throwing an error is quite reasonable in this case.

verhovsky commented 1 year ago

Closed in #109.

Thanks for reporting this, I also ran into this issue constantly while working with tree-sitter in VSCode.

verhovsky commented 1 year ago

I checked that no other properties of the prototypes can segfault with this:

diff --git a/test/node_test.js b/test/node_test.js
index b81c47b..b2e6490 100644
--- a/test/node_test.js
+++ b/test/node_test.js
@@ -412,12 +412,92 @@ describe("Node", () => {
   });

   describe("VSCode", () => {
-    it("shouldn't segfault when accessing properties on the prototype", () => {
+    it("shouldn't segfault when accessing properties/methods on prototype", () => {
+      const parserPrototype = Object.getPrototypeOf(parser);
+      const parserMethods = [
+        // C++ methods
+        "getLogger",
+        "setLogger",
+        "printDotGraphs",
+        // Overwritten methods
+        "setLanguage",
+        "parse",
+        // Node methods
+        "getLanguage",
+      ]
+      // for (const property of parserProperties) {
+      //   assert.throws(() => { parserPrototype[property]; }, TypeError)
+      // }
+      for (const method of parserMethods) {
+        try {
+          parserPrototype[method];
+        } catch {}
+      }
+
+
       const tree = parser.parse('2 + 2');
+      const treePrototype = Object.getPrototypeOf(tree);
+      const treeProperties = [
+        "rootNode"
+      ]
+      const treeMethods = [
+        // C++
+        "edit",
+        // "rootNode",
+        "printDotGraph",
+        "getChangedRanges",
+        "getEditedRange",
+        "_cacheNode",
+        "_cacheNodes",
+        // Redefined
+        "edit",
+        // Node
+        "walk",
+      ]
+      for (const property of treeProperties) {
+        assert.throws(() => { treePrototype[property]; }, TypeError)
+      }
+      for (const method of treeMethods) {
+        assert.throws(treePrototype[method], TypeError)
+      }
+
+      const treeCursorPrototype = Object.getPrototypeOf(tree.walk());
+      const treeCursorProperties = [
+        // C++
+        "startIndex",
+        "endIndex",
+        "nodeType",
+        "nodeIsNamed",
+        "nodeIsMissing",
+        "currentFieldName",
+
+        // Redefined
+        "currentNode",
+        "startPosition",
+        "endPosition",
+      ]
+      const treeCursorMethods = [
+        // C++
+        "gotoParent",
+        "gotoFirstChild",
+        "gotoFirstChildForIndex",
+        "gotoNextSibling",
+        // Redefined
+        "reset",
+      ]
+      for (const property of treeCursorProperties) {
+        try {
+          treeCursorPrototype[property]
+        } catch {}
+      }
+      for (const method of treeCursorMethods) {
+        assert.throws(treeCursorPrototype[method], TypeError)
+      }
+
       const nodePrototype = Object.getPrototypeOf(tree.rootNode);
       // const nodePrototype = tree.rootNode.__proto__;

-      const properties = [
+      const nodeProperties = [
         "type",
         "typeId",
         "isNamed",
@@ -440,11 +520,7 @@ describe("Node", () => {
         "previousSibling",
         "previousNamedSibling",
       ];
-      for (const property of properties) {
-        assert.throws(() => { nodePrototype[property]; }, TypeError)
-      }
-
-      const methods = [
+      const nodeMethods = [
         "hasChanges",
         "hasError",
         "isMissing",
@@ -462,7 +538,10 @@ describe("Node", () => {
         "descendantForPosition",
         "closest",
       ];
-      for (const method of methods) {
+      for (const property of nodeProperties) {
+        assert.throws(() => { nodePrototype[property]; }, TypeError)
+      }
+      for (const method of nodeMethods) {
         assert.throws(nodePrototype[method], TypeError)
       }
     });

But the properties that start out as native code and then are replaced with JS code that calls the original native code might segfault if they are made accessible as they might need to be for #53.