rust-analyzer / rowan

Apache License 2.0
689 stars 57 forks source link

NodeCache hashes twice #80

Closed lnicola closed 3 years ago

lnicola commented 3 years ago

NodeCache does get, then insert if it didn't find the node. We could avoid that by doing something like

diff --git i/src/green/builder.rs w/src/green/builder.rs
index 35ecbc3..82ce808 100644
--- i/src/green/builder.rs
+++ w/src/green/builder.rs
@@ -1,4 +1,6 @@
-use rustc_hash::FxHashSet;
+use std::collections::hash_map::Entry;
+
+use rustc_hash::FxHashMap;

 use crate::{
     green::{GreenElement, GreenNode, GreenToken, SyntaxKind},
@@ -7,8 +9,8 @@ use crate::{

 #[derive(Default, Debug)]
 pub struct NodeCache {
-    nodes: FxHashSet<GreenNode>,
-    tokens: FxHashSet<GreenToken>,
+    nodes: FxHashMap<GreenNode, ()>,
+    tokens: FxHashMap<GreenToken, ()>,
 }

 impl NodeCache {
@@ -17,7 +19,7 @@ impl NodeCache {
         I: IntoIterator<Item = GreenElement>,
         I::IntoIter: ExactSizeIterator,
     {
-        let mut node = GreenNode::new(kind, children);
+        let node = GreenNode::new(kind, children);
         // Green nodes are fully immutable, so it's ok to deduplicate them.
         // This is the same optimization that Roslyn does
         // https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees
@@ -27,21 +29,29 @@ impl NodeCache {
         // 17% of the memory for green nodes!
         // Future work: make hashing faster by avoiding rehashing of subtrees.
         if node.children().len() <= 3 {
-            match self.nodes.get(&node) {
-                Some(existing) => node = existing.clone(),
-                None => assert!(self.nodes.insert(node.clone())),
+            match self.nodes.entry(node) {
+                Entry::Occupied(entry) => entry.key().clone(),
+                Entry::Vacant(entry) => {
+                    let node = entry.key().clone();
+                    entry.insert(());
+                    node
+                }
             }
+        } else {
+            node
         }
-        node
     }

     fn token(&mut self, kind: SyntaxKind, text: SmolStr) -> GreenToken {
-        let mut token = GreenToken::new(kind, text);
-        match self.tokens.get(&token) {
-            Some(existing) => token = existing.clone(),
-            None => assert!(self.tokens.insert(token.clone())),
+        let token = GreenToken::new(kind, text);
+        match self.tokens.entry(token) {
+            Entry::Occupied(existing) => existing.key().clone(),
+            Entry::Vacant(entry) => {
+                let token = entry.key().clone();
+                entry.insert(());
+                token
+            }
         }
-        token
     }
 }

But I can't confirm that this is actually faster.

lnicola commented 3 years ago

Presumably fixed in #86.