rraguso / ajaxslt

Automatically exported from code.google.com/p/ajaxslt
Other
0 stars 0 forks source link

Performance improvement to use getElementsByTagName #18

Closed GoogleCodeExporter closed 9 years ago

GoogleCodeExporter commented 9 years ago
As implemented, in an HTML xpath like "//div", xpath.js attempts to
recursively collect all descendants before running the node test.  This
performs slowly because it will first attempt to iterate through every
child node in the document, and only then filter out those that are not divs.

Applying the nodetest earlier would help, but it's even better to run the
universal JS function written in C to do precisely this: getElementsByTagName.

I've attached a patch that will handle this case.  Instead of blindly
recursively collecting descendants, we attempt to extract a tag name from
the node test if possible.  (If the node test is NodeTestName, use the
name; if it's NodeTestAny or NodeTestElementOrAttribute, use "*".)  Then we
skip the node test loop at the end of StepExpr, in order to avoid
needlessly cloning the context.

To make this work, I had to do a bit of fancy footwork in the parser as
well.  As currently implemented, gathering descendants happens in one step,
and then running the node test happens in a following step (on the child
axis).  I modified makeLocationExpr2, makeLocationExpr7 and makePathExpr2
to unify cases like these into a single step that applies the nodetest on
the descendant-or-self axis.

This patch is more ambitious than some of the others, so I'm not checking
it in right away...  I'd like someone to code review it before I apply it.
 On my machine it results in a big performance increase; 70% in some cases.

Original issue reported on code.google.com by DanFabul...@gmail.com on 6 Nov 2007 at 12:56

Attachments:

GoogleCodeExporter commented 9 years ago
Well, the patch is *tricky*, but it's also pretty short.  So, here's the patch
inline, generated with "svn di":

Index: xpath.js
===================================================================
--- xpath.js    (revision 17)
+++ xpath.js    (working copy)
@@ -699,11 +699,17 @@
     copyArray(nodelist, input.childNodes);

   } else if (this.axis == xpathAxis.DESCENDANT_OR_SELF) {
-    nodelist.push(input);
-    xpathCollectDescendants(nodelist, input);
+    if (this.nodetest.evaluate(ctx).booleanValue()) {
+      nodelist.push(input);
+    }
+    var tagName = xpathExtractTagNameFromNodeTest(this.nodetest);
+    xpathCollectDescendants(nodelist, input, tagName);
+    if (tagName) skipNodeTest = true;

   } else if (this.axis == xpathAxis.DESCENDANT) {
-    xpathCollectDescendants(nodelist, input);
+    var tagName = xpathExtractTagNameFromNodeTest(this.nodetest);
+    xpathCollectDescendants(nodelist, input, tagName);
+    if (tagName) skipNodeTest = true;

   } else if (this.axis == xpathAxis.FOLLOWING) {
     for (var n = input; n; n = n.parentNode) {
@@ -1466,6 +1472,11 @@

 function makeLocationExpr2(dslash, rel) {
   rel.absolute = true;
+  if (rel.steps.length > 0 && rel.steps[0].axis == xpathAxis.CHILD) {
+    // DGF perf enhancement: why take two steps when you could take one?
+    rel.steps[0].axis = xpathAxis.DESCENDANT_OR_SELF;
+    return rel;
+  }
   rel.prependStep(makeAbbrevStep(dslash.value));
   return rel;
 }
@@ -1496,6 +1507,12 @@
 }

 function makeLocationExpr7(rel, dslash, step) {
+  if (step.axis == xpathAxis.CHILD) {
+    // DGF perf enhancement: why take two steps when you could take one?
+    step.axis = xpathAxis.DESCENDANT_OR_SELF;
+    rel.appendStep(step);
+    return rel;
+  }
   rel.appendStep(makeAbbrevStep(dslash.value));
   rel.appendStep(step);
   return rel;
@@ -1610,6 +1627,11 @@
 }

 function makePathExpr2(filter, dslash, rel) {
+  if (rel.steps.length > 0 && rel.steps[0].axis == xpathAxis.CHILD) {
+    // DGF perf enhancement: why take two steps when you could take one?
+    rel.steps[0].axis = xpathAxis.DESCENDANT_OR_SELF;
+    return new PathExpr(filter, rel);
+  }
   rel.prependStep(makeAbbrevStep(dslash.value));
   return new PathExpr(filter, rel);
 }
@@ -2128,13 +2150,26 @@

 // Local utility functions that are used by the lexer or parser.

-function xpathCollectDescendants(nodelist, node) {
+function xpathCollectDescendants(nodelist, node, opt_tagName) {
+  if (opt_tagName && node.getElementsByTagName) {
+    copyArray(nodelist, node.getElementsByTagName(opt_tagName));
+    return;
+  }
   for (var n = node.firstChild; n; n = n.nextSibling) {
     nodelist.push(n);
     arguments.callee(nodelist, n);
   }
 }

+// DGF extract a tag name suitable for getElementsByTagName
+function xpathExtractTagNameFromNodeTest(nodetest) {
+  if (nodetest instanceof NodeTestName) {
+    return nodetest.name;
+  } else if (nodetest instanceof NodeTestAny || nodetest instanceof
NodeTestElementOrAttribute) {
+    return "*";
+  }
+}
+
 function xpathCollectDescendantsReverse(nodelist, node) {
   for (var n = node.lastChild; n; n = n.previousSibling) {
     nodelist.push(n);
Index: util.js
===================================================================
--- util.js (revision 17)
+++ util.js (working copy)
@@ -216,6 +216,7 @@

 // Shallow-copies an array.
 function copyArray(dst, src) {
+  if (!src) return;
   for (var i = 0; i < src.length; ++i) {
     dst.push(src[i]);
   }

Original comment by DanFabul...@gmail.com on 6 Nov 2007 at 12:57

GoogleCodeExporter commented 9 years ago
Oops, running the unit tests turned up a bug in dom.js's implementation of
getElementsByTagName.  If you called x.getElementsByTagName, the implementation 
was
incorrectly willing to return x itself if it had the right name.  Also, it 
didn't
support "*" which the tests now use.

Index: dom.js
===================================================================
--- dom.js      (revision 17)
+++ dom.js      (working copy)
@@ -491,11 +491,20 @@

 XNode.prototype.getElementsByTagName = function(name) {
   var ret = [];
-  domTraverseElements(this, function(node) {
-    if (node.nodeName == name) {
+  var self = this;
+  if ("*" == name) {
+    domTraverseElements(this, function(node) {
+      if (self == node) return;
       ret.push(node);
-    }
-  }, null);
+    }, null);
+  } else {
+    domTraverseElements(this, function(node) {
+      if (self == node) return;
+      if (node.nodeName == name) {
+        ret.push(node);
+      }
+    }, null);
+  }
   return ret;
 }

Original comment by DanFabul...@gmail.com on 6 Nov 2007 at 2:00

GoogleCodeExporter commented 9 years ago
Fixed in revision 21

Original comment by DanFabul...@gmail.com on 7 Nov 2007 at 2:48