gildas-lormeau / SingleFile

Web Extension for saving a faithful copy of a complete web page in a single HTML file
GNU Affero General Public License v3.0
14.3k stars 945 forks source link

Improper handling of range offsets by `markSelectedContent` #1417

Closed Foair closed 3 months ago

Foair commented 3 months ago

Describe the bug Using Range API to make a selection, the 'Save selection' menu results in a wider range than expected.

To Reproduce I‘ve made an example to illustrate this problem:

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Document</title>
  <style>p[data-single-file-selected-content] { color: coral; font-weight: bold; }</style>
  <script>
    const SINGLE_FILE_PREFIX = "single-file-";
    const SELECTED_CONTENT_ATTRIBUTE_NAME = "data-" + SINGLE_FILE_PREFIX + "selected-content";
  </script>
  <script>
// Identical to https://github.com/gildas-lormeau/SingleFile/blob/62a4c9f535a528a8ba026bc8a3b7d8d11c067c43/src/ui/content/content-ui.js#L198-L243
function markSelectedContent() {
    const selection = getSelection();
    let selectionFound;
    for (let indexRange = 0; indexRange < selection.rangeCount; indexRange++) {
        let range = selection.getRangeAt(indexRange);
        if (range && range.commonAncestorContainer) {
            const treeWalker = document.createTreeWalker(range.commonAncestorContainer);
            let rangeSelectionFound = false;
            let finished = false;
            while (!finished) {
                if (rangeSelectionFound || treeWalker.currentNode == range.startContainer || treeWalker.currentNode == range.endContainer) {
                    rangeSelectionFound = true;
                    if (range.startContainer != range.endContainer || range.startOffset != range.endOffset) {
                        selectionFound = true;
                        markSelectedNode(treeWalker.currentNode);
                    }
                }
                if (selectionFound && treeWalker.currentNode == range.startContainer) {
                    markSelectedParents(treeWalker.currentNode);
                }
                if (treeWalker.currentNode == range.endContainer) {
                    finished = true;
                } else {
                    treeWalker.nextNode();
                }
            }
            if (selectionFound && treeWalker.currentNode == range.endContainer && treeWalker.currentNode.querySelectorAll) {
                treeWalker.currentNode.querySelectorAll("*").forEach(descendantElement => markSelectedNode(descendantElement));
            }
        }
    }
    return selectionFound;
}

function markSelectedNode(node) {
    const element = node.nodeType == Node.ELEMENT_NODE ? node : node.parentElement;
    element.setAttribute(SELECTED_CONTENT_ATTRIBUTE_NAME, "");
}

function markSelectedParents(node) {
    if (node.parentElement) {
        markSelectedNode(node);
        markSelectedParents(node.parentElement);
    }
}
  </script>
  <script type="module">
    function selectAndMark(withRange) {
      const range = new Range();
      range.setStart(container1, 0);
      range.setEnd(container1, 3);
      withRange(range);
      console.log(range);
      const selection = getSelection();
      selection.removeAllRanges();
      selection.addRange(range);
      markSelectedContent();
    }

    button1.onclick = () => {
      selectAndMark(range => {
        range.setStart(container1, 1);
        range.setEnd(container1, 4);
      });
    };
    button2.onclick = () => {
      selectAndMark(range => {
        range.setStartAfter(container2.firstChild);
        range.setEndAfter(container2.childNodes[3]);
      });
    };
    button3.onclick = () => {
      selectAndMark(range => {
        range.setStart(container3.childNodes[1], 0);
        range.setEnd(container3.childNodes[3], 1);
      });
    };
  </script>
</head>
<body>
  <div id="container1"><p>1</p><p>2</p><p>3</p><p>4</p><p>5</p></div>
  <hr>
  <div id="container2"><p>1</p><p>2</p><p>3</p><p>4</p><p>5</p></div>
  <hr>
  <div id="container3"><p>1</p><p>2</p><p>3</p><p>4</p><p>5</p></div>
  <hr>
  <button id="button1">Mark contents in #1</button>
  <button id="button2">Mark contents in #2</button>
  <button id="button3">Mark contents in #3</button>
</body>
</html>

As you can see, clicking button1 and button2 results in a wider range than expected, while clicking button3 works as expected.

Expected behavior

Currently, markSelectedContent incorrectly handle situations when range.startContainer === range.endContainer, which commonly occurs with the output of Range#selectNode. This problem should be fixed to facilitate automation.

One feasible fix might be like:

  if (selectionFound && treeWalker.currentNode == range.endContainer && treeWalker.currentNode.querySelectorAll) {
-   treeWalker.currentNode.querySelectorAll("*").forEach(descendantElement => markSelectedNode(descendantElement));
+   for (
+     let offset =
+       range.startContainer === range.endContainer ? range.startOffset : 0;
+     offset < range.endOffset;
+     offset++
+   ) {
+     const node = range.endContainer.childNodes[offset];
+     markSelectedNode(node);
+     node?.querySelectorAll('*').forEach(markSelectedNode);
+   }
  }

Environment

gildas-lormeau commented 3 months ago

Thank you very much for the bug report and the fix. I merged the code, the fix will be available in the next version.