gannonh / chatgpt-pgvector

ChatGTP (gpt3.5-turbo) starter app
https://astro-labs.app/docs
934 stars 130 forks source link

"error - RangeError: Maximum call stack size exceeded" for massive 150 page pdf file. #10

Open akero opened 1 year ago

akero commented 1 year ago

The link is https://arxiv.org/pdf/2303.12712.pdf? and seems like the text is too much for this to handle. WindowsTerminal_E4vfpwNz8W

akero commented 1 year ago

Ok I fixed it. Replace Stringify.js in "gpt3.5-turbo-pgvector\node_modules\domutils\lib\esm\stringify.js" with the code below. Need to finish the setup before that file shows up so a fresh clone doesn't have it btw.


import renderHTML from "dom-serializer";
import { ElementType } from "domelementtype";

function processNodes(node, processor) {
    let stack = [node];
    let result = [];

    while (stack.length > 0) {
        let currentNode = stack.pop();

        if (Array.isArray(currentNode)) {
            stack.push(...currentNode);
        } else {
            result.push(processor(currentNode));
            if (hasChildren(currentNode) && !isComment(currentNode)) {
                stack.push(...currentNode.children);
            }
        }
    }

    return result.join("");
}

export function getOuterHTML(node, options) {
    return renderHTML(node, options);
}

export function getInnerHTML(node, options) {
    return hasChildren(node)
        ? node.children.map((node) => getOuterHTML(node, options)).join("")
        : "";
}

export function getText(node) {
    return processNodes(node, (currentNode) => {
        if (isTag(currentNode)) {
            return currentNode.name === "br" ? "\n" : "";
        }
        if (isCDATA(currentNode) || isText(currentNode)) {
            return currentNode.data;
        }
        return "";
    });
}

export function textContent(node) {
    return processNodes(node, (currentNode) => {
        if (isText(currentNode)) {
            return currentNode.data;
        }
        return "";
    });
}

export function innerText(node) {
    return processNodes(node, (currentNode) => {
        if (hasChildren(currentNode) && (currentNode.type === ElementType.Tag || isCDATA(currentNode))) {
            return "";
        }
        if (isText(currentNode)) {
            return currentNode.data;
        }
        return "";
    });
}

//# sourceMappingURL=stringify.js.map