sentientwaffle / feed-read

fetch & parse ATOM & RSS feeds with Node.js
MIT License
74 stars 58 forks source link

parsing xhtml in atom feeds #3

Open eRGoon opened 11 years ago

eRGoon commented 11 years ago

I ran into a problem parsing xhtml in atom feeds. If a feed contains xhtml, the parsing doesn't work as expected. It returns "[object object]" for the tag instead of the actual content. So here is my helper function that solved the problem for me.

var getHTMLData = function(tag) {
    if (typeof(tag) === "string") {
        return tag;
    }

    var content = "<" + tag.name;
    var children = tag.children || [];

    if (tag.attributes) {
        for (var attr in tag.attributes) {
            if (tag.name === "a" && attr === "href") {
                content += " " + attr  + "='" + tag.attributes[attr] + "' target='_blank'";
            }
            content += " " + attr  + "='" + tag.attributes[attr] + "'";
        }
    }
    if (tag.isSelfClosing) {
        content += "/>";
    } else {
        content += ">";
    }

    for (var i = 0, l = children.length; i < l; i++) {
        var _tag = children[i];

        content += getHTMLData(_tag);
    }

    content += "</" + tag.name + ">";

    return content;
}

I call it in the child_by_name function

for (var i = 0, l = children.length; i < l; i++) {
    if (children[i].name === name) {
        if (children[i].attributes.type === "xhtml") {
            var _children = children[i].children || [];

            for (var j = 0, k = _children.length; j < k; j++) {
                _children[j] = getHTMLData(_children[j]);
            }
        }

        return children[i];
    }
}