VerifiedJoseph / Save-to-the-Wayback-Machine

Browser extension for quickly saving web pages to the Internet Archive's Wayback Machine.
GNU General Public License v3.0
152 stars 17 forks source link

auto save #159

Open skapytek opened 4 years ago

skapytek commented 4 years ago

Hi, is it possible to auto save every visited website to the internet archive? I got a userscript which can do this, but the function is limited. Heres the script, maybe you can use it to include it in your addon:

(function() {
    /* SETTINGS */
    var SHOW_BADGES = true;
    console.log("[IA Saver] Asking for archiving necessity...")
    GM_xmlhttpRequest({
        method: 'GET',
        url: 'https://archive.org/wayback/available?url=' + encodeURIComponent(location.href),
        //url: 'https://web.archive.org/__wb/sparkline?url=' + encodeURIComponent(location.href) + '&collection=web&output=json',
        onload: function(data){
            console.log(data);
            data = JSON.parse(data.responseText);

            if (isEmpty(data.archived_snapshots)){
            //if (data.last_ts == null){
                archive(location.href, true);
            }else{
                var last_save = timestampConvert(data.archived_snapshots.closest.timestamp);
                //var last_save = timestampConvert(data.last_ts);
                if (Date.now() - last_save > 2592000000){
                    archive(location.href, false);
                }else{
                    var log = "[IA Saver] Archiving unnecessary, latest save: " + new Date(last_save).toString() + " (" + data.archived_snapshots.closest.timestamp + ")";
                    //var log = "[IA Saver] Archiving unnecessary, latest save: " + new Date(last_save).toString() + " (" + data.last_ts + ")";
                    console.log(log);
                    showBadge("U", "darkorange", log)
                }
            }
        }
    });

    function archive(url, first){
        console.log("[IA Saver] Archiving...")
        GM_xmlhttpRequest({
            method: 'GET',
            url: 'https://web.archive.org/save/' + url,
            onload: function(data){
                var log;
                if (data.status == 200){
                    log = "[IA Saver] " + (first ? "FIRST ARCHIVAL!" : "Archived!") + " (https://web.archive.org/web/" + url + ")";
                    console.log(log);
                    showBadge(first ? "FIRST" : "A", "green", log)
                }else{
                    log = "[IA Saver] Archiving error (" + data.status + " - " + data.statusText + ")";
                    console.error(log);
                    showBadge("E", "#ff2e2e", log)
                }
            }
        });
    }

    function showBadge(status, color, title){
        var el = document.createElement("div");
        el.setAttribute("title", title)
        el.setAttribute("style", "position: fixed; display: block; bottom: 0; right: 0; background: " + color + "; color: #fff; padding: 3px 5px; font-size: 12.5px; border-radius: 0; border-top-left-radius: 5px; z-index: 1000000000000000000; font-family: Arial; width: initial; box-shadow: none; margin: 0;");
        el.innerHTML = status;
        var body = document.getElementsByTagName("html")[0];
        body.insertBefore(el, body.firstChild);
    }

    function timestampConvert(ts){
        return Date.parse(ts.replace(
            /^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/,
            '$4:$5:$6 $2/$3/$1 GMT'
        ));
    }

    function isEmpty(obj) {
        return Object.keys(obj).length === 0;
    }
})();
tonglil commented 4 years ago

This would be a great feature.

Let's say to prevent things going bananas, to provide the following controls:

So that people have to turn it on every once in a while/reasonably control the period of pages they want to archive

My personally preference is in the order of the above list.

EDIT:

Actually, I see your comments in #38.

jasikpark commented 4 years ago

I really like this idea though I would worry about accidentally ddosing the api, no?