nexcess / magento-turpentine

A Varnish extension for Magento.
GNU General Public License v2.0
520 stars 252 forks source link

Page 404 with Google bot and other #1382

Open jeremydevos opened 7 years ago

jeremydevos commented 7 years ago

Hi,

Hi, I just saw that "GoogleBot" when it goes first on a good page, it put it in 404

Headers with a UserAgent GoogleBot :

HTTP/1.1 404 Not Found
Content-Type: text/html; charset=UTF-8
Content-Encoding: gzip
P3p: CP="CAO PSA OUR"
Date: Tue, 21 Mar 2017 16:28:50 GMT
X-Xss-Protection: 1; mode=block, 1; mode=block, 1; mode=block
Transfer-Encoding: chunked
X-Content-Type-Options: nosniff, nosniff, nosniff
Connection: Keep-Alive
Set-Cookie: X-Mapping-dccobkdh=49D0B809437359ADCD9BA65EA420A027; path=/
X-Frame-Options: SAMEORIGIN
Age: 0 

Headers with a Normal UserAgent :

HTTP/1.1 200 OK
X-Mod-Pagespeed: 1.11.33.5-0
Content-Type: text/html; charset=UTF-8
Content-Encoding: gzip
P3p: CP="CAO PSA OUR"
Date: Tue, 21 Mar 2017 16:41:08 GMT
X-Xss-Protection: 1; mode=block
Transfer-Encoding: chunked
X-Content-Type-Options: nosniff
Connection: Keep-Alive
Set-Cookie: X-Mapping-dccobkdh=7B08BD5E016A6FE9A19AFE13A32F70B0; path=/
Set-Cookie: frontendv3=XXXXXXXX; expires=Tue, 21-Mar-2017 17:41:08 UTC; path=/; domain=XXXXXXXXXXX.XX; httponly
X-Frame-Options: SAMEORIGIN
Age: 0 

VCL :

backend default {

}
backend admin {
}
acl crawler_acl {
}
acl debug_acl {
}
sub generate_session {
    if (req.url ~ ".*[&?]SID=([^&]+).*") {
        set req.http.X-Varnish-Faked-Session = regsub(
        req.url, ".*[&?]SID=([^&]+).*", "frontendv3=\1");
    } else {
        C{
            char uuid_buf [50];
            generate_uuid(uuid_buf);
            VRT_SetHdr(sp, HDR_REQ,
            "\030X-Varnish-Faked-Session:",
            uuid_buf,
            vrt_magic_string_end
            );
        }C
    }
    if (req.http.Cookie) {
        std.collect(req.http.Cookie);
        set req.http.Cookie = req.http.X-Varnish-Faked-Session +
        "; " + req.http.Cookie;
    } else {
        set req.http.Cookie = req.http.X-Varnish-Faked-Session;
    }
}
sub generate_session_expires {
    C{
        time_t now = time(NULL);
        struct tm now_tm = *gmtime(&now);
        now_tm.tm_sec += 3600;
        mktime(&now_tm);
        char date_buf [50];
        strftime(date_buf, sizeof(date_buf)-1, "%a, %d-%b-%Y %H:%M:%S %Z", &now_tm);
        VRT_SetHdr(sp, HDR_RESP,
        "\031X-Varnish-Cookie-Expires:",
        date_buf,
        vrt_magic_string_end
        );
    }C
}
sub vcl_recv {
    if (req.restarts == 0) {
        if (req.http.X-Forwarded-For) {
            set req.http.X-Forwarded-For =
            req.http.X-Forwarded-For + ", " + client.ip;
        } else {
            set req.http.X-Forwarded-For = client.ip;
        }
    }
    if (!true || req.http.Authorization ||
        req.request !~ "^(GET|HEAD)$" ||
    req.http.Cookie ~ "varnish_bypass=1") {
        return (pipe);
    }
    set req.url = regsuball(req.url, "(.*)//+(.*)", "\1/\2");
    if (req.http.Accept-Encoding) {
        if (req.http.Accept-Encoding ~ "gzip") {
            set req.http.Accept-Encoding = "gzip";
        } else if (req.http.Accept-Encoding ~ "deflate") {
            set req.http.Accept-Encoding = "deflate";
        } else {
            unset req.http.Accept-Encoding;
        }
    }
    if (req.url ~ "^(/media/|/skin/|/js/|/)(?:(?:index|litespeed)\.php/)?") {
        set req.http.X-Turpentine-Secret-Handshake = "1";
        if (req.url ~ "^(/media/|/skin/|/js/|/)(?:(?:index|litespeed)\.php/)?admin") {
            set req.backend = admin;
            return (pipe);
        }
        if (req.http.Cookie ~ "\bcurrency=") {
            set req.http.X-Varnish-Currency = regsub(
            req.http.Cookie, ".*\bcurrency=([^;]*).*", "\1");
        }
        if (req.http.Cookie ~ "\bstore=") {
            set req.http.X-Varnish-Store = regsub(
            req.http.Cookie, ".*\bstore=([^;]*).*", "\1");
        }
        if (req.url ~ "/turpentine/esi/get(?:Block|FormKey)/") {
            set req.http.X-Varnish-Esi-Method = regsub(
            req.url, ".*/method/(\w+)/.*", "\1");
            set req.http.X-Varnish-Esi-Access = regsub(
            req.url, ".*/access/(\w+)/.*", "\1");
            if (req.http.X-Varnish-Esi-Method == "esi" && req.esi_level == 0 &&
            !(false || client.ip ~ debug_acl)) {
                error 403 "External ESI requests are not allowed";
            }
        }
        if (req.http.Cookie !~ "frontendv3=") {
            if (client.ip ~ crawler_acl ||
            req.http.User-Agent ~ "^(?:ApacheBench/.*|.*Googlebot.*|JoeDog/.*Siege.*|magespeedtest\.com|Nexcessnet_Turpentine/.*)$") {
                set req.http.Cookie = "frontendv3=crawler-session";
            } else {
                call generate_session;
            }
        }
        if (true &&
        req.url ~ ".*\.(?:css|js|jpe?g|png|Png|gif|ico|swf|woff)(?=\?|&|$)") {
            unset req.http.Cookie;
            unset req.http.X-Varnish-Faked-Session;
            return (lookup);
        }
        if (req.url ~ "^(/media/|/skin/|/js/|/)(?:(?:index|litespeed)\.php/)?(?:XXXXXX)" ||
        req.url ~ "\?.*__from_store=") {
            return (pipe);
        }
        if (true &&
        req.url ~ "(?:[?&](?:__SID|XDEBUG_PROFILE)(?=[&=]|$))") {
            return (pass);
        }
        return (lookup);
    }
}
sub vcl_pipe {
    unset bereq.http.X-Turpentine-Secret-Handshake;
    set bereq.http.Connection = "close";
}
sub vcl_hash {
    hash_data(req.url);
    if (req.http.Host) {
        hash_data(req.http.Host);
    } else {
        hash_data(server.ip);
    }
    hash_data(req.http.Ssl-Offloaded);
    if (req.http.X-Normalized-User-Agent) {
        hash_data(req.http.X-Normalized-User-Agent);
    }
    if (req.http.Accept-Encoding) {
        hash_data(req.http.Accept-Encoding);
    }
    if (req.http.X-Varnish-Store || req.http.X-Varnish-Currency) {
        hash_data("s=" + req.http.X-Varnish-Store + "&c=" + req.http.X-Varnish-Currency);
    }
    if (req.http.X-Varnish-Esi-Access == "private" &&
    req.http.Cookie ~ "frontendv3=") {
        hash_data(regsub(req.http.Cookie, "^.*?frontendv3=([^;]*);*.*$", "\1"));
    }
    if (req.http.X-Forwarded-Proto) {
        hash_data(req.http.X-Forwarded-Proto);
    }
    return (hash);
}
sub vcl_hit {
}
sub vcl_fetch {
    set req.grace = 15s;
    set beresp.http.X-Varnish-Host = req.http.host;
    set beresp.http.X-Varnish-URL = req.url;
    if (req.url ~ "^(/media/|/skin/|/js/|/)(?:(?:index|litespeed)\.php/)?") {
        unset beresp.http.Vary;
        set beresp.do_gzip = true;
        if (beresp.status != 200 && beresp.status != 404) {
            set beresp.ttl = 15s;
            unset beresp.http.Cache-Control;
            return (hit_for_pass);
        } else {
            if (beresp.http.Set-Cookie) {
                set beresp.http.X-Varnish-Set-Cookie = beresp.http.Set-Cookie;
                unset beresp.http.Set-Cookie;
            }
            unset beresp.http.Cache-Control;
            unset beresp.http.Expires;
            unset beresp.http.Pragma;
            unset beresp.http.Cache;
            unset beresp.http.Age;
            if (beresp.status == 404
                && true &&
            bereq.url ~ ".*\.(?:css|js|jpe?g|png|Png|gif|ico|swf|woff)(?=\?|&|$)") {
                set beresp.ttl = 0s;
                return (hit_for_pass);
            }
            if (beresp.http.X-Turpentine-Esi == "1") {
                set beresp.do_esi = true;
            }
            if (beresp.http.X-Turpentine-Cache == "0") {
                set beresp.ttl = 15s;
                unset beresp.http.Cache-Control;
                return (hit_for_pass);
            } else {
                if (true &&
                bereq.url ~ ".*\.(?:css|js|jpe?g|png|Png|gif|ico|swf|woff)(?=\?|&|$)") {
                    set beresp.ttl = 1296000s;
                    set beresp.http.Cache-Control = "max-age=1296000";
                } elseif (req.http.X-Varnish-Esi-Method) {
                    if (req.http.X-Varnish-Esi-Access == "private" &&
                    req.http.Cookie ~ "frontendv3=") {
                        set beresp.http.X-Varnish-Session = regsub(req.http.Cookie,
                        "^.*?frontendv3=([^;]*);*.*$", "\1");
                    }
                    if (req.http.X-Varnish-Esi-Method == "ajax" &&
                    req.http.X-Varnish-Esi-Access == "public") {
                        set beresp.http.Cache-Control = "max-age=" + regsub(
                        req.url, ".*/ttl/(\d+)/.*", "\1");
                    }
                    set beresp.ttl = std.duration(
                    regsub(
                    req.url, ".*/ttl/(\d+)/.*", "\1s"),
                    300s);
                    unset beresp.http.Cache-Control;
                    if (beresp.ttl == 0s) {
                        set beresp.ttl = 15s;
                        unset beresp.http.Cache-Control;
                        return (hit_for_pass);
                    }
                } else {
                    set beresp.ttl = 3600s;
                }
            }
        }
        return (deliver);
    }
}
sub vcl_deliver {
    if (req.http.X-Varnish-Faked-Session) {
        call generate_session_expires;
        set resp.http.Set-Cookie = req.http.X-Varnish-Faked-Session +
        "; expires=" + resp.http.X-Varnish-Cookie-Expires + "; path=/";
        if (req.http.Host) {
            if ("domain.com" == "empty") {
                set resp.http.Set-Cookie = resp.http.Set-Cookie +
                "; domain=" + regsub(req.http.Host, ":\d+$", "");
            } else {
                set resp.http.Set-Cookie = resp.http.Set-Cookie +
                "; domain=domain.com";
            }
        }
        set resp.http.Set-Cookie = resp.http.Set-Cookie + "; httponly";
        unset resp.http.X-Varnish-Cookie-Expires;
    }
    if (req.http.X-Varnish-Esi-Method == "ajax" && req.http.X-Varnish-Esi-Access == "private") {
        set resp.http.Cache-Control = "no-cache";
    }
    if (false || client.ip ~ debug_acl) {
        set resp.http.X-Varnish-Hits = obj.hits;
        set resp.http.X-Varnish-Esi-Method = req.http.X-Varnish-Esi-Method;
        set resp.http.X-Varnish-Esi-Access = req.http.X-Varnish-Esi-Access;
        set resp.http.X-Varnish-Currency = req.http.X-Varnish-Currency;
        set resp.http.X-Varnish-Store = req.http.X-Varnish-Store;
    } else {
        unset resp.http.X-Varnish;
        unset resp.http.Via;
        unset resp.http.X-Powered-By;
        unset resp.http.Server;
        unset resp.http.X-Turpentine-Cache;
        unset resp.http.X-Turpentine-Esi;
        unset resp.http.X-Turpentine-Flush-Events;
        unset resp.http.X-Turpentine-Block;
        unset resp.http.X-Varnish-Session;
        unset resp.http.X-Varnish-Host;
        unset resp.http.X-Varnish-URL;
        unset resp.http.X-Varnish-Set-Cookie;
    }
}
<Nexcessnet_Turpentine>
            <version>0.6.6</version>
        </Nexcessnet_Turpentine>

Thank you!

jeremydevos commented 7 years ago

Varnishlog KO (404 google bot) :

 12 ReqStart     c XX.XX.XX.XX 6138 346826277
   12 RxRequest    c GET
   12 RxURL        c /xxxxxxxxxxx/xxxxxxx/xxxxxxx.html
   12 RxProtocol   c HTTP/1.1
   12 RxHeader     c User-Agent: Googlebot/2.1 (+http://www.googlebot.com/bot.html)
   12 RxHeader     c X-Forwarded-For: XX.XX.XX.XX
   12 RxHeader     c Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
   12 RxHeader     c Cache-Control: max-stale=0
   12 RxHeader     c XXXXXXXXX: FR
   12 RxHeader     c X-Forwarded-Proto: http
   12 RxHeader     c Accept-Language: en-US,en;q=0.5
   12 RxHeader     c X-Server-IP: XX.XX.XX.XX
   12 RxHeader     c Host: xxx.xxxxxx.xxxx
   12 RxHeader     c X-Cluster-Client-Ip:XX.XX.XX.XX 
   12 RxHeader     c Cookie: ABTasty=XXXXX; ABTastySession=XXXXXXXX
   12 RxHeader     c Connection: Keep-Alive
   12 RxHeader     c X-BlueCoat-Via: 0854cb1014a9e09d
   12 RxHeader     c Upgrade-Insecure-Requests: 1
   12 RxHeader     c Accept-Encoding: gzip, deflate
   12 VCL_call     c recv
   12 VCL_acl      c NO_MATCH crawler_acl
   12 VCL_return   c lookup
   12 VCL_call     c hash
   12 Hash         c /xxxxxxxxxxx/xxxxxxx/xxxxxxx.html
   12 Hash         c xxx.xxxxxx.xxxx
   12 Hash         c 
   12 Hash         c gzip
   12 Hash         c http
   12 VCL_return   c hash
   12 VCL_call     c miss fetch
   12 Backend      c 25 default default
   12 TTL          c 346826277 RFC 0 -1 -1 1490186981 0 1490186980 375007920 0
   12 VCL_call     c fetch
   12 TTL          c 346826277 VCL 3600 -1 -1 1490186980 -0
   12 VCL_return   c deliver
   12 ObjProtocol  c HTTP/1.1
   12 ObjResponse  c Not Found

Varnishlog OK (User agent normal) :

  18 ReqStart     c XX.XX.XX.XX 6138 346826277
   18 RxRequest    c GET
   18 RxURL        c /xxxxxxxxxxx/xxxxxxx/xxxxxxx.html
   18 RxProtocol   c HTTP/1.1
   18 RxHeader     c User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0
   18 RxHeader     c X-Forwarded-For: xxxxxxxxxxx
   18 RxHeader     c Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
   18 RxHeader     c Cache-Control: max-stale=0
   18 RxHeader     c XXXXXXXXX: FR
   18 RxHeader     c X-Forwarded-Proto: http
   18 RxHeader     c Accept-Language: en-US,en;q=0.5
   18 RxHeader     c X-Server-IP: XXXXXXXXX
   18 RxHeader     c Host: XXXXXXXXX
   18 RxHeader     c X-Cluster-Client-Ip: XXXXXXXXX
   18 RxHeader     c Cookie: ABTasty=uXXXXXXXXX; ABTastySession=XXXXXXXXX
   18 RxHeader     c Connection: Keep-Alive
   18 RxHeader     c X-BlueCoat-Via: 0854cb1014a9e09d
   18 RxHeader     c Upgrade-Insecure-Requests: 1
   18 RxHeader     c Accept-Encoding: gzip, deflate
   18 VCL_call     c recv
   18 VCL_acl      c NO_MATCH crawler_acl
   18 VCL_return   c lookup
   18 VCL_call     c hash
   18 Hash         c /xxxxxxxxxxx/xxxxxxx/xxxxxxx.html
   18 Hash         c xxx.xxxxxx.xxxxx
   18 Hash         c 
   18 Hash         c gzip
   18 Hash         c http
   18 VCL_return   c hash
   18 VCL_call     c miss fetch
   18 Backend      c 16 default default
   18 TTL          c 346827009 RFC 0 -1 -1 1490187039 0 1490187038 375007920 0
   18 VCL_call     c fetch
   18 TTL          c 346827009 VCL 3600 -1 -1 1490187039 -0
   18 VCL_return   c deliver
   18 ObjProtocol  c HTTP/1.1
   18 ObjResponse  c OK
miguelbalparda commented 7 years ago

Just in case, don't you have another extension that might be interfering with this? I see you have a cookie called frontendv3, can you check what happens if you do a request without it?

jeremydevos commented 7 years ago

Hey, It's the same with or without the frontend cookie, I have a redis, I have the problem only on the production servers behind a load balancing. Magento Enterprise 1.14.3.1

jeremydevos commented 7 years ago

Hello,

Its possible to explain this :

 if (req.http.Cookie !~ "frontendv3=") {
            if (client.ip ~ crawler_acl ||
            req.http.User-Agent ~ "^(?:ApacheBench/.*|.*Googlebot.*|JoeDog/.*Siege.*|magespeedtest\.com|Nexcessnet_Turpentine/.*)$") {
                set req.http.Cookie = "frontendv3=crawler-session";
            } else {
                call generate_session;
            }
        }

I have this problem with only user agent mentioned on this function.

What happens if I remove Googlebot? What is the effect?

jeremydevos commented 7 years ago

public function fixCmRedisSessionLocks($eventObject) { if (Mage::helper('core')->isModuleEnabled('Cm_RedisSession')) { if ( ! empty($_COOKIE['frontend']) && 'crawler-session' == $_COOKIE['frontend'] && ! defined('CM_REDISSESSION_LOCKING_ENABLED')) { define('CM_REDISSESSION_LOCKING_ENABLED', false); } } }

Event never call ?

miguelbalparda commented 7 years ago

For your first comment, it should be explained here plus the code you mentioned. For your last comment, that is #1334.

jeremydevos commented 7 years ago

I think the problem comes from the couple varnish / redis, in the Turpentine module I use I do not have this function fixCmRedisSessionLocks, here is an OK frame on a preprod environment without the problem and a production KO frame or i Have the problem :

Trame OK (Preprod, no 404, rep 200 OK) :

redis-cli monitor | grep sess_c
1490282143.713491 [1 10.1.34.12:44252] "hincrby" "sess_crawler-session" "lock" "1"
1490282143.713678 [1 10.1.34.12:44252] "hincrby" "sess_crawler-session" "wait" "1"
1490282144.224114 [1 10.1.34.12:44252] "hincrby" "sess_crawler-session" "lock" "1"
1490282144.724729 [1 10.1.34.12:44252] "hincrby" "sess_crawler-session" "lock" "1"
1490282145.225291 [1 10.1.34.12:44252] "hincrby" "sess_crawler-session" "lock" "1"
1490282145.725888 [1 10.1.34.12:44252] "hincrby" "sess_crawler-session" "lock" "1"
1490282146.226457 [1 10.1.34.12:44252] "hincrby" "sess_crawler-session" "lock" "1"
1490282146.226688 [1 10.1.34.12:44252] "hget" "sess_crawler-session" "pid"
1490282146.727223 [1 10.1.34.12:44252] "hincrby" "sess_crawler-session" "lock" "1"
1490282146.727466 [1 10.1.34.12:44252] "hget" "sess_crawler-session" "pid"
1490282146.727695 [1 10.1.34.12:44252] "hmget" "sess_crawler-session" "data" "writes"
1490282146.728414 [1 10.1.34.12:44252] "hincrby" "sess_crawler-session" "wait" "-1"
1490282146.728431 [1 10.1.34.12:44252] "hget" "sess_crawler-session" "req"
1490282146.728441 [1 10.1.34.12:44252] "hmset" "sess_crawler-session" "pid" "xxxxx|10260" "lock" "1" "req" "GET xxxx.xxxxx.xxxx /xxxxxxxx.html"
1490282146.728468 [1 10.1.34.12:44252] "expire" "sess_crawler-session" "1200"
1490282147.227452 [1 10.1.34.12:44258] "hincrby" "sess_crawler-session" "lock" "1"
1490282147.227664 [1 10.1.34.12:44258] "hincrby" "sess_crawler-session" "wait" "1"
1490282147.738142 [1 10.1.34.12:44258] "hincrby" "sess_crawler-session" "lock" "1"
1490282148.238741 [1 10.1.34.12:44258] "hincrby" "sess_crawler-session" "lock" "1"
1490282148.739441 [1 10.1.34.12:44258] "hincrby" "sess_crawler-session" "lock" "1"
1490282149.240637 [1 10.1.34.12:44258] "hincrby" "sess_crawler-session" "lock" "1"
1490282149.741213 [1 10.1.34.12:44258] "hincrby" "sess_crawler-session" "lock" "1"
1490282149.741456 [1 10.1.34.12:44258] "hget" "sess_crawler-session" "pid"
1490282150.242004 [1 10.1.34.12:44258] "hincrby" "sess_crawler-session" "lock" "1"
1490282150.242225 [1 10.1.34.12:44258] "hget" "sess_crawler-session" "pid"
1490282150.242429 [1 10.1.34.12:44258] "hmget" "sess_crawler-session" "data" "writes"
1490282150.243238 [1 10.1.34.12:44258] "hincrby" "sess_crawler-session" "wait" "-1"
1490282150.243252 [1 10.1.34.12:44258] "hget" "sess_crawler-session" "req"
1490282150.243262 [1 10.1.34.12:44258] "hmset" "sess_crawler-session" "pid" "xxxx|10259" "lock" "1" "req" "GET xxxx.xxxxx.xxxx/turpentine/esi/getBlock/method/esi/access/public/ttl/3600/hmac/xxxxxx"
1490282150.243330 [1 10.1.34.12:44258] "expire" "sess_crawler-session" "1200"

Trame KO (Production) Rep 404 with googelbot :

redis-cli monitor | grep sess_c
1490282232.926519 [1 10.1.34.15:49251] "hincrby" "sess_crawler-session" "lock" "1"
1490282232.926716 [1 10.1.34.15:49251] "hmget" "sess_crawler-session" "data" "writes"
1490282232.927033 [1 10.1.34.15:49251] "hmset" "sess_crawler-session" "pid" "xxxx|30316" "lock" "1"
1490282232.927048 [1 10.1.34.15:49251] "expire" "sess_crawler-session" "3600"
1490282233.191461 [1 10.1.34.15:49251] "hget" "sess_crawler-session" "pid"
1490282233.191807 [1 10.1.34.15:49251] "hmset" "sess_crawler-session" "data" ":gz:x\x01\xedX\xeb\x8e\xd3F...4rw7\xban\xa0\x96,5>\xf6ux\xcd\xd1\xc4U0\xe9'\x95\xe1\xff\xabI#t\x8b}\x14\x99-/\x1a\xd9R\x03:\xb3\x04\xfdS\xed\x05\"?c\x13\xe6\xe3\x15\xa2\xed=C\xc3\xe7\x9f\xc7 \xc3\x8d\xe5\xd8]\xbf\x1cL\xbe\x1c\xb3\t\xe2\bl\x1f\x02\xdc\xddA\a\x9c\xa2$\tc\xe8\xfe7C\xbc\b\xe2" "lock" "0"
1490282233.192020 [1 10.1.34.15:49251] "hincrby" "sess_crawler-session" "writes" "1"
1490282233.192029 [1 10.1.34.15:49251] "expire" "sess_crawler-session" "3600"
1490282233.298189 [1 10.1.34.15:49257] "hincrby" "sess_crawler-session" "lock" "1"
1490282233.298375 [1 10.1.34.15:49257] "hmget" "sess_crawler-session" "data" "writes"
1490282233.298744 [1 10.1.34.15:49257] "hmset" "sess_crawler-session" "pid" "xxxx|30188" "lock" "1"
1490282233.298770 [1 10.1.34.15:49257] "expire" "sess_crawler-session" "3600"
1490282233.406263 [1 10.1.34.15:49257] "hget" "sess_crawler-session" "pid"
1490282233.406647 [1 10.1.34.15:49257] "hmset" "sess_crawler-session" "data" ":gz:x\x01\xedX\xe9n\xdbF\x10\xce\xa3...f8\xfc\xf3\x18d\xb8\xb1\x1c\xba\xeb\xe7\x83\xc9\x97c6A\x1c\x81\xedC\x80\xdb;\xe8\x80S\x94$a\x0c\xdd\xff\x02X^\b\xe3" "lock" "0"
1490282233.406857 [1 10.1.34.15:49257] "hincrby" "sess_crawler-session" "writes" "1"
1490282233.406871 [1 10.1.34.15:49257] "expire" "sess_crawler-session" "3600"
1490282233.509104 [1 10.1.34.15:49261] "hincrby" "sess_crawler-session" "lock" "1"
1490282233.509293 [1 10.1.34.15:49261] "hmget" "sess_crawler-session" "data" "writes"
1490282233.509660 [1 10.1.34.15:49261] "hmset" "sess_crawler-session" "pid" "xxxx|31327" "lock" "1"
1490282233.509684 [1 10.1.34.15:49261] "expire" "sess_crawler-session" "3600"
1490282234.032038 [1 10.1.34.15:49261] "hget" "sess_crawler-session" "pid"
1490282234.032480 [1 10.1.34.15:49261] "hmset" "sess_crawler-session" "data" ":gz:x\x01\xedX\xe9n\xdbF\x10\xce\xa3\b\...xbdgh\xf8\xfc\xf3\x18d\xb8\xb1\x1c\xba\xeb\xe7\x83\xc9\x97c6A\x1c\x81\xedC\x80\xdb;\xe8\x80S\x94$a\x0c\xdd\xff\x02X^\b\xe3" "lock" "0"
1490282234.032725 [1 10.1.34.15:49261] "hincrby" "sess_crawler-session" "writes" "1"
1490282234.032740 [1 10.1.34.15:49261] "expire" "sess_crawler-session" "3600"
1490282234.163337 [1 10.1.34.15:49298] "hincrby" "sess_crawler-session" "lock" "1"
1490282234.166357 [1 10.1.34.15:49298] "hmget" "sess_crawler-session" "data" "writes"
1490282234.170257 [1 10.1.34.15:49298] "hmset" "sess_crawler-session" "pid" "xxxxx|30902" "lock" "1"
1490282234.170268 [1 10.1.34.15:49298] "expire" "sess_crawler-session" "3600"
1490282234.282801 [1 10.1.34.15:49298] "hget" "sess_crawler-session" "pid"
1490282234.283111 [1 10.1.34.15:49298] "hmset" "sess_crawler-session" "data" ":gz:x\x01\xedX\xe9n\xdbF\x10\x...8\xcc\x12\xf4\x8f\xb5\x17\x88\xfc\x8cM\x98\x8fW\x88\xb6\xf7\x0c\r\x9f\x7f\x1e\x83\x0c7\x96cw\xfd|0\xf9r\xcc&\x88#\xb0}\bp{\a\x1dp\x8a\x92$\x8c\xa1\xfb\x9f\x8e%\b\xe6" "lock" "0"
1490282234.283321 [1 10.1.34.15:49298] "hincrby" "sess_crawler-session" "writes" "1"
1490282234.283329 [1 10.1.34.15:49298] "expire" "sess_crawler-session" "3600"
1490282234.397586 [1 10.1.34.15:49304] "hincrby" "sess_crawler-session" "lock" "1"
1490282234.397826 [1 10.1.34.15:49304] "hmget" "sess_crawler-session" "data" "writes"
1490282234.398139 [1 10.1.34.15:49304] "hmset" "sess_crawler-session" "pid" "xx|30902" "lock" "1"
1490282234.398154 [1 10.1.34.15:49304] "expire" "sess_crawler-session" "3600"
1490282234.675492 [1 10.1.34.15:49304] "hget" "sess_crawler-session" "pid"
1490282234.675919 [1 10.1.34.15:49304] "hmset" "sess_crawler-session" "data" ":gz:x\x01\xedX\xe9n\x...fb\x9f\x8e%\b\xe6" "lock" "0"
1490282234.676286 [1 10.1.34.15:49304] "hincrby" "sess_crawler-session" "writes" "1"
1490282234.676306 [1 10.1.34.15:49304] "expire" "sess_crawler-session" "3600"
miguelbalparda commented 7 years ago

Have you tried upgrading to the latest version of Turpentine?

bartlubbersen commented 7 years ago

Today I got the exact same issue for one of our customers, all bots including the turpentine crawler itself gave 404 errors on all product pages. After a while I decided to remove the crawler session file and this solved the problem for me.

Why the crawler session would trigger 404's and why it got corrupted I couldn't find.