nexcess / magento-turpentine

A Varnish extension for Magento.
GNU General Public License v2.0
519 stars 253 forks source link

enabling Varnish ESI Blocks causes loop #834

Open keebler-elf opened 9 years ago

keebler-elf commented 9 years ago

Enabling Varnish ESI Blocks in cache management settings causes any page to repeatedly show the 404 error page repeating down the page with no header or footer spiking resources until you force the browser to stop loading the page. The default varnish install default.vcl works fine and the other PageCache extension default.vcl works fine so I believe varnish is working properly. This is the vcl generated by the extension.

C{
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <pthread.h>
static pthread_mutex_t lrand_mutex = PTHREAD_MUTEX_INITIALIZER;
void generate_uuid(char* buf) {
pthread_mutex_lock(&lrand_mutex);
long a = lrand48();
long b = lrand48();
long c = lrand48();
long d = lrand48();
pthread_mutex_unlock(&lrand_mutex);
sprintf(buf, "frontend=%08lx%04lx%04lx%04lx%04lx%08lx",
a,
b & 0xffff,
(b & ((long)0x0fff0000) >> 16) | 0x4000,
(c & 0x0fff) | 0x8000,
(c & (long)0xffff0000) >> 16,
d
);
return;
}
}C
import std;
backend default {
.host = "127.0.0.1";
.port = "8080";
.first_byte_timeout = 300s;
.between_bytes_timeout = 300s;
}
backend admin {
.host = "127.0.0.1";
.port = "8080";
.first_byte_timeout = 21600s;
.between_bytes_timeout = 21600s;
}
acl crawler_acl {
"127.0.0.1";
}
acl debug_acl {
}
sub generate_session {
if (req.url ~ ".*[&?]SID=([^&]+).*") {
set req.http.X-Varnish-Faked-Session = regsub(
req.url, ".*[&?]SID=([^&]+).*", "frontend=\1");
} else {
C{
char uuid_buf [50];
generate_uuid(uuid_buf);
VRT_SetHdr(sp, HDR_REQ,
"\030X-Varnish-Faked-Session:",
uuid_buf,
vrt_magic_string_end
);
}C
}
if (req.http.Cookie) {
std.collect(req.http.Cookie);
set req.http.Cookie = req.http.X-Varnish-Faked-Session +
"; " + req.http.Cookie;
} else {
set req.http.Cookie = req.http.X-Varnish-Faked-Session;
}
}
sub generate_session_expires {
C{
time_t now = time(NULL);
struct tm now_tm = *gmtime(&now);
now_tm.tm_sec += 86400;
mktime(&now_tm);
char date_buf [50];
strftime(date_buf, sizeof(date_buf)-1, "%a, %d-%b-%Y %H:%M:%S %Z", &now_tm);
VRT_SetHdr(sp, HDR_RESP,
"\031X-Varnish-Cookie-Expires:",
date_buf,
vrt_magic_string_end
);
}C
}
sub vcl_recv {
if (req.restarts == 0) {
if (req.http.X-Forwarded-For) {
set req.http.X-Forwarded-For =
req.http.X-Forwarded-For + ", " + client.ip;
} else {
set req.http.X-Forwarded-For = client.ip;
}
}
if (req.http.Accept-Encoding) {
if (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} else if (req.http.Accept-Encoding ~ "deflate") {
set req.http.Accept-Encoding = "deflate";
} else {
unset req.http.Accept-Encoding;
}
}
if (req.http.User-Agent ~ "iP(?:hone|ad|od)|BlackBerry|Palm|Googlebot-Mobile|Mobile|mobile|mobi|Windows Mobile|Safari Mobile|Android|Opera (?:Mini|Mobi)") {
set req.http.X-Normalized-User-Agent = "mobile";
} else if (req.http.User-Agent ~ "MSIE") {
set req.http.X-Normalized-User-Agent = "msie";
} else if (req.http.User-Agent ~ "Firefox") {
set req.http.X-Normalized-User-Agent = "firefox";
} else if (req.http.User-Agent ~ "Chrome") {
set req.http.X-Normalized-User-Agent = "chrome";
} else if (req.http.User-Agent ~ "Safari") {
set req.http.X-Normalized-User-Agent = "safari";
} else if (req.http.User-Agent ~ "Opera") {
set req.http.X-Normalized-User-Agent = "opera";
} else {
set req.http.X-Normalized-User-Agent = "other";
}
if (!true || req.http.Authorization ||
req.request !~ "^(GET|HEAD|OPTIONS)$" ||
req.http.Cookie ~ "varnish_bypass=1") {
return (pipe);
}
set req.url = regsuball(req.url, "(.*)//+(.*)", "\1/\2");
if (req.url ~ "^(/media/|/skin/|/js/|/)(?:(?:index|litespeed)\.php/)?") {
set req.http.X-Turpentine-Secret-Handshake = "1";
if (req.url ~ "^(/media/|/skin/|/js/|/)(?:(?:index|litespeed)\.php/)?myadmin") {
set req.backend = admin;
return (pipe);
}
if (req.http.Cookie ~ "\bcurrency=") {
set req.http.X-Varnish-Currency = regsub(
req.http.Cookie, ".*\bcurrency=([^;]*).*", "\1");
}
if (req.http.Cookie ~ "\bstore=") {
set req.http.X-Varnish-Store = regsub(
req.http.Cookie, ".*\bstore=([^;]*).*", "\1");
}
if (req.url ~ "/turpentine/esi/get(?:Block|FormKey)/") {
set req.http.X-Varnish-Esi-Method = regsub(
req.url, ".*/method/(\w+)/.*", "\1");
set req.http.X-Varnish-Esi-Access = regsub(
req.url, ".*/access/(\w+)/.*", "\1");
if (req.http.X-Varnish-Esi-Method == "esi" && req.esi_level == 0 &&
!(true || client.ip ~ debug_acl)) {
error 403 "External ESI requests are not allowed";
}
}
if (req.http.Cookie !~ "frontend=" && !req.http.X-Varnish-Esi-Method) {
if (client.ip ~ crawler_acl ||
req.http.User-Agent ~ "^(?:ApacheBench/.*|.*Googlebot.*|JoeDog/.*Siege.*|magespeedtest\.com|Nexcessnet_Turpentine/.*)$") {
set req.http.Cookie = "frontend=crawler-session";
} else {
call generate_session;
}
}
if (true &&
req.url ~ ".*\.(?:css|js|jpe?g|png|gif|ico|swf)(?=\?|&|$)") {
unset req.http.Cookie;
unset req.http.X-Varnish-Faked-Session;
return (lookup);
}
if (req.url ~ "^(/media/|/skin/|/js/|/)(?:(?:index|litespeed)\.php/)?(?:myadmin|api|cron\.php)" ||
req.url ~ "\?.*__from_store=") {
return (pipe);
}
if (true &&
req.url ~ "(?:[?&](?:__SID|XDEBUG_PROFILE)(?=[&=]|$))") {
return (pass);
}
if (true && req.url ~ "[?&](utm_source|utm_medium|utm_campaign|utm_content|utm_term|gclid|cx|ie|cof|siteurl)=") {
set req.url = regsuball(req.url, "(?:(\?)?|&)(?:utm_source|utm_medium|utm_campaign|utm_content|utm_term|gclid|cx|ie|cof|siteurl)=[^&]+", "\1");
set req.url = regsuball(req.url, "(?:(\?)&|\?$)", "\1");
}
return (lookup);
}
}
sub vcl_pipe {
unset bereq.http.X-Turpentine-Secret-Handshake;
set bereq.http.Connection = "close";
}
sub vcl_hash {
hash_data(req.url);
if (req.http.Host) {
hash_data(req.http.Host);
} else {
hash_data(server.ip);
}
hash_data(req.http.Ssl-Offloaded);
if (req.http.X-Normalized-User-Agent) {
hash_data(req.http.X-Normalized-User-Agent);
}
if (req.http.Accept-Encoding) {
hash_data(req.http.Accept-Encoding);
}
if (req.http.X-Varnish-Store || req.http.X-Varnish-Currency) {
hash_data("s=" + req.http.X-Varnish-Store + "&c=" + req.http.X-Varnish-Currency);
}
if (req.http.X-Varnish-Esi-Access == "private" &&
req.http.Cookie ~ "frontend=") {
hash_data(regsub(req.http.Cookie, "^.*?frontend=([^;]*);*.*$", "\1"));
}
return (hash);
}
sub vcl_hit {
}
sub vcl_fetch {
set req.grace = 15s;
set beresp.http.X-Varnish-Host = req.http.host;
set beresp.http.X-Varnish-URL = req.url;
if (req.url ~ "^(/media/|/skin/|/js/|/)(?:(?:index|litespeed)\.php/)?") {
unset beresp.http.Vary;
set beresp.do_gzip = true;
if (beresp.status != 200 && beresp.status != 404) {
set beresp.ttl = 15s;
return (hit_for_pass);
} else {
if (beresp.http.Set-Cookie) {
set beresp.http.X-Varnish-Set-Cookie = beresp.http.Set-Cookie;
unset beresp.http.Set-Cookie;
}
unset beresp.http.Cache-Control;
unset beresp.http.Expires;
unset beresp.http.Pragma;
unset beresp.http.Cache;
unset beresp.http.Age;
if (beresp.http.X-Turpentine-Esi == "1") {
set beresp.do_esi = true;
}
if (beresp.http.X-Turpentine-Cache == "0") {
set beresp.ttl = 15s;
return (hit_for_pass);
} else {
if (true &&
bereq.url ~ ".*\.(?:css|js|jpe?g|png|gif|ico|swf)(?=\?|&|$)") {
set beresp.ttl = 28800s;
set beresp.http.Cache-Control = "max-age=28800";
} elseif (req.http.X-Varnish-Esi-Method) {
if (req.http.X-Varnish-Esi-Access == "private" &&
req.http.Cookie ~ "frontend=") {
set beresp.http.X-Varnish-Session = regsub(req.http.Cookie,
"^.*?frontend=([^;]*);*.*$", "\1");
}
if (req.http.X-Varnish-Esi-Method == "ajax" &&
req.http.X-Varnish-Esi-Access == "public") {
set beresp.http.Cache-Control = "max-age=" + regsub(
req.url, ".*/ttl/(\d+)/.*", "\1");
}
set beresp.ttl = std.duration(
regsub(
req.url, ".*/ttl/(\d+)/.*", "\1s"),
300s);
if (beresp.ttl == 0s) {
set beresp.ttl = 15s;
return (hit_for_pass);
}
} else {
set beresp.ttl = 3600s;
}
}
}
return (deliver);
}
}
sub vcl_deliver {
if (req.http.X-Varnish-Faked-Session) {
call generate_session_expires;
set resp.http.Set-Cookie = req.http.X-Varnish-Faked-Session +
"; expires=" + resp.http.X-Varnish-Cookie-Expires + "; path=/";
if (req.http.Host) {
set resp.http.Set-Cookie = resp.http.Set-Cookie +
"; domain=" + regsub(req.http.Host, ":\d+$", "");
}
set resp.http.Set-Cookie = resp.http.Set-Cookie + "; httponly";
unset resp.http.X-Varnish-Cookie-Expires;
}
if (req.http.X-Varnish-Esi-Method == "ajax" && req.http.X-Varnish-Esi-Access == "private") {
set resp.http.Cache-Control = "no-cache";
}
if (true || client.ip ~ debug_acl) {
set resp.http.X-Varnish-Hits = obj.hits;
set resp.http.X-Varnish-Esi-Method = req.http.X-Varnish-Esi-Method;
set resp.http.X-Varnish-Esi-Access = req.http.X-Varnish-Esi-Access;
set resp.http.X-Varnish-Currency = req.http.X-Varnish-Currency;
set resp.http.X-Varnish-Store = req.http.X-Varnish-Store;
} else {
unset resp.http.X-Varnish;
unset resp.http.Via;
unset resp.http.X-Powered-By;
unset resp.http.Server;
unset resp.http.X-Turpentine-Cache;
unset resp.http.X-Turpentine-Esi;
unset resp.http.X-Turpentine-Flush-Events;
unset resp.http.X-Turpentine-Block;
unset resp.http.X-Varnish-Session;
unset resp.http.X-Varnish-Host;
unset resp.http.X-Varnish-URL;
unset resp.http.X-Varnish-Set-Cookie;
}
}

System.log only shows lots of 2015-06-30T09:47:30+00:00 INFO (6): TURPENTINE: Checking ESI block candidate: messages and failed attempts at warping up the cache Error crawling URL (https://www.xxx.com/xxx.html): Unable to read response, or response is empty. No exceptions thrown. At a loss on where to start debugging here.

aricwatson commented 9 years ago

The default varnish install default.vcl works fine and the other PageCache extension default.vcl works fine so I believe varnish is working properly. This is the vcl generated by the extension.

Other caching extensions have been known to cause problems with Turpentine - can you try turning off PageCache and seeing if things work better?

keebler-elf commented 9 years ago

I have turned them off via cache settings and then set them to false in the xml modules. [magento root]/var/cache is empty. Still getting the non stop loop of the 404 page with no header or footer.

matt-johnston-ecommistry commented 9 years ago

I've got the identical issue, with no caching extensions installed - is it worth me posting a full list of installed extensions? And I have checked all installed extensions for rewrite conflicts or other obvious issues with no luck.

ichalov commented 9 years ago

It looks like magento returns 404 on ESI requests (i.e. it can't handle URLs like /turpentine/esi/getBlock/method/esi/access/public/ttl/3600/hmac/... ). It would probably help to diagnose it if you enabled Debug Info in 'Varnish Options' tab and then looked what URLs it shows in ESI URL comments in Page Source of those cycled pages.

matt-johnston-ecommistry commented 9 years ago

I have identified this issue on our install as Varnish executing requests to the server (for ESI includes) like the follows:

"GET /https:/domain.com/turpentine/esi/getBlock/method/esi/access/public/ttl/3600/hmac/{redacted}/data/{redacted}-/https:/domain.com/turpentine/esi/getBlock/method/esi/access/public/ttl/3600/hmac/{redacted}/data/{redacted}-/https:/domain.com/turpentine/esi/getBlock/method/esi/access/public/ttl/3600/hmac/{redacted}/data/{redacted}-/https:/domain.com/turpentine/esi/getBlock/method/esi/access/public/ttl/3600/hmac/{redacted}/data/{redacted}-/https:/domain.com/turpentine/esi/getBlock/method/esi/access/public/ttl/3600/hmac/{redacted}/data/{redacted}-/ HTTP/1.1"

For obvious reasons, this doesn't work - it appears to be caused by us using https:// on both insecure and secure URLs in Magento. Apparently there is a bug in Varnish that it does not recognize https:// ESI urls, and treats them as relative URLs instead - hence the issue above. https://www.varnish-cache.org/trac/ticket/1333 covers the issue - it's a confirmed bug in Varnish which is resolved, but not in Varnish 3 as far as I can tell.

I'll be submitting a pull request shortly with a patch for Turpentine to get around this issue - please give it a go and see if it solves it for you, if you are also running in a similar (HTTPS-only) setup.

miguelbalparda commented 9 years ago

@keebler-elf @ichalov did you have the chance to check the proposed fix by @matt-johnston-ecommistry?

ichalov commented 9 years ago

I can tell I needed to do a similar fix in app/code/community/Nexcessnet/Turpentine/Model/Observer/Esi.php when I was implementing full site SSL. It worked for me, so I didn't continue to search for a more graceful solution. I also believe #808 describes the same issue.

My procedure for fixing ESI URLs is:

    protected function _stripBaseUrl ($url) {
        $res = preg_replace("~^".Mage::getStoreConfig(Mage_Core_Model_Store::XML_PATH_SECURE_BASE_URL)."~", "/", $url);
        return $res?$res:$url;
    }

and I call it from replaceFormKeyPlaceholder and injectEsi methods. It worked both in normal non-SSL mode and with SSL terminator in place.

miguelbalparda commented 9 years ago

@ichalov can you create a PR with your changes please?

ichalov commented 9 years ago

@miguelbalparda , You may get an idea of what my change was from this commit: https://github.com/ichalov/magento-turpentine/commit/ef3738f806c2d9ffea6621dd63213f7859c286d3 . I'm not really sure it should be merged into master like that. I didn't test it with varnish and backend installed on different servers for example. Maybe it's worth investing some time in finding a better solution.

keebler-elf commented 9 years ago

Applied the fix suggested by @matt-johnston-ecommistry and could never get a page to load. Only 503 errors. Had to restart the nginx service as the worker processes were all over the place slowing things down to a crawl. Applied the fix suggested by @ichalov and server responds after about 55 seconds and then the page loads according to chrome dev tools. No looping of the 404 page just the homepage loading one time as intended. For full SSL one part of the wiki stats not to use https in the unsecure settings and then another part states to use https in the unsecure settings. I'm still unclear of what do put in the settings for unsecure base url. Would anybody care to share what settings actually work?

miguelbalparda commented 9 years ago

@keebler-elf all the sites/servers are different and this is an ongoing development project, I suggest you to try using both https and http in the usecure settings and test your case in particular. Once we can get to fix your issue we can compare this scenario with other scenarios and see if we can create a permanent fix. So far we are progressing with the use of SSL and there are several leads to pursue, please keep sharing your experience with this.

ichalov commented 9 years ago

@keebler-elf , you can use the same https base URL for both unsecure and secure setting. I have live sites configured like this, they work.

As to the slowness, it's seems to be more difficult to diagnose. Do you use nginx as SSL terminator? Or both SSL terminator and backend? Do you have varnish on the same server as backend? What is the ports configuration? For example I typically configure it like this: 80 port is occupied by apache to serve http->https redirects and varnish also configured to use it as backend, 443 - nginx doing SSL termination and using varnish on 8080 port as backend. Also, are you completely sure you flushed all the caches after applying patches before doing the testing?

keebler-elf commented 9 years ago

@miguelbalparda I will. @ichalov Nginx as both ssl terminator and backend. proxy_pass http://127.0.0.1:6081 to varnish back to nginx on 8080. everything on one server. dedicated with decent specs. Stack: ubuntu 14.04, nginx, percona, hhvm with php-fpm fallback All caches were fluched via admin and then rm -rf magento_root/var/cache/* via terminal

ichalov commented 9 years ago

@keebler-elf , can you look at nginx logs and check what URLs were accessed during these tests? Does nginx also serve on port 80? I guess the first solution didn't work because replacing 'https' with 'http' in ESI URL actually makes varnish access port 80 and if it does redirect it back to https then you have a loop. The second patch removes proto, host, and port from ESI URL completely, so hopefully varnish asks for it from port 8080 in your setup. But I have no ideas as to why it works slow. Did you notice what processes were consuming CPU during that test? Can you remake it or it's difficult because you're experimenting on the live site and you don't want downtime?

keebler-elf commented 9 years ago

I have nginx listening on port 80 as well at least until i get things sorted out. URLs accessed:

2015/07/02 17:53:52 [error] 7258#0: *114 connect() failed (111: Connection refused) while connecting to upstream, client: 184.77.54.86, server: www.mysite.com, request: "GET /https:/www.mysite.com/turpentine/esi/getBlock/method/esi/access/private/ttl/86400/hmac/43df7ef730ac2f292b82a7e39ae6a7c81894892d20a040eb512a31ea5f32b68b/data/FfF-jPB7eB0xM1zD76c7ZRRFDH9eoHsjiQGJZFYxG4jSfjnQs8BPvrbLjeJ1GU09ZbZIMLXGHR.cOHlK.8A8r1OcY7P-Z-Zhq6nrkHDkGGTzPpXX5ez47Q3BmyaVjGwTKU8Yjd.RzjnDvtYZ3Mllh3seXtZsDrp1D-i6RhJuOnZToOboNjFT2rxU6ppCT1MXaTZEMl.stHUW-PsNvI5LGOVP6ZTvzAEpOnpwk9X0z73lIWSQ7zYEIkN77dx7s12PRyU7KOVUFLcyq7vW1.P7xIEi7EbR2bAQZc6B0Zw8mfUPlZ13cINXsdYuzha5D2wwsds.mR3rWuOPqYjcoFj8JCyxYLT3FFLcqlvt8wXBtWGsoD2yz-uLlzDP4Vi9BLRx-zLso8Jk076GsJKHQCYSj-3HwBhWzPZR/https:/www.mysite.com/turpentine/esi/getBlock/method/esi/access/private/ttl/86400/hmac/86e5a1f22e0db542f03102bc839ead951170597ee63088a67d39da5ac8e3be64/data/8RjebvuYV1hBhqjvil5cBBrf4kHf3vurDjXadPwfHu7I8UP2LLTBzZSNnPY8CjCsmJieM4mxD6-6lca9lym72XKCGdUpo6ueS7Kieq5FPU-NdNmw052xteggqfh.aZ0OTqEVTw8GW-jI.FaefyrD5u42TGsVqdedCudgrcK2MCaLZW5.FrV69hfLhMg0HaisvygOldI1m0azgmhY33PAKI31WP71H1yg37OLjQmcP-XCgUPokaBi7sOboUX23WUKjuF5VLOK8dgx0l4eGRzggFHLpf7BJJqJxUwYblTxeuhHcar1Loaw1IEH-G0PgkD1tDZNdXj88yEfPCOKDPxxhcpBBiyPLVhG7jNpWGdomIC-adlal93A.i4N6iN30ESt/https:/www.mysite.com/turpentine/esi/getBlock/method/esi/access/private/ttl/86400/hmac/c15331584906ffe494c541fc21cca09b054f6b5d67933a653651fc5c9aeb9060/data/pjh5Z7TKDLqnydCSauUDPsO5Mv3debZz-8Oue2oRXw8fozIzmyAzOojx582cOa9xfXhRfkxKBvD-86PFFsX3WUvoE.Z7rEiN1nYwrr8VpxUaWUUA1JaHUrBdLDODOGLlcdEKy232QdEEtCDaa5WekfTgb6J97a4NAjxDWi4VeSgLeMQsqJKTTdKu8yLAwScM9Qn7itwfTtyTtQBe6AsonrWK5m6urHMNQtiv9s3zxFEmjX.eOStMfoY-MaxefvPyHAJBKtl1-HoYrO6KIsNPOgjwilfNgB8P8Al3hjncKmuRhapSP1rn15grxZ1vh6cNcrvd-9v308uAwWrHH1E95.4SuUiy5iJBLZdHg4xDO7T-jgCrzPbegFmVCisXLZy7/https:/www.mysite.com/turpentine/esi/getBlock/method/esi/access/private/ttl/86400/hmac/43df7ef730ac2f292b82a7e39ae6a7c81894892d20a040eb512a31ea5f32b68b/data/FfF-jPB7eB0xM1zD76c7ZRRFDH9

2015/07/02 17:54:03 [error] 7258#0: *114 connect() failed (111: Connection refused) while connecting to upstream, client: 184.77.54.86, server: www.mysite.com, request: "GET /https:/www.mysite.com/turpentine/esi/getBlock/method/esi/access/private/ttl/86400/hmac/43df7ef730ac2f292b82a7e39ae6a7c81894892d20a040eb512a31ea5f32b68b/data/FfF-jPB7eB0xM1zD76c7ZRRFDH9eoHsjiQGJZFYxG4jSfjnQs8BPvrbLjeJ1GU09ZbZIMLXGHR.cOHlK.8A8r1OcY7P-Z-Zhq6nrkHDkGGTzPpXX5ez47Q3BmyaVjGwTKU8Yjd.RzjnDvtYZ3Mllh3seXtZsDrp1D-i6RhJuOnZToOboNjFT2rxU6ppCT1MXaTZEMl.stHUW-PsNvI5LGOVP6ZTvzAEpOnpwk9X0z73lIWSQ7zYEIkN77dx7s12PRyU7KOVUFLcyq7vW1.P7xIEi7EbR2bAQZc6B0Zw8mfUPlZ13cINXsdYuzha5D2wwsds.mR3rWuOPqYjcoFj8JCyxYLT3FFLcqlvt8wXBtWGsoD2yz-uLlzDP4Vi9BLRx-zLso8Jk076GsJKHQCYSj-3HwBhWzPZR/https:/www.mysite.com/turpentine/esi/getBlock/method/esi/access/private/ttl/86400/hmac/86e5a1f22e0db542f03102bc839ead951170597ee63088a67d39da5ac8e3be64/data/8RjebvuYV1hBhqjvil5cBBrf4kHf3vurDjXadPwfHu7I8UP2LLTBzZSNnPY8CjCsmJieM4mxD6-6lca9lym72XKCGdUpo6ueS7Kieq5FPU-NdNmw052xteggqfh.aZ0OTqEVTw8GW-jI.FaefyrD5u42TGsVqdedCudgrcK2MCaLZW5.FrV69hfLhMg0HaisvygOldI1m0azgmhY33PAKI31WP71H1yg37OLjQmcP-XCgUPokaBi7sOboUX23WUKjuF5VLOK8dgx0l4eGRzggFHLpf7BJJqJxUwYblTxeuhHcar1Loaw1IEH-G0PgkD1tDZNdXj88yEfPCOKDPxxhcpBBiyPLVhG7jNpWGdomIC-adlal93A.i4N6iN30ESt/https:/www.mysite.com/turpentine/esi/getBlock/method/esi/access/private/ttl/86400/hmac/c15331584906ffe494c541fc21cca09b054f6b5d67933a653651fc5c9aeb9060/data/pjh5Z7TKDLqnydCSauUDPsO5Mv3debZz-8Oue2oRXw8fozIzmyAzOojx582cOa9xfXhRfkxKBvD-86PFFsX3WUvoE.Z7rEiN1nYwrr8VpxUaWUUA1JaHUrBdLDODOGLlcdEKy232QdEEtCDaa5WekfTgb6J97a4NAjxDWi4VeSgLeMQsqJKTTdKu8yLAwScM9Qn7itwfTtyTtQBe6AsonrWK5m6urHMNQtiv9s3zxFEmjX.eOStMfoY-MaxefvPyHAJBKtl1-HoYrO6KIsNPOgjwilfNgB8P8Al3hjncKmuRhapSP1rn15grxZ1vh6cNcrvd-9v308uAwWrHH1E95.4SuUiy5iJBLZdHg4xDO7T-jgCrzPbegFmVCisXLZy7/https:/www.mysite.com/turpentine/esi/getBlock/method/esi/access/public/ttl/3600/hmac/a3a3a90e952638e923ae1e68e1d74088e5816d550bdab6636c267e236c5ad28f/data/C3vI0nIdQ1ko-pV51IJl-HnxTKO8l

2015/07/02 17:54:14 [error] 7258#0: *114 connect() failed (111: Connection refused) while connecting to upstream, client: 184.77.54.86, server: www.mysite.com, request: "GET /https:/www.mysite.com/turpentine/esi/getBlock/method/esi/access/private/ttl/86400/hmac/43df7ef730ac2f292b82a7e39ae6a7c81894892d20a040eb512a31ea5f32b68b/data/FfF-jPB7eB0xM1zD76c7ZRRFDH9eoHsjiQGJZFYxG4jSfjnQs8BPvrbLjeJ1GU09ZbZIMLXGHR.cOHlK.8A8r1OcY7P-Z-Zhq6nrkHDkGGTzPpXX5ez47Q3BmyaVjGwTKU8Yjd.RzjnDvtYZ3Mllh3seXtZsDrp1D-i6RhJuOnZToOboNjFT2rxU6ppCT1MXaTZEMl.stHUW-PsNvI5LGOVP6ZTvzAEpOnpwk9X0z73lIWSQ7zYEIkN77dx7s12PRyU7KOVUFLcyq7vW1.P7xIEi7EbR2bAQZc6B0Zw8mfUPlZ13cINXsdYuzha5D2wwsds.mR3rWuOPqYjcoFj8JCyxYLT3FFLcqlvt8wXBtWGsoD2yz-uLlzDP4Vi9BLRx-zLso8Jk076GsJKHQCYSj-3HwBhWzPZR/https:/www.mysite.com/turpentine/esi/getBlock/method/esi/access/private/ttl/86400/hmac/86e5a1f22e0db542f03102bc839ead951170597ee63088a67d39da5ac8e3be64/data/8RjebvuYV1hBhqjvil5cBBrf4kHf3vurDjXadPwfHu7I8UP2LLTBzZSNnPY8CjCsmJieM4mxD6-6lca9lym72XKCGdUpo6ueS7Kieq5FPU-NdNmw052xteggqfh.aZ0OTqEVTw8GW-jI.FaefyrD5u42TGsVqdedCudgrcK2MCaLZW5.FrV69hfLhMg0HaisvygOldI1m0azgmhY33PAKI31WP71H1yg37OLjQmcP-XCgUPokaBi7sOboUX23WUKjuF5VLOK8dgx0l4eGRzggFHLpf7BJJqJxUwYblTxeuhHcar1Loaw1IEH-G0PgkD1tDZNdXj88yEfPCOKDPxxhcpBBiyPLVhG7jNpWGdomIC-adlal93A.i4N6iN30ESt/https:/www.mysite.com/turpentine/esi/getBlock/method/esi/access/public/ttl/3600/hmac/a3a3a90e952638e923ae1e68e1d74088e5816d550bdab6636c267e236c5ad28f/data/C3vI0nIdQ1ko-pV51IJl-HnxTKO8lIDiU3Zv0VFiVtdSApTdzvxu7Eku9Lvh.YmrkiVudOGOYWnpTojxRMeRej1AwWpnOL0GEV-hNgkkyjxEcVKbA1RslTb4btOXPki0SNKZzYVonMevFxCqqxB9Osu9CGS8vTsljMmmlr3hxbfyObJlU8Xb7H7LCQ2jyfSLYVQdMl8LdRCzur2TBnCh9rkODDgSveDqQFM318PTJhkMLydD9HXUWTxVQOeQ2p28XJhCk5wXFVMws1x95WgYqn7Ml8e.Qx4V/https:/www.mysite.com/turpentine/esi/getBlock/method/esi/access/public/ttl/3600/hmac/a3a3a90e952638e923ae1e68e1d74088e5816d550bdab6636c267e236c5ad28f/data/C3vI0nIdQ1ko-pV51IJl-HnxTKO8lIDiU3Zv0VFiVtdSApTdzvxu7Eku9Lvh.YmrkiVudOGOYWnpTojxRMeRej1AwWpnOL0GEV-hNgkkyjxEcVKbA1RslTb4btOXPki

I disabled hhvm to run php-fpm for a while and enable slow log. executed the top command and php-fpm had 4 processes in the low 20% range of cpu usage. Slow log showed:

[02-Jul-2015 17:52:19]  [pool www] pid 7230
script_filename = /var/www/mysite.com/htdocs/index.php
[0x00007f5f5c13a548] sleep() /var/www/mysite.com/htdocs/app/code/community/Cm/RedisSession/Model/Session.php:393
[0x00007ffd6eb6ce80] read() unknown:0
[0x00007f5f5c13a330] session_start() /var/www/mysite.com/htdocs/app/code/core/Mage/Core/Model/Session/Abstract/Varien.php:133
[0x00007f5f5c13a1d0] start() /var/www/mysite.com/htdocs/app/code/core/Mage/Core/Model/Session/Abstract/Varien.php:222
[0x00007f5f5c13a088] init() /var/www/mysite.com/htdocs/app/code/core/Mage/Core/Model/Session/Abstract.php:84
[0x00007f5f5c139f90] init() /var/www/mysite.com/htdocs/app/code/local/Mage/Core/Model/Session.php:42
[0x00007f5f5c139e60] __construct() /var/www/mysite.com/htdocs/app/code/core/Mage/Core/Model/Config.php:1348
[0x00007f5f5c139d48] getModelInstance() /var/www/mysite.com/htdocs/app/Mage.php:463
[0x00007f5f5c139c18] getModel() /var/www/mysite.com/htdocs/app/Mage.php:477
[0x00007f5f5c139ab0] getSingleton() /var/www/mysite.com/htdocs/app/code/core/Mage/Core/Model/Url.php:1066
[0x00007f5f5c1399a8] _prepareSessionUrlWithParams() /var/www/mysite.com/htdocs/app/code/core/Mage/Core/Model/Url.php:1048
[0x00007f5f5c139868] _prepareSessionUrl() /var/www/mysite.com/htdocs/app/code/core/Mage/Core/Model/Url.php:1011
[0x00007f5f5c139750] getUrl() /var/www/mysite.com/htdocs/app/Mage.php:394
[0x00007f5f5c139680] getUrl() /var/www/mysite.com/htdocs/app/code/community/Nexcessnet/Turpentine/Helper/Esi.php:167
[0x00007f5f5c139528] getDummyUrl() /var/www/mysite.com/htdocs/app/code/community/Nexcessnet/Turpentine/Model/Observer/Esi.php:82
[0x00007f5f5c1393d8] checkRedirectUrl() /var/www/mysite.com/htdocs/app/code/core/Mage/Core/Model/App.php:1338
[0x00007f5f5c1391d8] _callObserverMethod() /var/www/mysite.com/htdocs/app/code/core/Mage/Core/Model/App.php:1317
[0x00007f5f5c139050] dispatchEvent() /var/www/mysite.com/htdocs/app/Mage.php:448
[0x00007f5f5c138ef8] dispatchEvent() /var/www/mysite.com/htdocs/app/code/core/Mage/Core/Controller/Response/Http.php:105
[0x00007f5f5c138d70] setRedirect() /var/www/mysite.com/htdocs/app/code/core/Mage/Core/Controller/Varien/Front.php:345
ichalov commented 9 years ago

I believe those 'GET /https:/www.mysite.com/turpentine/esi/getBlock/' requests in the log can't be from the test where you get normal page in 1 minute. It's impossible to get unbroken page when ESI URLs are like that.

After some more thinking here is my version of what have happened: It somehow got into internal redirection loop in the first test and overloaded the server. That loop was still active and consuming CPU during the second test, that's why it was slow.

It's probably better to restart nginx and varnish along with cache flushes when you do full page cache configuration changes.

matt-johnston-ecommistry commented 9 years ago

When I implemented my fix I also got redirection loops and traffic still from varnish - I agree with the above its a better idea to restart everything before testing.

One good diagnostic is commenting out the enable esi line in the vcl so you can see the esi requests in the html - if they show https then something is still off in Magento but if showing http you have issues elsewhere. Note you will need to clear cache pretty thoroughly after making a change to the internals On 3/07/2015 10:35 am, "ichalov" notifications@github.com wrote:

I believe those 'GET /https:/www.mysite.com/turpentine/esi/getBlock/' requests in the log can't be from the test where you get normal page in 1 minute. It's impossible to get unbroken page when ESI URLs are like that.

After some more thinking here is my version of what have happened: It somehow got into internal redirection loop in the first test and overloaded the server. That loop was still active and consuming CPU during the second test, that's why it was slow.

It's probably better to restart nginx and varnish along with cache flushes when you do full page cache configuration changes.

— Reply to this email directly or view it on GitHub https://github.com/nexcess/magento-turpentine/issues/834#issuecomment-118185542 .

keebler-elf commented 9 years ago

@matt-johnston-ecommistry I will try that test now. @ichalov not sure if you wanted from access log or error log. restarted all services and cleared caches as suggested. Homepage loaded much faster then 55 seconds. More like 3~4 seconds. ran a tail on the error log for the site when accessing the homepage:

2015/07/02 18:41:43 [error] 8297#0: *6 connect() failed (111: Connection refused) while connecting to upstream, client: 184.xx.xx.xx, server: www.mysite.com, request: "GET /index.php/admin/cache/flushSystem/key/392781fc35ad4e54fd349d6ee149f416/?SID=ia2im0dr4d6sje6jeo312kmo54 HTTP/1.0", upstream: "fastcgi://127.0.0.1:8000", host: "www.mysite.com", referrer: "https://www.mysite.com/index.php/admin/cache/index/key/b8472e39d72ad1801f693819db9622bb/?SID=ia2im0dr4d6sje6jeo312kmo54"
2015/07/02 18:41:56 [error] 8297#0: *10 connect() failed (111: Connection refused) while connecting to upstream, client: 184.xx.xx.xx, server: www.mysite.com, request: "GET /index.php/admin/cache/index/key/b8472e39d72ad1801f693819db9622bb/ HTTP/1.0", upstream: "fastcgi://127.0.0.1:8000", host: "www.mysite.com", referrer: "https://www.mysite.com/index.php/admin/cache/index/key/b8472e39d72ad1801f693819db9622bb/?SID=ia2im0dr4d6sje6jeo312kmo54"
2015/07/02 18:42:11 [error] 8297#0: *14 connect() failed (111: Connection refused) while connecting to upstream, client: 184.xx.xx.xx, server: www.mysite.com, request: "POST /index.php/admin/cache/massRefresh/key/1e4349c4e6861c133727ba321bd54ab8/ HTTP/1.0", upstream: "fastcgi://127.0.0.1:8000", host: "www.mysite.com", referrer: "https://www.mysite.com/index.php/admin/cache/index/key/b8472e39d72ad1801f693819db9622bb/"
2015/07/02 18:42:30 [error] 8297#0: *21 connect() failed (111: Connection refused) while connecting to upstream, client: 184.xx.xx.xx, server: www.mysite.com, request: "GET /index.php/admin/compiler_process/index/key/82124db667dc0ae3c173eba689891753/ HTTP/1.0", upstream: "fastcgi://127.0.0.1:8000", host: "www.mysite.com", referrer: "https://www.mysite.com/index.php/admin/cache/index/key/b8472e39d72ad1801f693819db9622bb/"
2015/07/02 18:42:45 [error] 8297#0: *26 connect() failed (111: Connection refused) while connecting to upstream, client: 184.xx.xx.xx, server: www.mysite.com, request: "GET / HTTP/1.1", upstream: "fastcgi://127.0.0.1:8000", host: "www.mysite.com"
aricwatson commented 9 years ago

I suspect PR https://github.com/nexcess/magento-turpentine/pull/844 will resolve these problems - let us know!

dende commented 9 years ago

Hi, I just wanted to add, that we encounter similair behaviour to what @keebler-elf described. Our setup looks like this:

For HTTPS:
[Amazon load balancer as a SSL terminator on Port 443] -> [varnish on port 1443 -> nginx as backend on port 2443 -> hhvm/php-fpm on a unix socket]

For HTTP:
[Amazon load balancer on port 80] -> [nginx on port 80] -> Redirects to HTTPS ( so basically back to the load balancer )

To be able to use full site SSL I implemented the patch proposed by ichalov@ef3738f

When we were using hhvm everything worked fine, and the site was delivered really fast, both from varnish/nginx and from varnish/cache. Because an extension we rely on is not compatible with hhvm we switched to php-fpm. With php-fpm varnish is really slow when delivering from the backend and not from the cache. For a page to get served through varnish/nginx/php-fpm it takes around 50 seconds. Once the page can be delivered from varnish's cache, everything is stupid fast again.

Just like @keebler-elf I enabled php-fpm's slow log, and this is what I get:

[22-Jul-2015 15:20:20]  [pool www] pid 28648
script_filename = /home/mage/magento/index.php
[0x00007fbbb7981e90] session_start() /home/mage/magento/app/code/core/Mage/Core/Model/Session/Abstract/Varien.php:133
[0x00007fbbb7981d30] start() /home/mage/magento/app/code/core/Mage/Core/Model/Session/Abstract/Varien.php:222
[0x00007fbbb7981be8] init() /home/mage/magento/app/code/core/Mage/Core/Model/Session/Abstract.php:84
[0x00007fbbb7981af0] init() /home/mage/magento/app/code/local/Mage/Core/Model/Session.php:42
[0x00007fbbb79819c0] __construct() /home/mage/magento/app/code/core/Mage/Core/Model/Config.php:1348
[0x00007fbbb79818a8] getModelInstance() /home/mage/magento/app/Mage.php:463
[0x00007fbbb7981778] getModel() /home/mage/magento/app/Mage.php:477
[0x00007fbbb7981620] getSingleton() /home/mage/magento/app/code/core/Mage/Core/Controller/Varien/Action.php:493
[0x00007fbbb7981508] preDispatch() /home/mage/magento/app/code/core/Mage/Core/Controller/Front/Action.php:69
[0x00007fbbb7981350] preDispatch() /home/mage/magento/app/code/core/Mage/Core/Controller/Varien/Action.php:407
[0x00007fbbb7981140] dispatch() /home/mage/magento/app/code/core/Mage/Core/Controller/Varien/Router/Standard.php:254
[0x00007fbbb7980ff0] match() /home/mage/magento/app/code/core/Mage/Core/Controller/Varien/Front.php:172
[0x00007fbbb7980e98] dispatch() /home/mage/magento/app/code/core/Mage/Core/Model/App.php:365
[0x00007fbbb7980cf8] run() /home/mage/magento/app/Mage.php:684
[0x00007fbbb7980b98] run() /home/mage/magento/index.php:120

The only difference I can see is, that we're not using RedisCache but Memcached for session storage and therefore the RedisCache related lines are missing from the log.

I did a little research and maybe the PHP locking sessions might be the problem like described here http://stackoverflow.com/a/1914266 ?

As soon as we disable varnish and let nginx listen on port 1443 there there are no session related problems any more.

miguelbalparda commented 9 years ago

844 is merged, please take a look and let us know if it fixed this issue.

keebler-elf commented 9 years ago

@miguelbalparda still having issues here