errorception / redis-lock

Implements a locking primitive using redis. An implementation of the algorithm described at https://redis.io/commands/setnx
211 stars 48 forks source link

Need better diagnostics/warnings/reporting when locks time out #6

Open nickminutello opened 11 years ago

nickminutello commented 11 years ago

I have found that when trying to diagnose issues with lock timeouts in a multi-process/multi-host environment, it was kind of essential to know "who" took the lock that timed out (that way you can then know where to start looking as to why that lock timed out).

To do this I made the lock value slightly more sophisticated than a timeout value - I added the host/pid.

I cant do a pull-request - but I can paste the code here: (this has a configurable delay patch mixed in too)

var hostName;
try {
    var OS = require('os');
    hostName = OS.hostname();
} catch (e) {
    hostName = process.env.HOST || process.env.HOSTNAME;
}
hostName = hostName ? hostName.split('.')[0] : 'unknown-host';

function createLockValue(lockTimeoutValue) {
    var callerId = hostName + ":" + process.pid;
    return callerId + "|" + lockTimeoutValue;
}

function getTimeoutValue(lockValue) {
    if (lockValue) {
        var parts = lockValue.split("|");
        if (parts.length ===2) {
            return parseFloat(parts[1]);
        }
    }
    return 0;
}

function getCallerId(lockValue) {
    if (lockValue) {
        var parts = lockValue.split("|");
        if (parts.length ===2) {
            return parts[0];
        }
    }
    return "unknown-caller-id";
}

function acquireLock(client, lockName, timeout, retryDelay, onLockAquired) {
    function retry() {
        acquireLock(client, lockName, timeout, retryDelay, onLockAquired);
    }

    var lockTimeoutValue = (Date.now() + timeout + 1);

    client.setnx(lockName, createLockValue(lockTimeoutValue), function(err, result) {
        if(err) {
            return setTimeout(retry, retryDelay);
        }
        if(result === 0) {
            client.get(lockName, function(err, lockValue) {
                if(err) {
                    return setTimeout(retry, retryDelay);
                }

                var timeStamp = getTimeoutValue(lockValue);

                var now = Date.now();
                if(timeStamp > now) {
                    setTimeout(retry, retryDelay);
                } else {
                    lockTimeoutValue = (Date.now() + timeout + 1);
                    client.getset(lockName, createLockValue(lockTimeoutValue), function(err, result) {
                        if(err) {
                            return setTimeout(retry, retryDelay);
                        }

                        if(result === lockValue) {
                            if (lockValue) {
                                console.log("[PID %s] [%s] Warning : Lock '%s' timed out by %dms - stealing lock from %s ...", process.pid, new Date().toISOString(), lockName, (now - timeStamp), getCallerId(lockValue));
                            }
                            onLockAquired(lockTimeoutValue);
                        } else {
                            setTimeout(retry, retryDelay);
                        }
                    });
                }
            });
        } else {
            onLockAquired(lockTimeoutValue);
        }
    });
}

module.exports = function(client, retryDelay) {
    if(!(client && client.setnx)) {
        throw new Error("You must specify a client instance of http://github.com/mranney/node_redis");
    }

    retryDelay = retryDelay || 50;

    return function(lockName, timeout, taskToPerform) {
        if(!lockName) {
            throw new Error("You must specify a lock string. It is on the basis on this the lock is acquired.");
        }

        if(!taskToPerform) {
            taskToPerform = timeout;
            timeout = 5000;
        }

        lockName = "lock." + lockName;

        acquireLock(client, lockName, timeout, retryDelay, function(lockTimeoutValue) {
            var start  = Date.now();
            taskToPerform(function(done) {
                if(lockTimeoutValue > Date.now()) {
                    client.del(lockName, done);
                } else {
                    console.log("[PID %s] [%s] Warning : Lock '%s' timed out before lock could be released. Locked operation took %dms", process.pid, new Date().toISOString(),lockName, Date.now() - start);
                    done();
                }
            });
        });
    };
};
rakeshpai commented 11 years ago

@nickminutello Thanks for this.

Couple of questions.

Rest looks fine, though I shall break these up into multiple commits. Also, tests broken. Will fix. Thanks again.

nickminutello commented 11 years ago
nickminutello commented 11 years ago

Also. The unknown-hostname and unknown-caller-id also handle the case when the lock value happens to be undefined (e.g. the lock has been deleted after the call to .setnx has failed)

nickminutello commented 11 years ago

Make sense?

toblerpwn commented 11 years ago

just to add: had a long, long, long troubleshooting session where it ended up that this was the issue (timeout && my callbacks and promise chains not completing). very hard to track down.

i'd suggest even creating an error condition/handler that is triggered when timeout occurs and may optionally be handled in its own function/etc. perhaps like:

lock(lockName, timeout, callback(), timeoutHandler())

to summarize: i wish there was a way to receive a notification & react appropriately to a timeout condition

boxbag commented 9 years ago

Any update on this issue? I'm evaluating this library and would love a way to be notified when the callback times out for debugging purposes

LoicMahieu commented 9 years ago

It is a really old issue, created long before I join the project. I don't think anybody is working on this anymore. If you think that is interesting and valuable, pull request or discussion are welcome :rocket: