Open sumitchawla opened 10 years ago
This is a good bug. I'm not sure if node natively resumes streams after a connection goes down. I'll do some investigation and update the issue.
Can you elaborate on one part of this for me: Are you asking about resuming an in-progress upload? Or re-issuing the upload after?
This is about re-issuing the upload after.
So, I've attempted to reproduce this, and I've been unable. I'd love for your help to try and isolate this. The following code creates a pkgcloud client for rackspace, and then attempts to send a 1m local file to the cloud 100 times.
During the test, I disable my local network, and then re-enable it. pkgcloud continued automatically after the network returned.
My test:
var pkgcloud = require('pkgcloud'),
logging = require('../../../common/logging'),
config = require('../../../common/config'),
fs = require('fs'),
async = require('async'),
_ = require('underscore');
var log = logging.getLogger(process.env.PKGCLOUD_LOG_LEVEL || 'debug');
var client = pkgcloud.providers.rackspace.storage.createClient(config.getConfig('rackspace', 3));
client.on('log::*', logging.logFunction);
var items = [];
for (var i = 0; i < 100; i++) {
items.push(i);
}
async.forEachSeries(items, function(item, next) {
var source = fs.createReadStream(process.argv[4]);
var dest = client.upload({
container: process.argv[2],
remote: process.argv[3]
}, function (err) {
if (err) {
log.error(err);
}
});
dest.on('end', function () {
log.info('Uploaded');
next();
});
source.pipe(dest);
}, function(err) {
});
Hi Ken
Sorry for a late reply. My code is not much different. Only thing i tried different is to throttle the number of connections. The problem is not easy to reproduce. But i am able to reproduce 1 in 15 times. I run the program, and pull out my Ethernet cable at random intervals.
Please let me know if can provide any additional information, or i can do some extended logging and try to reproduce.
var pkgcloud = require('pkgcloud');
var fs = require('fs');
var async = require('async');
var https = require("https");
var http = require("http");
var StringDecoder = require('string_decoder').StringDecoder;
var decoder = new StringDecoder('utf8');
//Limiting connections to reporduce problem
console.log(https.globalAgent.maxSockets);// = 1024;
https.globalAgent.maxSockets = 1;
console.log(https.globalAgent.maxSockets);// = 1024;
var storageClient = pkgcloud.storage.createClient({
provider : 'rackspace',
username : 'USER_NAME',
apiKey : 'API_KEY',
region : 'dfw'
});
// bind logging everything to logger
storageClient.on('log::*', function (message, object) {
if (object) {
console.log(this.event.split('::')[1], message, object);
}
else {
console.log(this.event.split('::')[1], message);
}
});
var containerName = 'sumit_test';
var time_now = (new Date()).toISOString();
function ensureContainer(done) {
if (!storageClient) {
done(new Error("Invalid Storage Client for Cloud Upload"));
return;
}
function ensuerContainerInternal(settings, done) {
console.log("***** ensure client ************");
storageClient.getContainer(settings.name, function (err, container) {
if (err) {
console.log(" ************ ensure client - Trying to create container ************");
storageClient.createContainer(settings, function(err, container) {
if (err) {
console.log("ensure client - Create container - error");
done(err);
} else {
console.log("ensure client - Create container - success");
ensuerContainerInternal(settings, done);
}
});
} else {
console.log("ensure client - container exists");
done(null, container);
}
});
}
ensuerContainerInternal({
name: containerName,
metadata: {
"Timestamp": (new Date()).toISOString(),
"Access-Log-Delivery": true
},
logRetention: 'true'
}, done);
}
function uploadFile(filePath,fileName, remoteFileName, done){
var ustream = storageClient.upload({
container: containerName,
headers: {'Access-Control-Allow-Origin': '*', 'content-type': 'application/octet-stream',
"Content-disposition": ('attachment; filename=' + fileName)},
remote: remoteFileName
}, function(err, result) {
done(err, result);
});
ustream.on('error', function(err){
console.log("********** ustream error **********", err);
done(err);
});
fs.createReadStream(filePath).pipe(ustream);
}
function getNewCallback(index) {
return function(callback) {
ensureContainer(function(err, container){
if (err) {
console.log("error in create container", index , err);
callback(err);
return;
}
var fileName = 'example.txt';
uploadFile(fileName, fileName, time_now + "_" + index + "_" + fileName ,function(er, result){
if(er) {
console.log("error in upload file", index, er);
callback(err);
return;
}
console.log("File upload result", index, result);
callback(null, true);
});
});
};
}
var iterations = 100;
var callbacks = [];
for(var i = 0; i < iterations; i++) {
callbacks.push(getNewCallback(i));
}
async.parallel(callbacks, function(err, result) {
console.log("Upload Complete", err);
});
I have a similar problem. I have a piece of code that uploads thousands of small files keeping 30 concurrent uploads all the time. While the upload is in progress I swap my laptop's WLAN on and off repeatedly. This frequently results in 1 - N upload requests that never finish. The callback is never called. I can repeat this easily. I can try to write a small standalone program that reproduces this if it helps?
@koskimas that would be a huge help!
Was just able to reproduce with this code. The file to send is given as the first argument and the rackspace container name as second. I ran this on OSX over a good quality 3G connection. The file I used was a small 40kb jpeg.
While this was running I swapped wlan on and off many times. Really fast for a while and then slower and I even left the WLAN off for a minute or two.
var filePath = process.argv[2];
var container = process.argv[3];
var pkgCloud = require('pkgcloud');
var Promise = require('bluebird');
var path = require('path');
var _ = require('lodash');
var client = pkgCloud.storage.createClient({
provider: 'rackspace',
username: 'bar',
apiKey: 'baz',
region: 'DFW'
});
var requestsRunning = 0;
function upload(fileName) {
return new Promise(function(resolve) {
requestsRunning++;
console.log('sending file', fileName, 'requests running:', requestsRunning);
client.upload({container:container, local:filePath, remote:fileName}, function(err, result) {
requestsRunning--;
if (err) {
console.log('sending file', fileName, 'finished with error', err);
} else {
console.log('successfully finished sending file', fileName, 'requests running:', requestsRunning);
}
resolve(result);
});
});
}
Promise
.resolve(_.range(1, 1000))
.map(function(index) {
return upload(index + path.extname(filePath));
}, {concurrency: 30});
So basically, you just kept sending the same file over and over. ok, I'll try and reproduce.
Yeah this sends the same file, but the original code obviously sends different files. All about the same size.
The sockets just seem to hang for some reason. Setting a timeout on the socket solves this in my case. Is there any way of setting the timeout value for the request? Is there a way to pass options to the request object?
Maybe? I haven't looked into this yet, as I'm pretty involved in a major aws refactor.
What I meant was that is there currently any way to pass options to the request object? Something like this:
client.upload({
container: self.container,
local: localFilePath,
remote: fileName,
requestOptions: {
timeout: 60000
}
}, function(err, result) {
});
or something like this:
client.setRequestOptions({
timeout: 60000
});
@koskimas nope; open an issue?
Hi, I wonder to know is there some progress in resolving that issue ? Enabling timeouts setting for the requests would be highly appreciated.
Hi, I have a similar problem. To reproduce, pipe uploading file throu Transform stream:
const Transform = require('stream').Transform;
const TransformStream = function () {
this.size = 0;
Transform.call(this);
};
TransformStream.prototype = Object.create(Transform.prototype, {
_transform: {
value: function (chunk, encoding, cb) {
this.size += chunk.length;
if (this.size > 1000000 && this.size < 1100000) {
console.log('waiting');
setTimeout(() => (console.log('continue'), cb(null, chunk)), 120000);
} else {
cb(null, chunk);
}
}
}
});
Error: rackspace Error (404): Item not found Is there any way to avoid such behavior? May be request timeout?
Hi, as we are almost 1 year later, is there a way to "relaunch" an upload after a internet shutdown ? i am currently facing the problem, searching for a good way for doing this
We hit this issue in Rackspace where Ethernet Interface went down, and came back, but cloud uploads completely stalled. The app server process had to be restarted in order to resume cloud uploads. I was able to reproduce this scenario in my local environment using the following scenario:
I am using linux mint here.
Any help would be appreciated.