Closed chacliff closed 8 years ago
Hi, There are 2 issues spotted.
~Keyang
On 8 March 2016 at 02:07, chacliff notifications@github.com wrote:
created a transform to remove some data from my big files in a stream.
`var Transform = require('stream').Transform; var util = require('util');
// Transform sctreamer to remove first line function RemoveNonPrintable(args) { if (! (this instanceof RemoveNonPrintable)) { return new RemoveNonPrintable(args); } Transform.call(this, args); this._buff = ''; } util.inherits(RemoveNonPrintable, Transform);
RemoveNonPrintable.prototype._transform = function(chunk, encoding, done) { // collect string into buffer this._buff += chunk.toString(); // push to stream skipping first line var tempData = this._buff.replace(/[^\x20-\x7E]+/g, ""); this.push(tempData); // clear string buffer this._buff = null;
done();
};`
then thought that you were using streams so therefore i could pipe to you like so.
readStream.pipe(removed).pipe(csvConverter).pipe(writeStream);
but there is no output from your module when i pipe the data to you. What might be the problem thought this was the right approach.
I also tried converting the data back to a buffer before pushing it back to the buffer. That didn't seem to work.
— Reply to this email directly or view it on GitHub https://github.com/Keyang/node-csvtojson/issues/84.
ok i'll see about not removing the line and carriage returns, besides that, is there any way to make the parser not edit some of the colunms from strings to integers. i'm having a problem where i need them to remain strings.
yup. just set checkType:false which will keep everything as string.
~Keyang
On 9 March 2016 at 18:25, chacliff notifications@github.com wrote:
ok i'll see about not removing the line and carriage returns, besides that, is there any way to make the parser not edit some of the colunms from strings to integers. i'm having a problem where i need them to remain strings.
— Reply to this email directly or view it on GitHub https://github.com/Keyang/node-csvtojson/issues/84#issuecomment-194436643 .
ok yeah that seems to have fixed it. Thanks
Thank you for your help, if u wanted to see the final solution to this.
var tempData = this._buff.replace(/[^\x20-\x7E\n]+/g, ""); this.push(tempData);
That's for the transform, just had to make sure it didn't delete newlines.
`var prom = []; for(var index = 0; index < read.length; index++){ prom.push(changeFile(read[index], write[index])); } Promise.all(prom).then(function(){ console.log("done"); });
function changeFile(readfile, writefile){
return new Promise(function(resolve, reject){
var removed = new RemoveNonPrintable();
var csvConverter=new Converter({constructResult:false,
ignoreEmpty: true,
checkType: false,
});
var readStream=require("fs").createReadStream(readfile);
var writeStream = require("fs").createWriteStream(writefile);
readStream.pipe(removed).pipe(csvConverter).pipe(writeStream).on('finish', function(){
console.log("done writing" + writefile);
resolve();
});
});
}`
This is how i'm streaming multiple files in the same script. The next jump off point for me will be to read each json file into the db. which i could make cmd line mongoimports, or i could use waterline to do the import manually.
But Thanks again you were a huge help. -Chase
glad to be helpful
~Keyang
On 12 March 2016 at 01:34, chacliff notifications@github.com wrote:
Thank you for your help, if u wanted to see the final solution to this.
var tempData = this._buff.replace(/[^\x20-\x7E\n]+/g, ""); this.push(tempData);
That's for the transform, just had to make sure it didn't delete newlines.
`var prom = []; for(var index = 0; index < read.length; index++){ prom.push(changeFile(read[index], write[index])); } Promise.all(prom).then(function(){ console.log("done"); });
function changeFile(readfile, writefile){ return new Promise(function(resolve, reject){ var removed = new RemoveNonPrintable(); var csvConverter=new Converter({constructResult:false, ignoreEmpty: true, checkType: false, }); var readStream=require("fs").createReadStream(readfile); var writeStream = require("fs").createWriteStream(writefile); readStream.pipe(removed).pipe(csvConverter).pipe(writeStream).on('finish', function(){ console.log("done writing" + writefile); resolve(); }); }); }`
This is how i'm streaming multiple files in the same script. The next jump off point for me will be to read each json file into the db. which i could make cmd line mongoimports, or i could use waterline to do the import manually.
But Thanks again you were a huge help. -Chase
— Reply to this email directly or view it on GitHub https://github.com/Keyang/node-csvtojson/issues/84#issuecomment-195626430 .
created a transform to remove some data from my big files in a stream.
`var Transform = require('stream').Transform; var util = require('util');
// Transform sctreamer to remove first line function RemoveNonPrintable(args) { if (! (this instanceof RemoveNonPrintable)) { return new RemoveNonPrintable(args); } Transform.call(this, args); this._buff = ''; } util.inherits(RemoveNonPrintable, Transform);
RemoveNonPrintable.prototype._transform = function(chunk, encoding, done) { // collect string into buffer this._buff += chunk.toString(); // push to stream skipping first line var tempData = this._buff.replace(/[^\x20-\x7E]+/g, ""); this.push(tempData); // clear string buffer this._buff = null;
};`
then thought that you were using streams so therefore i could pipe to you like so.
readStream.pipe(removed).pipe(csvConverter).pipe(writeStream);
but there is no output from your module when i pipe the data to you. What might be the problem thought this was the right approach.
I also tried converting the data back to a buffer before pushing it back to the buffer. That didn't seem to work.