-
Notifications
You must be signed in to change notification settings - Fork 48
Open
Description
I have an issue when writing a file in stream mode. The EOLseems not to be respected when writing a new line.
This is the sync version, that works as expected.
var i = fs.openSync(self._options.trainFile, 'r');
var o = fs.openSync(tmpFilePath, 'w');
var buf = new Buffer(1024 * 1024), len, prev = '';
while(len = fs.readSync(i, buf, 0, buf.length)) {
var a = (prev + buf.toString('utf-8', 0, len)).split('\n');
prev = len === buf.length ? '\n' + a.splice(a.length - 1)[0] : '';
var out = '';
a.forEach(function(text) {
if(!text) return;
text=text.toLowerCase()
.replace(/^/gm, '__label__')
.replace(/'/g, " ' ")
.replace(/"/g, '')
.replace(/\./g, ' \. ')
.replace(/,/g, ' \, ')
.replace(/\(/g, ' ( ')
.replace(/\)/g, ' ) ')
.replace(/!/g, ' ! ')
.replace(/\?/g, ' ! ')
.replace(/;/g, ' ')
.replace(/:/g, ' ')
out += text + '\n';
});
var bout = new Buffer(out, 'utf-8');
fs.writeSync(o, bout, 0, bout.length);
}
fs.closeSync(o);
fs.closeSync(i);while this is the stream mode with byline
var os= require("os");
var Transform = require('stream').Transform
var writeStream = fs.createWriteStream(tmpFilePath, {flags: 'w', encoding: 'utf-8'});
var stream = byline(fs.createReadStream(self._options.trainFile, { flags: 'r', encoding: 'utf8'}));
//stream.pipe(writeStream);
stream.on('end', function() {
return resolve({
trainFile: tmpFilePath
});
});
stream.on('data', function(text) { /// read line by line
text=text.toLowerCase()
.replace(/^/gm, '__label__')
.replace(/'/g, " ' ")
.replace(/"/g, '')
.replace(/\./g, ' \. ')
.replace(/,/g, ' \, ')
.replace(/\(/g, ' ( ')
.replace(/\)/g, ' ) ')
.replace(/!/g, ' ! ')
.replace(/\?/g, ' ! ')
.replace(/;/g, ' ')
.replace(/:/g, ' ');
writeStream.write(text + os.EOL);
});Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels