Skip to content

Issue when writing EOL in stream mode #47

@loretoparisi

Description

@loretoparisi

I have an issue when writing a file in stream mode. The EOLseems not to be respected when writing a new line.

This is the sync version, that works as expected.

var i = fs.openSync(self._options.trainFile, 'r');
        var o = fs.openSync(tmpFilePath, 'w');

        var buf = new Buffer(1024 * 1024), len, prev = '';

        while(len = fs.readSync(i, buf, 0, buf.length)) {

            var a = (prev + buf.toString('utf-8', 0, len)).split('\n');
            prev = len === buf.length ? '\n' + a.splice(a.length - 1)[0] : '';
            var out = '';
            a.forEach(function(text) {
                if(!text) return;
                text=text.toLowerCase()
                .replace(/^/gm, '__label__')
                .replace(/'/g, " ' ")
                .replace(/"/g, '')
                .replace(/\./g, ' \. ')
                .replace(/,/g, ' \, ')
                .replace(/\(/g, ' ( ')
                .replace(/\)/g, ' ) ')
                .replace(/!/g, ' ! ')
                .replace(/\?/g, ' ! ')
                .replace(/;/g, ' ')
                .replace(/:/g, ' ')
                out += text + '\n';
            });
            var bout = new Buffer(out, 'utf-8');
            fs.writeSync(o, bout, 0, bout.length);
        }

        fs.closeSync(o);
        fs.closeSync(i);

while this is the stream mode with byline

var os= require("os");
        var Transform = require('stream').Transform
        var writeStream = fs.createWriteStream(tmpFilePath, {flags: 'w', encoding: 'utf-8'});
        var stream = byline(fs.createReadStream(self._options.trainFile, { flags: 'r', encoding: 'utf8'}));
        //stream.pipe(writeStream);
        stream.on('end', function() {
            return resolve({
                trainFile: tmpFilePath
            });
        });
        stream.on('data', function(text) { /// read line by line
            text=text.toLowerCase()
            .replace(/^/gm, '__label__')
            .replace(/'/g, " ' ")
            .replace(/"/g, '')
            .replace(/\./g, ' \. ')
            .replace(/,/g, ' \, ')
            .replace(/\(/g, ' ( ')
            .replace(/\)/g, ' ) ')
            .replace(/!/g, ' ! ')
            .replace(/\?/g, ' ! ')
            .replace(/;/g, ' ')
            .replace(/:/g, ' ');
            writeStream.write(text + os.EOL);
        });

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions