From 14e7c670e3aaf7b5b70cf20983961cd3e61a9b79 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Tue, 2 Apr 2024 10:07:38 -0500 Subject: [PATCH 01/18] Update snapshot --- typeset/tests/__snapshots__/typeset.test.js.snap | 10 +++++----- typeset/tests/seed/test-code.output.xhtml | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/typeset/tests/__snapshots__/typeset.test.js.snap b/typeset/tests/__snapshots__/typeset.test.js.snap index 90df575..0245b78 100644 --- a/typeset/tests/__snapshots__/typeset.test.js.snap +++ b/typeset/tests/__snapshots__/typeset.test.js.snap @@ -4,17 +4,17 @@ exports[`Convert inline code tags and block pre tags 1`] = ` " -
my_integer = 5
my_floating_point = 26.2
my_Boolean = True
my_string = 'characters'
- -

-
hello = 5
-

+
1
my_integer = 5
2
my_floating_point = 26.2
3
my_Boolean = True
4
my_string = 'characters'
   print('Hi')
   a = b + c
 
+

+
1
hello = 5
+

+

hello = 5

diff --git a/typeset/tests/seed/test-code.output.xhtml b/typeset/tests/seed/test-code.output.xhtml index 5447469..2676acf 100644 --- a/typeset/tests/seed/test-code.output.xhtml +++ b/typeset/tests/seed/test-code.output.xhtml @@ -1,17 +1,17 @@ -
my_integer = 5
my_floating_point = 26.2
my_Boolean = True
my_string = 'characters'
- -

-
hello = 5
-

+
1
my_integer = 5
2
my_floating_point = 26.2
3
my_Boolean = True
4
my_string = 'characters'
   print('Hi')
   a = b + c
 
+

+
1
hello = 5
+

+

hello = 5

From 66fc70a6b4696f02834f13054641d8de67a7a860 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Tue, 2 Apr 2024 17:18:27 -0500 Subject: [PATCH 02/18] Mathify JSON Example usage: find . -name content.json -exec grep -lF data-math {} \+ | node typeset/start.js -i - -f mathml --- typeset/converter.js | 65 ++-------- typeset/helpers.js | 117 ++++++++++++++++++ typeset/start.js | 98 ++++++++++++--- .../tests/__snapshots__/typeset.test.js.snap | 9 ++ typeset/tests/typeset.test.js | 51 +++++--- 5 files changed, 257 insertions(+), 83 deletions(-) create mode 100644 typeset/helpers.js diff --git a/typeset/converter.js b/typeset/converter.js index 73b9fab..8743b56 100644 --- a/typeset/converter.js +++ b/typeset/converter.js @@ -1,14 +1,12 @@ -const path = require('path') -const fileExists = require('file-exists') -const { DOMParser, XMLSerializer } = require('@xmldom/xmldom') +const { XMLSerializer } = require('@xmldom/xmldom') const { scanXML, looseTagEq } = require('./scan-xml') const { PARAS } = require('./paras') const sax = require('sax') -const fs = require('fs') const mjnodeConverter = require('./mjnode') const hljs = require('highlight.js') const hljsLineNumbers = require('./hljs-line-numbers') +const { parseXML } = require('./helpers') // Status codes const STATUS_CODE = { @@ -16,54 +14,17 @@ const STATUS_CODE = { ERROR: 111 } -class ParseError extends Error { } - -function parseXML (xmlString) { - const locator = { lineNumber: 0, columnNumber: 0 } - const cb = () => { - const pos = { - line: locator.lineNumber - 1, - character: locator.columnNumber - 1 - } - throw new ParseError(`ParseError: ${JSON.stringify(pos)}`) - } - const p = new DOMParser({ - locator, - errorHandler: { - warning: console.warn, - error: cb, - fatalError: cb - } - }) - const doc = p.parseFromString(xmlString) - return doc -} - -const createMapOfMathMLElements = async (log, inputPath, cssPath, outputPath, outputFormat, batchSize, highlight) => { +const createMapOfMathMLElements = async (log, getInputStream, cssPath, getOutputStream, outputFormat, batchSize, highlight) => { const timeOfStart = new Date().getTime() - // Check that the XHTML and CSS files exist - if (!fileExists.sync(inputPath)) { - log.error(`Input XHTML file not found: "${inputPath}"`) - return STATUS_CODE.ERROR - } - if (cssPath && !fileExists.sync(cssPath)) { - log.error(`Input CSS file not found: "${cssPath}"`) - return STATUS_CODE.ERROR - } - const parser = sax.parser(true) - const output = path.resolve(outputPath) const mathEntries = [] const codeEntries = [] // Keep an array of all the replacements in the order they appeared in within the file const sortedReplacements = [] let head - log.info('Opening XHTML file (may take a few minutes)') - log.debug(`Opening "${inputPath}"`) - const inputContent = fs.createReadStream(inputPath).setEncoding('utf8') - log.debug(`Opened "${inputPath}"`) + const inputContent = getInputStream().setEncoding('utf8') const matchers = [ { attr: 'data-math' }, @@ -79,12 +40,12 @@ const createMapOfMathMLElements = async (log, inputPath, cssPath, outputPath, ou if (highlight) { log.debug('Adding matchers for code highlighting...') - const tags = ['pre', 'code']; - const attributes = ['data-lang', 'lang']; + const tags = ['pre', 'code'] + const attributes = ['data-lang', 'lang'] for (let i = 0; i < tags.length; i++) { for (let j = 0; j < attributes.length; j++) { - matchers.push({ tag: tags[i], attr: attributes[j] }); + matchers.push({ tag: tags[i], attr: attributes[j] }) } } } @@ -109,9 +70,12 @@ const createMapOfMathMLElements = async (log, inputPath, cssPath, outputPath, ou } head = match sortedReplacements.push(match) - } else { + } else if (looseTagEq(match.node.name, 'pre') || looseTagEq(match.node.name, 'code')) { codeEntries.push(match) sortedReplacements.push(match) + } else { + const attr = JSON.stringify(match.node.attributes) + throw new Error(`Got unexpected node: ${match.node.name} ${attr}`) } } ) @@ -121,7 +85,6 @@ const createMapOfMathMLElements = async (log, inputPath, cssPath, outputPath, ou inputContent.on('data', chunk => parser.write(chunk)) inputContent.on('end', () => resolve()) }) - log.debug(`Parsed "${inputPath}"`) // Prepare code highlighting await highlightCodeElements(codeEntries) @@ -140,16 +103,14 @@ const createMapOfMathMLElements = async (log, inputPath, cssPath, outputPath, ou } log.info('Updating content...') await new Promise((resolve, reject) => { - const reader = fs.createReadStream(inputPath).setEncoding('utf8') - const writer = fs.createWriteStream(outputPath) + const reader = getInputStream().setEncoding('utf8') + const writer = getOutputStream() writer.on('error', err => reject(err)) reader.on('error', err => reject(err)) writer.on('finish', () => resolve()) PARAS(sortedReplacements, reader, writer) }) - log.info(`Content saved. Open "${output}" to see converted file.`) - const timeOfEndInSec = (new Date().getTime() - timeOfStart) / 1000 const timeOfEndInMin = timeOfEndInSec > 60 ? Math.round(timeOfEndInSec / 60) : 0 let timeOfEnd = '' diff --git a/typeset/helpers.js b/typeset/helpers.js new file mode 100644 index 0000000..3ee54ed --- /dev/null +++ b/typeset/helpers.js @@ -0,0 +1,117 @@ +const { EventEmitter } = require("events") + +const { DOMParser } = require('@xmldom/xmldom') + +class ParseError extends Error { } + +function parseXML (xmlString, warn = console.warn) { + const locator = { lineNumber: 0, columnNumber: 0 } + const cb = () => { + const pos = { + line: locator.lineNumber - 1, + character: locator.columnNumber - 1 + } + throw new ParseError(`ParseError: ${JSON.stringify(pos)}`) + } + const p = new DOMParser({ + locator, + errorHandler: { + warning: warn, + error: cb, + fatalError: cb + } + }) + const doc = p.parseFromString(xmlString) + return doc +} + +class MemoryStream extends EventEmitter { + setEncoding (encoding) { + if (encoding !== 'utf8' && encoding !== 'utf-8') { + throw new Error('Memory stream only supported utf-8 encoding') + } + return this; + } +} + +class MemoryReadStream extends MemoryStream { + constructor (content, chunkSize = 1<<20) { + super() + this.content = content + this.chunkSize = chunkSize + } + + on (evt, callback) { + super.on(evt, callback) + if (evt === 'data') { + this._start() + } + } + + _start () { + const content = this.content + const chunkSize = this.chunkSize + + process.nextTick(() => { + let offset = 0 + const chunks = Math.ceil(content.length / chunkSize) + for (let i = 0; i < chunks; i++) { + this.emit('data', content.slice(offset, offset + chunkSize)) + offset += chunkSize + } + this.emit('end') + }); + return this + } +} + +class MemoryWriteStream extends MemoryStream { + constructor () { + super() + this.sb = [] + } + + write (chunk) { + this.sb.push(chunk) + } + + end () { + this.emit('finish', {}); + } + + getValue () { + return this.sb.join('') + } +} + +async function walkJSON (content, handler) { + const recurse = async (name, value, parent, prevPath) => { + const fqPath = [...prevPath, name] + const jsType = typeof value + switch (jsType) { + case 'string': + case 'number': + case 'boolean': + await handler({ name, fqPath, value, parent, type: jsType }) + return + case 'object': { + const type = Array.isArray(value) ? 'array' : 'object' + await handler({ name, fqPath, value, parent, type }) + if (value != null) { + for (const [k, v] of Object.entries(value)) { + await recurse(k, v, value, fqPath) + } + } + } + } + } + await recurse('', content, undefined, []) +} + +module.exports = { + MemoryReadStream, + MemoryWriteStream, + walkJSON, + parseXML, +} + diff --git a/typeset/start.js b/typeset/start.js index aa9fe33..25290e1 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -1,20 +1,24 @@ const path = require('path') +const fs = require('fs') const yargs = require('yargs') require('dotenv').config() const bunyan = require('bunyan') const BunyanFormat = require('bunyan-format') const converter = require('./converter') +const { createInterface } = require('readline') +const { walkJSON, MemoryWriteStream, MemoryReadStream, parseXML } = require('./helpers') +const { XMLSerializer } = require('@xmldom/xmldom') const log = bunyan.createLogger({ name: 'node-typeset', - level: process.env.LOG_LEVEL || 'info', + level: process.env.LOG_LEVEL || 'warn', stream: new BunyanFormat({ outputMode: process.env.LOG_FORMAT || 'short' }) }) const argv = yargs - .option('xhtml', { + .option('input', { alias: 'i', - describe: 'Input XHTML File' + describe: 'Input File (xhtml, json, or \'-\' to read file list from stdin)' }) .option('css', { alias: 'c', @@ -37,11 +41,10 @@ const argv = yargs alias: 'b', describe: 'Number of math elements to convert as a batch. Default: 3000' }) - .demandOption(['xhtml', 'output']) + .demandOption(['input']) .help() .argv -const pathToInput = path.resolve(argv.xhtml) const pathToCss = argv.css ? path.resolve(argv.css) : null let outputFormat = 'html' const batchSize = Number(argv.batchSize) || 3000 @@ -61,18 +64,81 @@ if (argv.format) { log.warn('No output format. It will be set to default (html).') } -if (!/\.xhtml$/.test(pathToInput)) { - throw new Error('The input file must end with \'.xhtml\' so Chrome parses it as XML (strict) rather than HTML') -} - -if (!/\.xhtml$/.test(argv.output)) { - throw new Error('The output file should end with \'.xhtml\'') -} -log.debug(`Converting Math Using XHTML="${argv.xhtml}" and CSS="${argv.css}"`) -converter.createMapOfMathMLElements(log, pathToInput.replace(/\\/g, '/'), pathToCss, argv.output, outputFormat, batchSize, argv.highlight) - .then(exitStatus => process.exit(exitStatus)) - .catch(err => { +if (argv.input === '-') { + const readline = createInterface({ input: process.stdin }) + const inner = async () => { + for await (const line of readline) { + if (line.endsWith('.json')) { + const inputJSON = JSON.parse(fs.readFileSync(line, { encoding: 'utf-8' })) + log.info(line) + await walkJSON(inputJSON, async ({ parent, name, value }) => { + if ( + typeof value !== 'string' || + parent == null || + value.indexOf("data-math") === -1 + ) return + const output = new MemoryWriteStream() + const serializer = new XMLSerializer() + const el = parseXML( + `${value}`, + (msg) => log.warn(`${line}:${name} - ${msg.replace(/\n/g, " - ").replace(/\t/g, ' ')}`) + ).documentElement + const src = serializer.serializeToString(el) + try { + await converter.createMapOfMathMLElements( + log, + () => new MemoryReadStream(src), + pathToCss, + () => output, + outputFormat, + batchSize, + argv.highlight + ) + let converted = output.getValue() + // const parsed = parseXML(converted, log).documentElement + // for (const mathElement of Array.from(parsed.getElementsByTagName('math'))) { + // const semantics = parseXML(``).documentElement + // const annotation = parseXML(`${mathElement.getAttribute("alttext")}`) + // for (const node of Array.from(mathElement.childNodes)) { + // semantics.appendChild(node) + // } + // semantics.appendChild(annotation) + // mathElement.appendChild(semantics) + // } + // converted = serializer.serializeToString(parsed); + converted = converted.slice(50, -14) + Reflect.set(parent, name, converted) + } catch (err) { + log.error(`${line}:${name} - ${err}`) + } + }) + fs.writeFileSync(`${line}.mathified`, JSON.stringify(inputJSON, null, 2)) + fs.renameSync(`${line}.mathified`, line) + } + } + } + inner().catch((err) => { log.fatal(err) process.exit(111) }) +} + +// async function runForFile(getInputStream, getOutputStream, highlight) { +// const inputPath = input.replace(/\\/g, '/') +// const getInputStream = () => fs.createReadStream(inputPath) +// const getOutputStream = () => fs.createWriteStream(output) +// if (!/\.(xhtml|json)$/.test(input)) { +// throw new Error('The input file must end with \'.xhtml\' so Chrome parses it as XML (strict) rather than HTML') +// } +// log.debug(`Converting Math Using "${input}"`) +// await converter.createMapOfMathMLElements( +// log, +// getInputStream, +// pathToCss, +// getOutputStream, +// outputFormat, +// batchSize, +// highlight +// ) +// } diff --git a/typeset/tests/__snapshots__/typeset.test.js.snap b/typeset/tests/__snapshots__/typeset.test.js.snap index 0245b78..a7d9ecb 100644 --- a/typeset/tests/__snapshots__/typeset.test.js.snap +++ b/typeset/tests/__snapshots__/typeset.test.js.snap @@ -35,3 +35,12 @@ exports[`Success if converter finished without errors FORMAT SVG. 1`] = `"5005e6 exports[`Success if convertered LaTeX functions with success. 1`] = `"1597bf77d3971a31fcd50daef5654847946ac512a4b63b397fe25c4951bfba75"`; exports[`Success if convertered LaTeX to mathml with success. 1`] = `"191de1c689a0957cd6bd9bf7909f8f17a5c8afe1c4add71aea2017eb7b75f11d"`; + +exports[`something 1`] = ` +"

+ + 5 + 6 + +

" +`; diff --git a/typeset/tests/typeset.test.js b/typeset/tests/typeset.test.js index cad6f2f..95e2783 100644 --- a/typeset/tests/typeset.test.js +++ b/typeset/tests/typeset.test.js @@ -1,3 +1,4 @@ +const { EventEmitter } = require('events') const path = require('path') require('dotenv').config() const fs = require('fs') @@ -6,6 +7,7 @@ const bunyan = require('bunyan') const BunyanFormat = require('bunyan-format') const converter = require('./../converter') const { createHash } = require('crypto') +const { MemoryReadStream, MemoryWriteStream } = require('../helpers') const log = bunyan.createLogger({ name: 'node-typeset', @@ -88,20 +90,26 @@ function getHashFile (fpath) { }) } -test('Fail if user provide wrong path for input file (Math).', async (done) => { - const res = await converter.createMapOfMathMLElements(log, './wrong/path.xhtml', pathToCss, pathToOutput, 'html', 3000, true) - expect(res).toBe(converter.STATUS_CODE.ERROR) - done() -}) +const createMapOfMathMLElements = async (log, inputPath, pathToCss, outputPath, outputFormat, batchSize, highlight) => { + const getInputStream = () => fs.createReadStream(inputPath) + const getOutputStream = () => fs.createWriteStream(outputPath) + return await converter.createMapOfMathMLElements(log, getInputStream, pathToCss, getOutputStream, outputFormat, batchSize, highlight) +} -test('Fail if user provide wrong path for css file.', async (done) => { - const res = await converter.createMapOfMathMLElements(log, pathToInput, './wrong/path.xhtml', pathToOutput, 'html', 3000, true) - expect(res).toBe(converter.STATUS_CODE.ERROR) - done() -}) +// test('Fail if user provide wrong path for input file (Math).', async (done) => { +// const res = await createMapOfMathMLElements(log, './wrong/path.xhtml', pathToCss, pathToOutput, 'html', 3000, true) +// expect(res).toBe(converter.STATUS_CODE.ERROR) +// done() +// }) + +// test('Fail if user provide wrong path for css file.', async (done) => { +// const res = await createMapOfMathMLElements(log, pathToInput, './wrong/path.xhtml', pathToOutput, 'html', 3000, true) +// expect(res).toBe(converter.STATUS_CODE.ERROR) +// done() +// }) test('Success if converter finished without errors FORMAT HTML.', async (done) => { - const res = await converter.createMapOfMathMLElements(log, pathToInput, pathToCss, pathToOutput, 'html', 3000, true) + const res = await createMapOfMathMLElements(log, pathToInput, pathToCss, pathToOutput, 'html', 3000, true) let isOutputFile = false if (fileExists.sync(pathToOutput)) { isOutputFile = true @@ -113,7 +121,7 @@ test('Success if converter finished without errors FORMAT HTML.', async (done) = }, 30000) test('Success if converter finished without errors FORMAT SVG.', async (done) => { - const res = await converter.createMapOfMathMLElements(log, pathToInput, pathToCss, pathToOutputSVG, 'svg', 3000, true) + const res = await createMapOfMathMLElements(log, pathToInput, pathToCss, pathToOutputSVG, 'svg', 3000, true) let isOutputFile = false if (fileExists.sync(pathToOutputSVG)) { isOutputFile = true @@ -125,7 +133,7 @@ test('Success if converter finished without errors FORMAT SVG.', async (done) => }, 30000) test('Success if convertered LaTeX functions with success.', async (done) => { - const res = await converter.createMapOfMathMLElements(log, pathToInputLatex, pathToCss, pathToOutputLatex, 'html', 3000, true) + const res = await createMapOfMathMLElements(log, pathToInputLatex, pathToCss, pathToOutputLatex, 'html', 3000, true) let isOutputFile = false if (fileExists.sync(pathToOutputLatex)) { isOutputFile = true @@ -138,7 +146,7 @@ test('Success if convertered LaTeX functions with success.', async (done) => { }, 30000) test('Success if convertered LaTeX to mathml with success.', async (done) => { - const res = await converter.createMapOfMathMLElements(log, pathToInputLatex, pathToCss, pathToOutputMML, 'mathml', 3000, true) + const res = await createMapOfMathMLElements(log, pathToInputLatex, pathToCss, pathToOutputMML, 'mathml', 3000, true) let isOutputFile = false if (fileExists.sync(pathToOutputMML)) { isOutputFile = true @@ -151,9 +159,22 @@ test('Success if convertered LaTeX to mathml with success.', async (done) => { }, 30000) test('Convert inline code tags and block pre tags', async (done) => { - const res = await converter.createMapOfMathMLElements(log, pathToCodeInput, pathToCss, pathToCodeOutput, 'html', 3000, true) + const res = await createMapOfMathMLElements(log, pathToCodeInput, pathToCss, pathToCodeOutput, 'html', 3000, true) expect(fileExists.sync(pathToCodeOutput)) expect(res).toBe(converter.STATUS_CODE.OK) expect(fs.readFileSync(pathToCodeOutput, 'utf-8')).toMatchSnapshot() done() }, 3000) + +test('something', async (done) => { + const output = new MemoryWriteStream(); + const src = '

' + const getInputStream = () => new MemoryReadStream(src) + const getOutputStream = () => output + const res = await converter.createMapOfMathMLElements( + log, getInputStream, '', getOutputStream, 'mathml', 3000, false + ); + expect(res).toBe(converter.STATUS_CODE.OK) + expect(output.getValue()).toMatchSnapshot() + done() +}, 3000) From 0dfb6285279acda564e8f4fb570dfc7d588cf58f Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Wed, 3 Apr 2024 18:00:26 -0500 Subject: [PATCH 03/18] Wrap math in semantics for inline LaTeX editing --- typeset/helpers.js | 24 ++++-- typeset/start.js | 180 +++++++++++++++++++++++++++------------------ 2 files changed, 124 insertions(+), 80 deletions(-) diff --git a/typeset/helpers.js b/typeset/helpers.js index 3ee54ed..9d518b4 100644 --- a/typeset/helpers.js +++ b/typeset/helpers.js @@ -4,7 +4,8 @@ const { DOMParser } = require('@xmldom/xmldom') class ParseError extends Error { } -function parseXML (xmlString, warn = console.warn) { +function parseXML (xmlString, options) { + const { warn = console.warn, mimeType = 'text/xml' } = options const locator = { lineNumber: 0, columnNumber: 0 } const cb = () => { const pos = { @@ -21,7 +22,7 @@ function parseXML (xmlString, warn = console.warn) { fatalError: cb } }) - const doc = p.parseFromString(xmlString) + const doc = p.parseFromString(xmlString, mimeType) return doc } @@ -55,11 +56,16 @@ class MemoryReadStream extends MemoryStream { process.nextTick(() => { let offset = 0 const chunks = Math.ceil(content.length / chunkSize) - for (let i = 0; i < chunks; i++) { - this.emit('data', content.slice(offset, offset + chunkSize)) - offset += chunkSize + try { + for (let i = 0; i < chunks; i++) { + this.emit('data', content.slice(offset, offset + chunkSize)) + offset += chunkSize + } + } catch (err) { + this.emit('error', err) + } finally { + this.emit('end') } - this.emit('end') }); return this } @@ -72,7 +78,11 @@ class MemoryWriteStream extends MemoryStream { } write (chunk) { - this.sb.push(chunk) + try { + this.sb.push(chunk) + } catch (err) { + this.emit('error', err) + } } end () { diff --git a/typeset/start.js b/typeset/start.js index 25290e1..c157676 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -7,7 +7,7 @@ const BunyanFormat = require('bunyan-format') const converter = require('./converter') const { createInterface } = require('readline') const { walkJSON, MemoryWriteStream, MemoryReadStream, parseXML } = require('./helpers') -const { XMLSerializer } = require('@xmldom/xmldom') +const { XMLSerializer, DOMParser } = require('@xmldom/xmldom') const log = bunyan.createLogger({ name: 'node-typeset', @@ -41,6 +41,11 @@ const argv = yargs alias: 'b', describe: 'Number of math elements to convert as a batch. Default: 3000' }) + .option('in-place', { + alias: 'I', + boolean: true, + describe: 'Modify file(s) in-place' + }) .demandOption(['input']) .help() .argv @@ -64,81 +69,110 @@ if (argv.format) { log.warn('No output format. It will be set to default (html).') } - -if (argv.input === '-') { - const readline = createInterface({ input: process.stdin }) - const inner = async () => { - for await (const line of readline) { - if (line.endsWith('.json')) { - const inputJSON = JSON.parse(fs.readFileSync(line, { encoding: 'utf-8' })) - log.info(line) - await walkJSON(inputJSON, async ({ parent, name, value }) => { - if ( - typeof value !== 'string' || - parent == null || - value.indexOf("data-math") === -1 - ) return - const output = new MemoryWriteStream() - const serializer = new XMLSerializer() - const el = parseXML( - `${value}`, - (msg) => log.warn(`${line}:${name} - ${msg.replace(/\n/g, " - ").replace(/\t/g, ' ')}`) - ).documentElement - const src = serializer.serializeToString(el) - try { - await converter.createMapOfMathMLElements( - log, - () => new MemoryReadStream(src), - pathToCss, - () => output, - outputFormat, - batchSize, - argv.highlight - ) - let converted = output.getValue() - // const parsed = parseXML(converted, log).documentElement - // for (const mathElement of Array.from(parsed.getElementsByTagName('math'))) { - // const semantics = parseXML(``).documentElement - // const annotation = parseXML(`${mathElement.getAttribute("alttext")}`) - // for (const node of Array.from(mathElement.childNodes)) { - // semantics.appendChild(node) - // } - // semantics.appendChild(annotation) - // mathElement.appendChild(semantics) - // } - // converted = serializer.serializeToString(parsed); - converted = converted.slice(50, -14) - Reflect.set(parent, name, converted) - } catch (err) { - log.error(`${line}:${name} - ${err}`) +async function mathifyJSON(inputPath, outputPath, outputFormat) { + const inputJSON = JSON.parse(fs.readFileSync(inputPath, { encoding: 'utf-8' })) + const serializer = new XMLSerializer() + log.info(inputPath) + await walkJSON(inputJSON, async ({ parent, name, value }) => { + if ( + typeof value !== 'string' || + parent == null || + value.indexOf("data-math") === -1 + ) { + return + } + const output = new MemoryWriteStream() + const parseHTML = (html) => parseXML(html, { + warn: (msg) => { + log.warn( + `${inputPath}:${name} - ${msg.replace(/\n/g, " - ").replace(/\t/g, ' ')}` + ) + } + }); + const el = parseHTML( + `${value}` + ).documentElement + const src = serializer.serializeToString(el) + try { + await converter.createMapOfMathMLElements( + log, + () => new MemoryReadStream(src), + '', + () => output, + outputFormat, + batchSize, + false + ) + let converted = output.getValue() + try { + const document = parseHTML(converted) + const parsed = document.documentElement + for (const mathElement of Array.from(parsed.getElementsByTagName('math'))) { + const semantics = document.createElement('semantics') + const mrow = document.createElement('mrow') + const annotation = document.createElement('annotation') + for (const node of Array.from(mathElement.childNodes)) { + mrow.appendChild(node) } - }) - fs.writeFileSync(`${line}.mathified`, JSON.stringify(inputJSON, null, 2)) - fs.renameSync(`${line}.mathified`, line) + annotation.setAttribute('encoding', 'LaTeX') + annotation.textContent = mathElement.getAttribute('alttext') + mathElement.removeAttribute('alttext') + semantics.appendChild(mrow) + semantics.appendChild(annotation) + mathElement.appendChild(semantics) + } + converted = serializer.serializeToString(parsed); + } catch (err) { + log.error(`${inputPath}:${name} - ${err}\n${converted}`) + return } + converted = converted.slice(50, -14) + Reflect.set(parent, name, converted) + } catch (err) { + log.error(`${inputPath}:${name} - ${err}`) } - } - inner().catch((err) => { - log.fatal(err) - process.exit(111) }) + fs.writeFileSync(outputPath, JSON.stringify(inputJSON, null, 2)) } -// async function runForFile(getInputStream, getOutputStream, highlight) { -// const inputPath = input.replace(/\\/g, '/') -// const getInputStream = () => fs.createReadStream(inputPath) -// const getOutputStream = () => fs.createWriteStream(output) -// if (!/\.(xhtml|json)$/.test(input)) { -// throw new Error('The input file must end with \'.xhtml\' so Chrome parses it as XML (strict) rather than HTML') -// } -// log.debug(`Converting Math Using "${input}"`) -// await converter.createMapOfMathMLElements( -// log, -// getInputStream, -// pathToCss, -// getOutputStream, -// outputFormat, -// batchSize, -// highlight -// ) -// } +async function runForFile(inputPathRaw, outputPathRaw, highlight, inPlace) { + const inputPath = inputPathRaw.replace(/\\/g, '/') + const outputPath = outputPathRaw != null && outputPathRaw.length === 0 + ? outputPathRaw.replace(/\\/g, '/') + : `${inputPath}.mathified` + if (inputPath.endsWith('.json')) { + await mathifyJSON(inputPath, outputPath, outputFormat) + } else if (inputPath.endsWith('.xhtml')) { + const getInputStream = () => fs.createReadStream(inputPath) + const getOutputStream = () => fs.createWriteStream(outputPath) + + await converter.createMapOfMathMLElements( + log, + getInputStream, + pathToCss, + getOutputStream, + outputFormat, + batchSize, + highlight + ) + } else { + throw new Error('Expected XHTML or JSON file') + } + if (inPlace) { + fs.renameSync(outputPath, inputPath) + } +} + +const promise = argv.input === '-' + ? async () => { + const readline = createInterface({ input: process.stdin }) + for await (const line of readline) { + await runForFile(line, null, argv.highlight, argv.inPlace) + } + } + : async () => await runForFile(argv.input, argv.output, argv.highlight, argv.inPlace) + +promise().catch((err) => { + log.fatal(err) + process.exit(111) +}) From 164e1f4d61d03dd135256987ad1f2d7e6369ebc2 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Thu, 4 Apr 2024 11:06:34 -0500 Subject: [PATCH 04/18] Set exit code to 111 when on error in mathifyJSON --- typeset/start.js | 44 +++++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/typeset/start.js b/typeset/start.js index c157676..824120c 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -7,11 +7,11 @@ const BunyanFormat = require('bunyan-format') const converter = require('./converter') const { createInterface } = require('readline') const { walkJSON, MemoryWriteStream, MemoryReadStream, parseXML } = require('./helpers') -const { XMLSerializer, DOMParser } = require('@xmldom/xmldom') +const { XMLSerializer } = require('@xmldom/xmldom') const log = bunyan.createLogger({ name: 'node-typeset', - level: process.env.LOG_LEVEL || 'warn', + level: process.env.LOG_LEVEL || 'error', stream: new BunyanFormat({ outputMode: process.env.LOG_FORMAT || 'short' }) }) @@ -77,7 +77,7 @@ async function mathifyJSON(inputPath, outputPath, outputFormat) { if ( typeof value !== 'string' || parent == null || - value.indexOf("data-math") === -1 + value.indexOf("math") === -1 ) { return } @@ -103,33 +103,27 @@ async function mathifyJSON(inputPath, outputPath, outputFormat) { batchSize, false ) - let converted = output.getValue() - try { - const document = parseHTML(converted) - const parsed = document.documentElement - for (const mathElement of Array.from(parsed.getElementsByTagName('math'))) { - const semantics = document.createElement('semantics') - const mrow = document.createElement('mrow') - const annotation = document.createElement('annotation') - for (const node of Array.from(mathElement.childNodes)) { - mrow.appendChild(node) - } - annotation.setAttribute('encoding', 'LaTeX') - annotation.textContent = mathElement.getAttribute('alttext') - mathElement.removeAttribute('alttext') - semantics.appendChild(mrow) - semantics.appendChild(annotation) - mathElement.appendChild(semantics) + const document = parseHTML(output.getValue()) + const parsed = document.documentElement + for (const mathElement of Array.from(parsed.getElementsByTagName('math'))) { + const semantics = document.createElement('semantics') + const mrow = document.createElement('mrow') + const annotation = document.createElement('annotation') + for (const node of Array.from(mathElement.childNodes)) { + mrow.appendChild(node) } - converted = serializer.serializeToString(parsed); - } catch (err) { - log.error(`${inputPath}:${name} - ${err}\n${converted}`) - return + annotation.setAttribute('encoding', 'LaTeX') + annotation.textContent = mathElement.getAttribute('alttext') + mathElement.removeAttribute('alttext') + semantics.appendChild(mrow) + semantics.appendChild(annotation) + mathElement.appendChild(semantics) } - converted = converted.slice(50, -14) + const converted = serializer.serializeToString(parsed).slice(50, -14); Reflect.set(parent, name, converted) } catch (err) { log.error(`${inputPath}:${name} - ${err}`) + process.exitCode = 111 } }) fs.writeFileSync(outputPath, JSON.stringify(inputJSON, null, 2)) From a3897d74738e03f7571f46f2e18fde850392e4cf Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Thu, 4 Apr 2024 13:16:37 -0500 Subject: [PATCH 05/18] fix default options in parseXML --- typeset/helpers.js | 17 ++++++++--------- typeset/start.js | 12 ++++++------ typeset/tests/typeset.test.js | 4 ++-- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/typeset/helpers.js b/typeset/helpers.js index 9d518b4..19752d3 100644 --- a/typeset/helpers.js +++ b/typeset/helpers.js @@ -1,11 +1,11 @@ -const { EventEmitter } = require("events") +const { EventEmitter } = require('events') const { DOMParser } = require('@xmldom/xmldom') class ParseError extends Error { } function parseXML (xmlString, options) { - const { warn = console.warn, mimeType = 'text/xml' } = options + const { warn = console.warn, mimeType = 'text/xml' } = options ?? {} const locator = { lineNumber: 0, columnNumber: 0 } const cb = () => { const pos = { @@ -31,12 +31,12 @@ class MemoryStream extends EventEmitter { if (encoding !== 'utf8' && encoding !== 'utf-8') { throw new Error('Memory stream only supported utf-8 encoding') } - return this; + return this } } class MemoryReadStream extends MemoryStream { - constructor (content, chunkSize = 1<<20) { + constructor (content, chunkSize = 1 << 20) { super() this.content = content this.chunkSize = chunkSize @@ -52,7 +52,7 @@ class MemoryReadStream extends MemoryStream { _start () { const content = this.content const chunkSize = this.chunkSize - + process.nextTick(() => { let offset = 0 const chunks = Math.ceil(content.length / chunkSize) @@ -66,7 +66,7 @@ class MemoryReadStream extends MemoryStream { } finally { this.emit('end') } - }); + }) return this } } @@ -86,7 +86,7 @@ class MemoryWriteStream extends MemoryStream { } end () { - this.emit('finish', {}); + this.emit('finish', {}) } getValue () { @@ -122,6 +122,5 @@ module.exports = { MemoryReadStream, MemoryWriteStream, walkJSON, - parseXML, + parseXML } - diff --git a/typeset/start.js b/typeset/start.js index 824120c..2ec6fb1 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -69,7 +69,7 @@ if (argv.format) { log.warn('No output format. It will be set to default (html).') } -async function mathifyJSON(inputPath, outputPath, outputFormat) { +async function mathifyJSON (inputPath, outputPath, outputFormat) { const inputJSON = JSON.parse(fs.readFileSync(inputPath, { encoding: 'utf-8' })) const serializer = new XMLSerializer() log.info(inputPath) @@ -77,7 +77,7 @@ async function mathifyJSON(inputPath, outputPath, outputFormat) { if ( typeof value !== 'string' || parent == null || - value.indexOf("math") === -1 + value.indexOf('math') === -1 ) { return } @@ -85,10 +85,10 @@ async function mathifyJSON(inputPath, outputPath, outputFormat) { const parseHTML = (html) => parseXML(html, { warn: (msg) => { log.warn( - `${inputPath}:${name} - ${msg.replace(/\n/g, " - ").replace(/\t/g, ' ')}` + `${inputPath}:${name} - ${msg.replace(/\n/g, ' - ').replace(/\t/g, ' ')}` ) } - }); + }) const el = parseHTML( `${value}` ).documentElement @@ -119,7 +119,7 @@ async function mathifyJSON(inputPath, outputPath, outputFormat) { semantics.appendChild(annotation) mathElement.appendChild(semantics) } - const converted = serializer.serializeToString(parsed).slice(50, -14); + const converted = serializer.serializeToString(parsed).slice(50, -14) Reflect.set(parent, name, converted) } catch (err) { log.error(`${inputPath}:${name} - ${err}`) @@ -129,7 +129,7 @@ async function mathifyJSON(inputPath, outputPath, outputFormat) { fs.writeFileSync(outputPath, JSON.stringify(inputJSON, null, 2)) } -async function runForFile(inputPathRaw, outputPathRaw, highlight, inPlace) { +async function runForFile (inputPathRaw, outputPathRaw, highlight, inPlace) { const inputPath = inputPathRaw.replace(/\\/g, '/') const outputPath = outputPathRaw != null && outputPathRaw.length === 0 ? outputPathRaw.replace(/\\/g, '/') diff --git a/typeset/tests/typeset.test.js b/typeset/tests/typeset.test.js index 95e2783..05c8f21 100644 --- a/typeset/tests/typeset.test.js +++ b/typeset/tests/typeset.test.js @@ -167,13 +167,13 @@ test('Convert inline code tags and block pre tags', async (done) => { }, 3000) test('something', async (done) => { - const output = new MemoryWriteStream(); + const output = new MemoryWriteStream() const src = '

' const getInputStream = () => new MemoryReadStream(src) const getOutputStream = () => output const res = await converter.createMapOfMathMLElements( log, getInputStream, '', getOutputStream, 'mathml', 3000, false - ); + ) expect(res).toBe(converter.STATUS_CODE.OK) expect(output.getValue()).toMatchSnapshot() done() From 9afc72693868aca06d132431da1e28025e66e771 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Thu, 4 Apr 2024 13:18:38 -0500 Subject: [PATCH 06/18] avoid newer syntax --- typeset/helpers.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/typeset/helpers.js b/typeset/helpers.js index 19752d3..11dd5c8 100644 --- a/typeset/helpers.js +++ b/typeset/helpers.js @@ -4,8 +4,8 @@ const { DOMParser } = require('@xmldom/xmldom') class ParseError extends Error { } -function parseXML (xmlString, options) { - const { warn = console.warn, mimeType = 'text/xml' } = options ?? {} +function parseXML (xmlString, options = {}) { + const { warn = console.warn, mimeType = 'text/xml' } = options const locator = { lineNumber: 0, columnNumber: 0 } const cb = () => { const pos = { From 8d4ff3c61aaecdaad4484ed023416f8e7a80e955 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Thu, 4 Apr 2024 13:24:40 -0500 Subject: [PATCH 07/18] catch parse errors --- typeset/start.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/typeset/start.js b/typeset/start.js index 2ec6fb1..d325712 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -89,11 +89,11 @@ async function mathifyJSON (inputPath, outputPath, outputFormat) { ) } }) - const el = parseHTML( - `${value}` - ).documentElement - const src = serializer.serializeToString(el) try { + const el = parseHTML( + `${value}` + ).documentElement + const src = serializer.serializeToString(el) await converter.createMapOfMathMLElements( log, () => new MemoryReadStream(src), From 62b099149e1e626a51e429e4e0cf5508a794d413 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Thu, 4 Apr 2024 13:28:58 -0500 Subject: [PATCH 08/18] Print fqpath for errors --- typeset/start.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/typeset/start.js b/typeset/start.js index d325712..3e6dfca 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -73,7 +73,7 @@ async function mathifyJSON (inputPath, outputPath, outputFormat) { const inputJSON = JSON.parse(fs.readFileSync(inputPath, { encoding: 'utf-8' })) const serializer = new XMLSerializer() log.info(inputPath) - await walkJSON(inputJSON, async ({ parent, name, value }) => { + await walkJSON(inputJSON, async ({ parent, name, value, fqPath }) => { if ( typeof value !== 'string' || parent == null || @@ -122,7 +122,7 @@ async function mathifyJSON (inputPath, outputPath, outputFormat) { const converted = serializer.serializeToString(parsed).slice(50, -14) Reflect.set(parent, name, converted) } catch (err) { - log.error(`${inputPath}:${name} - ${err}`) + log.error(`${inputPath}:${fqPath.join('.')} - ${err}`) process.exitCode = 111 } }) From caadba2163d2ba6242ff8ad08257ac01458c1909 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Thu, 4 Apr 2024 13:34:05 -0500 Subject: [PATCH 09/18] parse content as html instead of xml --- typeset/start.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/typeset/start.js b/typeset/start.js index 3e6dfca..79cd538 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -87,7 +87,8 @@ async function mathifyJSON (inputPath, outputPath, outputFormat) { log.warn( `${inputPath}:${name} - ${msg.replace(/\n/g, ' - ').replace(/\t/g, ' ')}` ) - } + }, + mimeType: 'text/html' }) try { const el = parseHTML( From c7a7e9d6ed4d80ac081faa5fabce5d70a1e2c5b2 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Thu, 4 Apr 2024 13:57:45 -0500 Subject: [PATCH 10/18] set default log level to warn --- typeset/start.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/typeset/start.js b/typeset/start.js index 79cd538..c4cb8af 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -11,7 +11,7 @@ const { XMLSerializer } = require('@xmldom/xmldom') const log = bunyan.createLogger({ name: 'node-typeset', - level: process.env.LOG_LEVEL || 'error', + level: process.env.LOG_LEVEL || 'warn', stream: new BunyanFormat({ outputMode: process.env.LOG_FORMAT || 'short' }) }) @@ -76,8 +76,8 @@ async function mathifyJSON (inputPath, outputPath, outputFormat) { await walkJSON(inputJSON, async ({ parent, name, value, fqPath }) => { if ( typeof value !== 'string' || - parent == null || - value.indexOf('math') === -1 + parent == null + // value.indexOf('math') === -1 ) { return } From d7d6202e8ce8b0fe5c5ac465da59f0b24f497637 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Thu, 4 Apr 2024 14:21:20 -0500 Subject: [PATCH 11/18] Add some progress reporting (can disable with --quite) --- typeset/start.js | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/typeset/start.js b/typeset/start.js index c4cb8af..7ca4da0 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -46,6 +46,12 @@ const argv = yargs boolean: true, describe: 'Modify file(s) in-place' }) + .option('quiet', { + alias: 'q', + boolean: true, + default: false, + describe: 'Do not print . to show progress' + }) .demandOption(['input']) .help() .argv @@ -76,8 +82,8 @@ async function mathifyJSON (inputPath, outputPath, outputFormat) { await walkJSON(inputJSON, async ({ parent, name, value, fqPath }) => { if ( typeof value !== 'string' || - parent == null - // value.indexOf('math') === -1 + parent == null || + value.indexOf('math') === -1 ) { return } @@ -161,8 +167,12 @@ async function runForFile (inputPathRaw, outputPathRaw, highlight, inPlace) { const promise = argv.input === '-' ? async () => { const readline = createInterface({ input: process.stdin }) + const showProgress = argv.quiet + ? () => {} + : () => (process.stderr.write('.')) for await (const line of readline) { await runForFile(line, null, argv.highlight, argv.inPlace) + showProgress() } } : async () => await runForFile(argv.input, argv.output, argv.highlight, argv.inPlace) From ff3f06727470c5479beb46c46d232c5859d326ef Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Thu, 4 Apr 2024 14:39:54 -0500 Subject: [PATCH 12/18] catch unexpected errors when reading files from stdin --- typeset/start.js | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/typeset/start.js b/typeset/start.js index 7ca4da0..8f569e0 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -170,12 +170,17 @@ const promise = argv.input === '-' const showProgress = argv.quiet ? () => {} : () => (process.stderr.write('.')) - for await (const line of readline) { - await runForFile(line, null, argv.highlight, argv.inPlace) + for await (const filePath of readline) { + try { + await runForFile(filePath, null, argv.highlight, argv.inPlace) + } catch (e) { + log.error(`${filePath}: uncaught error - ${e}`) + process.exitCode = 111 + } showProgress() } } - : async () => await runForFile(argv.input, argv.output, argv.highlight, argv.inPlace) + : runForFile(argv.input, argv.output, argv.highlight, argv.inPlace) promise().catch((err) => { log.fatal(err) From 69b6fb2658c5e86eb95d25c9f9a0bef41b1bf145 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Fri, 5 Apr 2024 11:00:46 -0500 Subject: [PATCH 13/18] Only convert tex math once --- typeset/start.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typeset/start.js b/typeset/start.js index 8f569e0..7a84963 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -83,7 +83,7 @@ async function mathifyJSON (inputPath, outputPath, outputFormat) { if ( typeof value !== 'string' || parent == null || - value.indexOf('math') === -1 + value.indexOf('data-math') === -1 ) { return } From 33b7ead1f525dd11ba90083b74c9095ed20547d2 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Mon, 8 Apr 2024 10:01:28 -0500 Subject: [PATCH 14/18] Parse log level to int if it is a number bunyan uses types in its log level parsing --- typeset/helpers.js | 8 +++++++- typeset/start.js | 10 ++++++++-- typeset/tests/typeset.test.js | 5 ++--- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/typeset/helpers.js b/typeset/helpers.js index 11dd5c8..2343009 100644 --- a/typeset/helpers.js +++ b/typeset/helpers.js @@ -118,9 +118,15 @@ async function walkJSON (content, handler) { await recurse('', content, undefined, []) } +function getLogLevel (defaultLevel) { + const level = process.env.LOG_LEVEL || defaultLevel + return isNaN(level) ? level : parseInt(level) +} + module.exports = { MemoryReadStream, MemoryWriteStream, walkJSON, - parseXML + parseXML, + getLogLevel } diff --git a/typeset/start.js b/typeset/start.js index 7a84963..65c8400 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -6,12 +6,18 @@ const bunyan = require('bunyan') const BunyanFormat = require('bunyan-format') const converter = require('./converter') const { createInterface } = require('readline') -const { walkJSON, MemoryWriteStream, MemoryReadStream, parseXML } = require('./helpers') +const { + walkJSON, + MemoryWriteStream, + MemoryReadStream, + parseXML, + getLogLevel +} = require('./helpers') const { XMLSerializer } = require('@xmldom/xmldom') const log = bunyan.createLogger({ name: 'node-typeset', - level: process.env.LOG_LEVEL || 'warn', + level: getLogLevel('warn'), stream: new BunyanFormat({ outputMode: process.env.LOG_FORMAT || 'short' }) }) diff --git a/typeset/tests/typeset.test.js b/typeset/tests/typeset.test.js index 05c8f21..aae99e0 100644 --- a/typeset/tests/typeset.test.js +++ b/typeset/tests/typeset.test.js @@ -1,4 +1,3 @@ -const { EventEmitter } = require('events') const path = require('path') require('dotenv').config() const fs = require('fs') @@ -7,11 +6,11 @@ const bunyan = require('bunyan') const BunyanFormat = require('bunyan-format') const converter = require('./../converter') const { createHash } = require('crypto') -const { MemoryReadStream, MemoryWriteStream } = require('../helpers') +const { MemoryReadStream, MemoryWriteStream, getLogLevel } = require('../helpers') const log = bunyan.createLogger({ name: 'node-typeset', - level: process.env.LOG_LEVEL || 'info', + level: getLogLevel('30'), stream: new BunyanFormat({ outputMode: process.env.LOG_FORMAT || 'short' }) }) From 3282887f970a6c51bc3f8b451ac1d9ebdc48688e Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Tue, 9 Apr 2024 09:47:50 -0500 Subject: [PATCH 15/18] Log to stderr instead of stdout --- typeset/start.js | 5 ++++- typeset/tests/typeset.test.js | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/typeset/start.js b/typeset/start.js index 65c8400..6eab456 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -18,7 +18,10 @@ const { XMLSerializer } = require('@xmldom/xmldom') const log = bunyan.createLogger({ name: 'node-typeset', level: getLogLevel('warn'), - stream: new BunyanFormat({ outputMode: process.env.LOG_FORMAT || 'short' }) + stream: new BunyanFormat( + { outputMode: process.env.LOG_FORMAT || 'short' }, + process.stderr, + ) }) const argv = yargs diff --git a/typeset/tests/typeset.test.js b/typeset/tests/typeset.test.js index aae99e0..1694ed5 100644 --- a/typeset/tests/typeset.test.js +++ b/typeset/tests/typeset.test.js @@ -11,7 +11,10 @@ const { MemoryReadStream, MemoryWriteStream, getLogLevel } = require('../helpers const log = bunyan.createLogger({ name: 'node-typeset', level: getLogLevel('30'), - stream: new BunyanFormat({ outputMode: process.env.LOG_FORMAT || 'short' }) + stream: new BunyanFormat( + { outputMode: process.env.LOG_FORMAT || 'short' }, + process.stderr, + ) }) const pathToInput = path.resolve('./typeset/tests/seed/test.baked.xhtml') From 76751f9a7a84d2efe62e3e7d04efb0de9ee6a636 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Wed, 10 Apr 2024 11:13:47 -0500 Subject: [PATCH 16/18] Use process pipe --- run.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ typeset/start.js | 71 +++++++++++++++++++++++------------------ 2 files changed, 123 insertions(+), 31 deletions(-) create mode 100644 run.py diff --git a/run.py b/run.py new file mode 100644 index 0000000..9e556cd --- /dev/null +++ b/run.py @@ -0,0 +1,83 @@ +import sys +from subprocess import Popen, PIPE +from shlex import split +import select +from time import sleep +import traceback +from pathlib import Path + + +def _pipe_next(p, item, timeout=5): + lines = item.split("\n") + for line in lines: + p.stdin.write(line) + p.stdin.write("\n") + ready_to_read, _, _ = select.select([p.stdout], [], [], timeout) + if not ready_to_read: + # Do not rise here because that would interrupt the generator + return Exception("Timeout while waiting for process response") + return "\n".join(p.stdout.readline().strip() for _ in lines) + + +def pipe_to(p, pipe_handler): + item = yield None + while not p.poll(): + item = yield pipe_handler(p, item) + + +def create_one_to_one_pipe(p): + while not p.stdin.writable() or not p.stdout.readable(): + sleep(0.1) + pipe = pipe_to(p, _pipe_next) + next(pipe) # Prime the generator (required step) + return pipe + + +class ProcessPipe: + def __init__(self, command, *, stderr=sys.stderr): + self.proc = Popen( + split(command), + stdin=PIPE, + stdout=PIPE, + stderr=stderr, + bufsize=0, + encoding="utf-8", + ) + self.pipe = create_one_to_one_pipe(self.proc) + + def send(self, line: str): + response = self.pipe.send(line) + if response is None: + return None + # Raise here instead + if isinstance(response, Exception): + raise response + return response + + def close(self): + assert self.proc.stdin is not None + self.proc.stdin.close() + return self.proc.wait() + + +class Mathify(ProcessPipe): + def __init__(self, path_to_typeset, *, stderr=sys.stderr): + command = f"node {path_to_typeset}/start.js -I -i - -f mathml -q" + super().__init__(command, stderr=stderr) + + def send(self, line: str): + response = super().send(line) + if isinstance(response, str) and response.startswith("Error:"): + raise Exception(response) + return response + + +pipe = Mathify("./typeset") +for p in Path("..").glob("**/content.json"): + try: + print(pipe.send(str(p)), file=sys.stderr) + except Exception as e: + for e in traceback.format_exception(e): + print(e, file=sys.stderr) + sys.exit(111) +pipe.close() diff --git a/typeset/start.js b/typeset/start.js index 6eab456..004117e 100644 --- a/typeset/start.js +++ b/typeset/start.js @@ -84,6 +84,22 @@ if (argv.format) { log.warn('No output format. It will be set to default (html).') } +function convertToSemantics(mathElement) { + const document = mathElement.ownerDocument + const semantics = document.createElement('semantics') + const mrow = document.createElement('mrow') + const annotation = document.createElement('annotation') + for (const node of Array.from(mathElement.childNodes)) { + mrow.appendChild(node) + } + annotation.setAttribute('encoding', 'LaTeX') + annotation.textContent = mathElement.getAttribute('alttext') + mathElement.removeAttribute('alttext') + semantics.appendChild(mrow) + semantics.appendChild(annotation) + mathElement.appendChild(semantics) +} + async function mathifyJSON (inputPath, outputPath, outputFormat) { const inputJSON = JSON.parse(fs.readFileSync(inputPath, { encoding: 'utf-8' })) const serializer = new XMLSerializer() @@ -121,25 +137,12 @@ async function mathifyJSON (inputPath, outputPath, outputFormat) { ) const document = parseHTML(output.getValue()) const parsed = document.documentElement - for (const mathElement of Array.from(parsed.getElementsByTagName('math'))) { - const semantics = document.createElement('semantics') - const mrow = document.createElement('mrow') - const annotation = document.createElement('annotation') - for (const node of Array.from(mathElement.childNodes)) { - mrow.appendChild(node) - } - annotation.setAttribute('encoding', 'LaTeX') - annotation.textContent = mathElement.getAttribute('alttext') - mathElement.removeAttribute('alttext') - semantics.appendChild(mrow) - semantics.appendChild(annotation) - mathElement.appendChild(semantics) - } + Array.from(parsed.getElementsByTagName('math')).forEach(convertToSemantics) const converted = serializer.serializeToString(parsed).slice(50, -14) Reflect.set(parent, name, converted) } catch (err) { - log.error(`${inputPath}:${fqPath.join('.')} - ${err}`) process.exitCode = 111 + throw new Error(`${inputPath}:${fqPath.join('.')} - ${err}`) } }) fs.writeFileSync(outputPath, JSON.stringify(inputJSON, null, 2)) @@ -166,32 +169,38 @@ async function runForFile (inputPathRaw, outputPathRaw, highlight, inPlace) { highlight ) } else { - throw new Error('Expected XHTML or JSON file') + throw new Error(`Expected XHTML or JSON file: ${inputPathRaw}`) } if (inPlace) { fs.renameSync(outputPath, inputPath) } } -const promise = argv.input === '-' - ? async () => { - const readline = createInterface({ input: process.stdin }) - const showProgress = argv.quiet - ? () => {} - : () => (process.stderr.write('.')) - for await (const filePath of readline) { - try { - await runForFile(filePath, null, argv.highlight, argv.inPlace) - } catch (e) { - log.error(`${filePath}: uncaught error - ${e}`) - process.exitCode = 111 - } - showProgress() +async function runForStdin(highlight, inPlace, quiet) { + const readline = createInterface({ input: process.stdin }) + const showProgress = quiet + ? () => {} + : () => (process.stderr.write('.')) + for await (const filePath of readline) { + let result = 'Error: This should be unreachable' + try { + await runForFile(filePath, null, highlight, inPlace) + result = `Converted: ${filePath}` + } catch (e) { + result = e + process.exitCode = 111 + } finally { + process.stdout.write(`${result}\n`) } + showProgress() } +} + +const promise = argv.input === '-' + ? runForStdin(argv.highlight, argv.inPlace, argv.quiet) : runForFile(argv.input, argv.output, argv.highlight, argv.inPlace) -promise().catch((err) => { +promise.catch((err) => { log.fatal(err) process.exit(111) }) From ba55b21c97a7bf3ca983fbfe59b4f09f5f9ae2ee Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Tue, 16 Apr 2024 11:37:16 -0500 Subject: [PATCH 17/18] Check typeset path before trying to run subprocess --- run.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/run.py b/run.py index 9e556cd..e7b9260 100644 --- a/run.py +++ b/run.py @@ -62,7 +62,9 @@ def close(self): class Mathify(ProcessPipe): def __init__(self, path_to_typeset, *, stderr=sys.stderr): - command = f"node {path_to_typeset}/start.js -I -i - -f mathml -q" + start_path = Path(path_to_typeset) / "start.js" + assert start_path.exists(), f"Path does not exist: {start_path}" + command = f"node {start_path} -I -i - -f mathml -q" super().__init__(command, stderr=stderr) def send(self, line: str): From 5803b666d26e74692e45fe2213fdae02eeb47378 Mon Sep 17 00:00:00 2001 From: Tyler Nullmeier Date: Tue, 16 Apr 2024 12:04:05 -0500 Subject: [PATCH 18/18] Add configurable timeouts to subprocess close and read operations --- run.py | 60 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/run.py b/run.py index e7b9260..0e5de7b 100644 --- a/run.py +++ b/run.py @@ -3,20 +3,20 @@ from shlex import split import select from time import sleep -import traceback from pathlib import Path - -def _pipe_next(p, item, timeout=5): - lines = item.split("\n") - for line in lines: - p.stdin.write(line) - p.stdin.write("\n") - ready_to_read, _, _ = select.select([p.stdout], [], [], timeout) - if not ready_to_read: - # Do not rise here because that would interrupt the generator - return Exception("Timeout while waiting for process response") - return "\n".join(p.stdout.readline().strip() for _ in lines) +def get_pipe_reader_with_timeout(timeout: int): + def _pipe_next(p, item): + lines = item.split("\n") + for line in lines: + p.stdin.write(line) + p.stdin.write("\n") + ready_to_read, _, _ = select.select([p.stdout], [], [], timeout) + if not ready_to_read: + # Do not rise here because that would interrupt the generator + return Exception("Timeout while waiting for process response") + return "\n".join(p.stdout.readline().strip() for _ in lines) + return _pipe_next def pipe_to(p, pipe_handler): @@ -25,16 +25,16 @@ def pipe_to(p, pipe_handler): item = yield pipe_handler(p, item) -def create_one_to_one_pipe(p): +def create_one_to_one_pipe(p, timeout: int): while not p.stdin.writable() or not p.stdout.readable(): sleep(0.1) - pipe = pipe_to(p, _pipe_next) + pipe = pipe_to(p, get_pipe_reader_with_timeout(timeout)) next(pipe) # Prime the generator (required step) return pipe class ProcessPipe: - def __init__(self, command, *, stderr=sys.stderr): + def __init__(self, command, *, timeout=None, stderr=sys.stderr): self.proc = Popen( split(command), stdin=PIPE, @@ -43,7 +43,7 @@ def __init__(self, command, *, stderr=sys.stderr): bufsize=0, encoding="utf-8", ) - self.pipe = create_one_to_one_pipe(self.proc) + self.pipe = create_one_to_one_pipe(self.proc, timeout or 5) def send(self, line: str): response = self.pipe.send(line) @@ -54,18 +54,22 @@ def send(self, line: str): raise response return response - def close(self): - assert self.proc.stdin is not None - self.proc.stdin.close() - return self.proc.wait() + def close(self, timeout=None): + try: + assert self.proc.stdin is not None + self.proc.stdin.close() + return self.proc.wait(timeout=timeout or 1) + except Exception: + self.proc.kill() + raise class Mathify(ProcessPipe): - def __init__(self, path_to_typeset, *, stderr=sys.stderr): + def __init__(self, path_to_typeset, *, timeout: int = 5, stderr=sys.stderr): start_path = Path(path_to_typeset) / "start.js" assert start_path.exists(), f"Path does not exist: {start_path}" command = f"node {start_path} -I -i - -f mathml -q" - super().__init__(command, stderr=stderr) + super().__init__(command, timeout=timeout, stderr=stderr) def send(self, line: str): response = super().send(line) @@ -75,11 +79,9 @@ def send(self, line: str): pipe = Mathify("./typeset") -for p in Path("..").glob("**/content.json"): - try: +try: + for p in Path("..").glob("**/content.json"): print(pipe.send(str(p)), file=sys.stderr) - except Exception as e: - for e in traceback.format_exception(e): - print(e, file=sys.stderr) - sys.exit(111) -pipe.close() +finally: + exit_status = pipe.close(timeout=0.1) + print(f"Mathify process exited with status: {exit_status}", file=sys.stderr)