Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import sys
from subprocess import Popen, PIPE
from shlex import split
import select
from time import sleep
from pathlib import Path

def get_pipe_reader_with_timeout(timeout: int):
def _pipe_next(p, item):
lines = item.split("\n")
for line in lines:
p.stdin.write(line)
p.stdin.write("\n")
ready_to_read, _, _ = select.select([p.stdout], [], [], timeout)
if not ready_to_read:
# Do not rise here because that would interrupt the generator
return Exception("Timeout while waiting for process response")
return "\n".join(p.stdout.readline().strip() for _ in lines)
return _pipe_next


def pipe_to(p, pipe_handler):
item = yield None
while not p.poll():
item = yield pipe_handler(p, item)


def create_one_to_one_pipe(p, timeout: int):
while not p.stdin.writable() or not p.stdout.readable():
sleep(0.1)
pipe = pipe_to(p, get_pipe_reader_with_timeout(timeout))
next(pipe) # Prime the generator (required step)
return pipe


class ProcessPipe:
def __init__(self, command, *, timeout=None, stderr=sys.stderr):
self.proc = Popen(
split(command),
stdin=PIPE,
stdout=PIPE,
stderr=stderr,
bufsize=0,
encoding="utf-8",
)
self.pipe = create_one_to_one_pipe(self.proc, timeout or 5)

def send(self, line: str):
response = self.pipe.send(line)
if response is None:
return None
# Raise here instead
if isinstance(response, Exception):
raise response
return response

def close(self, timeout=None):
try:
assert self.proc.stdin is not None
self.proc.stdin.close()
return self.proc.wait(timeout=timeout or 1)
except Exception:
self.proc.kill()
raise


class Mathify(ProcessPipe):
def __init__(self, path_to_typeset, *, timeout: int = 5, stderr=sys.stderr):
start_path = Path(path_to_typeset) / "start.js"
assert start_path.exists(), f"Path does not exist: {start_path}"
command = f"node {start_path} -I -i - -f mathml -q"
super().__init__(command, timeout=timeout, stderr=stderr)

def send(self, line: str):
response = super().send(line)
if isinstance(response, str) and response.startswith("Error:"):
raise Exception(response)
return response


pipe = Mathify("./typeset")
try:
for p in Path("..").glob("**/content.json"):
print(pipe.send(str(p)), file=sys.stderr)
finally:
exit_status = pipe.close(timeout=0.1)
print(f"Mathify process exited with status: {exit_status}", file=sys.stderr)
65 changes: 13 additions & 52 deletions typeset/converter.js
Original file line number Diff line number Diff line change
@@ -1,69 +1,30 @@
const path = require('path')
const fileExists = require('file-exists')
const { DOMParser, XMLSerializer } = require('@xmldom/xmldom')
const { XMLSerializer } = require('@xmldom/xmldom')
const { scanXML, looseTagEq } = require('./scan-xml')
const { PARAS } = require('./paras')
const sax = require('sax')

const fs = require('fs')
const mjnodeConverter = require('./mjnode')
const hljs = require('highlight.js')
const hljsLineNumbers = require('./hljs-line-numbers')
const { parseXML } = require('./helpers')

// Status codes
const STATUS_CODE = {
OK: 0,
ERROR: 111
}

class ParseError extends Error { }

function parseXML (xmlString) {
const locator = { lineNumber: 0, columnNumber: 0 }
const cb = () => {
const pos = {
line: locator.lineNumber - 1,
character: locator.columnNumber - 1
}
throw new ParseError(`ParseError: ${JSON.stringify(pos)}`)
}
const p = new DOMParser({
locator,
errorHandler: {
warning: console.warn,
error: cb,
fatalError: cb
}
})
const doc = p.parseFromString(xmlString)
return doc
}

const createMapOfMathMLElements = async (log, inputPath, cssPath, outputPath, outputFormat, batchSize, highlight) => {
const createMapOfMathMLElements = async (log, getInputStream, cssPath, getOutputStream, outputFormat, batchSize, highlight) => {
const timeOfStart = new Date().getTime()

// Check that the XHTML and CSS files exist
if (!fileExists.sync(inputPath)) {
log.error(`Input XHTML file not found: "${inputPath}"`)
return STATUS_CODE.ERROR
}
if (cssPath && !fileExists.sync(cssPath)) {
log.error(`Input CSS file not found: "${cssPath}"`)
return STATUS_CODE.ERROR
}

const parser = sax.parser(true)
const output = path.resolve(outputPath)
const mathEntries = []
const codeEntries = []
// Keep an array of all the replacements in the order they appeared in within the file
const sortedReplacements = []
let head

log.info('Opening XHTML file (may take a few minutes)')
log.debug(`Opening "${inputPath}"`)
const inputContent = fs.createReadStream(inputPath).setEncoding('utf8')
log.debug(`Opened "${inputPath}"`)
const inputContent = getInputStream().setEncoding('utf8')

const matchers = [
{ attr: 'data-math' },
Expand All @@ -79,12 +40,12 @@ const createMapOfMathMLElements = async (log, inputPath, cssPath, outputPath, ou
if (highlight) {
log.debug('Adding matchers for code highlighting...')

const tags = ['pre', 'code'];
const attributes = ['data-lang', 'lang'];
const tags = ['pre', 'code']
const attributes = ['data-lang', 'lang']

for (let i = 0; i < tags.length; i++) {
for (let j = 0; j < attributes.length; j++) {
matchers.push({ tag: tags[i], attr: attributes[j] });
matchers.push({ tag: tags[i], attr: attributes[j] })
}
}
}
Expand All @@ -109,9 +70,12 @@ const createMapOfMathMLElements = async (log, inputPath, cssPath, outputPath, ou
}
head = match
sortedReplacements.push(match)
} else {
} else if (looseTagEq(match.node.name, 'pre') || looseTagEq(match.node.name, 'code')) {
codeEntries.push(match)
sortedReplacements.push(match)
} else {
const attr = JSON.stringify(match.node.attributes)
throw new Error(`Got unexpected node: ${match.node.name} ${attr}`)
}
}
)
Expand All @@ -121,7 +85,6 @@ const createMapOfMathMLElements = async (log, inputPath, cssPath, outputPath, ou
inputContent.on('data', chunk => parser.write(chunk))
inputContent.on('end', () => resolve())
})
log.debug(`Parsed "${inputPath}"`)

// Prepare code highlighting
await highlightCodeElements(codeEntries)
Expand All @@ -140,16 +103,14 @@ const createMapOfMathMLElements = async (log, inputPath, cssPath, outputPath, ou
}
log.info('Updating content...')
await new Promise((resolve, reject) => {
const reader = fs.createReadStream(inputPath).setEncoding('utf8')
const writer = fs.createWriteStream(outputPath)
const reader = getInputStream().setEncoding('utf8')
const writer = getOutputStream()
writer.on('error', err => reject(err))
reader.on('error', err => reject(err))
writer.on('finish', () => resolve())
PARAS(sortedReplacements, reader, writer)
})

log.info(`Content saved. Open "${output}" to see converted file.`)

const timeOfEndInSec = (new Date().getTime() - timeOfStart) / 1000
const timeOfEndInMin = timeOfEndInSec > 60 ? Math.round(timeOfEndInSec / 60) : 0
let timeOfEnd = ''
Expand Down
132 changes: 132 additions & 0 deletions typeset/helpers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
const { EventEmitter } = require('events')

const { DOMParser } = require('@xmldom/xmldom')

class ParseError extends Error { }

function parseXML (xmlString, options = {}) {
const { warn = console.warn, mimeType = 'text/xml' } = options
const locator = { lineNumber: 0, columnNumber: 0 }
const cb = () => {
const pos = {
line: locator.lineNumber - 1,
character: locator.columnNumber - 1
}
throw new ParseError(`ParseError: ${JSON.stringify(pos)}`)
}
const p = new DOMParser({
locator,
errorHandler: {
warning: warn,
error: cb,
fatalError: cb
}
})
const doc = p.parseFromString(xmlString, mimeType)
return doc
}

class MemoryStream extends EventEmitter {
setEncoding (encoding) {
if (encoding !== 'utf8' && encoding !== 'utf-8') {
throw new Error('Memory stream only supported utf-8 encoding')
}
return this
}
}

class MemoryReadStream extends MemoryStream {
constructor (content, chunkSize = 1 << 20) {
super()
this.content = content
this.chunkSize = chunkSize
}

on (evt, callback) {
super.on(evt, callback)
if (evt === 'data') {
this._start()
}
}

_start () {
const content = this.content
const chunkSize = this.chunkSize

process.nextTick(() => {
let offset = 0
const chunks = Math.ceil(content.length / chunkSize)
try {
for (let i = 0; i < chunks; i++) {
this.emit('data', content.slice(offset, offset + chunkSize))
offset += chunkSize
}
} catch (err) {
this.emit('error', err)
} finally {
this.emit('end')
}
})
return this
}
}

class MemoryWriteStream extends MemoryStream {
constructor () {
super()
this.sb = []
}

write (chunk) {
try {
this.sb.push(chunk)
} catch (err) {
this.emit('error', err)
}
}

end () {
this.emit('finish', {})
}

getValue () {
return this.sb.join('')
}
}

async function walkJSON (content, handler) {
const recurse = async (name, value, parent, prevPath) => {
const fqPath = [...prevPath, name]
const jsType = typeof value
switch (jsType) {
case 'string':
case 'number':
case 'boolean':
await handler({ name, fqPath, value, parent, type: jsType })
return
case 'object': {
const type = Array.isArray(value) ? 'array' : 'object'
await handler({ name, fqPath, value, parent, type })
if (value != null) {
for (const [k, v] of Object.entries(value)) {
await recurse(k, v, value, fqPath)
}
}
}
}
}
await recurse('', content, undefined, [])
}

function getLogLevel (defaultLevel) {
const level = process.env.LOG_LEVEL || defaultLevel
return isNaN(level) ? level : parseInt(level)
}

module.exports = {
MemoryReadStream,
MemoryWriteStream,
walkJSON,
parseXML,
getLogLevel
}
Loading