|
| 1 | +<!DOCTYPE html> |
| 2 | +<html lang="en"> |
| 3 | +<head> |
| 4 | + <title>MANO - XML Converter</title> |
| 5 | + <meta name="keywords" content="MANO, Manuscripts Online, Converter"> |
| 6 | + <meta name="description" content=""> |
| 7 | + <meta charset="UTF-8"/> |
| 8 | + <meta name="viewport" content="width=device-width, initial-scale=1.0 shrink-to-fit=no"/> |
| 9 | + <meta name="google" content="notranslate"/> |
| 10 | + <link rel="canonical" href="https://mano-project.github.io/converter.html"/> |
| 11 | + <link rel="icon" type="image/png" href="images/MANO.png"> |
| 12 | + |
| 13 | + <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet"> |
| 14 | + <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js"></script> |
| 15 | + |
| 16 | + <link rel="stylesheet" type="text/css" href="css/style.css" media="screen"/> |
| 17 | + <style> |
| 18 | + html, body { |
| 19 | + height: 100%; |
| 20 | + margin: 0; |
| 21 | + display: flex; |
| 22 | + flex-direction: column; |
| 23 | + } |
| 24 | + |
| 25 | + .container { |
| 26 | + flex: 1; /* Push footer down */ |
| 27 | + } |
| 28 | + |
| 29 | + footer.footer { |
| 30 | + margin-top: auto; /* Always stick footer at bottom */ |
| 31 | + } |
| 32 | + |
| 33 | + .dropzone { |
| 34 | + border: 2px dashed #ccc; |
| 35 | + padding: 2rem; |
| 36 | + text-align: center; |
| 37 | + border-radius: .5rem; |
| 38 | + color: #888; |
| 39 | + cursor: pointer; |
| 40 | + } |
| 41 | + .file-list { margin-top: 1rem; } |
| 42 | + </style> |
| 43 | +</head> |
| 44 | +<body> |
| 45 | + <nav class="navbar bg-body-tertiary "> |
| 46 | + <div class="container-fluid d-flex justify-content-between align-items-center position-relative py-2"> |
| 47 | + |
| 48 | + <!-- Invisible spacer to balance layout --> |
| 49 | + <div style="width: 80px;"></div> |
| 50 | + |
| 51 | + <!-- Centered logo --> |
| 52 | + <div class="position-absolute start-50 translate-middle-x text-center"> |
| 53 | + <a class="navbar-brand" href="index.html"> |
| 54 | + <img src="images/MANO.png" alt="Logo" width="80" class="d-inline-block align-text-top"> |
| 55 | + </a> |
| 56 | + </div> |
| 57 | + |
| 58 | + <!-- Offcanvas toggle aligned right --> |
| 59 | + <button class="navbar-toggler" type="button" data-bs-toggle="offcanvas" data-bs-target="#offcanvasNavbar" |
| 60 | + aria-controls="offcanvasNavbar" aria-label="Toggle navigation"> |
| 61 | + <span class="navbar-toggler-icon"></span> |
| 62 | + </button> |
| 63 | + |
| 64 | + <div class="offcanvas offcanvas-end" tabindex="-1" id="offcanvasNavbar" aria-labelledby="offcanvasNavbarLabel"> |
| 65 | + <div class="offcanvas-header"> |
| 66 | + <h5 class="offcanvas-title" id="offcanvasNavbarLabel"><MANO></h5> |
| 67 | + <button type="button" class="btn-close" data-bs-dismiss="offcanvas" aria-label="Close"></button> |
| 68 | + </div> |
| 69 | + <div class="offcanvas-body"> |
| 70 | + <ul class="navbar-nav justify-content-end flex-grow-1 pe-3"> |
| 71 | + <li class="nav-item"><a class="nav-link" href="index.html">Home</a></li> |
| 72 | + <li class="nav-item"><a class="nav-link" href="resources.html">Resources</a></li> |
| 73 | + <li class="nav-item"><a class="nav-link" href="editor.html">Metadata Editor</a></li> |
| 74 | + <li class="nav-item"><a class="nav-link" href="collection.html">Metadata Collection</a></li> |
| 75 | + <li class="nav-item"><a class="nav-link active" href="viewer.html">Transcription Viewer</a></li> |
| 76 | + <li class="nav-item"><a class="nav-link" href="about.html">About</a></li> |
| 77 | + </ul> |
| 78 | + </div> |
| 79 | + </div> |
| 80 | + </div> |
| 81 | + </nav> |
| 82 | + |
| 83 | + <div class="container mt-4 mb-3"> |
| 84 | + <div class="container"> |
| 85 | + <button onclick="history.back()" class="btn btn-sm btn-outline-secondary">← Back</button> |
| 86 | + </div> |
| 87 | + |
| 88 | + <h1 class="page-title">Convert PAGE-XML into TEI-XML</h1> |
| 89 | + <p class="text-center">Upload one or more PAGE-XML files. The converter will merge them into a single TEI file.</p> |
| 90 | + |
| 91 | + <!-- Upload area --> |
| 92 | + <div class="dropzone" id="dropzone"> |
| 93 | + <p>Click or drag & drop PAGE-XML files here</p> |
| 94 | + <input type="file" id="fileInput" accept=".xml" multiple hidden> |
| 95 | + </div> |
| 96 | + |
| 97 | + <!-- File list --> |
| 98 | + <ul id="fileList" class="list-group file-list"></ul> |
| 99 | + |
| 100 | + <!-- Convert button --> |
| 101 | + <div class="text-center mt-3"> |
| 102 | + <button id="convertBtn" class="btn btn-success" disabled>Convert to TEI</button> |
| 103 | + <a id="downloadBtn" class="btn btn-primary d-none" download="combined-transcription.xml">Download TEI</a> |
| 104 | + <a href="viewer.html" class="btn btn-outline-secondary d-none" id="viewBtn">Open in Viewer</a> |
| 105 | + </div> |
| 106 | + </div> |
| 107 | + |
| 108 | + <footer class="footer bg-body-tertiary text-center py-4"> |
| 109 | + <div class="container"> |
| 110 | + |
| 111 | + <!-- Logo centered --> |
| 112 | + <div class="mb-3"> |
| 113 | + <a class="navbar-brand" href="index.html"> |
| 114 | + <img src="images/MANO.png" alt="Logo" width="80"> |
| 115 | + </a> |
| 116 | + </div> |
| 117 | + |
| 118 | + <!-- Links centered in one line --> |
| 119 | + <div class="mb-3"> |
| 120 | + <a class="footer-link mx-2" href="index.html">Home</a> |
| 121 | + <a class="footer-link mx-2" href="resources.html">Resources</a> |
| 122 | + <a class="footer-link mx-2" href="editor.html">Metadata Editor</a> |
| 123 | + <a class="footer-link mx-2" href="collection.html">Metadata Collection</a> |
| 124 | + <a class="footer-link mx-2" href="viewer.html">Transcription Viewer</a> |
| 125 | + <a class="footer-link mx-2" href="about.html">About</a> |
| 126 | + </div> |
| 127 | + |
| 128 | + <!-- Copyright centered --> |
| 129 | + <div class="text-center mt-2"> |
| 130 | + <span>© 2025 <span class="mano"><MANO></span></span> |
| 131 | + </div> |
| 132 | + |
| 133 | + </div> |
| 134 | + </footer> |
| 135 | + |
| 136 | + <script> |
| 137 | + const dropzone = document.getElementById('dropzone'); |
| 138 | + const fileInput = document.getElementById('fileInput'); |
| 139 | + const fileList = document.getElementById('fileList'); |
| 140 | + const convertBtn = document.getElementById('convertBtn'); |
| 141 | + const downloadBtn = document.getElementById('downloadBtn'); |
| 142 | + const viewBtn = document.getElementById('viewBtn'); |
| 143 | + |
| 144 | + let selectedFiles = []; |
| 145 | + |
| 146 | + // Handle dropzone click |
| 147 | + dropzone.addEventListener('click', () => fileInput.click()); |
| 148 | + |
| 149 | + // Handle file input change |
| 150 | + fileInput.addEventListener('change', (e) => handleFiles(e.target.files)); |
| 151 | + |
| 152 | + // Drag & drop |
| 153 | + dropzone.addEventListener('dragover', (e) => { |
| 154 | + e.preventDefault(); |
| 155 | + dropzone.style.borderColor = '#007bff'; |
| 156 | + }); |
| 157 | + |
| 158 | + dropzone.addEventListener('dragleave', () => { |
| 159 | + dropzone.style.borderColor = '#ccc'; |
| 160 | + }); |
| 161 | + |
| 162 | + dropzone.addEventListener('drop', (e) => { |
| 163 | + e.preventDefault(); |
| 164 | + dropzone.style.borderColor = '#ccc'; |
| 165 | + handleFiles(e.dataTransfer.files); |
| 166 | + }); |
| 167 | + |
| 168 | + function handleFiles(files) { |
| 169 | + selectedFiles = Array.from(files); |
| 170 | + renderFileList(); |
| 171 | + convertBtn.disabled = selectedFiles.length === 0; |
| 172 | + } |
| 173 | + |
| 174 | + function renderFileList() { |
| 175 | + fileList.innerHTML = ''; |
| 176 | + selectedFiles.forEach(f => { |
| 177 | + const li = document.createElement('li'); |
| 178 | + li.className = 'list-group-item'; |
| 179 | + li.textContent = `${f.name} (${Math.round(f.size/1024)} KB)`; |
| 180 | + fileList.appendChild(li); |
| 181 | + }); |
| 182 | + } |
| 183 | + |
| 184 | + // Main conversion logic |
| 185 | + convertBtn.addEventListener('click', async () => { |
| 186 | + const fileContents = []; |
| 187 | + for (let f of selectedFiles) { |
| 188 | + const text = await f.text(); |
| 189 | + fileContents.push({ name: f.name, content: text }); |
| 190 | + } |
| 191 | + |
| 192 | + const tei = convertPageXMLtoTEI(fileContents); |
| 193 | + |
| 194 | + const blob = new Blob([tei], { type: 'application/xml' }); |
| 195 | + const url = URL.createObjectURL(blob); |
| 196 | + |
| 197 | + downloadBtn.href = url; |
| 198 | + downloadBtn.classList.remove('d-none'); |
| 199 | + viewBtn.classList.remove('d-none'); |
| 200 | + }); |
| 201 | + |
| 202 | + // Converter function |
| 203 | + function convertPageXMLtoTEI(files) { |
| 204 | + let teiPages = []; |
| 205 | + let pageNum = 1; |
| 206 | + let creator = '', createdDate = '', lastChange = '', comments = ''; |
| 207 | + |
| 208 | + for (const file of files) { |
| 209 | + const parser = new DOMParser(); |
| 210 | + const xmlDoc = parser.parseFromString(file.content, 'application/xml'); |
| 211 | + |
| 212 | + // Metadata |
| 213 | + if (!creator) { |
| 214 | + creator = xmlDoc.querySelector('Metadata > Creator')?.textContent || ''; |
| 215 | + createdDate = xmlDoc.querySelector('Metadata > Created')?.textContent || ''; |
| 216 | + lastChange = xmlDoc.querySelector('Metadata > LastChange')?.textContent || ''; |
| 217 | + comments = xmlDoc.querySelector('Metadata > Comments')?.textContent || ''; |
| 218 | + } |
| 219 | + |
| 220 | + // Page info |
| 221 | + const pageNode = xmlDoc.getElementsByTagName('Page')[0]; |
| 222 | + const facs = pageNode?.getAttribute('imageFilename') || `page${pageNum}.jpg`; |
| 223 | + |
| 224 | + // Collect lines |
| 225 | + const lines = xmlDoc.getElementsByTagName('TextLine'); |
| 226 | + let lineOutput = []; |
| 227 | + let lineNum = 1; |
| 228 | + |
| 229 | + for (let line of lines) { |
| 230 | + // Grab ALL TextEquiv nodes in this line |
| 231 | + const textEquivs = line.getElementsByTagName('TextEquiv'); |
| 232 | + if (textEquivs.length === 0) continue; |
| 233 | + |
| 234 | + // The LAST TextEquiv is the full line text |
| 235 | + const lastEquiv = textEquivs[textEquivs.length - 1]; |
| 236 | + const unicodeNode = lastEquiv.getElementsByTagName('Unicode')[0]; |
| 237 | + if (!unicodeNode) continue; |
| 238 | + |
| 239 | + const fullLineText = unicodeNode.textContent.trim(); |
| 240 | + |
| 241 | + // Add <lb> + full line |
| 242 | + lineOutput.push(`<lb n="${lineNum}"/>${fullLineText}`); |
| 243 | + lineNum++; |
| 244 | + } |
| 245 | + |
| 246 | + |
| 247 | + |
| 248 | + teiPages.push(` |
| 249 | + <pb n="${pageNum}" facs="${facs}"/> |
| 250 | + <p> |
| 251 | + ${lineOutput.join('\n')} |
| 252 | + </p>`); |
| 253 | + pageNum++; |
| 254 | + } |
| 255 | + |
| 256 | + // Build combined TEI |
| 257 | + return `<?xml version="1.0" encoding="UTF-8"?> |
| 258 | + <TEI xmlns="http://www.tei-c.org/ns/1.0"> |
| 259 | + <teiHeader> |
| 260 | + <fileDesc> |
| 261 | + <titleStmt> |
| 262 | + <title>Combined Transcription</title> |
| 263 | + </titleStmt> |
| 264 | + <publicationStmt> |
| 265 | + <p>XML-TEI generated from PAGE-XML using the converter tool in |
| 266 | + <ref target="https://mano-project.github.io/">MANO</ref>. |
| 267 | + </p> |
| 268 | + </publicationStmt> |
| 269 | + <sourceDesc> |
| 270 | + <p>Source: Transkribus Export (Created: ${createdDate} | Last Change: ${lastChange})</p> |
| 271 | + </sourceDesc> |
| 272 | + </fileDesc> |
| 273 | + </teiHeader> |
| 274 | + <text> |
| 275 | + <body> |
| 276 | + ${teiPages.join('\\n')} |
| 277 | + </body> |
| 278 | + </text> |
| 279 | + </TEI>`; |
| 280 | + |
| 281 | + } |
| 282 | + |
| 283 | + |
| 284 | + //Adjust files position to be converted in the correct order |
| 285 | + function renderFileList() { |
| 286 | + fileList.innerHTML = ''; |
| 287 | + |
| 288 | + selectedFiles.forEach((f, index) => { |
| 289 | + const li = document.createElement('li'); |
| 290 | + li.className = 'list-group-item d-flex justify-content-between align-items-center'; |
| 291 | + li.dataset.index = index; |
| 292 | + li.innerHTML = ` |
| 293 | + <span>${f.name} (${Math.round(f.size/1024)} KB)</span> |
| 294 | + <div> |
| 295 | + <button class="btn btn-sm btn-outline-secondary move-up">▲</button> |
| 296 | + <button class="btn btn-sm btn-outline-secondary move-down">▼</button> |
| 297 | + </div> |
| 298 | + `; |
| 299 | + fileList.appendChild(li); |
| 300 | + }); |
| 301 | + |
| 302 | + // Attach up/down button functionality |
| 303 | + fileList.querySelectorAll('.move-up').forEach(btn => { |
| 304 | + btn.addEventListener('click', (e) => { |
| 305 | + const li = e.target.closest('li'); |
| 306 | + const index = parseInt(li.dataset.index); |
| 307 | + if (index > 0) { |
| 308 | + [selectedFiles[index-1], selectedFiles[index]] = [selectedFiles[index], selectedFiles[index-1]]; |
| 309 | + renderFileList(); |
| 310 | + } |
| 311 | + }); |
| 312 | + }); |
| 313 | + |
| 314 | + fileList.querySelectorAll('.move-down').forEach(btn => { |
| 315 | + btn.addEventListener('click', (e) => { |
| 316 | + const li = e.target.closest('li'); |
| 317 | + const index = parseInt(li.dataset.index); |
| 318 | + if (index < selectedFiles.length-1) { |
| 319 | + [selectedFiles[index+1], selectedFiles[index]] = [selectedFiles[index], selectedFiles[index+1]]; |
| 320 | + renderFileList(); |
| 321 | + } |
| 322 | + }); |
| 323 | + }); |
| 324 | + } |
| 325 | +</script> |
| 326 | +</body> |
| 327 | +</html> |
0 commit comments