Skip to content

Commit e0f5bc5

Browse files
committed
load converter
1 parent b4c1f71 commit e0f5bc5

File tree

1 file changed

+327
-0
lines changed

1 file changed

+327
-0
lines changed

converter.html

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<title>MANO - XML Converter</title>
5+
<meta name="keywords" content="MANO, Manuscripts Online, Converter">
6+
<meta name="description" content="">
7+
<meta charset="UTF-8"/>
8+
<meta name="viewport" content="width=device-width, initial-scale=1.0 shrink-to-fit=no"/>
9+
<meta name="google" content="notranslate"/>
10+
<link rel="canonical" href="https://mano-project.github.io/converter.html"/>
11+
<link rel="icon" type="image/png" href="images/MANO.png">
12+
13+
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet">
14+
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js"></script>
15+
16+
<link rel="stylesheet" type="text/css" href="css/style.css" media="screen"/>
17+
<style>
18+
html, body {
19+
height: 100%;
20+
margin: 0;
21+
display: flex;
22+
flex-direction: column;
23+
}
24+
25+
.container {
26+
flex: 1; /* Push footer down */
27+
}
28+
29+
footer.footer {
30+
margin-top: auto; /* Always stick footer at bottom */
31+
}
32+
33+
.dropzone {
34+
border: 2px dashed #ccc;
35+
padding: 2rem;
36+
text-align: center;
37+
border-radius: .5rem;
38+
color: #888;
39+
cursor: pointer;
40+
}
41+
.file-list { margin-top: 1rem; }
42+
</style>
43+
</head>
44+
<body>
45+
<nav class="navbar bg-body-tertiary ">
46+
<div class="container-fluid d-flex justify-content-between align-items-center position-relative py-2">
47+
48+
<!-- Invisible spacer to balance layout -->
49+
<div style="width: 80px;"></div>
50+
51+
<!-- Centered logo -->
52+
<div class="position-absolute start-50 translate-middle-x text-center">
53+
<a class="navbar-brand" href="index.html">
54+
<img src="images/MANO.png" alt="Logo" width="80" class="d-inline-block align-text-top">
55+
</a>
56+
</div>
57+
58+
<!-- Offcanvas toggle aligned right -->
59+
<button class="navbar-toggler" type="button" data-bs-toggle="offcanvas" data-bs-target="#offcanvasNavbar"
60+
aria-controls="offcanvasNavbar" aria-label="Toggle navigation">
61+
<span class="navbar-toggler-icon"></span>
62+
</button>
63+
64+
<div class="offcanvas offcanvas-end" tabindex="-1" id="offcanvasNavbar" aria-labelledby="offcanvasNavbarLabel">
65+
<div class="offcanvas-header">
66+
<h5 class="offcanvas-title" id="offcanvasNavbarLabel">&lt;MANO&gt;</h5>
67+
<button type="button" class="btn-close" data-bs-dismiss="offcanvas" aria-label="Close"></button>
68+
</div>
69+
<div class="offcanvas-body">
70+
<ul class="navbar-nav justify-content-end flex-grow-1 pe-3">
71+
<li class="nav-item"><a class="nav-link" href="index.html">Home</a></li>
72+
<li class="nav-item"><a class="nav-link" href="resources.html">Resources</a></li>
73+
<li class="nav-item"><a class="nav-link" href="editor.html">Metadata Editor</a></li>
74+
<li class="nav-item"><a class="nav-link" href="collection.html">Metadata Collection</a></li>
75+
<li class="nav-item"><a class="nav-link active" href="viewer.html">Transcription Viewer</a></li>
76+
<li class="nav-item"><a class="nav-link" href="about.html">About</a></li>
77+
</ul>
78+
</div>
79+
</div>
80+
</div>
81+
</nav>
82+
83+
<div class="container mt-4 mb-3">
84+
<div class="container">
85+
<button onclick="history.back()" class="btn btn-sm btn-outline-secondary">&larr; Back</button>
86+
</div>
87+
88+
<h1 class="page-title">Convert PAGE-XML into TEI-XML</h1>
89+
<p class="text-center">Upload one or more PAGE-XML files. The converter will merge them into a single TEI file.</p>
90+
91+
<!-- Upload area -->
92+
<div class="dropzone" id="dropzone">
93+
<p>Click or drag & drop PAGE-XML files here</p>
94+
<input type="file" id="fileInput" accept=".xml" multiple hidden>
95+
</div>
96+
97+
<!-- File list -->
98+
<ul id="fileList" class="list-group file-list"></ul>
99+
100+
<!-- Convert button -->
101+
<div class="text-center mt-3">
102+
<button id="convertBtn" class="btn btn-success" disabled>Convert to TEI</button>
103+
<a id="downloadBtn" class="btn btn-primary d-none" download="combined-transcription.xml">Download TEI</a>
104+
<a href="viewer.html" class="btn btn-outline-secondary d-none" id="viewBtn">Open in Viewer</a>
105+
</div>
106+
</div>
107+
108+
<footer class="footer bg-body-tertiary text-center py-4">
109+
<div class="container">
110+
111+
<!-- Logo centered -->
112+
<div class="mb-3">
113+
<a class="navbar-brand" href="index.html">
114+
<img src="images/MANO.png" alt="Logo" width="80">
115+
</a>
116+
</div>
117+
118+
<!-- Links centered in one line -->
119+
<div class="mb-3">
120+
<a class="footer-link mx-2" href="index.html">Home</a>
121+
<a class="footer-link mx-2" href="resources.html">Resources</a>
122+
<a class="footer-link mx-2" href="editor.html">Metadata Editor</a>
123+
<a class="footer-link mx-2" href="collection.html">Metadata Collection</a>
124+
<a class="footer-link mx-2" href="viewer.html">Transcription Viewer</a>
125+
<a class="footer-link mx-2" href="about.html">About</a>
126+
</div>
127+
128+
<!-- Copyright centered -->
129+
<div class="text-center mt-2">
130+
<span>© 2025 <span class="mano">&lt;MANO&gt;</span></span>
131+
</div>
132+
133+
</div>
134+
</footer>
135+
136+
<script>
137+
const dropzone = document.getElementById('dropzone');
138+
const fileInput = document.getElementById('fileInput');
139+
const fileList = document.getElementById('fileList');
140+
const convertBtn = document.getElementById('convertBtn');
141+
const downloadBtn = document.getElementById('downloadBtn');
142+
const viewBtn = document.getElementById('viewBtn');
143+
144+
let selectedFiles = [];
145+
146+
// Handle dropzone click
147+
dropzone.addEventListener('click', () => fileInput.click());
148+
149+
// Handle file input change
150+
fileInput.addEventListener('change', (e) => handleFiles(e.target.files));
151+
152+
// Drag & drop
153+
dropzone.addEventListener('dragover', (e) => {
154+
e.preventDefault();
155+
dropzone.style.borderColor = '#007bff';
156+
});
157+
158+
dropzone.addEventListener('dragleave', () => {
159+
dropzone.style.borderColor = '#ccc';
160+
});
161+
162+
dropzone.addEventListener('drop', (e) => {
163+
e.preventDefault();
164+
dropzone.style.borderColor = '#ccc';
165+
handleFiles(e.dataTransfer.files);
166+
});
167+
168+
function handleFiles(files) {
169+
selectedFiles = Array.from(files);
170+
renderFileList();
171+
convertBtn.disabled = selectedFiles.length === 0;
172+
}
173+
174+
function renderFileList() {
175+
fileList.innerHTML = '';
176+
selectedFiles.forEach(f => {
177+
const li = document.createElement('li');
178+
li.className = 'list-group-item';
179+
li.textContent = `${f.name} (${Math.round(f.size/1024)} KB)`;
180+
fileList.appendChild(li);
181+
});
182+
}
183+
184+
// Main conversion logic
185+
convertBtn.addEventListener('click', async () => {
186+
const fileContents = [];
187+
for (let f of selectedFiles) {
188+
const text = await f.text();
189+
fileContents.push({ name: f.name, content: text });
190+
}
191+
192+
const tei = convertPageXMLtoTEI(fileContents);
193+
194+
const blob = new Blob([tei], { type: 'application/xml' });
195+
const url = URL.createObjectURL(blob);
196+
197+
downloadBtn.href = url;
198+
downloadBtn.classList.remove('d-none');
199+
viewBtn.classList.remove('d-none');
200+
});
201+
202+
// Converter function
203+
function convertPageXMLtoTEI(files) {
204+
let teiPages = [];
205+
let pageNum = 1;
206+
let creator = '', createdDate = '', lastChange = '', comments = '';
207+
208+
for (const file of files) {
209+
const parser = new DOMParser();
210+
const xmlDoc = parser.parseFromString(file.content, 'application/xml');
211+
212+
// Metadata
213+
if (!creator) {
214+
creator = xmlDoc.querySelector('Metadata > Creator')?.textContent || '';
215+
createdDate = xmlDoc.querySelector('Metadata > Created')?.textContent || '';
216+
lastChange = xmlDoc.querySelector('Metadata > LastChange')?.textContent || '';
217+
comments = xmlDoc.querySelector('Metadata > Comments')?.textContent || '';
218+
}
219+
220+
// Page info
221+
const pageNode = xmlDoc.getElementsByTagName('Page')[0];
222+
const facs = pageNode?.getAttribute('imageFilename') || `page${pageNum}.jpg`;
223+
224+
// Collect lines
225+
const lines = xmlDoc.getElementsByTagName('TextLine');
226+
let lineOutput = [];
227+
let lineNum = 1;
228+
229+
for (let line of lines) {
230+
// Grab ALL TextEquiv nodes in this line
231+
const textEquivs = line.getElementsByTagName('TextEquiv');
232+
if (textEquivs.length === 0) continue;
233+
234+
// The LAST TextEquiv is the full line text
235+
const lastEquiv = textEquivs[textEquivs.length - 1];
236+
const unicodeNode = lastEquiv.getElementsByTagName('Unicode')[0];
237+
if (!unicodeNode) continue;
238+
239+
const fullLineText = unicodeNode.textContent.trim();
240+
241+
// Add <lb> + full line
242+
lineOutput.push(`<lb n="${lineNum}"/>${fullLineText}`);
243+
lineNum++;
244+
}
245+
246+
247+
248+
teiPages.push(`
249+
<pb n="${pageNum}" facs="${facs}"/>
250+
<p>
251+
${lineOutput.join('\n')}
252+
</p>`);
253+
pageNum++;
254+
}
255+
256+
// Build combined TEI
257+
return `<?xml version="1.0" encoding="UTF-8"?>
258+
<TEI xmlns="http://www.tei-c.org/ns/1.0">
259+
<teiHeader>
260+
<fileDesc>
261+
<titleStmt>
262+
<title>Combined Transcription</title>
263+
</titleStmt>
264+
<publicationStmt>
265+
<p>XML-TEI generated from PAGE-XML using the converter tool in
266+
<ref target="https://mano-project.github.io/">MANO</ref>.
267+
</p>
268+
</publicationStmt>
269+
<sourceDesc>
270+
<p>Source: Transkribus Export (Created: ${createdDate} | Last Change: ${lastChange})</p>
271+
</sourceDesc>
272+
</fileDesc>
273+
</teiHeader>
274+
<text>
275+
<body>
276+
${teiPages.join('\\n')}
277+
</body>
278+
</text>
279+
</TEI>`;
280+
281+
}
282+
283+
284+
//Adjust files position to be converted in the correct order
285+
function renderFileList() {
286+
fileList.innerHTML = '';
287+
288+
selectedFiles.forEach((f, index) => {
289+
const li = document.createElement('li');
290+
li.className = 'list-group-item d-flex justify-content-between align-items-center';
291+
li.dataset.index = index;
292+
li.innerHTML = `
293+
<span>${f.name} (${Math.round(f.size/1024)} KB)</span>
294+
<div>
295+
<button class="btn btn-sm btn-outline-secondary move-up">▲</button>
296+
<button class="btn btn-sm btn-outline-secondary move-down">▼</button>
297+
</div>
298+
`;
299+
fileList.appendChild(li);
300+
});
301+
302+
// Attach up/down button functionality
303+
fileList.querySelectorAll('.move-up').forEach(btn => {
304+
btn.addEventListener('click', (e) => {
305+
const li = e.target.closest('li');
306+
const index = parseInt(li.dataset.index);
307+
if (index > 0) {
308+
[selectedFiles[index-1], selectedFiles[index]] = [selectedFiles[index], selectedFiles[index-1]];
309+
renderFileList();
310+
}
311+
});
312+
});
313+
314+
fileList.querySelectorAll('.move-down').forEach(btn => {
315+
btn.addEventListener('click', (e) => {
316+
const li = e.target.closest('li');
317+
const index = parseInt(li.dataset.index);
318+
if (index < selectedFiles.length-1) {
319+
[selectedFiles[index+1], selectedFiles[index]] = [selectedFiles[index], selectedFiles[index+1]];
320+
renderFileList();
321+
}
322+
});
323+
});
324+
}
325+
</script>
326+
</body>
327+
</html>

0 commit comments

Comments
 (0)