adsabs · kelockhart · Jul 3, 2025 · Jul 2, 2025 · Jul 3, 2025 · Jul 3, 2025
diff --git a/exportsrv/formatter/cslFormat.py b/exportsrv/formatter/cslFormat.py
@@ -11,6 +11,7 @@
 from exportsrv.formatter.format import Format
 from exportsrv.formatter.ads import adsFormatter, adsOrganizer, adsJournalFormat, adsOutputFormat
 from exportsrv.formatter.toLaTex import encode_laTex, encode_laTex_author, html_to_laTex, encode_latex_doi
+from exportsrv.utils import mathml_to_plaintext
 
 # This class accepts JSON and sends it to citeproc library to get reformated
 # We are supporting, as of end of 2024, 11 complete cls (formatting all the fields) and 20 syles that
@@ -146,6 +147,10 @@ def __update_data(self):
             for data in self.for_cls:
                 if len(data.get('DOI', '')) > 0:
                     data['DOI'] = data['DOI'].lstrip('doi:')
+        # remove MathML markup
+        elif (self.csl_style == 'ieee'):
+            for data in self.for_cls:
+                data['title'] = mathml_to_plaintext(data['title'])
 
 
     def __update_author_etal(self, author, the_rest, bibcode):

diff --git a/exportsrv/formatter/fieldedFormat.py b/exportsrv/formatter/fieldedFormat.py
@@ -9,7 +9,7 @@
 
 from exportsrv.formatter.format import Format
 from exportsrv.formatter.ads import adsOutputFormat
-from exportsrv.utils import get_eprint
+from exportsrv.utils import get_eprint, mathml_to_plaintext
 from exportsrv.formatter.strftime import strftime
 
 # This class accepts JSON object created by Solr and can reformats it
@@ -134,6 +134,7 @@ def __format_line_wrapped(self, text):
         return fill(text, width=72)
 
 
+
     def __get_tags(self, export_format):
         """
         convert from solr to each fielded types' tags
@@ -537,6 +538,7 @@ def __add_in(self, field, value):
         """
         if ((isinstance(value, str) or isinstance(value, bytes)) and (len(value) > 0)) or \
            (isinstance(value, int) and (value is not None)):
+            value = mathml_to_plaintext(value)
             return field + ' ' + value + '\n'
         return ''
 

diff --git a/exportsrv/formatter/rssFormat.py b/exportsrv/formatter/rssFormat.py
@@ -7,6 +7,7 @@
 
 from exportsrv.formatter.format import Format
 from exportsrv.formatter.ads import adsOutputFormat
+from exportsrv.utils import mathml_to_plaintext
 
 class RSSFormat(Format):
 
@@ -37,7 +38,7 @@ def __get_author_title(self, a_doc):
         first_author = ''
         if 'author' in a_doc:
             first_author = a_doc['author'][0]
-        title = ''.join(a_doc.get('title', ''))
+        title = mathml_to_plaintext(''.join(a_doc.get('title', '')))
         if len(first_author) > 0 and len(title) > 0:
             return first_author + ': ' + title
         if len(first_author) > 0:
@@ -87,7 +88,7 @@ def __get_doc(self, index):
             elif (field == 'url'):
                 self.__add_in(item, fields[field], current_app.config.get('EXPORT_SERVICE_FROM_BBB_URL') + '/' + a_doc.get('bibcode', ''))
             elif (field == 'abstract'):
-                self.__add_in(item, fields[field], self.__format_line_wrapped(a_doc.get(field, '')))
+                self.__add_in(item, fields[field], self.__format_line_wrapped(mathml_to_plaintext(a_doc.get(field, ''))))
         return item
 
 

diff --git a/exportsrv/formatter/toLaTex.py b/exportsrv/formatter/toLaTex.py
@@ -3,6 +3,7 @@
 import re
 from collections import OrderedDict
 from exportsrv.formatter.latexencode import utf8tolatex
+from exportsrv.utils import mathml_to_latex
 
 # this module contains methods to encode for latex output
 
@@ -34,6 +35,9 @@ def encode_laTex(text):
     :return:
     """
     if (len(text) > 1):
+        # first remove/convert any mathML markup
+        text = mathml_to_latex(text)
+
         # if any greek letter macro map it here
         # convert something like \\Sigma\\ to \textbackslash{}Sigma\textbackslash{}
         # however needs to go through utf8tolatex so add placeholder to be replaced afterward
@@ -113,4 +117,5 @@ def html_to_laTex(text):
     """
     for key in REGEX_HTML_TAG.keys():
         text = key.sub(REGEX_HTML_TAG[key], text)
-    return text
+    return text
+
diff --git a/exportsrv/formatter/voTableFormat.py b/exportsrv/formatter/voTableFormat.py
@@ -7,6 +7,7 @@
 from exportsrv.formatter.format import Format
 from exportsrv.formatter.ads import adsOutputFormat
 from exportsrv.formatter.strftime import strftime
+from exportsrv.utils import mathml_to_plaintext
 
 class VOTableFormat(Format):
 
@@ -78,7 +79,7 @@ def __get_doc(self, index):
             if (field == 'bibcode'):
                 self.__add_in_table_data(item, a_doc.get(field, ''))
             elif (field == 'title'):
-                self.__add_in_table_data(item, ''.join(a_doc.get(field, '')))
+                self.__add_in_table_data(item, mathml_to_plaintext(''.join(a_doc.get(field, ''))))
             elif (field == 'author'):
                 self.__add_in_table_data(item, '; '.join(a_doc.get(field, '')))
             elif (field == 'pub_raw'):

diff --git a/exportsrv/formatter/xmlFormat.py b/exportsrv/formatter/xmlFormat.py
@@ -9,7 +9,7 @@
 
 from exportsrv.formatter.format import Format
 from exportsrv.formatter.ads import adsOutputFormat
-from exportsrv.utils import get_eprint
+from exportsrv.utils import get_eprint, mathml_to_plaintext
 from exportsrv.formatter.strftime import strftime
 
 # This class accepts JSON object created by Solr and can reformat it
@@ -481,7 +481,7 @@ def __get_doc_dublin_xml(self, index):
             if field in ['bibcode', 'copyright']:
                 self.__add_in(record, fields[field], a_doc.get(field, ''))
             elif (field == 'title'):
-                self.__add_in(record, fields[field], ''.join(a_doc.get(field, '')))
+                self.__add_in(record, fields[field], mathml_to_plaintext(''.join(a_doc.get(field, ''))))
             elif (field == 'author'):
                 self.__add_author_list(a_doc, record, fields[field])
             elif (field == 'pub_raw'):
@@ -493,7 +493,7 @@ def __get_doc_dublin_xml(self, index):
             elif (field == 'url'):
                 self.__add_in(record, fields[field], current_app.config.get('EXPORT_SERVICE_FROM_BBB_URL') + '/' + a_doc.get('bibcode', ''))
             elif (field == 'abstract'):
-                self.__add_in(record, fields[field], self.__format_line_wrapped(a_doc.get(field, '')))
+                self.__add_in(record, fields[field], self.__format_line_wrapped(mathml_to_plaintext(a_doc.get(field, ''))))
             elif (field == 'doi'):
                 self.__add_in(record, fields[field], self.__get_doi('; '.join(a_doc.get(field, ''))))
             elif (field == 'num_citations'):
@@ -525,7 +525,7 @@ def __get_doc_reference_xml(self, index, xml_export_format):
             if field in ['bibcode', 'pub', 'volume', 'copyright']:
                 self.__add_in(record, fields[field], a_doc.get(field, ''))
             elif field in ['title', 'doi']:
-                self.__add_in(record, fields[field], ''.join(a_doc.get(field, '')))
+                self.__add_in(record, fields[field], mathml_to_plaintext(''.join(a_doc.get(field, ''))))
             elif (field == 'author'):
                 self.__add_author_list(a_doc, record, fields[field])
             elif (field == 'aff'):
@@ -543,7 +543,7 @@ def __get_doc_reference_xml(self, index, xml_export_format):
             elif (field == 'num_citations'):
                 self.__add_in(record, fields[field], self.__get_citation(int(a_doc.get(field, 0)), xml_export_format))
             elif (field == 'abstract'):
-                self.__add_in(record, fields[field], self.__format_line_wrapped(a_doc.get(field, '')))
+                self.__add_in(record, fields[field], self.__format_line_wrapped(mathml_to_plaintext(a_doc.get(field, ''))))
             elif (field == 'link'):
                 self.__add_doc_links(a_doc, record)
             elif (field == 'eprintid'):
@@ -705,7 +705,7 @@ def __get_doc_jats_xml(self, index):
                     ET.SubElement(article_meta_section, fields[field], {"pub-id-type": "doi"}).text = '; '.join(a_doc.get(field, ''))
             elif (field == 'title'):
                 title = ET.SubElement(article_meta_section, fields[field])
-                ET.SubElement(title, 'article-title').text = '; '.join(a_doc.get(field, ''))
+                ET.SubElement(title, 'article-title').text = mathml_to_plaintext('; '.join(a_doc.get(field, '')))
             elif (field == 'author'):
                 # add `contrib-group` tag and call the function to add list of authors to this tag
                 self.__add_author_list_jats_xml(a_doc, ET.SubElement(article_meta_section, fields[field]))
@@ -719,7 +719,7 @@ def __get_doc_jats_xml(self, index):
                 ET.SubElement(article_meta_section, "permissions").text = ""
                 # add abstract tag, then paragraph tag around the abstract (required)
                 abstract = ET.SubElement(article_meta_section, fields[field])
-                self.__add_in(abstract, "p", a_doc.get(field, ''))
+                self.__add_in(abstract, "p", mathml_to_plaintext(a_doc.get(field, '')))
             elif field in ['page', 'page_range']:
                 self.__add_page(a_doc, article_meta_section, fields[field])