Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions exportsrv/formatter/cslFormat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from exportsrv.formatter.format import Format
from exportsrv.formatter.ads import adsFormatter, adsOrganizer, adsJournalFormat, adsOutputFormat
from exportsrv.formatter.toLaTex import encode_laTex, encode_laTex_author, html_to_laTex, encode_latex_doi
from exportsrv.utils import mathml_to_plaintext

# This class accepts JSON and sends it to citeproc library to get reformated
# We are supporting, as of end of 2024, 11 complete cls (formatting all the fields) and 20 syles that
Expand Down Expand Up @@ -146,6 +147,10 @@ def __update_data(self):
for data in self.for_cls:
if len(data.get('DOI', '')) > 0:
data['DOI'] = data['DOI'].lstrip('doi:')
# remove MathML markup
elif (self.csl_style == 'ieee'):
for data in self.for_cls:
data['title'] = mathml_to_plaintext(data['title'])


def __update_author_etal(self, author, the_rest, bibcode):
Expand Down
4 changes: 3 additions & 1 deletion exportsrv/formatter/fieldedFormat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from exportsrv.formatter.format import Format
from exportsrv.formatter.ads import adsOutputFormat
from exportsrv.utils import get_eprint
from exportsrv.utils import get_eprint, mathml_to_plaintext
from exportsrv.formatter.strftime import strftime

# This class accepts JSON object created by Solr and can reformats it
Expand Down Expand Up @@ -134,6 +134,7 @@ def __format_line_wrapped(self, text):
return fill(text, width=72)



def __get_tags(self, export_format):
"""
convert from solr to each fielded types' tags
Expand Down Expand Up @@ -537,6 +538,7 @@ def __add_in(self, field, value):
"""
if ((isinstance(value, str) or isinstance(value, bytes)) and (len(value) > 0)) or \
(isinstance(value, int) and (value is not None)):
value = mathml_to_plaintext(value)
return field + ' ' + value + '\n'
return ''

Expand Down
5 changes: 3 additions & 2 deletions exportsrv/formatter/rssFormat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from exportsrv.formatter.format import Format
from exportsrv.formatter.ads import adsOutputFormat
from exportsrv.utils import mathml_to_plaintext

class RSSFormat(Format):

Expand Down Expand Up @@ -37,7 +38,7 @@ def __get_author_title(self, a_doc):
first_author = ''
if 'author' in a_doc:
first_author = a_doc['author'][0]
title = ''.join(a_doc.get('title', ''))
title = mathml_to_plaintext(''.join(a_doc.get('title', '')))
if len(first_author) > 0 and len(title) > 0:
return first_author + ': ' + title
if len(first_author) > 0:
Expand Down Expand Up @@ -87,7 +88,7 @@ def __get_doc(self, index):
elif (field == 'url'):
self.__add_in(item, fields[field], current_app.config.get('EXPORT_SERVICE_FROM_BBB_URL') + '/' + a_doc.get('bibcode', ''))
elif (field == 'abstract'):
self.__add_in(item, fields[field], self.__format_line_wrapped(a_doc.get(field, '')))
self.__add_in(item, fields[field], self.__format_line_wrapped(mathml_to_plaintext(a_doc.get(field, ''))))
return item


Expand Down
7 changes: 6 additions & 1 deletion exportsrv/formatter/toLaTex.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import re
from collections import OrderedDict
from exportsrv.formatter.latexencode import utf8tolatex
from exportsrv.utils import mathml_to_latex

# this module contains methods to encode for latex output

Expand Down Expand Up @@ -34,6 +35,9 @@ def encode_laTex(text):
:return:
"""
if (len(text) > 1):
# first remove/convert any mathML markup
text = mathml_to_latex(text)

# if any greek letter macro map it here
# convert something like \\Sigma\\ to \textbackslash{}Sigma\textbackslash{}
# however needs to go through utf8tolatex so add placeholder to be replaced afterward
Expand Down Expand Up @@ -113,4 +117,5 @@ def html_to_laTex(text):
"""
for key in REGEX_HTML_TAG.keys():
text = key.sub(REGEX_HTML_TAG[key], text)
return text
return text

3 changes: 2 additions & 1 deletion exportsrv/formatter/voTableFormat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from exportsrv.formatter.format import Format
from exportsrv.formatter.ads import adsOutputFormat
from exportsrv.formatter.strftime import strftime
from exportsrv.utils import mathml_to_plaintext

class VOTableFormat(Format):

Expand Down Expand Up @@ -78,7 +79,7 @@ def __get_doc(self, index):
if (field == 'bibcode'):
self.__add_in_table_data(item, a_doc.get(field, ''))
elif (field == 'title'):
self.__add_in_table_data(item, ''.join(a_doc.get(field, '')))
self.__add_in_table_data(item, mathml_to_plaintext(''.join(a_doc.get(field, ''))))
elif (field == 'author'):
self.__add_in_table_data(item, '; '.join(a_doc.get(field, '')))
elif (field == 'pub_raw'):
Expand Down
14 changes: 7 additions & 7 deletions exportsrv/formatter/xmlFormat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from exportsrv.formatter.format import Format
from exportsrv.formatter.ads import adsOutputFormat
from exportsrv.utils import get_eprint
from exportsrv.utils import get_eprint, mathml_to_plaintext
from exportsrv.formatter.strftime import strftime

# This class accepts JSON object created by Solr and can reformat it
Expand Down Expand Up @@ -481,7 +481,7 @@ def __get_doc_dublin_xml(self, index):
if field in ['bibcode', 'copyright']:
self.__add_in(record, fields[field], a_doc.get(field, ''))
elif (field == 'title'):
self.__add_in(record, fields[field], ''.join(a_doc.get(field, '')))
self.__add_in(record, fields[field], mathml_to_plaintext(''.join(a_doc.get(field, ''))))
elif (field == 'author'):
self.__add_author_list(a_doc, record, fields[field])
elif (field == 'pub_raw'):
Expand All @@ -493,7 +493,7 @@ def __get_doc_dublin_xml(self, index):
elif (field == 'url'):
self.__add_in(record, fields[field], current_app.config.get('EXPORT_SERVICE_FROM_BBB_URL') + '/' + a_doc.get('bibcode', ''))
elif (field == 'abstract'):
self.__add_in(record, fields[field], self.__format_line_wrapped(a_doc.get(field, '')))
self.__add_in(record, fields[field], self.__format_line_wrapped(mathml_to_plaintext(a_doc.get(field, ''))))
elif (field == 'doi'):
self.__add_in(record, fields[field], self.__get_doi('; '.join(a_doc.get(field, ''))))
elif (field == 'num_citations'):
Expand Down Expand Up @@ -525,7 +525,7 @@ def __get_doc_reference_xml(self, index, xml_export_format):
if field in ['bibcode', 'pub', 'volume', 'copyright']:
self.__add_in(record, fields[field], a_doc.get(field, ''))
elif field in ['title', 'doi']:
self.__add_in(record, fields[field], ''.join(a_doc.get(field, '')))
self.__add_in(record, fields[field], mathml_to_plaintext(''.join(a_doc.get(field, ''))))
elif (field == 'author'):
self.__add_author_list(a_doc, record, fields[field])
elif (field == 'aff'):
Expand All @@ -543,7 +543,7 @@ def __get_doc_reference_xml(self, index, xml_export_format):
elif (field == 'num_citations'):
self.__add_in(record, fields[field], self.__get_citation(int(a_doc.get(field, 0)), xml_export_format))
elif (field == 'abstract'):
self.__add_in(record, fields[field], self.__format_line_wrapped(a_doc.get(field, '')))
self.__add_in(record, fields[field], self.__format_line_wrapped(mathml_to_plaintext(a_doc.get(field, ''))))
elif (field == 'link'):
self.__add_doc_links(a_doc, record)
elif (field == 'eprintid'):
Expand Down Expand Up @@ -705,7 +705,7 @@ def __get_doc_jats_xml(self, index):
ET.SubElement(article_meta_section, fields[field], {"pub-id-type": "doi"}).text = '; '.join(a_doc.get(field, ''))
elif (field == 'title'):
title = ET.SubElement(article_meta_section, fields[field])
ET.SubElement(title, 'article-title').text = '; '.join(a_doc.get(field, ''))
ET.SubElement(title, 'article-title').text = mathml_to_plaintext('; '.join(a_doc.get(field, '')))
elif (field == 'author'):
# add `contrib-group` tag and call the function to add list of authors to this tag
self.__add_author_list_jats_xml(a_doc, ET.SubElement(article_meta_section, fields[field]))
Expand All @@ -719,7 +719,7 @@ def __get_doc_jats_xml(self, index):
ET.SubElement(article_meta_section, "permissions").text = ""
# add abstract tag, then paragraph tag around the abstract (required)
abstract = ET.SubElement(article_meta_section, fields[field])
self.__add_in(abstract, "p", a_doc.get(field, ''))
self.__add_in(abstract, "p", mathml_to_plaintext(a_doc.get(field, '')))
elif field in ['page', 'page_range']:
self.__add_page(a_doc, article_meta_section, fields[field])

Expand Down
Loading