diff --git a/grobid-core/src/main/java/org/grobid/core/data/Equation.java b/grobid-core/src/main/java/org/grobid/core/data/Equation.java
index 141660d848..753e3d5908 100644
--- a/grobid-core/src/main/java/org/grobid/core/data/Equation.java
+++ b/grobid-core/src/main/java/org/grobid/core/data/Equation.java
@@ -2,7 +2,9 @@
 
 import nu.xom.Attribute;
 import nu.xom.Element;
+import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.Triple;
 import org.grobid.core.document.xml.XmlBuilderUtils;
 import org.grobid.core.engines.Engine;
 import org.grobid.core.engines.config.GrobidAnalysisConfig;
@@ -10,13 +12,17 @@
 import org.grobid.core.layout.LayoutToken;
 import org.grobid.core.utilities.BoundingBoxCalculator;
 import org.grobid.core.utilities.LayoutTokensUtil;
+import org.grobid.core.utilities.OffsetPosition;
 import org.grobid.core.utilities.counters.CntManager;
 import org.grobid.core.utilities.TextUtilities;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.SortedSet;
 
+import static org.grobid.core.document.TEIFormatter.*;
+
 /**
  * Class for representing an equation.
  *
@@ -56,9 +62,15 @@ public Element toTEIElement(GrobidAnalysisConfig config) {
 			XmlBuilderUtils.addCoords(formulaElement, LayoutTokensUtil.getCoordsStringForOneBox(getLayoutTokens()));
 		}
 
-		formulaElement.appendChild(LayoutTokensUtil.normalizeText(content.toString()).trim());
+        List<Triple<String, String, OffsetPosition>> stylesList = extractStylesList(getContentTokens(), Arrays.asList(TEI_STYLE_BOLD_NAME, TEI_STYLE_ITALIC_NAME))   ;
+
+        if (CollectionUtils.isNotEmpty(stylesList)) {
+            applyStyleList(formulaElement, getContent(), stylesList);
+        } else {
+            formulaElement.appendChild(LayoutTokensUtil.normalizeText(content.toString()).trim());
+        }
 
-		if ( (label != null) && (label.length()>0) ) {
+		if ( StringUtils.isNotEmpty(label) ) {
 			Element labelEl = XmlBuilderUtils.teiElement("label",
     	    		LayoutTokensUtil.normalizeText(label.toString()));
 			formulaElement.appendChild(labelEl);
@@ -79,6 +91,16 @@ public List<LayoutToken> getContentTokens() {
 		return contentTokens;
 	}
 
+    public void addContentTokens(List<LayoutToken> tokens) {
+        if (tokens == null)
+            return;
+
+        if (contentTokens == null)
+            contentTokens = new ArrayList<>();
+
+        contentTokens.addAll(tokens);
+    }
+
 	public List<LayoutToken> getLabelTokens() {
 		return labelTokens;
 	}
@@ -181,9 +203,9 @@ public void addLayoutTokens(List<LayoutToken> tokens) {
     	if (tokens == null)
     		return;
     	if (layoutTokens == null)
-    		layoutTokens = new ArrayList<LayoutToken>();
-    	for(LayoutToken token : tokens)
-	    	layoutTokens.add(token);
+    		layoutTokens = new ArrayList<>();
+
+        layoutTokens.addAll(tokens);
     }
 
     public List<BoundingBox> getCoordinates() {
diff --git a/grobid-core/src/main/java/org/grobid/core/data/Figure.java b/grobid-core/src/main/java/org/grobid/core/data/Figure.java
index e9417e9217..b4784a8e70 100644
--- a/grobid-core/src/main/java/org/grobid/core/data/Figure.java
+++ b/grobid-core/src/main/java/org/grobid/core/data/Figure.java
@@ -5,6 +5,7 @@
 import com.google.common.collect.Lists;
 import com.google.common.base.Joiner;
 
+import org.apache.commons.lang3.tuple.Triple;
 import org.grobid.core.GrobidModels;
 import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.lang3.StringUtils;
@@ -17,12 +18,9 @@
 import org.grobid.core.layout.GraphicObjectType;
 import org.grobid.core.layout.LayoutToken;
 import org.grobid.core.layout.VectorGraphicBoxCalculator;
-import org.grobid.core.utilities.BoundingBoxCalculator;
-import org.grobid.core.utilities.LayoutTokensUtil;
-import org.grobid.core.utilities.TextUtilities;
+import org.grobid.core.utilities.*;
 import org.grobid.core.tokenization.TaggingTokenCluster;
 import org.grobid.core.tokenization.TaggingTokenClusteror;
-import org.grobid.core.utilities.KeyGen;
 import org.grobid.core.engines.label.TaggingLabels;
 import org.grobid.core.engines.label.TaggingLabel;
 import org.grobid.core.engines.citations.CalloutAnalyzer.MarkerType;
@@ -41,6 +39,8 @@
 import java.util.SortedSet;
 import java.util.Collections;
 
+import static org.grobid.core.document.TEIFormatter.applyStyleList;
+import static org.grobid.core.document.TEIFormatter.extractStylesList;
 import static org.grobid.core.document.xml.XmlBuilderUtils.teiElement;
 import static org.grobid.core.document.xml.XmlBuilderUtils.addXmlId;
 import static org.grobid.core.document.xml.XmlBuilderUtils.textNode;
@@ -388,7 +388,7 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form
 
             // if the segment has been parsed with the full text model we further extract the clusters
             // to get the bibliographical references
-            if ( (labeledCaption != null) && (labeledCaption.length() > 0) ) {
+            if (StringUtils.isNotEmpty(labeledCaption))  {
                 TaggingTokenClusteror clusteror = new TaggingTokenClusteror(GrobidModels.FULLTEXT, labeledCaption, captionLayoutTokens);
                 List<TaggingTokenCluster> clusters = clusteror.cluster();
                 
@@ -404,7 +404,9 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form
 
                     TaggingLabel clusterLabel = cluster.getTaggingLabel();
                     //String clusterContent = LayoutTokensUtil.normalizeText(cluster.concatTokens());
-                    String clusterContent = LayoutTokensUtil.normalizeDehyphenizeText(cluster.concatTokens());
+                    List<LayoutToken> dehyphenized = LayoutTokensUtil.dehyphenize(cluster.concatTokens());
+                    String text = LayoutTokensUtil.toText(dehyphenized).replace("\n", " ");
+
                     if (clusterLabel.equals(TaggingLabels.CITATION_MARKER)) {
                         try {
                             List<Node> refNodes = formatter.markReferencesTEILuceneBased(
@@ -422,7 +424,13 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form
                             LOGGER.warn("Problem when serializing TEI fragment for figure caption", e);
                         }
                     } else {
-                        desc.appendChild(textNode(clusterContent));
+                        List<Triple<String, String, OffsetPosition>> stylesList = extractStylesList(dehyphenized);
+
+                        if (CollectionUtils.isNotEmpty(stylesList)) {
+                            applyStyleList(desc, text, stylesList);
+                        } else {
+                            desc.appendChild(StringUtils.normalizeSpace(text));
+                        }
                     }
                 }
             } else {
diff --git a/grobid-core/src/main/java/org/grobid/core/data/Table.java b/grobid-core/src/main/java/org/grobid/core/data/Table.java
index 6356978837..abccb0bcd4 100644
--- a/grobid-core/src/main/java/org/grobid/core/data/Table.java
+++ b/grobid-core/src/main/java/org/grobid/core/data/Table.java
@@ -1,5 +1,7 @@
 package org.grobid.core.data;
 
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.lang3.tuple.Triple;
 import org.grobid.core.GrobidModels;
 import org.apache.commons.lang3.StringUtils;
 import org.grobid.core.data.table.Cell;
@@ -15,6 +17,7 @@
 import org.grobid.core.layout.LayoutToken;
 import org.grobid.core.utilities.BoundingBoxCalculator;
 import org.grobid.core.utilities.LayoutTokensUtil;
+import org.grobid.core.utilities.OffsetPosition;
 import org.grobid.core.utilities.counters.CntManager;
 import org.grobid.core.engines.counters.TableRejectionCounters;
 import org.grobid.core.tokenization.TaggingTokenCluster;
@@ -30,9 +33,9 @@
 import nu.xom.Attribute;
 import nu.xom.Element;
 import nu.xom.Node;
-import nu.xom.Text;
 
-import static org.grobid.core.document.xml.XmlBuilderUtils.teiElement;
+import static org.grobid.core.document.TEIFormatter.applyStyleList;
+import static org.grobid.core.document.TEIFormatter.extractStylesList;
 import static org.grobid.core.document.xml.XmlBuilderUtils.addXmlId;
 import static org.grobid.core.document.xml.XmlBuilderUtils.textNode;
 
@@ -119,7 +122,9 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form
 
                     TaggingLabel clusterLabel = cluster.getTaggingLabel();
                     //String clusterContent = LayoutTokensUtil.normalizeText(cluster.concatTokens());
-                    String clusterContent = LayoutTokensUtil.normalizeDehyphenizeText(cluster.concatTokens());
+                    List<LayoutToken> dehyphenized = LayoutTokensUtil.dehyphenize(cluster.concatTokens());
+                    String text = LayoutTokensUtil.toText(dehyphenized).replace("\n", " ");
+
                     if (clusterLabel.equals(TaggingLabels.CITATION_MARKER)) {
                         try {
                             List<Node> refNodes = formatter.markReferencesTEILuceneBased(
@@ -137,7 +142,13 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form
                             LOGGER.warn("Problem when serializing TEI fragment for table caption", e);
                         }
                     } else {
-                        desc.appendChild(textNode(clusterContent));
+                        List<Triple<String, String, OffsetPosition>> stylesList = extractStylesList(dehyphenized);
+
+                        if (CollectionUtils.isNotEmpty(stylesList)) {
+                            applyStyleList(desc, text, stylesList);
+                        } else {
+                            desc.appendChild(StringUtils.normalizeSpace(text));
+                        }
                     }
 
                     if (desc != null && config.isWithSentenceSegmentation()) {
diff --git a/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java b/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java
index f66baaa0c0..a8df9f310b 100755
--- a/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java
+++ b/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java
@@ -3,16 +3,14 @@
 import com.google.common.base.Joiner;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Sets;
-
-import org.apache.commons.collections4.CollectionUtils;
-import org.apache.commons.lang3.tuple.Pair;
-import org.apache.commons.lang3.StringUtils;
-
 import nu.xom.Attribute;
 import nu.xom.Element;
 import nu.xom.Node;
 import nu.xom.Text;
-
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.commons.lang3.tuple.Triple;
 import org.grobid.core.GrobidModels;
 import org.grobid.core.data.CopyrightsLicense.License;
 import org.grobid.core.data.CopyrightsLicense.CopyrightsOwner;
@@ -21,21 +19,19 @@
 import org.grobid.core.document.xml.XmlBuilderUtils;
 import org.grobid.core.engines.Engine;
 import org.grobid.core.engines.FullTextParser;
-import org.grobid.core.engines.label.SegmentationLabels;
+import org.grobid.core.engines.citations.CalloutAnalyzer.MarkerType;
 import org.grobid.core.engines.config.GrobidAnalysisConfig;
+import org.grobid.core.engines.label.SegmentationLabels;
 import org.grobid.core.engines.label.TaggingLabel;
 import org.grobid.core.engines.label.TaggingLabels;
 import org.grobid.core.exceptions.GrobidException;
 import org.grobid.core.lang.Language;
 import org.grobid.core.layout.*;
-import org.grobid.core.utilities.SentenceUtilities;
 import org.grobid.core.tokenization.TaggingTokenCluster;
 import org.grobid.core.tokenization.TaggingTokenClusteror;
 import org.grobid.core.utilities.*;
 import org.grobid.core.utilities.matching.EntityMatcherException;
 import org.grobid.core.utilities.matching.ReferenceMarkerMatcher;
-import org.grobid.core.engines.citations.CalloutAnalyzer.MarkerType;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -46,10 +42,8 @@
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
+import static org.grobid.core.document.xml.XmlBuilderUtils.*;
 
-import static org.grobid.core.document.xml.XmlBuilderUtils.teiElement;
-import static org.grobid.core.document.xml.XmlBuilderUtils.addXmlId;
-import static org.grobid.core.document.xml.XmlBuilderUtils.textNode;
 
 /**
  * Class for generating a TEI representation of a document.
@@ -58,6 +52,10 @@
 @SuppressWarnings("StringConcatenationInsideStringBuilderAppend")
 public class TEIFormatter {
     private static final Logger LOGGER = LoggerFactory.getLogger(TEIFormatter.class);
+    public static final String TEI_STYLE_ITALIC_NAME = "italic";
+    public static String TEI_STYLE_BOLD_NAME = "bold";
+    public static String TEI_STYLE_SUPERSCRIPT_NAME = "superscript";
+    public static String TEI_STYLE_SUBSCRIPT_NAME = "subscript";
 
     private Document doc = null;
     private FullTextParser fullTextParser = null;
@@ -169,7 +167,33 @@ public StringBuilder toTEIHeader(BiblioItem biblio,
         }
 
         if (biblio.getTitle() != null) {
-            tei.append(TextUtilities.HTMLEncode(biblio.getTitle()));
+            List<LayoutToken> layoutTokens = biblio.getLayoutTokens(TaggingLabels.HEADER_TITLE);
+
+            String text = LayoutTokensUtil.toText(layoutTokens).replace("\n", " ");
+
+            List<Triple<String, String, OffsetPosition>> stylesList = extractStylesList(layoutTokens);
+
+            if (CollectionUtils.isNotEmpty(stylesList)) {
+                int lastPosition = 0;
+                for (Triple<String, String, OffsetPosition> style : stylesList) {
+                    OffsetPosition offsetStyle = style.getRight();
+                    String subString = text.substring(lastPosition, offsetStyle.start);
+                    String prefixSpace = StringUtils.startsWith(subString, " ") ? " " : "";
+                    String suffixSpace = StringUtils.endsWith(subString, " ") ? " " : "";
+                    tei.append(prefixSpace + StringUtils.normalizeSpace(subString.replace("\n", " ")) + suffixSpace);
+                    tei.append("<hi rend=\"").append(style.getLeft()).append("\"").append(">")
+                        .append(StringUtils.normalizeSpace(text.substring(offsetStyle.start, offsetStyle.end).replace("\n", " ")))
+                        .append("</hi>");
+                    lastPosition = offsetStyle.end;
+                }
+                String subString = text.substring(lastPosition);
+                String prefixSpace = StringUtils.startsWith(subString, " ") ? " " : "";
+                tei.append(prefixSpace + StringUtils.normalizeSpace(subString.replace("\n", " ")));
+
+            } else {
+                String title = biblio.getTitle();
+                tei.append(TextUtilities.HTMLEncode(title));
+            }
         }
 
         tei.append("</title>\n");
@@ -268,8 +292,8 @@ public StringBuilder toTEIHeader(BiblioItem biblio,
             // We introduce something more meaningful with TEI customization to encode copyrights information:
             // - @resp with value "publisher", "authors", "unknown", we add a comment to clarify that @resp
             //   should be interpreted as the copyrights owner
-            // - license related to copyrights exception is encoded via <licence>  
-            // (note: I have no clue what can mean "free" as status for a document - there are always some sort of 
+            // - license related to copyrights exception is encoded via <licence>
+            // (note: I have no clue what can mean "free" as status for a document - there are always some sort of
             // restrictions like moral rights even for public domain documents)
             if (copyrightsLicense != null) {
                 tei.append("\t\t\t\t<availability ");
@@ -306,7 +330,7 @@ public StringBuilder toTEIHeader(BiblioItem biblio,
 
                 tei.append(" status=\"unknown\">\n");
                 tei.append("\t\t\t\t\t<licence/>\n");
-                
+
                 if (defaultPublicationStatement != null) {
                     tei.append("\t\t\t\t\t<p>" +
                             TextUtilities.HTMLEncode(defaultPublicationStatement) + "</p>\n");
@@ -930,8 +954,8 @@ else if (biblio.getE_Year().length() == 4)
                                             bds,
                                             false,
                                             new LayoutTokenization(biblio.getLayoutTokens(TaggingLabels.HEADER_ABSTRACT)),
-                                            null, 
-                                            null, 
+                                            null,
+                                            null,
                                             null,
                                             null,
                                             markerTypes,
@@ -1075,7 +1099,7 @@ public StringBuilder toTEIBody(StringBuilder buffer,
     protected List<Note> getTeiNotes(Document doc) {
         // There are two types of structured notes currently supported, foot notes and margin notes.
         // We consider that head notes are always only presentation matter and are never references
-        // in a text body. 
+        // in a text body.
 
         SortedSet<DocumentPiece> documentNoteParts = doc.getDocumentPart(SegmentationLabels.FOOTNOTE);
         List<Note> notes = getTeiNotes(doc, documentNoteParts, Note.NoteType.FOOT);
@@ -1119,7 +1143,7 @@ protected List<Note> getTeiNotes(Document doc, SortedSet<DocumentPiece> document
             if (localNotes != null)
                 notes.addAll(localNotes);
         }
-        
+
         notes.stream()
             .forEach(n -> n.setText(TextUtilities.dehyphenize(n.getText())));
 
@@ -1175,13 +1199,13 @@ protected List<Note> makeNotes(List<LayoutToken> noteTokens, String footText, No
         Note localNote = null;
         if (currentNumber == -1)
             localNote = new Note(null, noteTokens, footText, noteType);
-        else 
+        else
             localNote = new Note(""+currentNumber, noteTokens, footText, noteType);
 
         notes.add(localNote);
 
         // add possible subsequent notes concatenated in the same note sequence (this is a common error,
-        // which is addressed here by heuristics, it may not be necessary in the future with a better 
+        // which is addressed here by heuristics, it may not be necessary in the future with a better
         // segmentation model using more footnotes training data)
         if (currentNumber != -1) {
             String nextLabel = " " + (currentNumber+1);
@@ -1191,7 +1215,7 @@ protected List<Note> makeNotes(List<LayoutToken> noteTokens, String footText, No
 
             int nextFootnoteLabelIndex = footText.indexOf(nextLabel);
             if (nextFootnoteLabelIndex != -1) {
-                // optionally we could restrict here to superscript numbers 
+                // optionally we could restrict here to superscript numbers
                 // review local note
                 localNote.setText(footText.substring(0, nextFootnoteLabelIndex));
                 int pos = 0;
@@ -1233,9 +1257,9 @@ private StringBuilder toTEINote(StringBuilder tei,
                                     List<MarkerType> markerTypes,
                                     GrobidAnalysisConfig config) throws Exception {
         // pattern is <note n="1" place="foot" xml:id="foot_1">
-        // or 
+        // or
         // pattern is <note n="1" place="margin" xml:id="margin_1">
-        
+
         // if no note label is found, no @n attribute but we generate a random xml:id (not be used currently)
 
         for (Note note : notes) {
@@ -1247,20 +1271,20 @@ private StringBuilder toTEINote(StringBuilder tei,
 
             addXmlId(desc, note.getIdentifier());
 
-            // this is a paragraph element for storing text content of the note, which is 
+            // this is a paragraph element for storing text content of the note, which is
             // better practice than just putting the text under the <note> element
             Element pNote = XmlBuilderUtils.teiElement("p");
             if (config.isGenerateTeiIds()) {
                 String pID = KeyGen.getKey().substring(0, 7);
                 addXmlId(pNote, "_" + pID);
             }
-            
+
             if (config.isGenerateTeiCoordinates("p")) {
                 String coords = LayoutTokensUtil.getCoordsString(note.getTokens());
                 desc.addAttribute(new Attribute("coords", coords));
             }
-            
-            // for labelling bibliographical references in notes 
+
+            // for labelling bibliographical references in notes
             List<LayoutToken> noteTokens = note.getTokens();
 
             String coords = null;
@@ -1349,7 +1373,7 @@ public StringBuilder processTEIDivSection(String xmlType,
         StringBuilder contentBuffer = new StringBuilder();
 
         contentBuffer = toTEITextPiece(contentBuffer, text, null, biblioData, false,
-                new LayoutTokenization(tokens), null, null, null, 
+                new LayoutTokenization(tokens), null, null, null,
             null, null, doc, config);
         String result = contentBuffer.toString();
         String[] resultAsArray = result.split("\n");
@@ -1433,17 +1457,25 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
             TaggingLabel clusterLabel = cluster.getTaggingLabel();
             Engine.getCntManager().i(clusterLabel);
             if (clusterLabel.equals(TaggingLabels.SECTION)) {
-                String clusterContent = LayoutTokensUtil.normalizeDehyphenizeText(cluster.concatTokens());
+                List<LayoutToken> dehyphenized = LayoutTokensUtil.dehyphenize(cluster.concatTokens());
+                String text = LayoutTokensUtil.toText(dehyphenized).replace("\n", " ");
+
                 curDiv = teiElement("div");
                 Element head = teiElement("head");
                 // section numbers
-                org.grobid.core.utilities.Pair<String, String> numb = getSectionNumber(clusterContent);
+                Pair<List<LayoutToken>, String> numb = getSectionNumber(dehyphenized);
                 if (numb != null) {
-                    head.addAttribute(new Attribute("n", numb.b));
-                    head.appendChild(numb.a);
-                } else {
-                    head.appendChild(clusterContent);
+                    head.addAttribute(new Attribute("n", numb.getRight()));
+                    dehyphenized = numb.getLeft();
+                    text = LayoutTokensUtil.toText(dehyphenized);
                 }
+//                List<Triple<String, String, OffsetPosition>> stylesList = extractStylesList(dehyphenized);
+
+//                if (CollectionUtils.isNotEmpty(stylesList)) {
+//                    applyStyleList(head, text, stylesList);
+//                } else {
+                    head.appendChild(StringUtils.normalizeSpace(text.replace("\n", "")));
+//                }
 
                 if (config.isGenerateTeiIds()) {
                     String divID = KeyGen.getKey().substring(0, 7);
@@ -1451,10 +1483,7 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                 }
 
                 if (config.isGenerateTeiCoordinates("head") ) {
-                    String coords = LayoutTokensUtil.getCoordsString(cluster.concatTokens());
-                    if (coords != null) {
-                        head.addAttribute(new Attribute("coords", coords));
-                    }
+                    head.addAttribute(new Attribute("coords", LayoutTokensUtil.getCoordsString(cluster.concatTokens())));
                 }
 
                 curDiv.appendChild(head);
@@ -1463,13 +1492,13 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                     clusterLabel.equals(TaggingLabels.EQUATION_LABEL)) {
                 // get starting position of the cluster
                 int start = -1;
-                if ( (cluster.concatTokens() != null) && (cluster.concatTokens().size() > 0) ) {
+                if ( CollectionUtils.isNotEmpty(cluster.concatTokens()) ) {
                     start = cluster.concatTokens().get(0).getOffset();
                 }
                 // get the corresponding equation
                 if (start != -1) {
                     Equation theEquation = null;
-                    if (equations != null) {
+                    if (CollectionUtils.isNotEmpty(equations)) {
                         for(int i=0; i<equations.size(); i++) {
                             if (i < equationIndex) 
                                 continue;
@@ -1488,9 +1517,17 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                     }
                 }
             } else if (clusterLabel.equals(TaggingLabels.ITEM)) {
-                String clusterContent = LayoutTokensUtil.normalizeText(cluster.concatTokens());
-                //curDiv.appendChild(teiElement("item", clusterContent));
-                Element itemNode = teiElement("item", clusterContent);
+                String text = LayoutTokensUtil.toText(cluster.concatTokens()).replace("\n", " ");
+                Element itemNode = teiElement("item");
+
+                List<Triple<String, String, OffsetPosition>> stylesList = extractStylesList(cluster.concatTokens());
+
+                if (CollectionUtils.isNotEmpty(stylesList)) {
+                    applyStyleList(itemNode, text, stylesList);
+                } else {
+                    itemNode.appendChild(StringUtils.normalizeSpace(text));
+                }
+
                 if (!MARKER_LABELS.contains(lastClusterLabel) && (lastClusterLabel != TaggingLabels.ITEM)) {
                     curList = teiElement("list");
                     curDiv.appendChild(curList);
@@ -1509,17 +1546,20 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                 curDiv.appendChild(note);
             } else if (clusterLabel.equals(TaggingLabels.PARAGRAPH)) {
                 List<LayoutToken> clusterTokens = cluster.concatTokens();
-                int clusterPage = Iterables.getLast(clusterTokens).getPage();
+                List<LayoutToken> dehyphenized = LayoutTokensUtil.dehyphenize(clusterTokens);
+                int clusterPage = Iterables.getLast(dehyphenized).getPage();
 
                 List<Note> notesSamePage = null;
                 if (CollectionUtils.isNotEmpty(notes)) {
                     notesSamePage = notes.stream()
-                                .filter(f -> !f.isIgnored() && f.getPageNumber() == clusterPage)
-                                .collect(Collectors.toList());
+                        .filter(f -> !f.isIgnored() && f.getPageNumber() == clusterPage)
+                        .collect(Collectors.toList());
                 }
 
                 if (notesSamePage == null) {
-                    String clusterContent = LayoutTokensUtil.normalizeDehyphenizeText(clusterTokens);
+
+                    String text = LayoutTokensUtil.toText(dehyphenized).replace("\n", " ");
+
                     if (isNewParagraph(lastClusterLabel, curParagraph)) {
                         if (curParagraph != null && config.isWithSentenceSegmentation()) {
                             segmentIntoSentences(curParagraph, curParagraphTokens, config, doc.getLanguage());
@@ -1529,12 +1569,12 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                             String divID = KeyGen.getKey().substring(0, 7);
                             addXmlId(curParagraph, "_" + divID);
                         }
-                        
+
                         if (config.isGenerateTeiCoordinates("p")) {
                             String coords = LayoutTokensUtil.getCoordsString(clusterTokens);
                             curParagraph.addAttribute(new Attribute("coords", coords));
                         }
-                        
+
                         curDiv.appendChild(curParagraph);
                         curParagraphTokens = new ArrayList<>();
                     } else {
@@ -1545,8 +1585,15 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                             }
                         }
                     }
-                    curParagraph.appendChild(clusterContent);
-                    curParagraphTokens.addAll(clusterTokens);
+
+                    List<Triple<String, String, OffsetPosition>> stylesList = extractStylesList(dehyphenized);
+
+                    if (CollectionUtils.isNotEmpty(stylesList)) {
+                        applyStyleList(curParagraph, text, stylesList);
+                    } else {
+                        curParagraph.appendChild(StringUtils.normalizeSpace(text));
+                    }
+                    curParagraphTokens.addAll(cluster.concatTokens());
                 } else {
                     if (isNewParagraph(lastClusterLabel, curParagraph)) {
                         if (curParagraph != null && config.isWithSentenceSegmentation()) {
@@ -1562,15 +1609,15 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                             String coords = LayoutTokensUtil.getCoordsString(clusterTokens);
                             curParagraph.addAttribute(new Attribute("coords", coords));
                         }
-                        
+
                         curDiv.appendChild(curParagraph);
                         curParagraphTokens = new ArrayList<>();
                     }
 
                     // we need to cover several footnote callouts in the same paragraph segment
 
-                    // we also can't assume notes are sorted and will appear first in the text as the same order 
-                    // they are defined in the note areas - this might not always be the case in 
+                    // we also can't assume notes are sorted and will appear first in the text as the same order
+                    // they are defined in the note areas - this might not always be the case in
                     // ill-formed documents
 
                     // map the matched note labels to their corresponding note objects
@@ -1581,13 +1628,13 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                     List<Pair<String,OffsetPosition>> matchedLabelPosition = new ArrayList<>();
 
                     for (Note note : notesSamePage) {
-                        Optional<LayoutToken> matching = clusterTokens
+                        Optional<LayoutToken> matching = dehyphenized
                             .stream()
                             .filter(t -> t.getText().equals(note.getLabel()) && t.isSuperscript())
                             .findFirst();
 
                         if (matching.isPresent()) {
-                            int idx = clusterTokens.indexOf(matching.get());
+                            int idx = dehyphenized.indexOf(matching.get());
                             note.setIgnored(true);
                             OffsetPosition matchingPosition = new OffsetPosition();
                             matchingPosition.start = idx;
@@ -1611,8 +1658,8 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                         Note note = labels2Notes.get(matching.getLeft());
                         OffsetPosition matchingPosition = matching.getRight();
 
-                        List<LayoutToken> before = clusterTokens.subList(pos, matchingPosition.start);
-                        String clusterContentBefore = LayoutTokensUtil.normalizeDehyphenizeText(before);
+                        List<LayoutToken> before = dehyphenized.subList(pos, matchingPosition.start);
+                        String clusterContentBefore = LayoutTokensUtil.toText(before);
 
                         if (CollectionUtils.isNotEmpty(before) && before.get(0).getText().equals(" ")) {
                             curParagraph.appendChild(new Text(" "));
@@ -1625,10 +1672,18 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                                 curParagraph.addAttribute(new Attribute("coords", curParagraph.getAttributeValue("coords") + ";" + coords));
                             }
                         }
-                        
+
                         curParagraphTokens.addAll(before);
+                        List<Triple<String, String, OffsetPosition>> stylesList = extractStylesList(before);
 
-                        List<LayoutToken> calloutTokens = clusterTokens.subList(matchingPosition.start, matchingPosition.end);
+                        if (CollectionUtils.isNotEmpty(stylesList)) {
+                            applyStyleList(curParagraph, clusterContentBefore, stylesList);
+                        } else {
+                            curParagraph.appendChild(StringUtils.normalizeSpace(clusterContentBefore));
+                        }
+                        curParagraphTokens.addAll(cluster.concatTokens());
+
+                        List<LayoutToken> calloutTokens = dehyphenized.subList(matchingPosition.start, matchingPosition.end);
 
                         Element ref = teiElement("ref");
                         ref.addAttribute(new Attribute("type", "foot"));
@@ -1644,12 +1699,12 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                         ref.addAttribute(new Attribute("target", "#" + note.getIdentifier()));
                         curParagraph.appendChild(ref);
 
-                        pos = matchingPosition.end; 
+                        pos = matchingPosition.end;
                     }
 
                     // add last chunk of paragraph stuff (or whole paragraph if no note callout matching)
-                    List<LayoutToken> remaining = clusterTokens.subList(pos, clusterTokens.size());
-                    String remainingClusterContent = LayoutTokensUtil.normalizeDehyphenizeText(remaining);
+                    List<LayoutToken> remaining = dehyphenized.subList(pos, dehyphenized.size());
+                    String remainingClusterContent = LayoutTokensUtil.toText(remaining);
 
                     if (CollectionUtils.isNotEmpty(remaining) && remaining.get(0).getText().equals(" ")) {
                         curParagraph.appendChild(new Text(" "));
@@ -1664,6 +1719,16 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
 
                     curParagraph.appendChild(remainingClusterContent);
                     curParagraphTokens.addAll(remaining);
+                    List<Triple<String, String, OffsetPosition>> stylesList = extractStylesList(remaining);
+
+                    if (CollectionUtils.isNotEmpty(stylesList)) {
+                        applyStyleList(curParagraph, remainingClusterContent, stylesList);
+                    } else {
+                        curParagraph.appendChild(StringUtils.normalizeSpace(remainingClusterContent));
+                    }
+                    curParagraphTokens.addAll(cluster.concatTokens());
+
+
                 }
             } else if (MARKER_LABELS.contains(clusterLabel)) {
                 List<LayoutToken> refTokens = cluster.concatTokens();
@@ -1696,13 +1761,13 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                 } else {
                     throw new IllegalStateException("Unsupported marker type: " + clusterLabel);
                 }
-                
+
                 if (refNodes != null) {
                     boolean footNoteCallout = false;
 
                     if (refNodes.size() == 1 && (refNodes.get(0) instanceof Text)) {
-                        // filtered out superscript reference marker (based on the defined citationMarkerType) might 
-                        // be foot note callout - se we need in this particular case to try to match existing notes
+                        // filtered out superscript reference marker (based on the defined citationMarkerType) might
+                        // be footnote callout - se we need in this particular case to try to match existing notes
                         // similarly as within paragraph
                         if (citationMarkerType == null || citationMarkerType != MarkerType.SUPERSCRIPT_NUMBER) {
                             // is refTokens superscript?
@@ -1721,7 +1786,7 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                                         if (chunkRefString.trim().equals(note.getLabel())) {
                                             footNoteCallout = true;
                                             note.setIgnored(true);
-                                                   
+
                                             Element ref = teiElement("ref");
                                             ref.addAttribute(new Attribute("type", "foot"));
 
@@ -1743,16 +1808,16 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
                                     }
                                 }
                             }
-                        } 
+                        }
                     }
 
                     if (!footNoteCallout) {
                         for (Node n : refNodes) {
                             parent.appendChild(n);
                         }
-                    } 
+                    }
                 }
-                
+
                 if (curParagraph != null)
                     curParagraphTokens.addAll(cluster.concatTokens());
             } else if (clusterLabel.equals(TaggingLabels.FIGURE) || clusterLabel.equals(TaggingLabels.TABLE)) {
@@ -1827,6 +1892,43 @@ public StringBuilder toTEITextPiece(StringBuilder buffer,
         return buffer;
     }
 
+    /**
+     * Apply the styles as described in the stylesList.
+     * This method modifies the input paragraphElem.
+     */
+    public static Element applyStyleList(Element paragraphElem, String text, List<Triple<String, String, OffsetPosition>> stylesList) {
+//        if (CollectionUtils.isEmpty(stylesList)) {
+//            paragraphElem.appendChild(StringUtils.normalizeSpace(paragraphText));
+//            return paragraphElem;
+//        }
+
+        int lastPosition = 0;
+        for (Triple<String, String, OffsetPosition> style : stylesList) {
+            OffsetPosition offsetStyle = style.getRight();
+            String subString = text.substring(lastPosition, offsetStyle.start);
+            String prefixSpace = StringUtils.startsWith(subString, " ") ? " " : "";
+            String suffixSpace = "";
+            if (subString.length() > prefixSpace.length()) {
+                suffixSpace = StringUtils.endsWith(subString, " ") ? " " : "";
+            }
+            paragraphElem.appendChild(prefixSpace + StringUtils.normalizeSpace(subString.replace("\n", " ")) + suffixSpace);
+            Element rend = teiElement("hi");
+            rend.addAttribute(new Attribute("rend", style.getLeft()));
+            rend.appendChild(StringUtils.normalizeSpace(text.substring(offsetStyle.start, offsetStyle.end).replace("\n", " ")));
+            lastPosition = offsetStyle.end;
+            paragraphElem.appendChild(rend);
+        }
+        String subString = text.substring(lastPosition);
+        String subStringNormalized = StringUtils.normalizeSpace(subString);
+        String prefixSpace = "";
+        if (subStringNormalized.length() > 0) {
+            prefixSpace = StringUtils.startsWith(subString, " ") ? " " : "";
+        }
+        paragraphElem.appendChild(prefixSpace + StringUtils.normalizeSpace(subString.replace("\n", " ")));
+
+        return paragraphElem;
+    }
+
     public static boolean isNewParagraph(TaggingLabel lastClusterLabel, Element curParagraph) {
         return (!MARKER_LABELS.contains(lastClusterLabel) && lastClusterLabel != TaggingLabels.FIGURE
                 && lastClusterLabel != TaggingLabels.TABLE) || curParagraph == null;
@@ -1841,36 +1943,24 @@ public void segmentIntoSentences(Element curParagraph, List<LayoutToken> curPara
 
         // in xom, the following gives all the text under the element, for the whole subtree
         String text = curParagraph.getValue();
-        if (text == null || text.length() == 0)
+        if (StringUtils.isEmpty(text))
             return;
 
-        // identify ref nodes, ref spans and ref positions
-        Map<Integer,Node> mapRefNodes = new HashMap<>();
-        List<Integer> refPositions = new ArrayList<>();
-        List<OffsetPosition> forbiddenPositions = new ArrayList<>();
-        int pos = 0;
-        for(int i=0; i<curParagraph.getChildCount(); i++) {
-            Node theNode = curParagraph.getChild(i);
-            if (theNode instanceof Text) {
-                String chunk = theNode.getValue();
-                pos += chunk.length();
-            } else if (theNode instanceof Element) {
-                // for readability in another conditional
-                if (((Element) theNode).getLocalName().equals("ref")) {
-                    // map character offset of the node
-                    mapRefNodes.put(Integer.valueOf(pos), theNode);
-                    refPositions.add(Integer.valueOf(pos));
+        Map<Integer, Pair<Node, String>> rawMapRefNodes = identifyNestedNodes(curParagraph);
 
-                    String chunk = theNode.getValue();
-                    forbiddenPositions.add(new OffsetPosition(pos, pos+chunk.length()));
-                    pos += chunk.length();                    
-                }
-            }
-        }
+        List<OffsetPosition> forbiddenPositions = rawMapRefNodes.entrySet()
+            .stream()
+            .filter(entry -> ((Element) entry.getValue().getLeft()).getLocalName().equals("ref"))
+            .map(entry -> new OffsetPosition(entry.getKey(), entry.getValue().getRight().length() + entry.getKey()))
+            .collect(Collectors.toList());
 
-        List<OffsetPosition> theSentences = 
+        List<OffsetPosition> sentencesOffsetPosition =
             SentenceUtilities.getInstance().runSentenceDetection(text, forbiddenPositions, curParagraphTokens, new Language(lang));
-    
+
+        Map<Integer, Pair<Node, String>> mapRefNodes = splitMapNodesOverSentenceSplits(rawMapRefNodes, text, sentencesOffsetPosition);
+
+        List<Integer> refPositions = mapRefNodes.keySet().stream().sorted().collect(Collectors.toList());
+
         /*if (theSentences.size() == 0) {
             // this should normally not happen, but it happens (depending on sentence splitter, usually the text 
             // is just a punctuation)
@@ -1879,48 +1969,7 @@ public void segmentIntoSentences(Element curParagraph, List<LayoutToken> curPara
         }*/
 
         // segment the list of layout tokens according to the sentence segmentation if the coordinates are needed
-        List<List<LayoutToken>> segmentedParagraphTokens = new ArrayList<>();
-        List<LayoutToken> currentSentenceTokens = new ArrayList<>();
-        pos = 0;
-        
-        if (config.isGenerateTeiCoordinates("s")) {
-            
-            int currentSentenceIndex = 0;
-            String sentenceChunk = text.substring(theSentences.get(currentSentenceIndex).start, theSentences.get(currentSentenceIndex).end);
-
-            for(int i=0; i<curParagraphTokens.size(); i++) {
-                LayoutToken token = curParagraphTokens.get(i);
-                if (token.getText() == null || token.getText().length() == 0) 
-                    continue;
-                int newPos = sentenceChunk.indexOf(token.getText(), pos);
-                if ((newPos != -1) || SentenceUtilities.toSkipToken(token.getText())) {
-                    // just move on
-                    currentSentenceTokens.add(token);
-                    if (newPos != -1 && !SentenceUtilities.toSkipToken(token.getText()))
-                        pos = newPos;
-                } else {
-                    if (currentSentenceTokens.size() > 0) {
-                        segmentedParagraphTokens.add(currentSentenceTokens);
-                        currentSentenceIndex++;
-                        if (currentSentenceIndex >= theSentences.size()) {
-                            currentSentenceTokens = new ArrayList<>();
-                            break;
-                        }
-                        sentenceChunk = text.substring(theSentences.get(currentSentenceIndex).start, theSentences.get(currentSentenceIndex).end);
-                    }
-                    currentSentenceTokens = new ArrayList<>();
-                    currentSentenceTokens.add(token);
-                    pos = 0;
-                }
-                
-                if (currentSentenceIndex >= theSentences.size())
-                    break;
-            }
-            // last sentence
-            if (currentSentenceTokens.size() > 0) {
-                // check sentence index too ?
-                segmentedParagraphTokens.add(currentSentenceTokens);
-            }
+        List<List<LayoutToken>> segmentedParagraphTokens = segmentLayoutTokenLists(curParagraphTokens, text, sentencesOffsetPosition);
 
 /*if (segmentedParagraphTokens.size() != theSentences.size()) {
 System.out.println("ERROR, segmentedParagraphTokens size:" + segmentedParagraphTokens.size() + " vs theSentences size: " + theSentences.size());
@@ -1937,44 +1986,42 @@ public void segmentIntoSentences(Element curParagraph, List<LayoutToken> curPara
     k++;
 }
 }*/
-        }
+
 
         // update the xml paragraph element
         int currenChildIndex = 0;
-        pos = 0;
+        int pos = 0;
         int posInSentence = 0;
         int refIndex = 0;
-        for(int i=0; i<theSentences.size(); i++) {
-            pos = theSentences.get(i).start;
+        for(int i=0; i<sentencesOffsetPosition.size(); i++) {
+            pos = sentencesOffsetPosition.get(i).start;
             posInSentence = 0;
             Element sentenceElement = teiElement("s");
+
             if (config.isGenerateTeiIds()) {
                 String sID = KeyGen.getKey().substring(0, 7);
                 addXmlId(sentenceElement, "_" + sID);
             }
             if (config.isGenerateTeiCoordinates("s")) {
                 if (segmentedParagraphTokens.size()>=i+1) {
-                    currentSentenceTokens = segmentedParagraphTokens.get(i);
-                    String coords = LayoutTokensUtil.getCoordsString(currentSentenceTokens);
-                    if (coords != null) {
-                        sentenceElement.addAttribute(new Attribute("coords", coords));
-                    }
+                    List<LayoutToken> currentSentenceTokens = segmentedParagraphTokens.get(i);
+                    sentenceElement.addAttribute(new Attribute("coords", LayoutTokensUtil.getCoordsString(currentSentenceTokens)));
                 }
             }
-            
-            int sentenceLength = theSentences.get(i).end - pos;
+
+            int sentenceLength = sentencesOffsetPosition.get(i).end - pos;
             // check if we have a ref between pos and pos+sentenceLength
             for(int j=refIndex; j<refPositions.size(); j++) {
-                int refPos = refPositions.get(j).intValue();
-                if (refPos < pos+posInSentence) 
+                int refPos = refPositions.get(j);
+                if (refPos < pos+posInSentence)
                     continue;
 
                 if (refPos >= pos+posInSentence && refPos <= pos+sentenceLength) {
-                    Node valueNode = mapRefNodes.get(Integer.valueOf(refPos));
+                    Node valueNode = mapRefNodes.get(refPos).getLeft();
                     if (pos+posInSentence < refPos) {
-                        String local_text_chunk = text.substring(pos+posInSentence, refPos);
-                        local_text_chunk = XmlBuilderUtils.stripNonValidXMLCharacters(local_text_chunk);
-                        sentenceElement.appendChild(local_text_chunk);
+                        String localTextChunk = text.substring(pos+posInSentence, refPos);
+                        localTextChunk = XmlBuilderUtils.stripNonValidXMLCharacters(localTextChunk);
+                        sentenceElement.appendChild(localTextChunk);
                     }
                     valueNode.detach();
                     sentenceElement.appendChild(valueNode);
@@ -1986,10 +2033,10 @@ public void segmentIntoSentences(Element curParagraph, List<LayoutToken> curPara
                 }
             }
 
-            if (pos+posInSentence <= theSentences.get(i).end) {
-                String local_text_chunk = text.substring(pos+posInSentence, theSentences.get(i).end);
-                local_text_chunk = XmlBuilderUtils.stripNonValidXMLCharacters(local_text_chunk);
-                sentenceElement.appendChild(local_text_chunk);
+            if (pos + posInSentence <= sentencesOffsetPosition.get(i).end) {
+                String localTextChunk = text.substring(pos + posInSentence, sentencesOffsetPosition.get(i).end);
+                localTextChunk = XmlBuilderUtils.stripNonValidXMLCharacters(localTextChunk);
+                sentenceElement.appendChild(localTextChunk);
                 curParagraph.appendChild(sentenceElement);
             }
         }
@@ -2005,8 +2052,278 @@ public void segmentIntoSentences(Element curParagraph, List<LayoutToken> curPara
                 }
             }
         }
+    }
+
+    /**
+     * Adjust the nodes that could be over a sentence split.
+     * We know that refs cannot be split over sentences, so we can ignore them happily
+     **/
+    protected Map<Integer, Pair<Node, String>> splitMapNodesOverSentenceSplits(Map<Integer, Pair<Node, String>> mapRefNodes, String text, List<OffsetPosition> sentencesOffsetPosition) {
+        Map<Integer, Pair<Node, String>> adjustedMap = new TreeMap<>();
+
+        StringBuilder textAccumulator = new StringBuilder();
+        List<Integer> refPositions = mapRefNodes.keySet().stream().sorted().collect(Collectors.toList());
+
+        int currentNodeIdx = 0;
+        int previousSentenceOffsetStart = 0;
+        int previousPosInSentence = 0;
+        for(int i=0; i<sentencesOffsetPosition.size(); i++) {
+            OffsetPosition offsetPosition = sentencesOffsetPosition.get(i);
+            int posInSentence = 0;
+            int sentenceOffsetStart = offsetPosition.start;
+            int sentenceOffsetEnd = offsetPosition.end;
+
+            if (previousSentenceOffsetStart + previousPosInSentence < sentenceOffsetStart) {
+                textAccumulator.append(text, previousSentenceOffsetStart + previousPosInSentence, sentenceOffsetStart);
+            }
+            for(int j=currentNodeIdx; j<refPositions.size(); j++) {
+                int refPos = refPositions.get(j);
+                Node currentNode = mapRefNodes.get(refPos).getLeft();
+                int currentNodeLength = currentNode.getValue().length();
+
+                if (((Element) currentNode).getLocalName().equals("ref")) {
+                    if (refPos > sentenceOffsetEnd) {
+                        currentNodeIdx = j;
+                        break;
+                    }
+                    adjustedMap.put(refPos, mapRefNodes.get(refPos));
+                    if (textAccumulator.length() < refPos) {
+                        textAccumulator.append(text, textAccumulator.length(), refPos);
+                    }
+                    textAccumulator.append(mapRefNodes.get(refPos).getRight());
+                    posInSentence = refPos + currentNodeLength - sentenceOffsetStart;
+                    continue;
+                }
+
+                //The ref position is falling between sentence start and end
+                if (refPos >= sentenceOffsetStart+posInSentence && refPos < sentenceOffsetEnd) {
+
+                    //adding what's before the refPos to the accumulator
+                    if (refPos > sentenceOffsetStart + posInSentence) {
+                        textAccumulator.append(text, sentenceOffsetStart + posInSentence, refPos);
+                        posInSentence = refPos - sentenceOffsetStart;
+                    }
+
+                    //the node finishes before sentence ends - all good here :-)
+                    if (sentenceOffsetStart + posInSentence + currentNodeLength < sentenceOffsetEnd) {
+                        adjustedMap.put(refPos, mapRefNodes.get(refPos));
+                        textAccumulator.append(mapRefNodes.get(refPos).getRight());
+                        posInSentence = refPos + currentNodeLength - sentenceOffsetStart;
+                    } else {
+                        //The node exceed the sentence, we are in trouble! Cut it!
+                        int splitElementSize = sentenceOffsetEnd - refPos;
+
+                        String substringPrefix = currentNode.getValue().substring(0, splitElementSize);
+                        Element newElementPrefix = generateNewElement((Element) currentNode, substringPrefix);
+                        adjustedMap.put(refPos, Pair.of(newElementPrefix, substringPrefix));
+                        textAccumulator.append(substringPrefix);
+                        posInSentence = refPos + newElementPrefix.getValue().length() - sentenceOffsetStart;
+                        currentNodeIdx = j;
+                        break;
+                    }
+                } else if (refPos > sentenceOffsetEnd) {
+                    // add to accumulator the rest of the sentence and moving on to the next sentence
+                    String textChunk = text.substring(sentenceOffsetStart + posInSentence, sentenceOffsetEnd);
+                    textAccumulator.append(textChunk);
+                    posInSentence += textChunk.length();
+                    currentNodeIdx = j;
+                    break;
+                } else if (refPos < sentenceOffsetStart
+                    && textAccumulator.length() > refPos
+                    && textAccumulator.length() < refPos + currentNodeLength) {
+                    //The node is between this sentence and the previous one - trouble again dude
+
+                    String exceeded = textAccumulator.substring(0, refPos) + mapRefNodes.get(refPos).getLeft().getValue();
+
+                    if (exceeded.length() > sentenceOffsetEnd) {
+                        String previousNodeSuffix = exceeded.substring(sentenceOffsetStart, sentenceOffsetEnd);
+                        Element newElementSuffix = generateNewElement((Element) currentNode, previousNodeSuffix);
+                        adjustedMap.put(sentenceOffsetStart, Pair.of(newElementSuffix, previousNodeSuffix));
+                        if (textAccumulator.length() < sentenceOffsetStart) {
+                            textAccumulator.append(exceeded, textAccumulator.length(), sentenceOffsetStart);
+                        }
+                        textAccumulator.append(previousNodeSuffix);
+
+                        posInSentence = textAccumulator.length() - sentenceOffsetStart;
+                        currentNodeIdx = j;
+                        break;
+                    } else {
+                        //The item is within this sentence. Cool stuff.
+                        String previousNodeSuffix = exceeded.substring(sentenceOffsetStart);
+                        Element newElementSuffix = generateNewElement((Element) currentNode, previousNodeSuffix);
+                        adjustedMap.put(sentenceOffsetStart, Pair.of(newElementSuffix, previousNodeSuffix));
+                        if (textAccumulator.length() < sentenceOffsetStart) {
+                            textAccumulator.append(exceeded, textAccumulator.length(), sentenceOffsetStart);
+                        }
+                        textAccumulator.append(previousNodeSuffix);
+                        posInSentence = textAccumulator.length() - sentenceOffsetStart;
+                    }
+                }
+            }
+            previousSentenceOffsetStart = sentenceOffsetStart;
+            previousPosInSentence = posInSentence;
+
+            if (sentenceOffsetStart + posInSentence < sentenceOffsetEnd) {
+                textAccumulator.append(text, sentenceOffsetStart + posInSentence, sentencesOffsetPosition.get(i).end);
+            }
+        }
+
+        return adjustedMap;
+    }
+
+    private Element generateNewElement(Element currentNode, String value) {
+        Element newElement = teiElement(currentNode.getLocalName(), value);
+        for (int i=0; i < currentNode.getAttributeCount(); i++) {
+            Attribute a = new Attribute(currentNode.getAttribute(i));
+            newElement.addAttribute(a);
+        }
+        return newElement;
+    }
+
+    protected Map<Integer, Pair<Node, String>> identifyNestedNodes(Element curParagraph) {
+        // identify ref nodes, ref spans and ref positions
+        Map<Integer,Pair<Node, String>> mapNodes = new HashMap<>();
+
+        int pos = 0;
+        for(int i = 0; i< curParagraph.getChildCount(); i++) {
+            Node theNode = curParagraph.getChild(i);
+            if (theNode instanceof Text) {
+                String chunk = theNode.getValue();
+                pos += chunk.length();
+            } else if (theNode instanceof Element) {
+                // for readability in another conditional
+                if (((Element) theNode).getLocalName().equals("ref")) {
+                    String chunk = theNode.getValue();
+                    // map character offset of the node and the chunk text
+                    mapNodes.put(pos, Pair.of(theNode, chunk));
+
+                    pos += chunk.length();
+                } else if (((Element) theNode).getLocalName().equals("hi")) {
+                    String chunk = theNode.getValue();
+                    mapNodes.put(pos, Pair.of(theNode, chunk));
+
+                    pos += chunk.length();
+                }
+            }
+        }
+
+        return mapNodes;
+    }
+
+    private List<List<LayoutToken>> segmentLayoutTokenLists(List<LayoutToken> curParagraphTokens, String text, List<OffsetPosition> sentencesOffsetPosition) {
+        int pos;
+        List<List<LayoutToken>> segmentedParagraphTokens = new ArrayList<>();
+        List<LayoutToken> currentSentenceTokens = new ArrayList<>();
+        pos = 0;
+
+        int currentSentenceIndex = 0;
+//System.out.println(text);
+//System.out.println("theSentences.size(): " + theSentences.size());
+        String sentenceChunk = text.substring(sentencesOffsetPosition.get(currentSentenceIndex).start,
+            sentencesOffsetPosition.get(currentSentenceIndex).end);
+
+        for (LayoutToken token : curParagraphTokens) {
+            if (StringUtils.isEmpty(token.getText()))
+                continue;
+
+            int newPos = sentenceChunk.indexOf(token.getText(), pos);
+            if ((newPos != -1) || SentenceUtilities.toSkipToken(token.getText())) {
+                // just move on
+                currentSentenceTokens.add(token);
+                if (newPos != -1 && !SentenceUtilities.toSkipToken(token.getText()))
+                    pos = newPos;
+            } else {
+                if (currentSentenceTokens.size() > 0) {
+                    segmentedParagraphTokens.add(currentSentenceTokens);
+                    currentSentenceIndex++;
+                    if (currentSentenceIndex >= sentencesOffsetPosition.size()) {
+                        currentSentenceTokens = new ArrayList<>();
+                        break;
+                    }
+                    sentenceChunk = text.substring(sentencesOffsetPosition.get(currentSentenceIndex).start, sentencesOffsetPosition.get(currentSentenceIndex).end);
+                }
+                currentSentenceTokens = new ArrayList<>();
+                currentSentenceTokens.add(token);
+                pos = 0;
+            }
+
+            if (currentSentenceIndex >= sentencesOffsetPosition.size())
+                break;
+        }
+        // last sentence
+        if (currentSentenceTokens.size() > 0) {
+            // check sentence index too ?
+            segmentedParagraphTokens.add(currentSentenceTokens);
+        }
+        return segmentedParagraphTokens;
+    }
+
+    public static List<Triple<String, String, OffsetPosition>> extractStylesList(List<LayoutToken> tokenList) {
+        return extractStylesList(tokenList, new ArrayList<>());
+    }
 
-    }   
+
+    /**
+     * Extracts the stiles from the list of token. The additional parameter can ignore certain styles
+     * (e.g. to restrict only superscript/subscript when decorating formulas)
+     */
+    public static List<Triple<String, String, OffsetPosition>> extractStylesList(List<LayoutToken> tokenList, List<String> ignoreStyles) {
+        List<Triple<String, String, OffsetPosition>> styleList = new ArrayList<>();
+        String previousStyleName = "";
+        StringBuilder temporaryText = new StringBuilder();
+        StringBuilder value = new StringBuilder();
+
+        for (int index = 0; index < tokenList.size(); index++) {
+            LayoutToken token = tokenList.get(index);
+            int startOffset = temporaryText.toString().length();
+            temporaryText.append(token.getText());
+            int endOffset = temporaryText.toString().length();
+
+            if (token.getText().equals(" ") || token.getText().equals("\n")) {
+                if (value.length() > 0) {
+                    value.append(token.getText());
+                }
+                continue;
+            }
+
+            StringBuilder styleName = new StringBuilder();
+            if (token.isBold() && !ignoreStyles.contains(TEI_STYLE_BOLD_NAME)) {
+                styleName.append(TEI_STYLE_BOLD_NAME).append(" ");
+            }
+
+            if (token.isItalic() && !ignoreStyles.contains(TEI_STYLE_ITALIC_NAME)) {
+                styleName.append(TEI_STYLE_ITALIC_NAME).append(" ");
+            }
+
+            if(token.isSuperscript() && !ignoreStyles.contains(TEI_STYLE_SUPERSCRIPT_NAME)) {
+                styleName.append(TEI_STYLE_SUPERSCRIPT_NAME);
+            } else if(token.isSubscript() && !ignoreStyles.contains(TEI_STYLE_SUBSCRIPT_NAME)) {
+                styleName.append(TEI_STYLE_SUBSCRIPT_NAME);
+            }
+
+            String styleNameTrimmed = StringUtils.trim(styleName.toString());
+            value.append(token.getText());
+
+            if (StringUtils.isEmpty(styleNameTrimmed)) {
+                previousStyleName = styleNameTrimmed;
+                value = new StringBuilder();
+                continue;
+            }
+
+            if (styleNameTrimmed.equals(previousStyleName)) {
+                Triple<String, String, OffsetPosition> last = Iterables.getLast(styleList);
+                styleList.set(styleList.size()-1, Triple.of(last.getLeft(), value.toString(), new OffsetPosition(last.getRight().start, endOffset)));
+            } else {
+                styleList.add(Triple.of(styleNameTrimmed, value.toString(), new OffsetPosition(startOffset, endOffset)));
+//                value = new StringBuilder();
+            }
+
+            previousStyleName = styleNameTrimmed;
+        }
+//        List<Triple<String, String, OffsetPosition>> postProcessedStyleList = styleList.stream().map(s -> Triple.of(s.getLeft(), s.getMiddle().substring(s.getRight().start, s.getRight().end), s.getRight())).collect(Collectors.toList());
+
+        return styleList;
+    }
 
     /**
      * Return the graphic objects in a given interval position in the document.
@@ -2024,26 +2341,46 @@ private List<GraphicObject> getGraphicObject(List<GraphicObject> graphicObjects,
         return result;
     }
 
-    private org.grobid.core.utilities.Pair<String, String> getSectionNumber(String text) {
+    protected Pair<List<LayoutToken>, String> getSectionNumber(List<LayoutToken> tokens) {
+
+        String text = LayoutTokensUtil.toText(tokens);
+
         Matcher m1 = BasicStructureBuilder.headerNumbering1.matcher(text);
         Matcher m2 = BasicStructureBuilder.headerNumbering2.matcher(text);
         Matcher m3 = BasicStructureBuilder.headerNumbering3.matcher(text);
         Matcher m = null;
+        OffsetPosition position = null;
         String numb = null;
         if (m1.find()) {
             numb = m1.group(0);
+            position = new OffsetPosition(m1.start(), m1.end());
             m = m1;
         } else if (m2.find()) {
             numb = m2.group(0);
+            position = new OffsetPosition(m2.start(), m2.end());
             m = m2;
         } else if (m3.find()) {
             numb = m3.group(0);
+            position = new OffsetPosition(m3.start(), m3.end());
             m = m3;
         }
         if (numb != null) {
-            text = text.replace(numb, "").trim();
+            int lastPosition = 0;
+            StringBuilder acc = new StringBuilder();
+            List<LayoutToken> tokensWithoutSectionNumbers = new ArrayList<>();
+            for (int idx=0; idx < tokens.size(); idx++) {
+                if (!(lastPosition >= position.start && lastPosition < position.end )) {
+                    if (!(tokensWithoutSectionNumbers.size() == 0 && tokens.get(idx).getText().equals(" "))) {
+                        //adding a space at the beginning of the accumulator should be ignored
+                        tokensWithoutSectionNumbers.add(tokens.get(idx));
+                    }
+                }
+                acc.append(tokens.get(idx).getText());
+                lastPosition = acc.toString().length();
+            }
+
             numb = numb.replace(" ", "");
-            return new org.grobid.core.utilities.Pair<>(text, numb);
+            return Pair.of(tokensWithoutSectionNumbers, numb);
         } else {
             return null;
         }
@@ -2109,7 +2446,7 @@ public List<Node> markReferencesTEILuceneBased(List<LayoutToken> refTokens,
         if ( (refTokens == null) || (refTokens.size() == 0) ) 
             return null;
         String text = LayoutTokensUtil.toText(refTokens);
-        if (text == null || text.trim().length() == 0 || text.endsWith("</ref>") || text.startsWith("<ref") || markerMatcher == null)
+        if (StringUtils.isEmpty(text) || text.endsWith("</ref>") || text.startsWith("<ref") || markerMatcher == null)
             return Collections.<Node>singletonList(new Text(text));
 
         boolean spaceEnd = false;
@@ -2181,11 +2518,11 @@ public List<Node> markReferencesTEILuceneBased(List<LayoutToken> refTokens,
     }
 
 
-    public List<Node> markReferencesFigureTEI(String refText, 
+    public List<Node> markReferencesFigureTEI(String refText,
                                             List<LayoutToken> allRefTokens,
                                             List<Figure> figures,
                                             boolean generateCoordinates) {
-        if (refText == null || 
+        if (refText == null ||
             refText.trim().isEmpty()) {
             return null;
         }
@@ -2212,7 +2549,7 @@ public List<Node> markReferencesFigureTEI(String refText,
         }
 
         if (labels == null || labels.size() <= 1) {
-            org.grobid.core.utilities.Pair<String, List<LayoutToken>> localLabel = 
+            org.grobid.core.utilities.Pair<String, List<LayoutToken>> localLabel =
                 new org.grobid.core.utilities.Pair(refText, allRefTokens);
             labels = new ArrayList<>();
             labels.add(localLabel);
@@ -2260,7 +2597,7 @@ public List<Node> markReferencesFigureTEI(String refText,
 
             String andWordString = null;
             if (text.endsWith("and") || text.endsWith("&")) {
-                // the AND_WORD_PATTERN case, we want to exclude the AND word from the tagged chunk                
+                // the AND_WORD_PATTERN case, we want to exclude the AND word from the tagged chunk
                 if (text.endsWith("and")) {
                     text = text.substring(0, text.length()-3);
                     andWordString = "and";
@@ -2309,7 +2646,7 @@ else if (text.endsWith("&")) {
     public List<Node> markReferencesTableTEI(String refText, List<LayoutToken> allRefTokens,
                                              List<Table> tables,
                                              boolean generateCoordinates) {
-        if (refText == null || 
+        if (refText == null ||
             refText.trim().isEmpty()) {
             return null;
         }
@@ -2336,7 +2673,7 @@ public List<Node> markReferencesTableTEI(String refText, List<LayoutToken> allRe
         }
 
         if (labels == null || labels.size() <= 1) {
-            org.grobid.core.utilities.Pair<String, List<LayoutToken>> localLabel = 
+            org.grobid.core.utilities.Pair<String, List<LayoutToken>> localLabel =
                 new org.grobid.core.utilities.Pair(refText, allRefTokens);
             labels = new ArrayList<>();
             labels.add(localLabel);
@@ -2384,7 +2721,7 @@ public List<Node> markReferencesTableTEI(String refText, List<LayoutToken> allRe
 
             String andWordString = null;
             if (text.endsWith("and") || text.endsWith("&")) {
-                // the AND_WORD_PATTERN case, we want to exclude the AND word from the tagged chunk                
+                // the AND_WORD_PATTERN case, we want to exclude the AND word from the tagged chunk
                 if (text.endsWith("and")) {
                     text = text.substring(0, text.length()-3);
                     andWordString = "and";
@@ -2422,7 +2759,7 @@ else if (text.endsWith("&")) {
             if (andWordString != null) {
                 nodes.add(new Text(andWordString));
             }
-            
+
             if (spaceEnd)
                 nodes.add(new Text(" "));
         }
diff --git a/grobid-core/src/main/java/org/grobid/core/document/xml/XmlBuilderUtils.java b/grobid-core/src/main/java/org/grobid/core/document/xml/XmlBuilderUtils.java
index 5d4850f94e..0c549078df 100644
--- a/grobid-core/src/main/java/org/grobid/core/document/xml/XmlBuilderUtils.java
+++ b/grobid-core/src/main/java/org/grobid/core/document/xml/XmlBuilderUtils.java
@@ -102,7 +102,7 @@ public static void main(String[] args) throws ParsingException, IOException {
     }
 
     public static String stripNonValidXMLCharacters(String in) {
-        StringBuffer out = new StringBuffer(); // Used to hold the output.
+        StringBuilder out = new StringBuilder(); // Used to hold the output.
         char current; // Used to reference the current character.
 
         if (in == null || ("".equals(in))) 
diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java
index 28eda7e693..c0e7201fef 100755
--- a/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java
@@ -2382,7 +2382,9 @@ protected List<Equation> processEquations(String rese,
 			}
 
 			List<LayoutToken> tokenizationEquation = cluster.concatTokens();
-			String clusterContent = LayoutTokensUtil.normalizeText(LayoutTokensUtil.toText(cluster.concatTokens()));
+            //LF: I removed the normalisation to keep the content in sync with contentTokens.
+            //      The normalisation "StringUtils.normaliseSpaces()" is called anyway when building the XML
+			String clusterContent = LayoutTokensUtil.toText(cluster.concatTokens());
 
 			if (currentResult == null)
 				currentResult = new Equation();
@@ -2398,10 +2400,11 @@ protected List<Equation> processEquations(String rese,
 					currentResult = new Equation();
 				}
 	            currentResult.appendContent(clusterContent);
-            	currentResult.addLayoutTokens(cluster.concatTokens());
+            	currentResult.addLayoutTokens(tokenizationEquation);
+            	currentResult.addContentTokens(tokenizationEquation);
             } else if (clusterLabel.equals(TaggingLabels.EQUATION_LABEL)) {
                 currentResult.appendLabel(clusterContent);
-	            currentResult.addLayoutTokens(cluster.concatTokens());
+	            currentResult.addLayoutTokens(tokenizationEquation);
             }
 
 			lastLabel = clusterLabel;
diff --git a/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterTest.java b/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterTest.java
index 1e794f8765..fa3669156b 100644
--- a/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterTest.java
+++ b/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterTest.java
@@ -1,18 +1,34 @@
 package org.grobid.core.document;
 
+import nu.xom.Element;
+import nu.xom.Node;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.commons.lang3.tuple.Triple;
 import org.grobid.core.analyzers.GrobidAnalyzer;
 import org.grobid.core.data.Note;
+import org.grobid.core.document.xml.XmlBuilderUtils;
+import org.grobid.core.engines.config.GrobidAnalysisConfig;
+import org.grobid.core.lang.Language;
 import org.grobid.core.layout.LayoutToken;
 import org.grobid.core.utilities.GrobidProperties;
 import org.grobid.core.utilities.LayoutTokensUtil;
+import org.grobid.core.utilities.OffsetPosition;
+import org.grobid.core.utilities.SentenceUtilities;
 import org.junit.BeforeClass;
+import org.junit.Ignore;
 import org.junit.Test;
 
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
 
+import static org.grobid.core.document.TEIFormatter.*;
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.hasSize;
 import static org.junit.Assert.assertThat;
 
 public class TEIFormatterTest {
@@ -37,8 +53,8 @@ public void testMakeFootNote() throws Exception {
         assertThat(LayoutTokensUtil.toText(footnote.getTokens()), is("This is a footnote"));
         assertThat(footnote.getLabel(), is("1"));
     }
-    
-    
+
+
     @Test
     public void testMakeNotes() throws Exception {
         String text = "198 U.S. Const. art. I,  § §9 & 10. \n199 To be sure, there are revisionist arguments that the Ex Post Facto clause itself extends to retroactive civil laws too. See Eastern Enterprises v. Apfel, 524 U.S. 498, 538-39 (1998) (Thomas, J., concurring). And as with bills of attainder, in the wake of the Civil War the Supreme Court held that Ironclad  Oath requirements were ex post facto laws as well. Cummings, 71 U.S. at 326-332; Garland, 71 U.S.  at 377-368. But as discussed in the text, even these principles do not ensnare Section Three going  forward, on a non-ex-post-facto basis \n200 3 U.S. at 378-80 (arguments of counsel). \n201 Id. \n202 Id. at 382. See Baude & Sachs, Eleventh Amendment, supra note 9, at 626-627.   Electronic copy available at: https://ssrn.com/abstract=4532751";
@@ -64,4 +80,552 @@ public void testMakeNotes() throws Exception {
 
 
 
+    @Test
+    public void testSegmentIntoSentences_simpleText_ShouldSplitIntoSentencesAndAddSTag() throws Exception {
+        String text = "One sentence. Second sentence.";
+
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder().build();
+        List<LayoutToken> currentParagraphTokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
+        Element currentParagraph = XmlBuilderUtils.teiElement("p");
+        currentParagraph.appendChild(text);
+
+        new TEIFormatter(null, null)
+            .segmentIntoSentences(currentParagraph, currentParagraphTokens, config, "en");
+
+        assertThat(currentParagraph.toXML(), is("<p xmlns=\"http://www.tei-c.org/ns/1.0\"><s>One sentence.</s><s>Second sentence.</s></p>"));
+        assertThat(currentParagraph.getChildElements().size(), is(2));
+    }
+
+    @Test
+    public void testSegmentIntoSentences_Bold_ShouldSplitIntoSentencesAndAddSTag() throws Exception {
+        String text = "One sentence. Second sentence.";
+
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder().build();
+        List<LayoutToken> currentParagraphTokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
+        currentParagraphTokens.get(0).setBold(true);
+        currentParagraphTokens.get(2).setBold(true);
+        currentParagraphTokens.get(2).setItalic(true);
+        Element currentParagraph = XmlBuilderUtils.teiElement("p");
+        currentParagraph.appendChild(text);
+
+        new TEIFormatter(null, null)
+            .segmentIntoSentences(currentParagraph, currentParagraphTokens, config, "en");
+
+        assertThat(currentParagraph.toXML(), is("<p xmlns=\"http://www.tei-c.org/ns/1.0\"><s>One sentence.</s><s>Second sentence.</s></p>"));
+        assertThat(currentParagraph.getChildElements().size(), is(2));
+    }
+
+    @Test
+    public void testSegmentIntoSentences_NoStyle_ShouldWork() throws Exception {
+        String text = "One sentence (Foppiano et al.). Second sentence (Lopez et al.). ";
+
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder().build();
+        List<LayoutToken> currentParagraphTokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
+        currentParagraphTokens.get(0).setBold(true);
+        currentParagraphTokens.get(2).setBold(true);
+        currentParagraphTokens.get(2).setItalic(true);
+        Element currentParagraph = XmlBuilderUtils.teiElement("p");
+        currentParagraph.appendChild("One sentence");
+        currentParagraph.appendChild(" ");
+        currentParagraph.appendChild(XmlBuilderUtils.teiElement("ref", "(Foppiano et al.)"));
+        currentParagraph.appendChild(". ");
+        currentParagraph.appendChild("Second sentence");
+        currentParagraph.appendChild(" ");
+        currentParagraph.appendChild(XmlBuilderUtils.teiElement("ref", "(Lopez et al.)"));
+        currentParagraph.appendChild(".");
+
+        new TEIFormatter(null, null)
+            .segmentIntoSentences(currentParagraph, currentParagraphTokens, config, "en");
+
+        assertThat(currentParagraph.toXML(),
+            is("<p xmlns=\"http://www.tei-c.org/ns/1.0\"><s>One sentence <ref>(Foppiano et al.)</ref>.</s><s>Second sentence <ref>(Lopez et al.)</ref>.</s></p>"));
+    }
+
+
+    @Test
+    public void testSegmentIntoSentences_Style_ShouldWork() throws Exception {
+        String text1_0 = "One sentence ";
+        String text1_1 = ". ";
+        String text2_0 = "Second sentence ";
+        String text2_1 = ".";
+
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder()
+            .withSentenceSegmentation(true)
+            .build();
+
+        List<LayoutToken> tokens = new ArrayList<>();
+        List<LayoutToken> currentParagraphTokens1_0 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text1_0);
+        tokens.addAll(currentParagraphTokens1_0);
+        List<LayoutToken> currentParagraphTokens1_1 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text1_1);
+        tokens.addAll(currentParagraphTokens1_1);
+        List<LayoutToken> currentParagraphTokens2_0 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text2_0);
+        tokens.addAll(currentParagraphTokens2_0);
+        List<LayoutToken> currentParagraphTokens2_1 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text2_1);
+        tokens.addAll(currentParagraphTokens2_1);
+
+        currentParagraphTokens1_0.get(0).setBold(true);
+        currentParagraphTokens1_0.get(2).setBold(true);
+        currentParagraphTokens1_0.get(2).setItalic(true);
+
+        List<Triple<String, String, OffsetPosition>> styles1_0 = extractStylesList(currentParagraphTokens1_0);
+        List<Triple<String, String, OffsetPosition>> styles1_1 = extractStylesList(currentParagraphTokens1_1);
+        List<Triple<String, String, OffsetPosition>> styles2_0 = extractStylesList(currentParagraphTokens2_0);
+        List<Triple<String, String, OffsetPosition>> styles2_1 = extractStylesList(currentParagraphTokens2_1);
+
+        Element currentParagraph = XmlBuilderUtils.teiElement("p");
+
+        applyStyleList(currentParagraph, text1_0, styles1_0);
+        currentParagraph.appendChild(" ");
+        currentParagraph.appendChild(XmlBuilderUtils.teiElement("ref", "(Foppiano et al.)"));
+        applyStyleList(currentParagraph, text1_1, styles1_1);
+        currentParagraph.appendChild(" ");
+        applyStyleList(currentParagraph, text2_0, styles2_0);
+        currentParagraph.appendChild(" ");
+        currentParagraph.appendChild(XmlBuilderUtils.teiElement("ref", "(Lopez et al.)"));
+        applyStyleList(currentParagraph, text2_1, styles2_1);
+
+        //Assuming these are injected correctly
+
+        new TEIFormatter(null, null).segmentIntoSentences(currentParagraph, tokens, config, "en");
+
+        assertThat(currentParagraph.toXML(),
+            is("<p xmlns=\"http://www.tei-c.org/ns/1.0\"><s><hi rend=\"bold\">One</hi> <hi rend=\"bold italic\">sentence</hi> <ref>(Foppiano et al.)</ref>.</s><s>Second sentence <ref>(Lopez et al.)</ref>.</s></p>"));
+    }
+
+    @Test
+    public void testSegmentIntoSentences_StyleBetweenTwoSentences_ShouldWork() throws Exception {
+        String text1_0 = "One sentence";
+        String text1_1 = ". ";
+        String text2_0 = "Second sentence";
+        String text2_1 = ".";
+
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder()
+            .withSentenceSegmentation(true)
+            .build();
+
+        List<LayoutToken> tokens = new ArrayList<>();
+        List<LayoutToken> currentParagraphTokens1_0 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text1_0);
+        tokens.addAll(currentParagraphTokens1_0);
+        List<LayoutToken> currentParagraphTokens1_1 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text1_1);
+        tokens.addAll(currentParagraphTokens1_1);
+        List<LayoutToken> currentParagraphTokens2_0 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text2_0);
+        tokens.addAll(currentParagraphTokens2_0);
+        List<LayoutToken> currentParagraphTokens2_1 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text2_1);
+        tokens.addAll(currentParagraphTokens2_1);
+
+        currentParagraphTokens1_0.get(0).setBold(true); //One
+        currentParagraphTokens1_0.get(2).setItalic(true); //sentence
+        currentParagraphTokens1_1.get(0).setItalic(true); //.
+        currentParagraphTokens2_0.get(0).setItalic(true); //Second
+        currentParagraphTokens2_0.get(2).setItalic(true); //sentence
+
+        List<Triple<String, String, OffsetPosition>> styles1_0 = extractStylesList(currentParagraphTokens1_0);
+        List<Triple<String, String, OffsetPosition>> styles1_1 = extractStylesList(currentParagraphTokens1_1);
+        List<Triple<String, String, OffsetPosition>> styles2_0 = extractStylesList(currentParagraphTokens2_0);
+        List<Triple<String, String, OffsetPosition>> styles2_1 = extractStylesList(currentParagraphTokens2_1);
+
+        Element currentParagraph = XmlBuilderUtils.teiElement("p");
+
+        applyStyleList(currentParagraph, text1_0, styles1_0);
+        currentParagraph.appendChild(" ");
+        currentParagraph.appendChild(XmlBuilderUtils.teiElement("ref", "(Foppiano et al.)"));
+        applyStyleList(currentParagraph, text1_1, styles1_1);
+        currentParagraph.appendChild(" ");
+        applyStyleList(currentParagraph, text2_0, styles2_0);
+        currentParagraph.appendChild(" ");
+        currentParagraph.appendChild(XmlBuilderUtils.teiElement("ref", "(Lopez et al.)"));
+        applyStyleList(currentParagraph, text2_1, styles2_1);
+
+        //Assuming these are injected correctly
+
+        new TEIFormatter(null, null).segmentIntoSentences(currentParagraph, tokens, config, "en");
+
+        assertThat(currentParagraph.toXML(),
+            is("<p xmlns=\"http://www.tei-c.org/ns/1.0\"><s><hi rend=\"bold\">One</hi> <hi rend=\"italic\">sentence</hi> <ref>(Foppiano et al.)</ref><hi rend=\"italic\">.</hi></s><s><hi rend=\"italic\">Second sentence</hi> <ref>(Lopez et al.)</ref>.</s></p>"));
+    }
+
+    @Test
+    public void testSegmentIntoSentences_StyleBetweenTwoSentences_oneRef_ShouldWork() throws Exception {
+        String text1_0 = "One sentence. Second sentence";
+        String text1_1 = ".";
+
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder()
+            .withSentenceSegmentation(true)
+            .build();
+
+        List<LayoutToken> tokens = new ArrayList<>();
+        List<LayoutToken> currentParagraphTokens1_0 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text1_0);
+        tokens.addAll(currentParagraphTokens1_0);
+        List<LayoutToken> currentParagraphTokens1_1 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text1_1);
+        tokens.addAll(currentParagraphTokens1_1);
+
+        currentParagraphTokens1_0.get(0).setBold(true); //One
+        currentParagraphTokens1_0.get(2).setItalic(true); //sentence
+        currentParagraphTokens1_0.get(3).setItalic(true); //.
+        currentParagraphTokens1_0.get(5).setItalic(true); //Second
+
+        List<Triple<String, String, OffsetPosition>> styles1_0 = extractStylesList(currentParagraphTokens1_0);
+        List<Triple<String, String, OffsetPosition>> styles1_1 = extractStylesList(currentParagraphTokens1_1);
+
+        Element currentParagraph = XmlBuilderUtils.teiElement("p");
+
+        applyStyleList(currentParagraph, text1_0, styles1_0);
+        currentParagraph.appendChild(" ");
+        currentParagraph.appendChild(XmlBuilderUtils.teiElement("ref", "(Lopez et al.)"));
+        applyStyleList(currentParagraph, text1_1, styles1_1);
+
+        new TEIFormatter(null, null).segmentIntoSentences(currentParagraph, tokens, config, "en");
+
+        assertThat(currentParagraph.toXML(),
+            is("<p xmlns=\"http://www.tei-c.org/ns/1.0\"><s><hi rend=\"bold\">One</hi> <hi rend=\"italic\">sentence.</hi></s><s><hi rend=\"italic\">Second</hi> sentence <ref>(Lopez et al.)</ref>.</s></p>"));
+    }
+
+    @Test
+    public void testSegmentIntoSentences_StyleBetweenTwoSentencesWithoutRefs_ShouldWork() throws Exception {
+        String text = "One sentence. Second sentence.";
+
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder()
+            .withSentenceSegmentation(true)
+            .build();
+
+        List<LayoutToken> tokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
+
+        tokens.get(0).setBold(true); //One
+        tokens.get(2).setItalic(true); //sentence
+        tokens.get(3).setItalic(true); //.
+        tokens.get(5).setItalic(true); //Second
+//        currentParagraphTokens.get(7).setItalic(true); //sentence
+
+        List<Triple<String, String, OffsetPosition>> styles = extractStylesList(tokens);
+
+        Element currentParagraph = XmlBuilderUtils.teiElement("p");
+
+        applyStyleList(currentParagraph, text, styles);
+
+        //Assuming these are injected correctly
+        new TEIFormatter(null, null).segmentIntoSentences(currentParagraph, tokens, config, "en");
+
+        assertThat(currentParagraph.toXML(),
+            is("<p xmlns=\"http://www.tei-c.org/ns/1.0\"><s><hi rend=\"bold\">One</hi> <hi rend=\"italic\">sentence.</hi></s><s><hi rend=\"italic\">Second</hi> sentence.</s></p>"));
+    }
+
+    @Test
+    public void testSplitMapNodesOverSentenceSplits_shouldAdjustNodes() {
+        TEIFormatter teiFormatter = new TEIFormatter(null, null);
+
+        String text1_0 = "One sentence. Second sentence";
+        String text1_1 = ".";
+
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder()
+            .withSentenceSegmentation(true)
+            .build();
+
+        List<LayoutToken> tokens = new ArrayList<>();
+        List<LayoutToken> currentParagraphTokens1_0 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text1_0);
+        tokens.addAll(currentParagraphTokens1_0);
+        List<LayoutToken> currentParagraphTokens1_1 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text1_1);
+        tokens.addAll(currentParagraphTokens1_1);
+
+        currentParagraphTokens1_0.get(0).setBold(true); //One
+        currentParagraphTokens1_0.get(2).setItalic(true); //sentence
+        currentParagraphTokens1_0.get(3).setItalic(true); //.
+        currentParagraphTokens1_0.get(5).setItalic(true); //Second
+
+        List<Triple<String, String, OffsetPosition>> styles1_0 = extractStylesList(currentParagraphTokens1_0);
+        List<Triple<String, String, OffsetPosition>> styles1_1 = extractStylesList(currentParagraphTokens1_1);
+
+        Element currentParagraph = XmlBuilderUtils.teiElement("p");
+
+        applyStyleList(currentParagraph, text1_0, styles1_0);
+        currentParagraph.appendChild(" ");
+        currentParagraph.appendChild(XmlBuilderUtils.teiElement("ref", "(Lopez et al.)"));
+        applyStyleList(currentParagraph, text1_1, styles1_1);
+
+        String text = currentParagraph.getValue();
+
+        Map<Integer, Pair<Node, String>> nestedNodes = teiFormatter.identifyNestedNodes(currentParagraph);
+        List<OffsetPosition> forbiddenPositions = nestedNodes.entrySet()
+            .stream()
+            .filter(entry -> ((Element) entry.getValue().getLeft()).getLocalName().equals("ref"))
+            .map(entry -> new OffsetPosition(entry.getKey(), entry.getValue().getRight().length() + entry.getKey()))
+            .collect(Collectors.toList());
+
+        List<OffsetPosition> sentencesOffsetPosition =
+            SentenceUtilities.getInstance().runSentenceDetection(text, forbiddenPositions, tokens, new Language("en"));
+
+        Map<Integer, Pair<Node, String>> adjustedNestedNodes = teiFormatter.splitMapNodesOverSentenceSplits(nestedNodes, text, sentencesOffsetPosition);
+
+        assertThat(adjustedNestedNodes.size(), is(4));
+
+        assertThat(new ArrayList<>(adjustedNestedNodes.keySet()), is(Arrays.asList(0, 4, 14, 30)));
+
+        assertThat(adjustedNestedNodes.get(0).getRight(), is("One"));
+        assertThat(adjustedNestedNodes.get(4).getRight(), is("sentence."));
+        assertThat(adjustedNestedNodes.get(14).getRight(), is("Second"));
+        assertThat(adjustedNestedNodes.get(30).getRight(), is("(Lopez et al.)"));
+    }
+
+    @Test
+    public void testSplitMapNodesOverThreeSentenceSplits_shouldAdjustNodes() {
+        TEIFormatter teiFormatter = new TEIFormatter(null, null);
+
+        String text1_0 = "One sentence. Second sentence. Third sentence";
+        String text1_1 = ".";
+
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder()
+            .withSentenceSegmentation(true)
+            .build();
+
+        List<LayoutToken> tokens = new ArrayList<>();
+        List<LayoutToken> currentParagraphTokens1_0 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text1_0);
+        tokens.addAll(currentParagraphTokens1_0);
+        List<LayoutToken> currentParagraphTokens1_1 = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text1_1);
+        tokens.addAll(currentParagraphTokens1_1);
+
+        currentParagraphTokens1_0.get(0).setBold(true); //One
+        currentParagraphTokens1_0.get(2).setItalic(true); //sentence
+        currentParagraphTokens1_0.get(3).setItalic(true); //.
+        currentParagraphTokens1_0.get(5).setItalic(true); //Second
+        currentParagraphTokens1_0.get(7).setItalic(true); //sentence
+        currentParagraphTokens1_0.get(8).setItalic(true); //.
+        currentParagraphTokens1_0.get(10).setItalic(true); //Third
+//        currentParagraphTokens1_0.get(12).setItalic(true); //sentence
+
+        List<Triple<String, String, OffsetPosition>> styles1_0 = extractStylesList(currentParagraphTokens1_0);
+        List<Triple<String, String, OffsetPosition>> styles1_1 = extractStylesList(currentParagraphTokens1_1);
+
+        Element currentParagraph = XmlBuilderUtils.teiElement("p");
+
+        applyStyleList(currentParagraph, text1_0, styles1_0);
+        currentParagraph.appendChild(" ");
+        currentParagraph.appendChild(XmlBuilderUtils.teiElement("ref", "(Lopez et al.)"));
+        applyStyleList(currentParagraph, text1_1, styles1_1);
+
+        String text = currentParagraph.getValue();
+
+        Map<Integer, Pair<Node, String>> nestedNodes = teiFormatter.identifyNestedNodes(currentParagraph);
+        List<OffsetPosition> forbiddenPositions = nestedNodes.entrySet()
+            .stream()
+            .filter(entry -> ((Element) entry.getValue().getLeft()).getLocalName().equals("ref"))
+            .map(entry -> new OffsetPosition(entry.getKey(), entry.getValue().getRight().length() + entry.getKey()))
+            .collect(Collectors.toList());
+
+        List<OffsetPosition> sentencesOffsetPosition =
+            SentenceUtilities.getInstance().runSentenceDetection(text, forbiddenPositions, tokens, new Language("en"));
+
+        Map<Integer, Pair<Node, String>> adjustedNestedNodes = teiFormatter.splitMapNodesOverSentenceSplits(nestedNodes, text, sentencesOffsetPosition);
+
+        assertThat(adjustedNestedNodes.size(), is(5));
+
+        assertThat(new ArrayList<>(adjustedNestedNodes.keySet()), is(Arrays.asList(0, 4, 14, 31, 46)));
+
+        assertThat(adjustedNestedNodes.get(0).getRight(), is("One"));
+        assertThat(adjustedNestedNodes.get(4).getRight(), is("sentence."));
+        assertThat(adjustedNestedNodes.get(14).getRight(), is("Second sentence."));
+        assertThat(adjustedNestedNodes.get(31).getRight(), is("Third"));
+        assertThat(adjustedNestedNodes.get(46).getRight(), is("(Lopez et al.)"));
+    }
+
+    @Test
+    public void testIdentifyRefNotes() throws Exception {
+        Element currentParagraph = XmlBuilderUtils.teiElement("p");
+        currentParagraph.appendChild("One sentence");
+        currentParagraph.appendChild(" ");
+        currentParagraph.appendChild(XmlBuilderUtils.teiElement("ref", "(Foppiano et al.)"));
+        currentParagraph.appendChild(". ");
+        currentParagraph.appendChild("Second sentence");
+        currentParagraph.appendChild(" ");
+        currentParagraph.appendChild(XmlBuilderUtils.teiElement("ref", "(Lopez et al.)"));
+        currentParagraph.appendChild(".");
+
+        Map<Integer, Pair<Node, String>> integerPairMap = new TEIFormatter(null, null).identifyNestedNodes(currentParagraph);
+
+        assertThat(integerPairMap.keySet(), hasSize(2));
+        assertThat(integerPairMap.keySet().stream().toArray()[1], is(13));
+        assertThat(integerPairMap.get(13).getRight(), is("(Foppiano et al.)"));
+
+        assertThat(integerPairMap.keySet().stream().toArray()[0], is(48));
+        assertThat(integerPairMap.get(48).getRight(), is("(Lopez et al.)"));
+    }
+
+    @Test
+    public void testExtractStylesList_single_shouldWork() throws Exception {
+        String text = "The room temperature magnetic hysteresis loop for melt-spun ribbons of pure Nd 2 Fe 14 B is shown in Figure ";
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder().build();
+        List<LayoutToken> currentParagraphTokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
+
+        currentParagraphTokens.get(26).setSubscript(true);
+        currentParagraphTokens.get(30).setSubscript(true);
+
+        List<Triple<String, String, OffsetPosition>> pairs = extractStylesList(currentParagraphTokens);
+
+        assertThat(pairs, hasSize(2));
+        assertThat(pairs.get(0).getLeft(), is("subscript"));
+        assertThat(pairs.get(0).getMiddle(), is("2"));
+        assertThat(pairs.get(0).getRight().start, is(79));
+        assertThat(pairs.get(0).getRight().end, is(80));
+
+        assertThat(pairs.get(1).getLeft(), is("subscript"));
+        assertThat(pairs.get(1).getMiddle(), is("14"));
+        assertThat(pairs.get(1).getRight().start, is(84));
+        assertThat(pairs.get(1).getRight().end, is(86));
+    }
+
+    @Test
+    public void applyStyleList_simpleStyles_shouldWork() throws Exception {
+        String text = "This is bold and italic.";
+        List<Triple<String, String, OffsetPosition>> styles = new ArrayList<>();
+        styles.add(Triple.of("bold", "bold", new OffsetPosition(8, 12)));
+        styles.add(Triple.of("italic", "italic", new OffsetPosition(17, 23)));
+        Element rootElement = XmlBuilderUtils.teiElement("p");
+        TEIFormatter.applyStyleList(rootElement, text, styles);
+
+        assertThat(rootElement.toXML(), is("<p xmlns=\"http://www.tei-c.org/ns/1.0\">This is " +
+            "<hi rend=\"bold\">bold</hi> and <hi rend=\"italic\">italic</hi>.</p>"));
+    }
+
+    @Test
+    public void applyStyleList_complexStyles_shouldWork() throws Exception {
+        String text = "This is bold and italic.";
+        List<Triple<String, String, OffsetPosition>> styles = new ArrayList<>();
+        styles.add(Triple.of("subscript", "is", new OffsetPosition(5, 7)));
+        styles.add(Triple.of("bold subscript", "bold", new OffsetPosition(8, 12)));
+        styles.add(Triple.of("italic superscript", "and", new OffsetPosition(13, 16)));
+        styles.add(Triple.of("italic", "italic", new OffsetPosition(17, 23)));
+        Element rootElement = XmlBuilderUtils.teiElement("p");
+        TEIFormatter.applyStyleList(rootElement, text, styles);
+
+        assertThat(rootElement.toXML(), is("<p xmlns=\"http://www.tei-c.org/ns/1.0\">This " +
+            "<hi rend=\"subscript\">is</hi> " +
+            "<hi rend=\"bold subscript\">bold</hi> " +
+            "<hi rend=\"italic superscript\">and</hi> " +
+            "<hi rend=\"italic\">italic</hi>.</p>"));
+    }
+
+    @Test
+    public void testExtractStylesList_combined_shouldWork() throws Exception {
+        String text = "The room temperature magnetic hysteresis loop for melt-spun ribbons of pure Nd 2 Fe 14 B is shown in Figure ";
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder().build();
+        List<LayoutToken> currentParagraphTokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
+
+        currentParagraphTokens.get(26).setSubscript(true);
+        currentParagraphTokens.get(26).setBold(true);
+        currentParagraphTokens.get(26).setItalic(true);
+        currentParagraphTokens.get(30).setSubscript(true);
+
+        List<Triple<String, String, OffsetPosition>> pairs = extractStylesList(currentParagraphTokens);
+
+        assertThat(pairs, hasSize(2));
+        assertThat(pairs.get(0).getLeft(), is("bold italic subscript"));
+        assertThat(pairs.get(0).getMiddle(), is("2"));
+        assertThat(pairs.get(0).getRight().start, is(79));
+        assertThat(pairs.get(0).getRight().end, is(80));
+
+        assertThat(pairs.get(1).getLeft(), is("subscript"));
+        assertThat(pairs.get(1).getMiddle(), is("14"));
+        assertThat(pairs.get(1).getRight().start, is(84));
+        assertThat(pairs.get(1).getRight().end, is(86));
+    }
+
+    @Test
+    public void testExtractStylesList_continuousTokens_shouldWork() throws Exception {
+        String text = "The room temperature magnetic hysteresis loop for melt-spun ribbons of pure Nd 2 Fe 14 B is shown in Figure ";
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder().build();
+        List<LayoutToken> currentParagraphTokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
+
+        currentParagraphTokens.get(24).setBold(true);
+        currentParagraphTokens.get(26).setBold(true);
+        currentParagraphTokens.get(28).setBold(true);
+        currentParagraphTokens.get(30).setBold(true);
+
+        List<Triple<String, String, OffsetPosition>> pairs = extractStylesList(currentParagraphTokens);
+
+        assertThat(pairs, hasSize(1));
+        assertThat(pairs.get(0).getLeft(), is("bold"));
+        assertThat(pairs.get(0).getMiddle(), is("Nd 2 Fe 14"));
+        assertThat(pairs.get(0).getRight().start, is(76));
+        assertThat(pairs.get(0).getRight().end, is(86));
+    }
+
+    @Test
+    public void testExtractStylesList_ignoreBold_shouldWork() throws Exception {
+        String text = "The room temperature magnetic hysteresis loop for melt-spun ribbons of pure Nd 2 Fe 14 B is shown in Figure ";
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder().build();
+        List<LayoutToken> currentParagraphTokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
+
+        currentParagraphTokens.get(26).setSubscript(true);
+        currentParagraphTokens.get(26).setBold(true);
+        currentParagraphTokens.get(26).setItalic(true);
+        currentParagraphTokens.get(30).setSubscript(true);
+
+        List<Triple<String, String, OffsetPosition>> pairs = extractStylesList(currentParagraphTokens, Arrays.asList(TEI_STYLE_BOLD_NAME));
+
+        assertThat(pairs, hasSize(2));
+        assertThat(pairs.get(0).getLeft(), is("italic subscript"));
+        assertThat(pairs.get(0).getMiddle(), is("2"));
+        assertThat(pairs.get(0).getRight().start, is(79));
+        assertThat(pairs.get(0).getRight().end, is(80));
+
+        assertThat(pairs.get(1).getLeft(), is("subscript"));
+        assertThat(pairs.get(1).getMiddle(), is("14"));
+        assertThat(pairs.get(1).getRight().start, is(84));
+        assertThat(pairs.get(1).getRight().end, is(86));
+    }
+
+    @Ignore("The middle is actually not used")
+    public void testExtractStylesList_checkProducedText_ShouldWork() throws Exception {
+        String text = "I. Introduction  1.1. Généralités et rappels  ";
+        List<LayoutToken> textTokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
+
+        textTokens.get(0).setBold(true);
+        textTokens.get(1).setBold(true);
+        textTokens.get(3).setBold(true);
+
+        textTokens.get(6).setItalic(true);
+        textTokens.get(7).setItalic(true);
+        textTokens.get(8).setItalic(true);
+        textTokens.get(9).setItalic(true);
+        textTokens.get(11).setItalic(true);
+        textTokens.get(13).setItalic(true);
+        textTokens.get(15).setItalic(true);
+
+        List<Triple<String, String, OffsetPosition>> pairs = extractStylesList(textTokens);
+
+        assertThat(pairs, hasSize(2));
+        assertThat(pairs.get(0).getLeft(), is("bold"));
+        assertThat(pairs.get(0).getMiddle(), is("I. Introduction"));
+        assertThat(pairs.get(1).getLeft(), is("italic"));
+        assertThat(pairs.get(1).getMiddle(), is("1.1. Généralités et rappels"));
+    }
+
+    @Test
+    public void testGetSectionNumber_simple_ShouldWork() throws Exception {
+        String text = "3 Supercon 2";
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder().build();
+        List<LayoutToken> currentParagraphTokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
+
+        currentParagraphTokens.get(4).setSubscript(true);
+        Pair<List<LayoutToken>, String> sectionNumber = new TEIFormatter(null, null)
+            .getSectionNumber(currentParagraphTokens);
+
+        String output = LayoutTokensUtil.toText(sectionNumber.getLeft());
+        assertThat(output, is("Supercon 2"));
+        assertThat(sectionNumber.getRight(), is("3"));
+    }
+
+    @Test
+    public void testGetSectionNumber_doubleSpace_ShouldWork() throws Exception {
+        String text = "3   Supercon 2";
+        GrobidAnalysisConfig config = GrobidAnalysisConfig.builder().build();
+        List<LayoutToken> currentParagraphTokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
+
+        currentParagraphTokens.get(6).setSubscript(true);
+        Pair<List<LayoutToken>, String> sectionNumber = new TEIFormatter(null, null)
+            .getSectionNumber(currentParagraphTokens);
+
+        String output = LayoutTokensUtil.toText(sectionNumber.getLeft());
+        assertThat(output, is("Supercon 2"));
+        assertThat(sectionNumber.getRight(), is("3"));
+    }
+
 }
\ No newline at end of file