From 5225cc6eeb79340420fc1a8311d187669c9a1d7b Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Fri, 9 May 2025 23:26:31 -0700
Subject: [PATCH 01/42] Update scala and sbt

---
 build.sbt                | 27 ++++++++++++++-------------
 project/build.properties |  5 ++++-
 project/plugins.sbt      |  2 ++
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/build.sbt b/build.sbt
index 354fc7974..0bc3a2e3c 100644
--- a/build.sbt
+++ b/build.sbt
@@ -1,23 +1,24 @@
-// These were last checked on 2025-02-19.
+// These were last checked on 2025-05-09.
 val scala211 = "2.11.12" // up to 2.11.12
-val scala212 = "2.12.19" // up to 2.12.20
-val scala213 = "2.13.14" // up to 2.13.16
+val scala212 = "2.12.20" // up to 2.12.20
+val scala213 = "2.13.16" // up to 2.13.16
 val scala30  = "3.0.2"   // up to 3.0.2
 val scala31  = "3.1.3"   // up to 3.1.3
 val scala32  = "3.2.2"   // up to 3.2.2
-val scala33  = "3.3.5"   // up to 3.3.5 (LTS)
+val scala33  = "3.3.6"   // up to 3.3.6 (LTS)
 val scala34  = "3.4.3"   // up to 3.4.3
 val scala35  = "3.5.2"   // up to 3.5.2
-val scala36  = "3.6.3"   // up to 3.6.3
+val scala36  = "3.6.4"   // up to 3.6.4
+val scala37  = "3.7.0"   // up to 3.7.0
 
 // See https://www.scala-lang.org/blog/2022/08/17/long-term-compatibility-plans.html.
 // Scala30: "If you are maintaining a library, you should drop Scala 3.0."  Dropped.
 // Scala31: This is a LTS (long term support) version before it was called that.
 // Scala32: This is for experimentation, as in Scala Next, and not for release.
 // Scala33: This is the first official LTS, but hold off until necessary.
-val scala3 = scala31
+val scala3 = scala33
 
-ThisBuild / crossScalaVersions := Seq(scala212, scala211, scala213, scala3)
+ThisBuild / crossScalaVersions := Seq(scala213, scala3)
 ThisBuild / scalaVersion := crossScalaVersions.value.head
 
 lazy val root = (project in file("."))
@@ -33,17 +34,17 @@ lazy val library = project
 lazy val apps = project
   .dependsOn(library % "compile -> compile; test -> test")
 
-lazy val webapp = project
-  .enablePlugins(PlayScala)
-  .dependsOn(library % "compile -> compile; test -> test")
-  .settings(
+// lazy val webapp = project
+  // .enablePlugins(PlayScala)
+  // .dependsOn(library % "compile -> compile; test -> test")
+  // .settings(
     // scala3 doesn't have play (for 2.8.19 as specified by the project) and is ruled out completely.
     // scala213 has version problems for com.fasterxml.jackson.databind.JsonMappingException.
     // scala212 works!
     // scala211 isn't compiling and complains on twirlCompileTemplates.
     // This isn't a library.  Only one version needs to work.  We shouldn't use play for this anyway.
-    crossScalaVersions := Seq(scala212)
-  )
+    // crossScalaVersions := Seq(scala212)
+  // )
 
 lazy val debugger = project
     .dependsOn(library % "compile -> compile; test -> test")
diff --git a/project/build.properties b/project/build.properties
index 11956d958..4c19fc197 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -1,6 +1,9 @@
+# This was last checked on 2025-05-09.
 # Version 1.7.2+ will cause problems when combined with the play plug-in used for the webapp!
 # [error]         * org.scala-lang.modules:scala-xml_2.12:2.1.0 (early-semver) is selected over {1.2.0, 1.1.1}
 # [error]             +- org.scala-lang:scala-compiler:2.12.17              (depends on 2.1.0)
 # [error]             +- com.typesafe.sbt:sbt-native-packager:1.5.2 (scalaVersion=2.12, sbtVersion=1.0) (depends on 1.1.1)
 # [error]             +- com.typesafe.play:twirl-api_2.12:1.5.1             (depends on 1.2.0)
-sbt.version = 1.7.2
+# This error is solved by adding a VersionScheme.Always to plugins.sbt.
+# up to 1.10.11
+sbt.version = 1.10.11
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 273ee7ce6..417c04d23 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -1,3 +1,5 @@
+ThisBuild / libraryDependencySchemes += "org.scala-lang.modules" %% "scala-xml" % VersionScheme.Always
+
 // Latest version numbers were updated on 2024 July 11.
 addSbtPlugin("com.jsuereth"      % "sbt-pgp"      % "1.1.2-1") // up to 2.2.1 *
 addSbtPlugin("org.xerial.sbt"    % "sbt-sonatype" % "2.3")     // up to 3.9.21 *

From 1d18c8a198510f27f271ca23624e30a129a963f4 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Tue, 13 May 2025 16:06:16 -0700
Subject: [PATCH 02/42] Clean up BalaurProcessor

---
 .../processors/clu/BalaurProcessor.scala      | 194 ++++++++++--------
 1 file changed, 112 insertions(+), 82 deletions(-)

diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index b791d181e..dc69b9363 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -2,7 +2,7 @@ package org.clulab.processors.clu
 
 import com.typesafe.config.Config
 import com.typesafe.config.ConfigFactory
-import org.clulab.numeric.{NumericEntityRecognizer, setLabelsAndNorms}
+import org.clulab.numeric.{NumericEntityRecognizer, mkLabelsAndNorms}
 import org.clulab.processors.{Document, Processor, Sentence}
 import org.clulab.processors.clu.tokenizer._
 import org.clulab.scala.WrappedArray._
@@ -13,12 +13,11 @@ import org.clulab.struct.DirectedGraph
 import org.clulab.struct.GraphMap
 import org.clulab.utils.{Configured, MathUtils, ToEnhancedDependencies}
 import org.slf4j.{Logger, LoggerFactory}
-
 import org.clulab.odin.Mention
-
 import BalaurProcessor._
 import PostProcessor._
 import org.clulab.processors.hexatagging.HexaDecoder
+import org.clulab.struct.GraphMap.GraphMap
 
 class BalaurProcessor protected (
   val config: Config,
@@ -91,29 +90,27 @@ class BalaurProcessor protected (
     throw new RuntimeException("ERROR: cannot call this method on its own in this processor!")
   }
 
-  /** Lematization; modifies the document in place */
-  override def lemmatize(doc: Document): Unit = {
-    for(sent <- doc.sentences) {
-      val lemmas = new Array[String](sent.size)
-      for(i <- sent.words.indices) {
-        lemmas(i) = wordLemmatizer.lemmatizeWord(sent.words(i))
-
-        // a lemma may be empty in some weird Unicode situations
-        if(lemmas(i).isEmpty) {
-          logger.debug(s"""WARNING: Found empty lemma for word #$i "${sent.words(i)}" in sentence: ${sent.words.mkString(" ")}""")
-          lemmas(i) = sent.words(i).toLowerCase()
-        }
-      }
-      sent.lemmas = Some(lemmas)
+  /** Lemmatization; modifies the document in place */
+  override def lemmatize(words: Array[String]): Array[String] = {
+    val lemmas = words.zipWithIndex.map { case (word, index) =>
+      val lemma = wordLemmatizer.lemmatizeWord(word)
+      // a lemma may be empty in some weird Unicode situations
+      val nonEmptyLemma =
+          if (lemma.isEmpty) {
+            logger.debug(s"""WARNING: Found empty lemma for word #$index "$word" in sentence: ${words.mkString(" ")}""")
+            word.toLowerCase()
+          }
+          else lemma
+
+      nonEmptyLemma
     }
+
+    lemmas
   }
 
   /** Generates cheap lemmas with the word in lower case, for languages where a lemmatizer is not available */
-  def cheapLemmatize(doc:Document): Unit = {
-    for(sent <- doc.sentences) {
-      val lemmas = sent.words.map(_.toLowerCase()).toArray
-      sent.lemmas = Some(lemmas)
-    }
+  def cheapLemmatize(sentence: Sentence): Array[String] = {
+    sentence.words.map(_.toLowerCase())
   }
 
   override def recognizeNamedEntities(doc: Document): Unit = {
@@ -144,64 +141,86 @@ class BalaurProcessor protected (
     throw new RuntimeException("ERROR: functionality not supported in this procecessor!")
   }
 
-  override def annotate(doc: Document): Document = {
-    val verbose = false
-
-    // lemmas are created deterministically, not through the MTL framework
-    lemmatize(doc)
+  override def annotate(document: Document): Document = {
+    // Process one sentence at a time through the MTL framework.
+    val partlyAnnotatedSentences = document.sentences.map { sentence =>
+      val words = sentence.words
+      // Lemmas are created deterministically, not through the MTL framework.
+      val lemmas = lemmatize(words)
 
-    // process one sentence at a time through the MTL framework
-    for (sent <- doc.sentences) {
       try {
-        val allLabelsAndScores = tokenClassifier.predictWithScores(sent.words)
-        assignPosTags(allLabelsAndScores(TASK_TO_INDEX(POS_TASK)), sent)
-        assignNamedEntityLabels(allLabelsAndScores(TASK_TO_INDEX(NER_TASK)), sent)
-        assignChunkLabels(allLabelsAndScores(TASK_TO_INDEX(CHUNKING_TASK)), sent)
-        assignDependencyLabelsUsingHexaTags(
+        val allLabelsAndScores = tokenClassifier.predictWithScores(words)
+        val tags = mkPosTags(words, allLabelsAndScores(TASK_TO_INDEX(POS_TASK)))
+        val entities = {
+          val optionalEntities = mkOptionalNerLabels(words, sentence.startOffsets, sentence.endOffsets, tags, lemmas)
+
+          mkNamedEntityLabels(words, allLabelsAndScores(TASK_TO_INDEX(NER_TASK)), optionalEntities)
+        }
+        val chunks = mkChunkLabels(words, allLabelsAndScores(TASK_TO_INDEX(CHUNKING_TASK)))
+        val graphs = mkDependencyLabelsUsingHexaTags(
+          words, lemmas, tags,
           allLabelsAndScores(TASK_TO_INDEX(HEXA_TERM_TASK)), 
-          allLabelsAndScores(TASK_TO_INDEX(HEXA_NONTERM_TASK)), 
-          sent
+          allLabelsAndScores(TASK_TO_INDEX(HEXA_NONTERM_TASK))
+        )
+        // Entities and norms need to still be patched and filled in, so this is only a partly annotated sentence.
+        val partlyAnnotatedDocument = sentence.copy(
+          tags = Some(tags), lemmas = Some(lemmas), entities = Some(entities), chunks = Some(chunks), graphs = graphs
         )
-      } catch {
-        case e: EncoderMaxTokensRuntimeException => 
-          // this sentence exceeds the maximum number of tokens for the encoder
-          // TODO: at some point do something smart here
-          println(s"ERROR: this sentence exceeds the maximum number of tokens for the encoder and will not be annotated: ${sent.words.mkString(" ")}")
 
+        partlyAnnotatedDocument
+      }
+      catch {
+        // No values, not even lemmas, will be included in the annotation is there was an exception.
+        case e: EncoderMaxTokensRuntimeException =>
+          // TODO: at some point do something smart here
+          println(s"ERROR: This sentence exceeds the maximum number of tokens for the encoder and will not be annotated: ${sentence.words.mkString(" ")}")
+          sentence
+        case e: AssertionError =>
+          println(s"ERROR: The output of predictWithScores does not satisfy assertions.  The sentence will not be annotated: ${sentence.words.mkString(" ")}")
+          sentence
       }
     }
+    val partlyAnnotatedDocument = document.copy(sentences = partlyAnnotatedSentences)
+    val fullyAnnotatedDocument =
+        if (numericEntityRecognizerOpt.nonEmpty) {
+          val numericMentions = numericEntityRecognizerOpt.get.extractFrom(partlyAnnotatedDocument)
+          val (newLabels, newNorms) = mkLabelsAndNorms(partlyAnnotatedDocument, numericMentions)
+          val fullyAnnotatedSentences = partlyAnnotatedDocument.sentences.indices.map { index =>
+            partlyAnnotatedDocument.sentences(index).copy(
+              entities = Some(newLabels(index)),
+              norms = Some(newNorms(index))
+            )
+          }.toArray
+
+          partlyAnnotatedDocument.copy(sentences = fullyAnnotatedSentences)
+        }
+        else partlyAnnotatedDocument
 
-    // numeric entities using our numeric entity recognizer based on Odin rules
-    if(numericEntityRecognizerOpt.nonEmpty) {
-      val numericMentions = extractNumericEntityMentions(doc)
-      setLabelsAndNorms(doc, numericMentions)
-    }
-
-    doc
+    fullyAnnotatedDocument
   }
 
-  def extractNumericEntityMentions(doc:Document): Seq[Mention] = {
-    numericEntityRecognizerOpt.get.extractFrom(doc)
-  }
+  private def mkPosTags(words: Array[String], labels: Array[Array[(String, Float)]]): Array[String] = {
+    assert(labels.length == words.length)
 
-  private def assignPosTags(labels: Array[Array[(String, Float)]], sent: Sentence): Unit = {
-    assert(labels.length == sent.words.length)
-    sent.tags = Some(postprocessPartOfSpeechTags(sent.words, labels.map(_.head._1).toArray))
-  }
+    val tags = labels.map(_.head._1).toArray
 
-  /** Must be called after assignPosTags and lemmatize because it requires Sentence.tags and Sentence.lemmas */
-  private def assignNamedEntityLabels(labels: Array[Array[(String, Float)]], sent: Sentence): Unit = {
-    assert(labels.length == sent.words.length)
+    postprocessPartOfSpeechTags(words, tags)
+    tags
+  }
 
+  private def mkOptionalNerLabels(
+    words: Array[String], startOffsets: Array[Int], endOffsets: Array[Int],
+    tags: Array[String], lemmas: Array[String]
+  ): Option[Array[String]] = {
     // NER labels from the custom NER
-    val optionalNERLabels: Option[Array[String]] = optionalNER.map { ner =>
+    optionalNER.map { ner =>
       val sentence = Sentence(
-        sent.words,
-        sent.startOffsets,
-        sent.endOffsets,
-        sent.words,
-        sent.tags,
-        sent.lemmas,
+        words, // Why isn't this raw?
+        startOffsets,
+        endOffsets,
+        words,
+        Some(tags),
+        Some(lemmas),
         entities = None,
         norms = None,
         chunks = None,
@@ -212,18 +231,24 @@ class BalaurProcessor protected (
 
       ner.find(sentence)
     }
+  }
+
+  /** Must be called after assignPosTags and lemmatize because it requires Sentence.tags and Sentence.lemmas */
+  private def mkNamedEntityLabels(words: Array[String], labels: Array[Array[(String, Float)]], optionalNERLabels: Option[Array[String]]): Array[String] = {
+    assert(labels.length == words.length)
 
     val genericLabels = NamedEntity.patch(labels.map(_.head._1).toArray)
 
-    if(optionalNERLabels.isEmpty) {
-      sent.entities = Some(genericLabels)
-    } else {
+    if (optionalNERLabels.isEmpty) {
+      genericLabels
+    }
+    else {
       //println(s"MERGING NE labels for sentence: ${sent.words.mkString(" ")}")
       //println(s"Generic labels: ${NamedEntity.patch(labels).mkString(", ")}")
       //println(s"Optional labels: ${optionalNERLabels.get.mkString(", ")}")
       val mergedLabels = NamedEntity.patch(mergeNerLabels(genericLabels, optionalNERLabels.get))
       //println(s"Merged labels: ${mergedLabels.mkString(", ")}")
-      sent.entities = Some(mergedLabels)
+      mergedLabels
     }
   }
 
@@ -246,9 +271,10 @@ class BalaurProcessor protected (
     }
   }
 
-  private def assignChunkLabels(labels: Array[Array[(String, Float)]], sent: Sentence): Unit = {
-    assert(labels.length == sent.words.length)
-    sent.chunks = Some(labels.map(_.head._1).toArray)
+  private def mkChunkLabels(words: Array[String], labels: Array[Array[(String, Float)]]): Array[String] = {
+    assert(labels.length == words.length)
+
+    labels.map(_.head._1).toArray
   }
 
   // The head has one score, the label has another.  Here the two scores are interpolated
@@ -286,11 +312,14 @@ class BalaurProcessor protected (
     sentDependencies.toArray
   }
 
-  private def assignDependencyLabelsUsingHexaTags(
+  private def mkDependencyLabelsUsingHexaTags(
+    words: Array[String], lemmas: Array[String], tags: Array[String],
     termTags: Array[Array[PredictionScore]],
-    nonTermTags: Array[Array[PredictionScore]],
-    sent: Sentence): Unit = {
+    nonTermTags: Array[Array[PredictionScore]]
+  ): GraphMap = {
     val verbose = false
+    val graphs = GraphMap()
+    val size = words.length
 
     // bht is used just for debugging purposes here
     val (bht, deps, roots) = hexaDecoder.decode(termTags, nonTermTags, topK = 25, verbose)
@@ -301,20 +330,21 @@ class BalaurProcessor protected (
       println("Roots: " + roots.get.mkString(", "))
     }
 
-    if(deps.nonEmpty && roots.nonEmpty) {
+    if (deps.nonEmpty && roots.nonEmpty) {
       // basic dependencies that replicate treebank annotations
-      val depGraph = new DirectedGraph[String](deps.get, Some(sent.size), roots)
-      sent.graphs += GraphMap.UNIVERSAL_BASIC -> depGraph
+      val depGraph = new DirectedGraph[String](deps.get, Some(size), roots)
+      graphs += GraphMap.UNIVERSAL_BASIC -> depGraph
 
       // enhanced dependencies as defined by Manning
-      val enhancedDepGraph = ToEnhancedDependencies.generateUniversalEnhancedDependencies(sent, depGraph)
-      sent.graphs += GraphMap.UNIVERSAL_ENHANCED -> enhancedDepGraph
+      val enhancedDepGraph = ToEnhancedDependencies.generateUniversalEnhancedDependencies(words, lemmas, tags, depGraph)
+      graphs += GraphMap.UNIVERSAL_ENHANCED -> enhancedDepGraph
 
       // ideally, hybrid dependencies should contain both syntactic dependencies and semantic roles
       // however, this processor produces only syntactic dependencies
-      sent.graphs += GraphMap.HYBRID_DEPENDENCIES -> enhancedDepGraph
+      graphs += GraphMap.HYBRID_DEPENDENCIES -> enhancedDepGraph
     }
-  }  
+    graphs
+  }
 }
 
 object BalaurProcessor {

From b540f25bd3bc517e423ed20f2b10aa531a38f246 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Tue, 13 May 2025 17:48:50 -0700
Subject: [PATCH 03/42] Stop assigning to a val in Document

---
 .../org/clulab/processors/Document.scala      | 88 +++++++++++--------
 1 file changed, 50 insertions(+), 38 deletions(-)

diff --git a/library/src/main/scala/org/clulab/processors/Document.scala b/library/src/main/scala/org/clulab/processors/Document.scala
index 6435ab94c..ab5dbbba6 100644
--- a/library/src/main/scala/org/clulab/processors/Document.scala
+++ b/library/src/main/scala/org/clulab/processors/Document.scala
@@ -16,21 +16,20 @@ import scala.collection.mutable
   *   Written by: Mihai Surdeanu and Gus Hahn-Powell.
   *   Last Modified: Add apply method to copy Document.
   */
-class Document(val sentences: Array[Sentence]) extends Serializable {
-
+class Document(
+  val sentences: Array[Sentence],
   /** Unique id for this document, if any */
-  var id: Option[String] = None
-
+  val id: Option[String] = None,
   /** Clusters of coreferent mentions */
-  var coreferenceChains: Option[CorefChains] = None
-
+  val coreferenceChains: Option[CorefChains] = None,
   /** The original text corresponding to this document, if it was preserved by the corresponding processor */
-  var text: Option[String] = None
-
+  val text: Option[String] = None,
   /** Map of any arbitrary document attachments such as document creation time */
-  protected var attachments: Option[mutable.HashMap[String, DocumentAttachment]] = None
+  protected val attachments: Option[mutable.HashMap[String, DocumentAttachment]] = None,
+  protected val documentCreationTime:Option[String] = None
+) extends Serializable {
 
-  protected var documentCreationTime:Option[String] = None
+  def copy(sentences: Array[Sentence]): Document = ???
 
   /** Clears any internal state potentially constructed by the annotators */
   def clear(): Unit = { }
@@ -67,11 +66,11 @@ class Document(val sentences: Array[Sentence]) extends Serializable {
   )
 
   /** Adds an attachment to the document's attachment map */
-  def addAttachment(name: String, attachment: DocumentAttachment): Unit = {
-    if (attachments.isEmpty)
-      attachments = Some(new mutable.HashMap[String, DocumentAttachment]())
-    attachments.get += name -> attachment
-  }
+//  def addAttachment(name: String, attachment: DocumentAttachment): Unit = {
+//    if (attachments.isEmpty)
+//      attachments = Some(new mutable.HashMap[String, DocumentAttachment]())
+//    attachments.get += name -> attachment
+//  }
 
   /** Retrieves the attachment with the given name */
   def getAttachment(name: String): Option[DocumentAttachment] = attachments.flatMap(_.get(name))
@@ -96,14 +95,13 @@ class Document(val sentences: Array[Sentence]) extends Serializable {
    * The DCT will impacts how Sentence.norms are generated for DATE expressions
    * @param dct Document creation time
    */
-  def setDCT(dct:String): Unit = documentCreationTime = Some(dct)
+//  def setDCT(dct:String): Unit = documentCreationTime = Some(dct)
 
   def getDCT: Option[String] = documentCreationTime
 
   def prettyPrint(pw: PrintWriter): Unit = {
     // let's print the sentence-level annotations
-    var sentenceCount = 0
-    for (sentence <- sentences) {
+    sentences.zipWithIndex.foreach { case (sentence, sentenceCount) =>
       pw.println("Sentence #" + sentenceCount + ":")
       pw.println("Tokens: " + sentence.words.zipWithIndex.mkString(" "))
       pw.println("Start character offsets: " + sentence.startOffsets.mkString(" "))
@@ -157,7 +155,6 @@ class Document(val sentences: Array[Sentence]) extends Serializable {
         // on syntactic trees, including access to head phrases/words
       })
 
-      sentenceCount += 1
       pw.println("\n")
     }
 
@@ -177,20 +174,18 @@ class Document(val sentences: Array[Sentence]) extends Serializable {
     })
   }
 
-  def assimilate(document: Document, textOpt: Option[String]): Document = {
-    id = document.id
-    coreferenceChains = document.coreferenceChains
-    text = textOpt
-    attachments = document.attachments
-    documentCreationTime = document.documentCreationTime
-    this
-  }
-
   // sentences are a val, so they must be initialized through the construction of a new Document.
   // Thereafter, the remaining values can be assimilated from the old document.  The shortcut
   // is used so that subclasses don't have to duplicate almost everything in their copy.
   def copy(sentences: Array[Sentence] = sentences, textOpt: Option[String] = text): Document = {
-    new Document(sentences).assimilate(this, textOpt)
+    new Document(
+      sentences = sentences, // not this
+      id = this.id,
+      coreferenceChains = this.coreferenceChains,
+      text = textOpt, // not this
+      attachments = this.attachments,
+      documentCreationTime = this.documentCreationTime
+    )
   }
 
   def offset(offset: Int): Document =
@@ -202,20 +197,37 @@ class Document(val sentences: Array[Sentence]) extends Serializable {
 
 object Document {
 
-  def apply(sentences: Array[Sentence]): Document = new Document(sentences)
+  def apply(sentences: Array[Sentence]): Document = apply(sentences, text = None)
+
+  def apply(sentences: Array[Sentence], text: Option[String]): Document = apply(id = None, sentences, coref = None, text)
 
   def apply(id: Option[String], sentences: Array[Sentence], coref: Option[CorefChains], text: Option[String]): Document = {
-    val d = Document(sentences)
-    d.id = id
-    d.coreferenceChains = coref
-    d.text = text
-    d
+    val document = new Document(
+      sentences,
+      id = id,
+      coreferenceChains = coref,
+      text = text
+    )
+
+    document
   }
 
-  /** Return a new Document with relevant fields copied from the given Document. */
-  def apply (doc: Document): Document =
-    Document(doc.id, doc.sentences, doc.coreferenceChains, doc.text)
+  /** Return a new Document with some relevant fields copied from the given Document. */
+  def apply(doc: Document): Document =
+    apply(doc.id, doc.sentences, doc.coreferenceChains, doc.text)
+
+  def apply(doc: Document, sentences: Array[Sentence]): Document = {
+    val newDocument = new Document(
+      sentences,
+      id = doc.id,
+      coreferenceChains = doc.coreferenceChains,
+      text = doc.text,
+      attachments = doc.attachments,
+      documentCreationTime = doc.documentCreationTime
+    )
 
+    newDocument
+  }
 }
 
 /**

From 7543b9c2f4d84e2e5a41c24cc4423a6876d16b6d Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Wed, 14 May 2025 11:02:19 -0700
Subject: [PATCH 04/42] Pass the tests

---
 .../processors/apps/ColumnsToDocument.scala   |  27 +--
 .../apps/CommandLineInterface.scala           |   2 +-
 .../apps/NumericEntityRecognizerShell.scala   |   4 +-
 build.sbt                                     |   2 +-
 .../org/clulab/numeric/EvalTimeNorm.scala     |   2 +-
 .../numeric/NumericEntityRecognizer.scala     |  42 ++---
 .../scala/org/clulab/numeric/package.scala    |  74 ++++----
 .../org/clulab/processors/Document.scala      |  25 +--
 .../org/clulab/processors/Processor.scala     |  40 +++--
 .../org/clulab/processors/Sentence.scala      | 169 ++++++++++++------
 .../clulab/processors/clu/DocumentMaker.scala |  15 +-
 .../org/clulab/processors/clu/Veil.scala      |  28 +--
 .../serialization/DocumentSerializer.scala    |  24 +--
 .../serialization/json/JSONSerializer.scala   |  66 ++++---
 .../clulab/utils/ToEnhancedDependencies.scala |  78 ++++----
 .../org/clulab/utils/TestHash.scala           |   6 +-
 .../TestNumericEntityRecognition.scala        |   2 +-
 .../clulab/numeric/TestSeasonNormalizer.scala |   4 +-
 .../clulab/processors/TestLexiconNER.scala    |   4 +-
 .../org/clulab/processors/TestProcessor.scala |  91 +++++-----
 .../json/TestJSONSerializer.scala             |  15 +-
 .../struct/TestDocumentAttachment.scala       |  56 +++---
 .../org/clulab/utils/TestFindHeads.scala      |   6 +-
 23 files changed, 428 insertions(+), 354 deletions(-)

diff --git a/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala b/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
index 2789eb0d1..8822ba993 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
@@ -138,8 +138,10 @@ object ColumnsToDocument {
       }
     }
     if(words.nonEmpty) {
-      val s = new Sentence(words.toArray, startOffsets.toArray, endOffsets.toArray, words.toArray)
-      s.tags = Some(labels.toArray)
+      val s = new Sentence(
+        words.toArray, startOffsets.toArray, endOffsets.toArray, words.toArray,
+        tags = Some(labels.toArray)
+      )
       sentences += s
     }
     logger.debug(s"Loaded ${sentences.size} sentences.")
@@ -151,26 +153,5 @@ object ColumnsToDocument {
 
   }
 
-  def setTags(s:Sentence, tags:Array[String]): Unit = {
-    s.tags = Some(tags)
-  }
-
-  def setChunks(s:Sentence, chunks:Array[String]): Unit = {
-    s.chunks = Some(chunks)
-  }
-
-  def setEntities(s:Sentence, entities:Array[String]): Unit = {
-    s.entities = Some(entities)
-  }
-
-  def annotateLemmas(doc:Document): Unit = {
-    proc.lemmatize(doc) // some features use lemmas, which are not available in the CoNLL data
-  }
-
-  def annotateLemmmaTags(doc:Document): Unit = {
-    proc.lemmatize(doc)
-    proc.tagPartsOfSpeech(doc)
-  }
-
   def annotateNil(doc:Document): Unit = {}
 }
diff --git a/apps/src/main/scala/org/clulab/processors/apps/CommandLineInterface.scala b/apps/src/main/scala/org/clulab/processors/apps/CommandLineInterface.scala
index 022a59cc0..0e84c662d 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/CommandLineInterface.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/CommandLineInterface.scala
@@ -36,7 +36,7 @@ object CommandLineInterface extends App {
       } else if(props.containsKey(TOKENS)) {
         // one sentence per line; sentences are tokenized
         val sents = FileUtils.getLinesFromFile(props.getProperty(INPUT))
-        val tokenizedSents = sents.map(_.split("\\s+").toIterable)
+        val tokenizedSents = sents.map(_.split("\\s+").toSeq)
         proc.annotateFromTokens(tokenizedSents)
       } else {
         // assume raw text
diff --git a/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala b/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala
index 0009b0f04..47225a369 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala
@@ -1,6 +1,6 @@
 package org.clulab.processors.apps
 
-import org.clulab.numeric.{displayMentions, setLabelsAndNorms}
+import org.clulab.numeric.{displayMentions, mkLabelsAndNorms}
 import org.clulab.processors.clu.BalaurProcessor
 import org.clulab.utils.ReloadableProcessor
 import org.clulab.utils.ReloadableShell
@@ -37,7 +37,7 @@ class NumericEntityRecognizerShell(ruleDirOpt: Option[String]) extends Reloadabl
     val doc = proc.get.annotate(text)
     val mentions = proc.get.numericEntityRecognizerOpt.map(_.extractFrom(doc)).getOrElse(Seq.empty)
 
-    setLabelsAndNorms(doc, mentions)
+    mkLabelsAndNorms(doc, mentions)
     displayMentions(mentions, doc)
   }
 
diff --git a/build.sbt b/build.sbt
index 0bc3a2e3c..1ee3d3420 100644
--- a/build.sbt
+++ b/build.sbt
@@ -18,7 +18,7 @@ val scala37  = "3.7.0"   // up to 3.7.0
 // Scala33: This is the first official LTS, but hold off until necessary.
 val scala3 = scala33
 
-ThisBuild / crossScalaVersions := Seq(scala213, scala3)
+ThisBuild / crossScalaVersions := Seq(scala213) // , scala3)
 ThisBuild / scalaVersion := crossScalaVersions.value.head
 
 lazy val root = (project in file("."))
diff --git a/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala b/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala
index add58c14d..bf5190dba 100644
--- a/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala
+++ b/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala
@@ -34,7 +34,7 @@ object EvalTimeNorm {
       }
       val doc = proc.annotate(docText)
       val mentions = ner.extractFrom(doc)
-      setLabelsAndNorms(doc, mentions)
+      mkLabelsAndNorms(doc, mentions)
       val prediction =  mentions.collect{
         case m: Norm if m.neLabel.equals("DATE") || m.neLabel.equals("DATE-RANGE") =>
           (m.startOffset.toString, m.endOffset.toString, m.neNorm)
diff --git a/library/src/main/scala/org/clulab/numeric/NumericEntityRecognizer.scala b/library/src/main/scala/org/clulab/numeric/NumericEntityRecognizer.scala
index 3d5976a7d..73cc1940d 100644
--- a/library/src/main/scala/org/clulab/numeric/NumericEntityRecognizer.scala
+++ b/library/src/main/scala/org/clulab/numeric/NumericEntityRecognizer.scala
@@ -19,41 +19,29 @@ class NumericEntityRecognizer protected (val lexiconNer: LexiconNER, val actions
     new NumericEntityRecognizer(lexiconNer, actions, extractorEngine)
   }
 
-  /** Matches the lexicon NER on this document, setting the `entities` field */
-  def matchLexiconNer(document: Document): Seq[Option[Array[String]]] = {
-    val originalEntities = new ArrayBuffer[Option[Array[String]]]()
-
-    for(sent <- document.sentences) {
-      originalEntities += sent.entities
-
-      val labels = lexiconNer.find(sent)
-      // this needs to happen in place, otherwise Odin does not see these labels
-      // we will restore the original Sentence.entities at the end in `extractFrom`
-      sent.entities = Some(labels)
-      // println(s"ENTITIES: ${sent.entities.get.mkString(" ")}")
-    }
-
-    originalEntities
-  }
-
   /**
     * Entry point for numeric entity recognition
     * @param doc Input document
     * @return sets in place the sequence of NER labels and sequence of NER norms (using the TempEval-2 notation)
     */
-  def extractFrom(doc:Document): Seq[Mention] = {
-    // dictionaries
-    val originalEntities = matchLexiconNer(doc)
-    // grammars
-    var mentions = extractor.extractFrom(doc)
+  def extractFrom(doc: Document): Seq[Mention] = {
+    val newSentences = doc.sentences.map { sentence =>
+      val newEntities = lexiconNer.find(sentence)
+
+      sentence.copy(entities = Some(newEntities))
+    }
+    val newDocument = doc.copy(sentences = newSentences)
+    val mentions = {
+      val dirtyMentions = extractor.extractFrom(newDocument)
+      val cleanMentions = actions.cleanupAction(dirtyMentions)
 
-    // restore the original entities
-    for(i <- originalEntities.indices) {
-      doc.sentences(i).entities = originalEntities(i)
+      cleanMentions
     }
 
-    // global actions *after* all grammars are done
-    actions.cleanupAction(mentions)
+    // These mentions will have doc pointing to the newDocument,
+    // but sentence will be the index into the new sentences and
+    // will be valid for the original doc.
+    mentions
   }
 }
 
diff --git a/library/src/main/scala/org/clulab/numeric/package.scala b/library/src/main/scala/org/clulab/numeric/package.scala
index 70559d0f9..d41438014 100644
--- a/library/src/main/scala/org/clulab/numeric/package.scala
+++ b/library/src/main/scala/org/clulab/numeric/package.scala
@@ -70,58 +70,62 @@ package object numeric {
     * @param doc This document is modified in place
     * @param mentions The numeric mentions previously extracted
     */
-  def setLabelsAndNorms(doc: Document, mentions: Seq[Mention]): Unit = {
-    //
-    // initialize entities and norms
-    //
-    for (sentence <- doc.sentences) {
-      sentence.entities = sentence.entities.orElse(Some(Array.fill(sentence.size)("O")))
-      sentence.norms    = sentence.norms   .orElse(Some(Array.fill(sentence.size)("")))
+  def mkLabelsAndNorms(doc: Document, mentions: Seq[Mention]): (Array[Array[String]], Array[Array[String]]) = {
+    val allEntities = doc.sentences.map { sentence =>
+      sentence.entities.getOrElse(Array.fill(sentence.size)("O"))
     }
+    val allNorms = doc.sentences.map { sentence =>
+      sentence.norms.getOrElse(Array.fill(sentence.size)(""))
+    }
+
+    for (mention <- mentions) {
+      if (NumericActions.isNumeric(mention) && mention.isInstanceOf[Norm]) {
+        val sentenceIndex = mention.sentence
+        val entities = allEntities(sentenceIndex)
+        val norms = allNorms(sentenceIndex)
 
-    //
-    // convert numeric entities to entity labels and norms
-    //
-    for(mention <- mentions) {
-      if(NumericActions.isNumeric(mention) && mention.isInstanceOf[Norm]) {
-        addLabelsAndNorms(mention.asInstanceOf[Norm], mention.sentenceObj, mention.tokenInterval)
+        addLabelsAndNorms(mention.asInstanceOf[Norm], entities, norms, mention.tokenInterval)
+        removeOneEntityBeforeAnother(entities, norms, "B-LOC", "MEASUREMENT-LENGTH")
       }
     }
-    removeOneEntityBeforeAnother(doc, "B-LOC", "MEASUREMENT-LENGTH")
+
+    (allEntities, allNorms)
   }
 
-  def removeOneEntityBeforeAnother(doc: Document, triggerEntity: String, toBeRemovedShortened: String): Unit = {
+  def removeOneEntityBeforeAnother(entities: Array[String], norms: Array[String], triggerEntity: String, toBeRemovedShortened: String): Unit = {
     // removes entities and norms for unallowable entity sequences, e.g., don't extract 'in' as 'inch' before B-LOC in '... Sahal 108 in Senegal'
     // toBeRemovedShortened is entity without BIO-
-    for(s <- doc.sentences) {
-      val zippedEntities = s.entities.get.zipWithIndex
-      for ((e, i) <- zippedEntities) {
-        if (i > 0 && e == triggerEntity && s.entities.get(i-1).endsWith(toBeRemovedShortened)) {
-          s.entities.get(i - 1) = "O"
-          // go in reverse replacing indices and norms in the immediate preceding mention
-          breakable {
-            for ((en, j) <- zippedEntities.slice(0, i ).reverse) {
-              if (en.endsWith(toBeRemovedShortened)) {
-                s.entities.get(j) = "O"
-                s.norms.get(j) = ""
-              } else break()
-            }
+    val zippedEntities = entities.zipWithIndex
+
+    zippedEntities.foreach { case (outerEntity, outerIndex) =>
+      if (outerIndex > 0 && outerEntity == triggerEntity && entities(outerIndex - 1).endsWith(toBeRemovedShortened)) {
+        // Go in reverse replacing indices and norms in the immediate preceding mention.
+        zippedEntities.slice(0, outerIndex).reverse
+        breakable { // TODO: rewrite
+          for ((innerEntity, innerIndex) <- zippedEntities.slice(0, outerIndex).reverse) {
+            if (innerEntity.endsWith(toBeRemovedShortened)) {
+              entities(innerIndex) = "O"
+              norms(innerIndex) = ""
+            } else break()
           }
         }
       }
     }
   }
 
-  private def addLabelsAndNorms(m: Norm, s: Sentence, tokenInt: Interval): Unit = {
-    var first = true
+  private def addLabelsAndNorms(m: Norm, entities: Array[String], norms: Array[String], tokenInt: Interval): Unit = {
+    val label = m.neLabel
     val norm = m.neNorm
+
     // careful here: we may override some existing entities and norms
     // but, given that the numeric entity rules tend to be high precision, this is probably Ok...
-    for(i <- tokenInt.indices) {
-      val prefix = if(first) "B-" else "I-"
-      s.entities.get(i) = prefix + m.neLabel
-      s.norms.get(i) = norm
-      first = false
+    tokenInt.headOption.foreach { index =>
+      entities(index) = "B-" + label
+      norms(index) = norm
+    }
+    tokenInt.tail.foreach { index =>
+      entities(index) = "I-" + label
+      norms(index) = norm
     }
   }
 }
diff --git a/library/src/main/scala/org/clulab/processors/Document.scala b/library/src/main/scala/org/clulab/processors/Document.scala
index ab5dbbba6..f8d226c56 100644
--- a/library/src/main/scala/org/clulab/processors/Document.scala
+++ b/library/src/main/scala/org/clulab/processors/Document.scala
@@ -29,10 +29,17 @@ class Document(
   protected val documentCreationTime:Option[String] = None
 ) extends Serializable {
 
-  def copy(sentences: Array[Sentence]): Document = ???
+  def copy(
+    sentences: Array[Sentence] = sentences,
+    id: Option[String] = id,
+    coreferenceChains: Option[CorefChains] = coreferenceChains,
+    text: Option[String] = text,
+    attachments: Option[mutable.HashMap[String, DocumentAttachment]] = None,
+    documentCreationTime: Option[String] = documentCreationTime
+  ): Document = new Document(sentences, id, coreferenceChains, text, attachments, documentCreationTime)
 
   /** Clears any internal state potentially constructed by the annotators */
-  def clear(): Unit = { }
+  // def clear(): Unit = { }
 
   /**
     * Used to compare Documents.
@@ -174,20 +181,6 @@ class Document(
     })
   }
 
-  // sentences are a val, so they must be initialized through the construction of a new Document.
-  // Thereafter, the remaining values can be assimilated from the old document.  The shortcut
-  // is used so that subclasses don't have to duplicate almost everything in their copy.
-  def copy(sentences: Array[Sentence] = sentences, textOpt: Option[String] = text): Document = {
-    new Document(
-      sentences = sentences, // not this
-      id = this.id,
-      coreferenceChains = this.coreferenceChains,
-      text = textOpt, // not this
-      attachments = this.attachments,
-      documentCreationTime = this.documentCreationTime
-    )
-  }
-
   def offset(offset: Int): Document =
       // If a subclass of Document constructs itself with an attachment or a documentCreationTime that
       // would be overwritten on the copy(), then it should provide its own copy() method(s).
diff --git a/library/src/main/scala/org/clulab/processors/Processor.scala b/library/src/main/scala/org/clulab/processors/Processor.scala
index 9528d613d..00d5fcdf1 100644
--- a/library/src/main/scala/org/clulab/processors/Processor.scala
+++ b/library/src/main/scala/org/clulab/processors/Processor.scala
@@ -2,6 +2,8 @@ package org.clulab.processors
 
 import org.clulab.processors.clu.BalaurProcessor
 
+import scala.collection.mutable
+
 /**
   * User: mihais
   * Date: 3/1/13
@@ -21,31 +23,37 @@ trait Processor {
     require(documents.length > 1)
     val headDocument = documents.head
     val tailDocuments = documents.tail
-    val combinedSentences = documents.flatMap(_.sentences).toArray
-    val combinedDocument = new Document(combinedSentences)
 
     val headId = headDocument.id
     require(tailDocuments.forall(_.id == headId))
-    combinedDocument.id = headId
-
-    require(combinedDocument.text.isEmpty)
-    combinedDocument.text = combinedTextOpt
-
+    val headDctOpt = headDocument.getDCT
+    require(documents.tail.forall(_.getDCT == headDctOpt))
     // Coreference chains involve Mentions that include references to documents.  The Mentions are being
     // moved to a new Document and it would be infeasible to move the chains.
-    require(combinedDocument.coreferenceChains.isEmpty)
     require(documents.forall(_.coreferenceChains.isEmpty))
 
+    val attachments = mutable.HashMap[String, DocumentAttachment]()
+
     documents.foreach { document =>
       document.getAttachmentKeys.foreach { attachmentKey =>
-        require(combinedDocument.getAttachment(attachmentKey).forall(_ == document.getAttachment(attachmentKey).get))
-        combinedDocument.addAttachment(attachmentKey, document.getAttachment(attachmentKey).get)
+        val valueOpt = attachments.get(attachmentKey)
+        val isValid = valueOpt.forall(_ == document.getAttachment(attachmentKey).get)
+
+        require(isValid, "The attachments cannot contradict each other.")
+        attachments(attachmentKey) = document.getAttachment(attachmentKey).get
       }
     }
 
-    val headDctOpt = headDocument.getDCT
-    require(documents.tail.forall(_.getDCT == headDctOpt))
-    headDctOpt.foreach(combinedDocument.setDCT)
+    val combinedSentences = documents.flatMap(_.sentences).toArray
+    val combinedDocument = new Document(
+      sentences = combinedSentences,
+      id = headId,
+      coreferenceChains = None,
+      text = combinedTextOpt,
+      attachments = Some(attachments),
+      documentCreationTime = headDctOpt
+    )
+
     combinedDocument
   }
 
@@ -94,10 +102,10 @@ trait Processor {
   //   (2) It is more efficient during annotate() where all the possible operations are chained.
 
   /** Part of speech tagging; modifies the document in place. */
-  def tagPartsOfSpeech (doc:Document): Unit
+  def tagPartsOfSpeech(doc: Document): Unit
 
-  /** Lematization; modifies the document in place. */
-  def lemmatize (doc:Document): Unit
+  /** Lemmatization; modifies the document in place. */
+  def lemmatize(words: Array[String]): Array[String]
 
   /** Named Entity Recognition; modifies the document in place. */
   def recognizeNamedEntities (doc:Document): Unit
diff --git a/library/src/main/scala/org/clulab/processors/Sentence.scala b/library/src/main/scala/org/clulab/processors/Sentence.scala
index 0465226c1..7158efecb 100644
--- a/library/src/main/scala/org/clulab/processors/Sentence.scala
+++ b/library/src/main/scala/org/clulab/processors/Sentence.scala
@@ -8,6 +8,66 @@ import org.clulab.utils.SeqUtils
 
 import scala.collection.mutable
 
+case class WordTokenization(raw: String, startOffset: Int, endOffset: Int, word: String)
+
+// Is this SentenceTokenization, ArraySeq of WordTokenization
+// Tokenation, Tokse
+// Parseation, Parse
+case class Tokenization(
+  raw: Array[String],
+  startOffsets: Array[Int],
+  endOffsets: Array[Int],
+  words: Array[String]
+) {
+
+  def reverse: Tokenization = {
+    Tokenization(
+      raw = raw.reverse,
+      startOffsets = startOffsets.reverse,
+      endOffsets = endOffsets.reverse,
+      words = words.reverse
+    )
+  }
+}
+
+// These are by the word ones and then there are relationships between words.
+// So parse, might not be a thing that is per word.
+//case class WordParse(tag: String, lemma: String, entity: String, norm: String, chunk: String)
+
+//case class SentenceParse(tags: Array[String], cyntacticTree, graphs, relations)
+
+// Again is this SentenceParse
+case class Parse(
+  tags: Option[Array[String]] = None,
+  /** Lemmas */
+  lemmas: Option[Array[String]] = None,
+  /** NE labels */
+  entities: Option[Array[String]] = None,
+  /** Normalized values of named/numeric entities, such as dates */
+  norms: Option[Array[String]] = None,
+  /** Shallow parsing labels */
+  chunks: Option[Array[String]] = None,
+  /** Constituent tree of this sentence; includes head words */
+  syntacticTree: Option[Tree] = None,
+  /** DAG of syntactic and semantic dependencies; word offsets start at 0 */
+  graphs: GraphMap = GraphMap(),
+  /** Relation triples from OpenIE */
+  relations:Option[Array[RelationTriple]] = None
+) {
+
+  def reverse: Parse = {
+    Parse(
+      tags = tags.map(_.reverse),
+      lemmas = lemmas.map(_.reverse),
+      entities = entities.map(_.reverse),
+      norms = norms.map(_.reverse),
+      chunks = chunks.map(_.reverse)
+      // TODO: reverse syntacticTree, graphs, and relations!
+    )
+  }
+}
+
+
 /** Stores the annotations for a single sentence */
 class Sentence(
   /** Raw tokens in this sentence; these MUST match the original text */
@@ -24,25 +84,33 @@ class Sentence(
     * However, the number of raw tokens MUST always equal the number of words, so if the exact text must be recovered,
     *   please use the raw tokens with the same positions
     */
-  val words: Array[String]) extends Serializable {
+  val words: Array[String],
 
   /** POS tags for words */
-  var tags: Option[Array[String]] = None
+  val tags: Option[Array[String]] = None,
   /** Lemmas */
-  var lemmas: Option[Array[String]] = None
+  val lemmas: Option[Array[String]] = None,
   /** NE labels */
-  var entities: Option[Array[String]] = None
+  val entities: Option[Array[String]] = None,
   /** Normalized values of named/numeric entities, such as dates */
-  var norms: Option[Array[String]] = None
+  val norms: Option[Array[String]] = None,
   /** Shallow parsing labels */
-  var chunks: Option[Array[String]] = None
+  val chunks: Option[Array[String]] = None,
   /** Constituent tree of this sentence; includes head words */
-  var syntacticTree: Option[Tree] = None
+  val syntacticTree: Option[Tree] = None,
   /** DAG of syntactic and semantic dependencies; word offsets start at 0 */
-  var graphs: GraphMap = GraphMap()
+  val graphs: GraphMap = GraphMap(),
   /** Relation triples from OpenIE */
-  var relations:Option[Array[RelationTriple]] = None
+  val relations:Option[Array[RelationTriple]] = None
+) extends Serializable {
 
+  def getTokenization: Tokenization = {
+    Tokenization(raw, startOffsets, endOffsets, words)
+  }
+
+  def getParse: Parse = {
+    Parse(tags, lemmas, entities, norms, chunks, syntacticTree, graphs, relations)
+  }
 
   def size:Int = raw.length
 
@@ -150,42 +218,47 @@ class Sentence(
   }
 
   /** Reverts the current sentence */
-  def revert():Sentence = {
-    val reverted = new Sentence(
-      SeqUtils.revert(raw).toArray,
-      SeqUtils.revert(startOffsets).toArray,
-      SeqUtils.revert(endOffsets).toArray,
-      SeqUtils.revert(words).toArray)
-    if(tags.nonEmpty)
-      reverted.tags = Some(SeqUtils.revert(tags.get).toArray)
-    if(lemmas.nonEmpty)
-      reverted.lemmas = Some(SeqUtils.revert(lemmas.get).toArray)
-    if(entities.nonEmpty)
-      reverted.entities = Some(SeqUtils.revert(entities.get).toArray)
-    if(norms.nonEmpty)
-      reverted.norms = Some(SeqUtils.revert(norms.get).toArray)
-    if(chunks.nonEmpty)
-      reverted.chunks = Some(SeqUtils.revert(chunks.get).toArray)
+  def revert(): Sentence = {
+    val reversedTokenization = this.getTokenization.reverse
+    val reversedParse = this.getParse.reverse
+    val reversedSentence = Sentence(
+      reversedTokenization.raw,
+      reversedTokenization.startOffsets,
+      reversedTokenization.endOffsets,
+      reversedTokenization.words
+    )
 
+    // TODO: Make this work
+//    reversedSentence.tags = reversedParse.tags
+//    reversedSentence.lemmas = reversedParse.lemmas
+//    reversedSentence.entities = reversedParse.entities
+//    reversedSentence.norms = reversedParse.norms
+//    reversedSentence.chunks = reversedParse.chunks
     // TODO: revert syntacticTree and graphs!
 
-    reverted
-  }
-
-  def assimilate(sentence: Sentence): Sentence = {
-    tags = sentence.tags
-    lemmas = sentence.lemmas
-    entities = sentence.entities
-    norms = sentence.norms
-    chunks = sentence.chunks
-    syntacticTree = sentence.syntacticTree
-    graphs = sentence.graphs
-    relations = sentence.relations
-    this
+    reversedSentence
   }
 
-  def copy(raw: Array[String] = raw, startOffsets: Array[Int] = startOffsets, endOffsets: Array[Int] = endOffsets, words: Array[String] = words): Sentence =
-      new Sentence(raw, startOffsets, endOffsets, words).assimilate(this)
+  // TODO
+  def copy(
+    raw: Array[String] = raw,
+    startOffsets: Array[Int] = startOffsets,
+    endOffsets: Array[Int] = endOffsets,
+    words: Array[String] = words,
+
+    tags: Option[Array[String]] = tags,
+    lemmas: Option[Array[String]] = lemmas,
+    entities: Option[Array[String]] = entities,
+    norms: Option[Array[String]] = norms,
+    chunks: Option[Array[String]] = chunks,
+    syntacticTree: Option[Tree] = syntacticTree,
+    graphs: GraphMap = graphs,
+    relations: Option[Array[RelationTriple]] = relations
+  ): Sentence =
+    new Sentence(
+      raw, startOffsets, endOffsets, words,
+      tags, lemmas, entities, norms, chunks, syntacticTree, graphs, relations
+    )
 
   def offset(offset: Int): Sentence = {
     if (offset == 0) this
@@ -227,17 +300,9 @@ object Sentence {
     deps: GraphMap,
     relations: Option[Array[RelationTriple]]
   ): Sentence = {
-    val s = Sentence(raw, startOffsets, endOffsets, words)
-    // update annotations
-    s.tags = tags
-    s.lemmas = lemmas
-    s.entities = entities
-    s.norms = norms
-    s.chunks = chunks
-    s.syntacticTree = tree
-    s.graphs = deps
-    s.relations = relations
-    s
+    new Sentence(
+      raw, startOffsets, endOffsets, words,
+      tags, lemmas, entities, norms, chunks, tree, deps, relations
+    )
   }
-
 }
\ No newline at end of file
diff --git a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
index 92168b4bd..bde915cfc 100644
--- a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
@@ -17,8 +17,9 @@ object DocumentMaker {
                  text:String,
                  keepText:Boolean): Document = {
     val sents = tokenizer.tokenize(text)
-    val doc = new Document(sents)
-    if(keepText) doc.text = Some(text)
+    val textOpt = Option.when(keepText)(text)
+    val doc = Document(sents, textOpt)
+
     doc
   }
 
@@ -46,8 +47,9 @@ object DocumentMaker {
       //println("End offsets: " + sent.endOffsets.mkString(", "))
       sents += sent
     }
-    val doc = new Document(sents.toArray)
-    if(keepText) doc.text = Some(sentences.mkString(mkSep(charactersBetweenSentences)))
+    val textOpt = Option.when(keepText)(sentences.mkString(mkSep(charactersBetweenSentences)))
+    val doc = Document(sents.toArray, textOpt)
+
     doc
   }
 
@@ -77,8 +79,9 @@ object DocumentMaker {
       }
     }
 
-    val doc = new Document(sents.toArray)
-    if(keepText) doc.text = Some(text.toString)
+    val textOpt = Option.when(keepText)(text.toString)
+    val doc = Document(sents.toArray, textOpt)
+
     doc
   }
 
diff --git a/library/src/main/scala/org/clulab/processors/clu/Veil.scala b/library/src/main/scala/org/clulab/processors/clu/Veil.scala
index 6e4494ca4..481abf2b9 100644
--- a/library/src/main/scala/org/clulab/processors/clu/Veil.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/Veil.scala
@@ -48,7 +48,7 @@ class VeiledText(originalText: String, veiledLetters: Seq[Range]) extends Veil {
   }
 
   protected def unveilDocument(veiledDocument: Document): Document = {
-    val unveiledDocument = veiledDocument.copy(textOpt = Some(originalText))
+    val unveiledDocument = veiledDocument.copy(text = Some(originalText))
 
     unveiledDocument
   }
@@ -164,21 +164,27 @@ class VeiledDocument(originalDocument: Document, veiledWords: Seq[(Int, Range)])
     val unveiledStartOffsets = originalSentence.startOffsets
     val unveiledEndOffsets = originalSentence.endOffsets
     val unveiledWords = originalSentence.words
+
     val unveiledSentence = veiledSentence.copy(unveiledRaw, unveiledStartOffsets, unveiledEndOffsets, unveiledWords)
 
     def unveilStringArray(veiledArrayOpt: Option[Array[String]], veil: String): Option[Array[String]] =
         this.unveilStringArray(veiledArrayOpt, sentenceIndex, veil)
 
-    unveiledSentence.tags     = unveilStringArray(unveiledSentence.tags,     Veil.veiledTag)
-    unveiledSentence.lemmas   = unveilStringArray(unveiledSentence.lemmas,   Veil.veiledLemma)
-    unveiledSentence.entities = unveilStringArray(unveiledSentence.entities, Veil.veiledEntity)
-    unveiledSentence.norms    = unveilStringArray(unveiledSentence.norms,    Veil.veiledNorm)
-    unveiledSentence.chunks   = unveilStringArray(unveiledSentence.chunks,   Veil.veiledChunk)
-
-    unveiledSentence.syntacticTree = unveilSyntacticTree(unveiledSentence.syntacticTree)
-    unveiledSentence.graphs = unveilGraphs(unveiledSentence.graphs, sentenceIndex)
-    unveiledSentence.relations = unveilRelations(unveiledSentence.relations)
-    unveiledSentence
+    val tags     = unveilStringArray(unveiledSentence.tags,     Veil.veiledTag)
+    val lemmas   = unveilStringArray(unveiledSentence.lemmas,   Veil.veiledLemma)
+    val entities = unveilStringArray(unveiledSentence.entities, Veil.veiledEntity)
+    val norms    = unveilStringArray(unveiledSentence.norms,    Veil.veiledNorm)
+    val chunks   = unveilStringArray(unveiledSentence.chunks,   Veil.veiledChunk)
+
+    val syntacticTree = unveilSyntacticTree(unveiledSentence.syntacticTree)
+    val graphs = unveilGraphs(unveiledSentence.graphs, sentenceIndex)
+    val relations = unveilRelations(unveiledSentence.relations)
+
+    val newSentence = Sentence(
+      unveiledSentence.raw, unveiledSentence.startOffsets, unveiledSentence.endOffsets, unveiledSentence.words,
+      tags, lemmas, entities, norms, chunks, syntacticTree, graphs, relations
+    )
+    newSentence
   }
 
   protected def unveilDocument(veiledDocument: Document): Document = {
diff --git a/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala b/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
index 8016375ee..7f3103591 100644
--- a/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
@@ -102,13 +102,9 @@ class DocumentSerializer extends Logging {
 
       assert(bits(0) == END_OF_DOCUMENT, s"END_OF_DOCUMENT expected, found ${bits(0)}")
 
-      val doc = Document(sents.toArray)
-      doc.coreferenceChains = coref
-      doc.text = text
-
-
       // TODO: Hack by Enrique to resolve the document object for the relations
-      for(sen <- doc.sentences){
+      /*
+      val relationsOpt = for(sen <- sents){
         sen.relations match {
           case Some(relations) =>
             val newRelations = relations.map(r => RelationTriple(r.confidence, r.subjectInterval, r.relationInterval, r.objectInterval))
@@ -116,13 +112,21 @@ class DocumentSerializer extends Logging {
           case None => ()
         }
       }
+      */
 
-      namedDocumentAttachmentsOpt.foreach { namedDocumentAttachments =>
-        namedDocumentAttachments.foreach { case (name: String, documentAttachment: DocumentAttachment) =>
-          doc.addAttachment(name, documentAttachment)
-        }
+      val attachmentsOpt = namedDocumentAttachmentsOpt.map { namedDocumentAttachments =>
+        val attachments = mutable.HashMap[String, DocumentAttachment]()
+
+        attachments.addAll(namedDocumentAttachments)
+        attachments
       }
 
+      val doc = new Document(
+        sentences = sents.toArray,
+        text = text,
+        attachments = attachmentsOpt
+      )
+
       doc
     }
 
diff --git a/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala b/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
index 181400b2d..1ae8f456f 100644
--- a/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
@@ -1,8 +1,7 @@
 package org.clulab.serialization.json
 
 import java.io.File
-import org.clulab.processors.DocumentAttachmentBuilderFromJson
-import org.clulab.processors.{Document, Sentence}
+import org.clulab.processors.{Document, DocumentAttachment, DocumentAttachmentBuilderFromJson, Parse, Sentence}
 import org.clulab.struct.Edge
 import org.clulab.struct.{DirectedGraph, GraphMap}
 import org.clulab.utils.FileUtils
@@ -12,6 +11,8 @@ import org.json4s._
 import org.json4s.jackson.JsonMethods._
 import org.json4s.jackson.prettyJson
 
+import scala.collection.mutable
+
 
 /** JSON serialization utilities */
 // This annotation is to avoid "Compiler synthesis of Manifest and OptManifest is deprecated".
@@ -23,10 +24,11 @@ object JSONSerializer {
 
   def jsonAST(f: File): JValue = jsonAST(FileUtils.getTextFromFile(f))
 
-  protected def addDocumentAttachments(doc: Document, jValue: JValue): Unit = {
+  protected def getDocumentAttachments(jValue: JValue): Option[mutable.HashMap[String, DocumentAttachment]] = {
     // See also DocumentSerializer for text version of nearly the same thing.
     (jValue \ DOCUMENT_ATTACHMENTS_KEY) match {
       case jObject: JObject =>
+        val attachments = new mutable.HashMap[String, DocumentAttachment]()
         val keys = jObject.values.keys
         keys.foreach { (key: String) =>
           (jObject \ key) match {
@@ -38,7 +40,7 @@ object JSONSerializer {
               val documentAttachmentBuilder = obj.asInstanceOf[DocumentAttachmentBuilderFromJson]
               val value = (jObject \ DOCUMENT_ATTACHMENTS_VALUE_KEY)
               val documentAttachment = documentAttachmentBuilder.mkDocumentAttachment(value)
-              doc.addAttachment(key, documentAttachment)
+              attachments(key) = documentAttachment
             case jValue: JValue =>
               val text = prettyJson(jValue)
               throw new RuntimeException(s"ERROR: While deserializing document attachments expected JObject but found this: $text")
@@ -46,20 +48,27 @@ object JSONSerializer {
             case null => // noop.  It should never get here.  (Famous last words.)  Scala 3 prefers null over _.
           }
         }
+        Some(attachments)
       case _ => // Leave documentAttachments as is: None
+        None
     }
   }
 
   def toDocument(json: JValue): Document = {
     // recover sentences
     val sentences = (json \ "sentences").asInstanceOf[JArray].arr.map(sjson => toSentence(sjson)).toArray
+    val id = getStringOption(json, "id")
+    val text = getStringOption(json, "text")
     // initialize document
-    val d = Document(sentences)
-    // update id
-    d.id = getStringOption(json, "id")
-    // update text
-    d.text = getStringOption(json, "text")
-    addDocumentAttachments(d, json)
+    val attachments = getDocumentAttachments(json)
+    val d = new Document(
+      id = id,
+      sentences = sentences,
+      coreferenceChains = None,
+      text = text,
+      attachments = attachments
+    )
+
     d
   }
   def toDocument(docHash: String, djson: JValue): Document = toDocument(djson \ docHash)
@@ -73,20 +82,29 @@ object JSONSerializer {
       case contents => Some(contents.extract[Array[String]])
     }
 
-    val s = json.extract[Sentence]
-    val preferredSize = s.words.length
-    // build dependencies
-    val graphs = (json \ "graphs").extract[JObject].obj.map { case (key, json) =>
-      key -> toDirectedGraph(json, Some(preferredSize))
-    }.toMap
-    s.graphs = GraphMap(graphs)
-    // build labels
-    s.tags = getLabels(json, "tags")
-    s.lemmas = getLabels(json, "lemmas")
-    s.entities = getLabels(json, "entities")
-    s.norms = getLabels(json, "norms")
-    s.chunks = getLabels(json, "chunks")
-    s
+    val tokenizedSentence = json.extract[Sentence]
+
+    val tags = getLabels(json, "tags")
+    val lemmas = getLabels(json, "lemmas")
+    val entities = getLabels(json, "entities")
+    val norms = getLabels(json, "norms")
+    val chunks = getLabels(json, "chunks")
+    val syntacticTree = None // TODO: Are these not serialized?
+    val graphs = {
+      val preferredSize = tokenizedSentence.words.length
+      val graphs = (json \ "graphs").extract[JObject].obj.map { case (key, json) =>
+        key -> toDirectedGraph(json, Some(preferredSize))
+      }.toMap
+
+      GraphMap(graphs)
+    }
+    val relations = None // TODO: Are these not serialized?
+    val parsedSentence = Sentence(
+      tokenizedSentence.raw, tokenizedSentence.startOffsets, tokenizedSentence.endOffsets, tokenizedSentence.words,
+      tags, lemmas, entities, norms, chunks, syntacticTree, graphs, relations
+    )
+
+    parsedSentence
   }
 
   def toDirectedGraph(json: JValue, preferredSizeOpt: Option[Int] = None): DirectedGraph[String] = {
diff --git a/library/src/main/scala/org/clulab/utils/ToEnhancedDependencies.scala b/library/src/main/scala/org/clulab/utils/ToEnhancedDependencies.scala
index 3c19d2c1d..63eab7913 100644
--- a/library/src/main/scala/org/clulab/utils/ToEnhancedDependencies.scala
+++ b/library/src/main/scala/org/clulab/utils/ToEnhancedDependencies.scala
@@ -23,29 +23,29 @@ import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 object ToEnhancedDependencies {
   type EdgeSpec = (Int, Int, String)
 
-  def generateStanfordEnhancedDependencies(sentence:Sentence, dg:DirectedGraph[String]): DirectedGraph[String] = {
+  def generateStanfordEnhancedDependencies(words: Array[String], tags: Array[String], dg:DirectedGraph[String]): DirectedGraph[String] = {
     val dgi = dg.toDirectedGraphIndex()
-    collapsePrepositionsStanford(sentence, dgi)
+    collapsePrepositionsStanford(words, dgi)
     raiseSubjects(dgi)
-    pushSubjectsObjectsInsideRelativeClauses(sentence, dgi, universal = false)
-    propagateSubjectsAndObjectsInConjVerbs(sentence, dgi, universal = false)
-    propagateConjSubjectsAndObjects(sentence, dgi)
-    dgi.toDirectedGraph(Some(sentence.size))
+    pushSubjectsObjectsInsideRelativeClauses(tags, dgi, universal = false)
+    propagateSubjectsAndObjectsInConjVerbs(tags, dgi, universal = false)
+    propagateConjSubjectsAndObjects(tags, dgi)
+    dgi.toDirectedGraph(Some(words.length))
   }
 
-  def generateUniversalEnhancedDependencies(sentence:Sentence, dg:DirectedGraph[String]): DirectedGraph[String] = {
+  def generateUniversalEnhancedDependencies(words: Array[String], lemmas: Array[String], tags: Array[String], dg: DirectedGraph[String]): DirectedGraph[String] = {
     val dgi = dg.toDirectedGraphIndex()
-    collapseMWEs(sentence, dgi)
-    val collapsedNmods = collapsePrepositionsUniversal(sentence, dgi)
+    collapseMWEs(lemmas, tags, dgi)
+    val collapsedNmods = collapsePrepositionsUniversal(words, lemmas, tags, dgi)
     replicateCollapsedNmods(collapsedNmods, dgi)
     raiseSubjects(dgi)
-    pushSubjectsObjectsInsideRelativeClauses(sentence, dgi, universal = true)
-    propagateSubjectsAndObjectsInConjVerbs(sentence, dgi, universal = true)
-    propagateConjSubjectsAndObjects(sentence, dgi)
+    pushSubjectsObjectsInsideRelativeClauses(tags, dgi, universal = true)
+    propagateSubjectsAndObjectsInConjVerbs(tags, dgi, universal = true) // requires tags
+    propagateConjSubjectsAndObjects(tags, dgi)
     mergeNsubjXcomp(dgi)
-    replicateCopulativeSubjects(sentence, dgi)
-    expandConj(sentence, dgi) // this must be last because several of the above methods expect "conj" labels
-    dgi.toDirectedGraph(Some(sentence.size))
+    replicateCopulativeSubjects(dgi)
+    expandConj(words, dgi) // this must be last because several of the above methods expect "conj" labels
+    dgi.toDirectedGraph(Some(words.length))
   }
 
   /**
@@ -66,7 +66,7 @@ object ToEnhancedDependencies {
    * Replicates copulative subjects across conjunctions
    * It is difficult and expensive => nsubj from 2 to 0 and from 4 to 0
    */
-  def replicateCopulativeSubjects(sentence: Sentence, dgi: DirectedGraphIndex[String]): Unit = {
+  def replicateCopulativeSubjects(dgi: DirectedGraphIndex[String]): Unit = {
     val nsubjs = dgi.findByName("nsubj")
     for(nsubj <- nsubjs) {
       val cops = dgi.findByHeadAndName(nsubj.source, "cop")
@@ -102,13 +102,13 @@ object ToEnhancedDependencies {
    * @param sentence
    * @param dgi
    */
-  def expandConj(sentence: Sentence, dgi: DirectedGraphIndex[String]): Unit = {
+  def expandConj(words: Array[String], dgi: DirectedGraphIndex[String]): Unit = {
     val toRemove = new ListBuffer[Edge[String]]
     val conjs = dgi.findByName("conj")
     for (conj <- conjs) {
       var shouldRemove = false
       for(cc <- dgi.findByName("cc").filter(_.source == conj.source)) {
-        val ccWord = sentence.words(cc.destination).toLowerCase()
+        val ccWord = words(cc.destination).toLowerCase()
         dgi.addEdge(conj.source, conj.destination, s"conj_$ccWord")
         shouldRemove = true
       }
@@ -125,12 +125,12 @@ object ToEnhancedDependencies {
     * @param sentence The sentence to operate on
     * @param dgi The directed graph of collapsed dependencies at this stage
     */
-  def collapsePrepositionsStanford(sentence:Sentence, dgi:DirectedGraphIndex[String]): Unit = {
+  def collapsePrepositionsStanford(words: Array[String], dgi:DirectedGraphIndex[String]): Unit = {
     val toRemove = new ListBuffer[Edge[String]]
     val preps = dgi.findByName("prep")
     for(prep <- preps) {
       toRemove += prep
-      val word = sentence.words(prep.destination)
+      val word = words(prep.destination)
       for(pobj <- dgi.findByName("pobj").filter(_.source == prep.destination)) {
         dgi.addEdge(prep.source, pobj.destination, s"prep_$word")
         toRemove += pobj
@@ -140,12 +140,12 @@ object ToEnhancedDependencies {
   }
 
   def collapsePrepositionsUniversal(
-    sentence:Sentence, 
+    words: Array[String], lemmas: Array[String], tags: Array[String],
     dgi:DirectedGraphIndex[String]): Seq[EdgeSpec] = {
 
     val collapsedNmods = new ArrayBuffer[EdgeSpec]()
-    collapsePrepositionsUniversalNmodCase(sentence, dgi, collapsedNmods)
-    collapsePrepositionsUniversalDueTo(sentence, dgi, collapsedNmods)
+    collapsePrepositionsUniversalNmodCase(words, dgi, collapsedNmods)
+    collapsePrepositionsUniversalDueTo(lemmas, tags, dgi, collapsedNmods)
     collapsedNmods
   }
 
@@ -156,7 +156,7 @@ object ToEnhancedDependencies {
     * @param dgi The directed graph of collapsed dependencies at this stage
     */
   def collapsePrepositionsUniversalNmodCase(
-    sentence:Sentence, 
+    words: Array[String],
     dgi:DirectedGraphIndex[String],
     collapsedNmods: ArrayBuffer[EdgeSpec]): Unit = {
 
@@ -166,9 +166,9 @@ object ToEnhancedDependencies {
     for(prep <- preps) {
       toRemove += prep
       for(c <- dgi.findByName("case").filter(_.source == prep.destination)) {
-        val word = sentence.words(c.destination).toLowerCase()
+        val word = words(c.destination).toLowerCase()
         // find multi-word prepositions such as "such as"
-        val mwe = findMultiWord(word, c.destination, sentence, dgi)
+        val mwe = findMultiWord(word, c.destination, words, dgi)
 
         // TODO: add nmod:agent (if word == "by") and passive voice here?
         dgi.addEdge(prep.source, prep.destination, s"nmod_$mwe")
@@ -189,16 +189,15 @@ object ToEnhancedDependencies {
     * @param dgi The directed graph of collapsed dependencies at this stage
     */
   def collapsePrepositionsUniversalDueTo(
-    sentence:Sentence, 
+    lemmas: Array[String], tags: Array[String],
     dgi:DirectedGraphIndex[String], 
     collapsedNmods: ArrayBuffer[EdgeSpec]): Unit = {
 
-    val tags = sentence.tags.get
     val toRemove = new ListBuffer[Edge[String]]
     var shouldRemove = false
     val preps = dgi.findByName("mwe")
     for(prep <- preps) {
-      if(sentence.lemmas.get(prep.source) == "due" && sentence.lemmas.get(prep.destination) == "to") {
+      if(lemmas(prep.source) == "due" && lemmas(prep.destination) == "to") {
         // found a "due to" MWE
         for(leftDep <- dgi.findByModifier(prep.source)) {
           // found the dep from "famine" to "due"
@@ -235,15 +234,15 @@ object ToEnhancedDependencies {
     * @param dgi
     */
   def collapseMWEs(
-    sentence:Sentence, 
+    lemmas: Array[String],
+    tags: Array[String],
     dgi:DirectedGraphIndex[String]): Unit = {
 
-    val lemmas = sentence.lemmas.get
-    val tags = sentence.tags.get
+    val size = lemmas.length
     val toRemove = new ListBuffer[Edge[String]]
     var shouldRemove = true
     
-    for(i <- 0 until sentence.size - 1) {
+    for(i <- 0 until size - 1) {
       if(lemmas(i) == "due" && lemmas(i + 1) == "to" && tags(i) == "IN") {
         val toHeads = dgi.findByModifier(i + 1)
         var found = false
@@ -262,7 +261,7 @@ object ToEnhancedDependencies {
     if(shouldRemove) remove(toRemove, dgi)
   }
 
-  def findMultiWord(first: String, firstPos: Int, sentence: Sentence, dgi:DirectedGraphIndex[String]): String = {
+  def findMultiWord(first: String, firstPos: Int, words: Array[String], dgi:DirectedGraphIndex[String]): String = {
     val buffer = new StringBuilder
     buffer.append(first)
 
@@ -273,7 +272,7 @@ object ToEnhancedDependencies {
       if(mods.isEmpty) {
         done = true
       } else {
-        val word = sentence.words(mods.head.destination).toLowerCase()
+        val word = words(mods.head.destination).toLowerCase()
         buffer.append("_")
         buffer.append(word)
         head = mods.head.destination
@@ -303,9 +302,8 @@ object ToEnhancedDependencies {
     * @param sentence The sentence to operate on
     * @param dgi The directed graph of collapsed dependencies at this stage
     */
-  def propagateSubjectsAndObjectsInConjVerbs(sentence:Sentence, dgi:DirectedGraphIndex[String], universal:Boolean): Unit = {
+  def propagateSubjectsAndObjectsInConjVerbs(tags: Array[String], dgi:DirectedGraphIndex[String], universal:Boolean): Unit = {
     val conjs = dgi.findByName("conj").sortBy(_.source)
-    val tags = sentence.tags.get
     for(conj <- conjs) {
       val left = math.min(conj.source, conj.destination)
       val right = math.max(conj.source, conj.destination)
@@ -387,9 +385,8 @@ object ToEnhancedDependencies {
     * @param sentence The sentence to operate on
     * @param dgi The directed graph of collapsed dependencies at this stage
     */
-  def propagateConjSubjectsAndObjects(sentence:Sentence, dgi:DirectedGraphIndex[String]): Unit = {
+  def propagateConjSubjectsAndObjects(tags: Array[String], dgi:DirectedGraphIndex[String]): Unit = {
     val conjs = dgi.findByName("conj").sortBy(_.source)
-    val tags = sentence.tags.get
     for(conj <- conjs) {
       val left = math.min(conj.source, conj.destination)
       val right = math.max(conj.source, conj.destination)
@@ -424,11 +421,10 @@ object ToEnhancedDependencies {
     * @param sentence The sentence to operate on
     * @param dgi The directed graph of collapsed dependencies at this stage
     */
-  def pushSubjectsObjectsInsideRelativeClauses(sentence:Sentence, dgi:DirectedGraphIndex[String], universal:Boolean): Unit = {
+  def pushSubjectsObjectsInsideRelativeClauses(tags: Array[String], dgi:DirectedGraphIndex[String], universal:Boolean): Unit = {
     val rels =
       if(universal) dgi.findByName("acl:relcl")
       else dgi.findByName("rcmod")
-    val tags = sentence.tags.get
 
     for(rel <- rels) {
       val head = rel.source
diff --git a/library/src/test/scala-2.13/org/clulab/utils/TestHash.scala b/library/src/test/scala-2.13/org/clulab/utils/TestHash.scala
index 857f10727..31e03d8ec 100644
--- a/library/src/test/scala-2.13/org/clulab/utils/TestHash.scala
+++ b/library/src/test/scala-2.13/org/clulab/utils/TestHash.scala
@@ -34,7 +34,8 @@ class TestHash extends Test {
   behavior of "Hash"
 
   it should "compute the expected equivalence hash for a Document" in {
-    val expectedHash = 1145238653
+    val expectedHash = -1029127286
+//    val expectedHash = 1145238653
     val actualHash = document.equivalenceHash
 
     actualHash should be (expectedHash)
@@ -56,7 +57,8 @@ class TestHash extends Test {
   }
 
   it should "compute the expected equivalence hashes for Mentions" in {
-    val expectedHashes = Array(1317064233, 418554464, 269168883, 1021871359, 1657321605)
+    val expectedHashes = Array(-674187334, 1183699787, 391766831, -495035159, -2089326276)
+//    val expectedHashes = Array(1317064233, 418554464, 269168883, 1021871359, 1657321605)
     val actualHashes = allMentions.map(getEquivalenceHash)
 
     actualHashes should be (expectedHashes)
diff --git a/library/src/test/scala/org/clulab/numeric/TestNumericEntityRecognition.scala b/library/src/test/scala/org/clulab/numeric/TestNumericEntityRecognition.scala
index 2a9214736..864505e79 100644
--- a/library/src/test/scala/org/clulab/numeric/TestNumericEntityRecognition.scala
+++ b/library/src/test/scala/org/clulab/numeric/TestNumericEntityRecognition.scala
@@ -656,7 +656,7 @@ class TestNumericEntityRecognition extends Test {
   def numericParse(sentence: String): (Array[String], Array[String], Array[String]) = {
     val doc = proc.annotate(sentence)
     val mentions = ner.extractFrom(doc)
-    setLabelsAndNorms(doc, mentions)
+    mkLabelsAndNorms(doc, mentions)
 
     // assume 1 sentence per doc
     val sent = doc.sentences.head
diff --git a/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala b/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala
index 2bad85f6e..d1f104f8d 100644
--- a/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala
+++ b/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala
@@ -15,9 +15,9 @@ class TestSeasonNormalizer extends Test {
 
   def mkEntitiesAndNorms(processor: BalaurProcessor, text: String): (Array[String], Array[String]) = {
     val document = processor.annotate(text)
-    val mentions = processor.extractNumericEntityMentions(document)
+    val mentions = processor.numericEntityRecognizerOpt.get.extractFrom(document)
 
-    setLabelsAndNorms(document, mentions)
+    mkLabelsAndNorms(document, mentions)
     (document.sentences.head.entities.get, document.sentences.head.norms.get)
   }
 
diff --git a/library/src/test/scala/org/clulab/processors/TestLexiconNER.scala b/library/src/test/scala/org/clulab/processors/TestLexiconNER.scala
index 4c2fa4c37..48115479c 100644
--- a/library/src/test/scala/org/clulab/processors/TestLexiconNER.scala
+++ b/library/src/test/scala/org/clulab/processors/TestLexiconNER.scala
@@ -24,8 +24,8 @@ import scala.util.Using
 class TestLexiconNER extends CluTest {
 
   def mkSentence(text: String): Sentence = {
-    val doc = proc.mkDocument(text)
-    proc.annotate(doc)
+    val simpleDoc = proc.mkDocument(text)
+    val doc = proc.annotate(simpleDoc)
     doc.sentences.head
   }
 
diff --git a/library/src/test/scala/org/clulab/processors/TestProcessor.scala b/library/src/test/scala/org/clulab/processors/TestProcessor.scala
index 2f57e439f..e6f1e0d1b 100644
--- a/library/src/test/scala/org/clulab/processors/TestProcessor.scala
+++ b/library/src/test/scala/org/clulab/processors/TestProcessor.scala
@@ -9,7 +9,6 @@ class TestProcessor extends CluTest {
 
   "Processor" should "tokenize raw text correctly" in {
     val doc = proc.mkDocument("John Doe went to China. There, he visited Beijing.")
-    doc.clear()
 
     doc.sentences(0).words(0) should be ("John")
     doc.sentences(0).words(1) should be ("Doe")
@@ -40,8 +39,8 @@ class TestProcessor extends CluTest {
   }
 
   it should "POS tag correctly" in {
-    val doc = proc.mkDocument("John Doe went to China. There, he visited Beijing.")
-    proc.annotate(doc)
+    val simpleDoc = proc.mkDocument("John Doe went to China. There, he visited Beijing.")
+    val doc = proc.annotate(simpleDoc)
     
     doc.sentences(0).tags.get(0) should be ("NNP")
     doc.sentences(0).tags.get(1) should be ("NNP")
@@ -59,17 +58,16 @@ class TestProcessor extends CluTest {
   }
 
   it should "POS tag parentheses correctly" in {
-    val doc = proc.mkDocument("This is a test (of parentheses).")
-    proc.annotate(doc)
+    val simpleDoc = proc.mkDocument("This is a test (of parentheses).")
+    val doc = proc.annotate(simpleDoc)
 
     doc.sentences(0).tags.get(4) should be ("-LRB-")
     doc.sentences(0).tags.get(7) should be ("-RRB-")
   }
 
   it should "recognize syntactic chunks correctly" in {
-    val doc = proc.mkDocument("He reckons the current account deficit will narrow to only 1.8 billion.")
-    proc.annotate(doc)
-    doc.clear()
+    val simpleDoc = proc.mkDocument("He reckons the current account deficit will narrow to only 1.8 billion.")
+    val doc = proc.annotate(simpleDoc)
 
     doc.sentences(0).chunks.get(0) should be ("B-NP")
     doc.sentences(0).chunks.get(1) should be ("B-VP")
@@ -86,9 +84,8 @@ class TestProcessor extends CluTest {
   }
 
   it should "lemmatize text correctly" in {
-    val doc = proc.mkDocument("John Doe went to the shops.")
-    proc.annotate(doc)
-    doc.clear()
+    val simpleDoc = proc.mkDocument("John Doe went to the shops.")
+    val doc = proc.annotate(simpleDoc)
 
     doc.sentences(0).lemmas.get(0) should be ("john")
     doc.sentences(0).lemmas.get(2) should be ("go")
@@ -112,40 +109,44 @@ class TestProcessor extends CluTest {
   }
 
   it should "parse MWEs correctly" in {
-    var sent = "Foods such as icecream are tasty."
-    var doc = proc.mkDocument(sent)
-    println(s"WORDS: ${doc.sentences.head.words.mkString(", ")}")
-
-    proc.annotate(doc)
-    println(s"Enhanced universal dependencies for sentence: $sent")
-    println(doc.sentences.head.universalEnhancedDependencies.get)
-
-    doc.sentences.head.universalEnhancedDependencies.get.hasEdge(0, 3, "nmod_such_as") should be (true)
-    doc.sentences.head.universalEnhancedDependencies.get.hasEdge(0, 3, "nmod") should be (false)
-
-    sent = "There was famine due to drought."
-    doc = proc.mkDocument(sent)
-    println(s"WORDS: ${doc.sentences.head.words.mkString(", ")}")
-
-    proc.annotate(doc)
-    println(s"Enhanced universal dependencies for sentence: $sent")
-    println(doc.sentences.head.universalEnhancedDependencies.get)
-
-    doc.sentences.head.universalEnhancedDependencies.get.hasEdge(2, 5, "nmod_due_to") should be (true)
-    doc.sentences.head.universalEnhancedDependencies.get.hasEdge(2, 3, "amod") should be (false)
-    doc.sentences.head.universalEnhancedDependencies.get.hasEdge(2, 5, "nmod") should be (false)
-
-    sent = "They ate cake due to hunger."
-    doc = proc.mkDocument(sent)
-    println(s"WORDS: ${doc.sentences.head.words.mkString(", ")}")
-
-    proc.annotate(doc)
-    println(s"Enhanced universal dependencies for sentence: $sent")
-    println(doc.sentences.head.universalEnhancedDependencies.get)
-
-    doc.sentences.head.universalEnhancedDependencies.get.hasEdge(1, 5, "nmod_due_to") should be (true) 
-    doc.sentences.head.universalEnhancedDependencies.get.hasEdge(1, 3, "amod") should be (false)
-    doc.sentences.head.universalEnhancedDependencies.get.hasEdge(1, 5, "nmod") should be (false)
+    {
+      val sent = "Foods such as icecream are tasty."
+      val simpleDoc = proc.mkDocument(sent)
+      println(s"WORDS: ${simpleDoc.sentences.head.words.mkString(", ")}")
+
+      val doc = proc.annotate(simpleDoc)
+      println(s"Enhanced universal dependencies for sentence: $sent")
+      println(doc.sentences.head.universalEnhancedDependencies.get)
+
+      doc.sentences.head.universalEnhancedDependencies.get.hasEdge(0, 3, "nmod_such_as") should be(true)
+      doc.sentences.head.universalEnhancedDependencies.get.hasEdge(0, 3, "nmod") should be(false)
+    }
+    {
+      val sent = "There was famine due to drought."
+      val simpleDoc = proc.mkDocument(sent)
+      println(s"WORDS: ${simpleDoc.sentences.head.words.mkString(", ")}")
+
+      val doc = proc.annotate(simpleDoc)
+      println(s"Enhanced universal dependencies for sentence: $sent")
+      println(doc.sentences.head.universalEnhancedDependencies.get)
+
+      doc.sentences.head.universalEnhancedDependencies.get.hasEdge(2, 5, "nmod_due_to") should be(true)
+      doc.sentences.head.universalEnhancedDependencies.get.hasEdge(2, 3, "amod") should be(false)
+      doc.sentences.head.universalEnhancedDependencies.get.hasEdge(2, 5, "nmod") should be(false)
+    }
+    {
+      val sent = "They ate cake due to hunger."
+      val simpleDoc = proc.mkDocument(sent)
+      println(s"WORDS: ${simpleDoc.sentences.head.words.mkString(", ")}")
+
+      val doc = proc.annotate(simpleDoc)
+      println(s"Enhanced universal dependencies for sentence: $sent")
+      println(doc.sentences.head.universalEnhancedDependencies.get)
+
+      doc.sentences.head.universalEnhancedDependencies.get.hasEdge(1, 5, "nmod_due_to") should be(true)
+      doc.sentences.head.universalEnhancedDependencies.get.hasEdge(1, 3, "amod") should be(false)
+      doc.sentences.head.universalEnhancedDependencies.get.hasEdge(1, 5, "nmod") should be(false)
+    }
   }
 
   it should "parse incomplete sentence without crashing" in {
diff --git a/library/src/test/scala/org/clulab/serialization/json/TestJSONSerializer.scala b/library/src/test/scala/org/clulab/serialization/json/TestJSONSerializer.scala
index 5acf466d4..ceabd13f3 100644
--- a/library/src/test/scala/org/clulab/serialization/json/TestJSONSerializer.scala
+++ b/library/src/test/scala/org/clulab/serialization/json/TestJSONSerializer.scala
@@ -24,8 +24,8 @@ class TestJSONSerializer extends Test {
 
 
   "A Document with an ID" should "produce json with an \"id\" field" in {
-    val d = jsonStringToDocument(""" {"sentences":[{"raw":["Gonzo","married","Camilla","."], "words":["Gonzo","married","Camilla","."],"startOffsets":[0,6,14,21],"endOffsets":[5,13,21,22],"tags":["NNP","VBD","NNP","."],"lemmas":["Gonzo","marry","Camilla","."],"entities":["O","O","PERSON","O"],"norms":["O","O","O","O"],"chunks":["B-NP","B-VP","B-NP","O"],"graphs":{"stanford-basic":{"edges":[{"source":1,"destination":0,"relation":"nsubj"},{"source":1,"destination":2,"relation":"dobj"},{"source":1,"destination":3,"relation":"punct"}],"roots":[1]},"stanford-collapsed":{"edges":[{"source":1,"destination":0,"relation":"nsubj"},{"source":1,"destination":2,"relation":"dobj"},{"source":1,"destination":3,"relation":"punct"}],"roots":[1]}}}]} """)
-    d.id = Some("this-is-an-id")
+    val id = "this-is-an-id"
+    val d = jsonStringToDocument(s""" {"id":"$id","sentences":[{"raw":["Gonzo","married","Camilla","."], "words":["Gonzo","married","Camilla","."],"startOffsets":[0,6,14,21],"endOffsets":[5,13,21,22],"tags":["NNP","VBD","NNP","."],"lemmas":["Gonzo","marry","Camilla","."],"entities":["O","O","PERSON","O"],"norms":["O","O","O","O"],"chunks":["B-NP","B-VP","B-NP","O"],"graphs":{"stanford-basic":{"edges":[{"source":1,"destination":0,"relation":"nsubj"},{"source":1,"destination":2,"relation":"dobj"},{"source":1,"destination":3,"relation":"punct"}],"roots":[1]},"stanford-collapsed":{"edges":[{"source":1,"destination":0,"relation":"nsubj"},{"source":1,"destination":2,"relation":"dobj"},{"source":1,"destination":3,"relation":"punct"}],"roots":[1]}}}]} """)
     (d.jsonAST \ "id") should equal (JString("this-is-an-id"))
   }
 
@@ -35,8 +35,7 @@ class TestJSONSerializer extends Test {
   }
 
   "A Document with text" should "produce json with a \"text\" field" in {
-    val d = jsonStringToDocument(""" {"sentences":[{"raw":["Gonzo","married","Camilla","."], "words":["Gonzo","married","Camilla","."],"startOffsets":[0,6,14,21],"endOffsets":[5,13,21,22],"tags":["NNP","VBD","NNP","."],"lemmas":["Gonzo","marry","Camilla","."],"entities":["O","O","PERSON","O"],"norms":["O","O","O","O"],"chunks":["B-NP","B-VP","B-NP","O"],"graphs":{"stanford-basic":{"edges":[{"source":1,"destination":0,"relation":"nsubj"},{"source":1,"destination":2,"relation":"dobj"},{"source":1,"destination":3,"relation":"punct"}],"roots":[1]},"stanford-collapsed":{"edges":[{"source":1,"destination":0,"relation":"nsubj"},{"source":1,"destination":2,"relation":"dobj"},{"source":1,"destination":3,"relation":"punct"}],"roots":[1]}}}]} """)
-    d.text = Some(text)
+    val d = jsonStringToDocument(s""" {"text":"$text","sentences":[{"raw":["Gonzo","married","Camilla","."], "words":["Gonzo","married","Camilla","."],"startOffsets":[0,6,14,21],"endOffsets":[5,13,21,22],"tags":["NNP","VBD","NNP","."],"lemmas":["Gonzo","marry","Camilla","."],"entities":["O","O","PERSON","O"],"norms":["O","O","O","O"],"chunks":["B-NP","B-VP","B-NP","O"],"graphs":{"stanford-basic":{"edges":[{"source":1,"destination":0,"relation":"nsubj"},{"source":1,"destination":2,"relation":"dobj"},{"source":1,"destination":3,"relation":"punct"}],"roots":[1]},"stanford-collapsed":{"edges":[{"source":1,"destination":0,"relation":"nsubj"},{"source":1,"destination":2,"relation":"dobj"},{"source":1,"destination":3,"relation":"punct"}],"roots":[1]}}}]} """)
     (d.jsonAST \ "text") should equal (JString(text))
   }
 
@@ -61,11 +60,11 @@ class TestJSONSerializer extends Test {
     class Scratch(var document: Document) extends JSONSerialization {
       def jsonAST: JValue = document.jsonAST
     }
-    
-    doc.text = Some("This is a test") // Original failing test requires text
+
+    val docWithText = doc.copy(sentences = doc.sentences, text = Some("This is a test"))
     val documentSerializer = new DocumentSerializer()
-    val expectedDocAsJSON = new Scratch(doc).json()
-    val docSaved = documentSerializer.save(doc, keepText = true)
+    val expectedDocAsJSON = new Scratch(docWithText).json()
+    val docSaved = documentSerializer.save(docWithText, keepText = true)
     val docLoaded = documentSerializer.load(docSaved)
     val actualDocAsJSON = new Scratch(docLoaded).json()
     
diff --git a/library/src/test/scala/org/clulab/struct/TestDocumentAttachment.scala b/library/src/test/scala/org/clulab/struct/TestDocumentAttachment.scala
index b84e337a3..b34393b2b 100644
--- a/library/src/test/scala/org/clulab/struct/TestDocumentAttachment.scala
+++ b/library/src/test/scala/org/clulab/struct/TestDocumentAttachment.scala
@@ -1,7 +1,6 @@
 package org.clulab.struct
 
-import org.clulab.processors.Document
-import org.clulab.processors.Sentence
+import org.clulab.processors.{Document, DocumentAttachment, Sentence}
 import org.clulab.serialization.DocumentSerializer
 import org.clulab.serialization.json._
 import org.clulab.struct.test.CaseClass
@@ -17,6 +16,7 @@ import java.io.ByteArrayInputStream
 import java.io.ByteArrayOutputStream
 import java.io.ObjectInputStream
 import java.io.ObjectOutputStream
+import scala.collection.mutable
 import scala.util.Using
 
 class TestDocumentAttachment extends Test {
@@ -124,12 +124,13 @@ class TestDocumentAttachment extends Test {
 //  }
 
   "Document with TextNameDocumentAttachment" should "serialize as text" in {
-    val oldDocument = new Document(Array.empty[Sentence])
-
-    oldDocument.addAttachment(FIRST_KEY, new TextNameDocumentAttachment(FIRST_NAME))
-    oldDocument.addAttachment(MIDDLE_KEY, new TextNameDocumentAttachment(MIDDLE_NAME))
-    oldDocument.addAttachment(LAST_KEY, new TextNameDocumentAttachment(LAST_NAME))
-    oldDocument.addAttachment(ALIAS_KEY, new NameDocumentAttachment(ALIAS_NAME))
+    val oldAttachments = mutable.HashMap[String, DocumentAttachment](
+      (FIRST_KEY, new TextNameDocumentAttachment(FIRST_NAME)),
+      (MIDDLE_KEY, new TextNameDocumentAttachment(MIDDLE_NAME)),
+      (LAST_KEY, new TextNameDocumentAttachment(LAST_NAME)),
+      (ALIAS_KEY, new NameDocumentAttachment(ALIAS_NAME))
+    )
+    val oldDocument = new Document(sentences = Array.empty[Sentence], attachments = Some(oldAttachments))
 
     val documentSerializer = new DocumentSerializer()
     val documentString = documentSerializer.save(oldDocument)
@@ -146,12 +147,13 @@ class TestDocumentAttachment extends Test {
   }
 
   "Document with ObjectNameDocumentAttachment" should "serialize as text" in {
-    val oldDocument = new Document(Array.empty[Sentence])
-
-    oldDocument.addAttachment(FIRST_KEY, new ObjectNameDocumentAttachment(FIRST_NAME))
-    oldDocument.addAttachment(MIDDLE_KEY, new ObjectNameDocumentAttachment(MIDDLE_NAME))
-    oldDocument.addAttachment(LAST_KEY, new ObjectNameDocumentAttachment(LAST_NAME))
-    oldDocument.addAttachment(ALIAS_KEY, new NameDocumentAttachment(ALIAS_NAME))
+    val oldAttachments = mutable.HashMap[String, DocumentAttachment](
+      (FIRST_KEY, new ObjectNameDocumentAttachment(FIRST_NAME)),
+      (MIDDLE_KEY, new ObjectNameDocumentAttachment(MIDDLE_NAME)),
+      (LAST_KEY, new ObjectNameDocumentAttachment(LAST_NAME)),
+      (ALIAS_KEY, new NameDocumentAttachment(ALIAS_NAME))
+    )
+    val oldDocument = new Document(sentences = Array.empty[Sentence], attachments = Some(oldAttachments))
 
     val documentSerializer = new DocumentSerializer()
     // This should be a messy string.
@@ -169,12 +171,14 @@ class TestDocumentAttachment extends Test {
   }
 
   "Document with TextNameDocumentAttachments" should "serialize as json" in {
-    val oldDocument = new Document(Array.empty[Sentence])
+    val oldAttachments = mutable.HashMap[String, DocumentAttachment](
+      (FIRST_KEY, new TextNameDocumentAttachment(FIRST_NAME)),
+      (MIDDLE_KEY, new TextNameDocumentAttachment(MIDDLE_NAME)),
+      (LAST_KEY, new TextNameDocumentAttachment(LAST_NAME)),
+      (ALIAS_KEY, new NameDocumentAttachment(ALIAS_NAME))
+    )
+    val oldDocument = new Document(sentences = Array.empty[Sentence], attachments = Some(oldAttachments))
 
-    oldDocument.addAttachment(FIRST_KEY, new TextNameDocumentAttachment(FIRST_NAME))
-    oldDocument.addAttachment(MIDDLE_KEY, new TextNameDocumentAttachment(MIDDLE_NAME))
-    oldDocument.addAttachment(LAST_KEY, new TextNameDocumentAttachment(LAST_NAME))
-    oldDocument.addAttachment(ALIAS_KEY, new NameDocumentAttachment(ALIAS_NAME))
     // This shouldn't compile.
     /*oldDocument.addAttachment("wrong", new NameMethodAttachment("name"))*/
 
@@ -193,12 +197,13 @@ class TestDocumentAttachment extends Test {
   }
 
   "Document with ObjectNameDocumentAttachment" should "serialize as json" in {
-    val oldDocument = new Document(Array.empty[Sentence])
-
-    oldDocument.addAttachment(FIRST_KEY, new ObjectNameDocumentAttachment(FIRST_NAME))
-    oldDocument.addAttachment(MIDDLE_KEY, new ObjectNameDocumentAttachment(MIDDLE_NAME))
-    oldDocument.addAttachment(LAST_KEY, new ObjectNameDocumentAttachment(LAST_NAME))
-    oldDocument.addAttachment(ALIAS_KEY, new NameDocumentAttachment(ALIAS_NAME))
+    val oldAttachments = mutable.HashMap[String, DocumentAttachment](
+      (FIRST_KEY, new ObjectNameDocumentAttachment(FIRST_NAME)),
+      (MIDDLE_KEY, new ObjectNameDocumentAttachment(MIDDLE_NAME)),
+      (LAST_KEY, new ObjectNameDocumentAttachment(LAST_NAME)),
+      (ALIAS_KEY, new NameDocumentAttachment(ALIAS_NAME))
+    )
+    val oldDocument = new Document(Array.empty[Sentence], attachments = Some(oldAttachments))
 
     // This should be a messy string.
     val documentString = prettyJson(renderJValue(oldDocument.jsonAST))
@@ -214,4 +219,3 @@ class TestDocumentAttachment extends Test {
     /*require(newDocument == oldDocument)*/
   }
 }
-
diff --git a/library/src/test/scala/org/clulab/utils/TestFindHeads.scala b/library/src/test/scala/org/clulab/utils/TestFindHeads.scala
index 13390e71e..13e36fb85 100644
--- a/library/src/test/scala/org/clulab/utils/TestFindHeads.scala
+++ b/library/src/test/scala/org/clulab/utils/TestFindHeads.scala
@@ -9,10 +9,12 @@ class TestFindHeads extends Test {
   def newSentence(words: Array[String], directedGraph: DirectedGraph[String]): Sentence = {
     val startOffsets = Array(0) // unused
     val   endOffsets = Array(0) // unused
-    val sentence = new Sentence(words, startOffsets, endOffsets, words)
+    val sentence = new Sentence(
+      words, startOffsets, endOffsets, words,
+      tags = Some(words)
+    )
 
     sentence.graphs(UNIVERSAL_BASIC) = directedGraph
-    sentence.tags = Some(words)
     sentence
   }
 

From 9ccca3686e524969295d0ee87cf3132ce0f47153 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Thu, 22 May 2025 22:37:23 -0700
Subject: [PATCH 05/42] Compile for Scala 3

---
 build.sbt                                     |  2 +-
 .../org/clulab/processors/Sentence.scala      | 97 +++----------------
 .../processors/clu/BalaurProcessor.scala      | 13 +--
 .../serialization/json/JSONSerializer.scala   |  2 +-
 .../scala/org/clulab/struct/Annotation.scala  | 39 ++++++++
 .../org/clulab/struct/Tokenization.scala      | 21 ++++
 6 files changed, 82 insertions(+), 92 deletions(-)
 create mode 100644 library/src/main/scala/org/clulab/struct/Annotation.scala
 create mode 100644 library/src/main/scala/org/clulab/struct/Tokenization.scala

diff --git a/build.sbt b/build.sbt
index 1ee3d3420..e7c465370 100644
--- a/build.sbt
+++ b/build.sbt
@@ -18,7 +18,7 @@ val scala37  = "3.7.0"   // up to 3.7.0
 // Scala33: This is the first official LTS, but hold off until necessary.
 val scala3 = scala33
 
-ThisBuild / crossScalaVersions := Seq(scala213) // , scala3)
+ThisBuild / crossScalaVersions := Seq(scala3, scala213)
 ThisBuild / scalaVersion := crossScalaVersions.value.head
 
 lazy val root = (project in file("."))
diff --git a/library/src/main/scala/org/clulab/processors/Sentence.scala b/library/src/main/scala/org/clulab/processors/Sentence.scala
index 7158efecb..42ce43b0b 100644
--- a/library/src/main/scala/org/clulab/processors/Sentence.scala
+++ b/library/src/main/scala/org/clulab/processors/Sentence.scala
@@ -1,73 +1,11 @@
 package org.clulab.processors
 
-import org.clulab.scala.WrappedArray._
 import org.clulab.struct.{DirectedGraph, GraphMap, RelationTriple, Tree}
 import org.clulab.struct.GraphMap._
 import org.clulab.utils.Hash
-import org.clulab.utils.SeqUtils
 
 import scala.collection.mutable
 
-case class WordTokenization(raw: String, startOffset: Int, endOffset: Int, word: String)
-
-// Is this SentenceTokenization, ArraySeq of WordTokenization
-// Tokenation, Tokse
-// Parseation, Parse
-case class Tokenization(
-  raw: Array[String],
-  startOffsets: Array[Int],
-  endOffsets: Array[Int],
-  words: Array[String]
-) {
-
-  def reverse: Tokenization = {
-    Tokenization(
-      raw = raw.reverse,
-      startOffsets = startOffsets.reverse,
-      endOffsets = endOffsets.reverse,
-      words = words.reverse
-    )
-  }
-}
-
-// These are by the word ones and then there are relationships between words.
-// So parse, might not be a thing that is per word.
-//case class WordParse(tag: String, lemma: String, entity: String, norm: String, chunk: String)
-
-//case class SentenceParse(tags: Array[String], cyntacticTree, graphs, relations)
-
-// Again is this SentenceParse
-case class Parse(
-  tags: Option[Array[String]] = None,
-  /** Lemmas */
-  lemmas: Option[Array[String]] = None,
-  /** NE labels */
-  entities: Option[Array[String]] = None,
-  /** Normalized values of named/numeric entities, such as dates */
-  norms: Option[Array[String]] = None,
-  /** Shallow parsing labels */
-  chunks: Option[Array[String]] = None,
-  /** Constituent tree of this sentence; includes head words */
-  syntacticTree: Option[Tree] = None,
-  /** DAG of syntactic and semantic dependencies; word offsets start at 0 */
-  graphs: GraphMap = GraphMap(),
-  /** Relation triples from OpenIE */
-  relations:Option[Array[RelationTriple]] = None
-) {
-
-  def reverse: Parse = {
-    Parse(
-      tags = tags.map(_.reverse),
-      lemmas = lemmas.map(_.reverse),
-      entities = entities.map(_.reverse),
-      norms = norms.map(_.reverse),
-      chunks = chunks.map(_.reverse)
-      // TODO: reverse syntacticTree, graphs, and relations!
-    )
-  }
-}
-
-
 /** Stores the annotations for a single sentence */
 class Sentence(
   /** Raw tokens in this sentence; these MUST match the original text */
@@ -104,14 +42,6 @@ class Sentence(
   val relations:Option[Array[RelationTriple]] = None
 ) extends Serializable {
 
-  def getTokenization: Tokenization = {
-    Tokenization(raw, startOffsets, endOffsets, words)
-  }
-
-  def getParse: Parse = {
-    Parse(tags, lemmas, entities, norms, chunks, syntacticTree, graphs, relations)
-  }
-
   def size:Int = raw.length
 
   def indices: Range = 0 until size
@@ -219,23 +149,22 @@ class Sentence(
 
   /** Reverts the current sentence */
   def revert(): Sentence = {
-    val reversedTokenization = this.getTokenization.reverse
-    val reversedParse = this.getParse.reverse
     val reversedSentence = Sentence(
-      reversedTokenization.raw,
-      reversedTokenization.startOffsets,
-      reversedTokenization.endOffsets,
-      reversedTokenization.words
+      raw.reverse,
+      startOffsets.reverse,
+      endOffsets.reverse,
+      words.reverse,
+      tags.map(_.reverse),
+      lemmas.map(_.reverse),
+      entities.map(_.reverse),
+      norms.map(_.reverse),
+      chunks.map(_.reverse),
+      // TODO: revert syntacticTree and graphs!
+      syntacticTree,
+      graphs,
+      relations
     )
 
-    // TODO: Make this work
-//    reversedSentence.tags = reversedParse.tags
-//    reversedSentence.lemmas = reversedParse.lemmas
-//    reversedSentence.entities = reversedParse.entities
-//    reversedSentence.norms = reversedParse.norms
-//    reversedSentence.chunks = reversedParse.chunks
-    // TODO: revert syntacticTree and graphs!
-
     reversedSentence
   }
 
diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index dc69b9363..68bd2c6de 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -3,9 +3,11 @@ package org.clulab.processors.clu
 import com.typesafe.config.Config
 import com.typesafe.config.ConfigFactory
 import org.clulab.numeric.{NumericEntityRecognizer, mkLabelsAndNorms}
+import org.clulab.processors.clu.tokenizer.{EnglishLemmatizer, Lemmatizer, OpenDomainEnglishTokenizer, OpenDomainPortugueseTokenizer, OpenDomainSpanishTokenizer, PortugueseLemmatizer, SpanishLemmatizer, Tokenizer}
 import org.clulab.processors.{Document, Processor, Sentence}
-import org.clulab.processors.clu.tokenizer._
-import org.clulab.scala.WrappedArray._
+
+import scala.collection.immutable.ArraySeq
+//import org.clulab.scala.WrappedArray._
 import org.clulab.scala_transformers.encoder.TokenClassifier
 import org.clulab.scala_transformers.encoder.EncoderMaxTokensRuntimeException
 import org.clulab.sequences.{LexiconNER, NamedEntity}
@@ -13,7 +15,6 @@ import org.clulab.struct.DirectedGraph
 import org.clulab.struct.GraphMap
 import org.clulab.utils.{Configured, MathUtils, ToEnhancedDependencies}
 import org.slf4j.{Logger, LoggerFactory}
-import org.clulab.odin.Mention
 import BalaurProcessor._
 import PostProcessor._
 import org.clulab.processors.hexatagging.HexaDecoder
@@ -149,7 +150,7 @@ class BalaurProcessor protected (
       val lemmas = lemmatize(words)
 
       try {
-        val allLabelsAndScores = tokenClassifier.predictWithScores(words)
+        val allLabelsAndScores = tokenClassifier.predictWithScores(ArraySeq.unsafeWrapArray(words))
         val tags = mkPosTags(words, allLabelsAndScores(TASK_TO_INDEX(POS_TASK)))
         val entities = {
           val optionalEntities = mkOptionalNerLabels(words, sentence.startOffsets, sentence.endOffsets, tags, lemmas)
@@ -255,13 +256,13 @@ class BalaurProcessor protected (
   private def mergeNerLabels(generic: Array[String], custom: Array[String]): Array[String] = {
     require(generic.length == custom.length)
 
-    val customNamedEntities = NamedEntity.collect(custom)
+    val customNamedEntities = NamedEntity.collect(ArraySeq.unsafeWrapArray(custom))
     val result = generic.toArray // A copy of the generic labels is created here.
 
     if (customNamedEntities.isEmpty)
       result
     else {
-      val genericNamedEntities = NamedEntity.collect(generic)
+      val genericNamedEntities = NamedEntity.collect(ArraySeq.unsafeWrapArray(generic))
 
       //println(s"Generic NamedEntity: ${genericNamedEntities.mkString(", ")}")
       //println(s"Custom NamedEntity: ${customNamedEntities.mkString(", ")}")
diff --git a/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala b/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
index 1ae8f456f..903bed37e 100644
--- a/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
@@ -1,7 +1,7 @@
 package org.clulab.serialization.json
 
 import java.io.File
-import org.clulab.processors.{Document, DocumentAttachment, DocumentAttachmentBuilderFromJson, Parse, Sentence}
+import org.clulab.processors.{Document, DocumentAttachment, DocumentAttachmentBuilderFromJson, Sentence}
 import org.clulab.struct.Edge
 import org.clulab.struct.{DirectedGraph, GraphMap}
 import org.clulab.utils.FileUtils
diff --git a/library/src/main/scala/org/clulab/struct/Annotation.scala b/library/src/main/scala/org/clulab/struct/Annotation.scala
new file mode 100644
index 000000000..9489154f5
--- /dev/null
+++ b/library/src/main/scala/org/clulab/struct/Annotation.scala
@@ -0,0 +1,39 @@
+package org.clulab.struct
+
+import org.clulab.struct.GraphMap.GraphMap
+
+// These are by the word ones and then there are relationships between words.
+// So parse, might not be a thing that is per word.
+//case class WordParse(tag: String, lemma: String, entity: String, norm: String, chunk: String)
+
+//case class SentenceParse(tags: Array[String], syntacticTree, graphs, relations)
+
+case class Annotation(
+  tags: Option[Array[String]] = None,
+  /** Lemmas */
+  lemmas: Option[Array[String]] = None,
+  /** NE labels */
+  entities: Option[Array[String]] = None,
+  /** Normalized values of named/numeric entities, such as dates */
+  norms: Option[Array[String]] = None,
+  /** Shallow parsing labels */
+  chunks: Option[Array[String]] = None,
+  /** Constituent tree of this sentence; includes head words */
+  syntacticTree: Option[Tree] = None,
+  /** DAG of syntactic and semantic dependencies; word offsets start at 0 */
+  graphs: GraphMap = GraphMap(),
+  /** Relation triples from OpenIE */
+  relations:Option[Array[RelationTriple]] = None
+) {
+
+  def reverse: Annotation = {
+    Annotation(
+      tags = tags.map(_.reverse),
+      lemmas = lemmas.map(_.reverse),
+      entities = entities.map(_.reverse),
+      norms = norms.map(_.reverse),
+      chunks = chunks.map(_.reverse)
+      // TODO: reverse syntacticTree, graphs, and relations!
+    )
+  }
+}
diff --git a/library/src/main/scala/org/clulab/struct/Tokenization.scala b/library/src/main/scala/org/clulab/struct/Tokenization.scala
new file mode 100644
index 000000000..78e8b21da
--- /dev/null
+++ b/library/src/main/scala/org/clulab/struct/Tokenization.scala
@@ -0,0 +1,21 @@
+package org.clulab.struct
+
+// An alternative design would not use aligned arrays, but an array of structures.
+case class WordTokenization(raw: String, startOffset: Int, endOffset: Int, word: String)
+
+case class Tokenization(
+  raw: Array[String],
+  startOffsets: Array[Int],
+  endOffsets: Array[Int],
+  words: Array[String]
+) {
+
+  def reverse: Tokenization = {
+    Tokenization(
+      raw = raw.reverse,
+      startOffsets = startOffsets.reverse,
+      endOffsets = endOffsets.reverse,
+      words = words.reverse
+    )
+  }
+}

From 98c8115d5a842012ee5d60a4e302fce95552934d Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Fri, 23 May 2025 00:15:01 -0700
Subject: [PATCH 06/42] Pass Scala3 tests

---
 .../serialization/json/JSONSerializer.scala   | 26 ++++++++++++-------
 .../scala-3/org/clulab/utils/TestHash.scala   |  6 +++--
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala b/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
index 903bed37e..cac75f40a 100644
--- a/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
@@ -77,21 +77,27 @@ object JSONSerializer {
 
   def toSentence(json: JValue): Sentence = {
 
-    def getLabels(json: JValue, k: String): Option[Array[String]] = json \ k match {
+    def getStrings(json: JValue, k: String): Array[String] = (json \ k).extract[Array[String]]
+
+    def getInts(json: JValue, k: String): Array[Int] = (json \ k).extract[Array[Int]]
+
+    def getLabelsOpt(json: JValue, k: String): Option[Array[String]] = json \ k match {
       case JNothing => None
       case contents => Some(contents.extract[Array[String]])
     }
 
-    val tokenizedSentence = json.extract[Sentence]
-
-    val tags = getLabels(json, "tags")
-    val lemmas = getLabels(json, "lemmas")
-    val entities = getLabels(json, "entities")
-    val norms = getLabels(json, "norms")
-    val chunks = getLabels(json, "chunks")
+    val raw = getStrings(json, "raw")
+    val startOffsets = getInts(json, "startOffsets")
+    val endOffsets = getInts(json, "endOffsets")
+    val words = getStrings(json, "words")
+    val tags = getLabelsOpt(json, "tags")
+    val lemmas = getLabelsOpt(json, "lemmas")
+    val entities = getLabelsOpt(json, "entities")
+    val norms = getLabelsOpt(json, "norms")
+    val chunks = getLabelsOpt(json, "chunks")
     val syntacticTree = None // TODO: Are these not serialized?
     val graphs = {
-      val preferredSize = tokenizedSentence.words.length
+      val preferredSize = words.length
       val graphs = (json \ "graphs").extract[JObject].obj.map { case (key, json) =>
         key -> toDirectedGraph(json, Some(preferredSize))
       }.toMap
@@ -100,7 +106,7 @@ object JSONSerializer {
     }
     val relations = None // TODO: Are these not serialized?
     val parsedSentence = Sentence(
-      tokenizedSentence.raw, tokenizedSentence.startOffsets, tokenizedSentence.endOffsets, tokenizedSentence.words,
+      raw, startOffsets, endOffsets, words,
       tags, lemmas, entities, norms, chunks, syntacticTree, graphs, relations
     )
 
diff --git a/library/src/test/scala-3/org/clulab/utils/TestHash.scala b/library/src/test/scala-3/org/clulab/utils/TestHash.scala
index c1dcf17a8..9a08e0ca5 100644
--- a/library/src/test/scala-3/org/clulab/utils/TestHash.scala
+++ b/library/src/test/scala-3/org/clulab/utils/TestHash.scala
@@ -35,7 +35,8 @@ class TestHash extends Test {
   behavior of "Hash"
 
   it should "compute the expected equivalence hash for a Document" in {
-    val expectedHash = 1145238653
+    val expectedHash = -1029127286
+//    val expectedHash = 1145238653
     val actualHash = document.equivalenceHash
 
     actualHash should be (expectedHash)
@@ -57,7 +58,8 @@ class TestHash extends Test {
   }
 
   it should "compute the expected equivalence hashes for Mentions" in {
-    val expectedHashes = Array(1317064233, 418554464, 269168883, 1021871359, 1657321605)
+    val expectedHashes = Array(-674187334, 1183699787, 391766831, -495035159, -2089326276)
+//    val expectedHashes = Array(1317064233, 418554464, 269168883, 1021871359, 1657321605)
     val actualHashes = allMentions.map(getEquivalenceHash)
 
     actualHashes should be (expectedHashes)

From 211cd2aede935850649a24f3f4cff2a1115e239f Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Fri, 23 May 2025 09:47:17 -0700
Subject: [PATCH 07/42] NumericUtils

---
 .../processors/apps/NumericEntityRecognizerShell.scala   | 6 +++---
 .../src/main/scala/org/clulab/numeric/EvalTimeNorm.scala | 2 +-
 .../clulab/numeric/{package.scala => NumericUtils.scala} | 9 +++++----
 .../clulab/numeric/TestNumericEntityRecognition.scala    | 2 +-
 .../scala/org/clulab/numeric/TestSeasonNormalizer.scala  | 2 +-
 5 files changed, 11 insertions(+), 10 deletions(-)
 rename library/src/main/scala/org/clulab/numeric/{package.scala => NumericUtils.scala} (95%)

diff --git a/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala b/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala
index 47225a369..3a93ff4bd 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala
@@ -1,6 +1,6 @@
 package org.clulab.processors.apps
 
-import org.clulab.numeric.{displayMentions, mkLabelsAndNorms}
+import org.clulab.numeric.NumericUtils
 import org.clulab.processors.clu.BalaurProcessor
 import org.clulab.utils.ReloadableProcessor
 import org.clulab.utils.ReloadableShell
@@ -37,8 +37,8 @@ class NumericEntityRecognizerShell(ruleDirOpt: Option[String]) extends Reloadabl
     val doc = proc.get.annotate(text)
     val mentions = proc.get.numericEntityRecognizerOpt.map(_.extractFrom(doc)).getOrElse(Seq.empty)
 
-    mkLabelsAndNorms(doc, mentions)
-    displayMentions(mentions, doc)
+    NumericUtils.mkLabelsAndNorms(doc, mentions)
+    NumericUtils.displayMentions(mentions, doc)
   }
 
   def reload(): Unit = {
diff --git a/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala b/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala
index bf5190dba..08acac195 100644
--- a/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala
+++ b/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala
@@ -34,7 +34,7 @@ object EvalTimeNorm {
       }
       val doc = proc.annotate(docText)
       val mentions = ner.extractFrom(doc)
-      mkLabelsAndNorms(doc, mentions)
+      NumericUtils.mkLabelsAndNorms(doc, mentions)
       val prediction =  mentions.collect{
         case m: Norm if m.neLabel.equals("DATE") || m.neLabel.equals("DATE-RANGE") =>
           (m.startOffset.toString, m.endOffset.toString, m.neNorm)
diff --git a/library/src/main/scala/org/clulab/numeric/package.scala b/library/src/main/scala/org/clulab/numeric/NumericUtils.scala
similarity index 95%
rename from library/src/main/scala/org/clulab/numeric/package.scala
rename to library/src/main/scala/org/clulab/numeric/NumericUtils.scala
index d41438014..261b8dc74 100644
--- a/library/src/main/scala/org/clulab/numeric/package.scala
+++ b/library/src/main/scala/org/clulab/numeric/NumericUtils.scala
@@ -1,13 +1,14 @@
-package org.clulab
+package org.clulab.numeric
 
 import org.clulab.numeric.actions.NumericActions
-import org.clulab.numeric.mentions.{DateMention, DateRangeMention, MeasurementMention, Norm, PercentageMention}
+import org.clulab.numeric.mentions.Norm
 import org.clulab.odin.{EventMention, Mention}
-import org.clulab.processors.{Document, Sentence}
+import org.clulab.processors.Document
 import org.clulab.struct.Interval
+
 import _root_.scala.util.control.Breaks._
 
-package object numeric {
+object NumericUtils {
   def displayMentions(mentions: Seq[Mention], doc: Document): Unit = {
     val mentionsBySentence = mentions.groupBy(_.sentence).map { case (sentence, mentions) =>
       sentence -> mentions.sortBy(_.start)
diff --git a/library/src/test/scala/org/clulab/numeric/TestNumericEntityRecognition.scala b/library/src/test/scala/org/clulab/numeric/TestNumericEntityRecognition.scala
index 864505e79..4bd2bd2ea 100644
--- a/library/src/test/scala/org/clulab/numeric/TestNumericEntityRecognition.scala
+++ b/library/src/test/scala/org/clulab/numeric/TestNumericEntityRecognition.scala
@@ -656,7 +656,7 @@ class TestNumericEntityRecognition extends Test {
   def numericParse(sentence: String): (Array[String], Array[String], Array[String]) = {
     val doc = proc.annotate(sentence)
     val mentions = ner.extractFrom(doc)
-    mkLabelsAndNorms(doc, mentions)
+    NumericUtils.mkLabelsAndNorms(doc, mentions)
 
     // assume 1 sentence per doc
     val sent = doc.sentences.head
diff --git a/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala b/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala
index d1f104f8d..8f8fa38ff 100644
--- a/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala
+++ b/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala
@@ -17,7 +17,7 @@ class TestSeasonNormalizer extends Test {
     val document = processor.annotate(text)
     val mentions = processor.numericEntityRecognizerOpt.get.extractFrom(document)
 
-    mkLabelsAndNorms(document, mentions)
+    NumericUtils.mkLabelsAndNorms(document, mentions)
     (document.sentences.head.entities.get, document.sentences.head.norms.get)
   }
 

From 8ea1301601cb2e4b3052aac42396e70efc348830 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Fri, 23 May 2025 10:12:38 -0700
Subject: [PATCH 08/42] GraphMapType

---
 .../main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala | 5 +++--
 .../src/main/scala-2.13/org/clulab/struct/GraphMap.scala  | 8 ++++----
 library/src/main/scala-3/org/clulab/struct/GraphMap.scala | 8 ++++----
 .../src/main/scala/org/clulab/processors/Sentence.scala   | 6 +++---
 .../src/main/scala/org/clulab/processors/clu/Veil.scala   | 4 ++--
 .../scala/org/clulab/serialization/json/package.scala     | 2 +-
 library/src/main/scala/org/clulab/struct/Annotation.scala | 4 ++--
 7 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/library/src/main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala b/library/src/main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala
index 57ad2411e..de5a5472f 100644
--- a/library/src/main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala
+++ b/library/src/main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala
@@ -7,10 +7,11 @@ class GraphMap protected extends mutable.HashMap[String, DirectedGraph[String]]
 }
 
 object GraphMap extends GraphMapNames {
+  type GraphMapType = GraphMap
 
-  def apply(): GraphMap = new GraphMap()
+  def apply(): GraphMapType = new GraphMap()
 
-  def apply(existing: Map[String, DirectedGraph[String]]): GraphMap = {
+  def apply(existing: Map[String, DirectedGraph[String]]): GraphMapType = {
     val gm = GraphMap()
     gm ++= existing
   }
diff --git a/library/src/main/scala-2.13/org/clulab/struct/GraphMap.scala b/library/src/main/scala-2.13/org/clulab/struct/GraphMap.scala
index fd1b32794..805226874 100644
--- a/library/src/main/scala-2.13/org/clulab/struct/GraphMap.scala
+++ b/library/src/main/scala-2.13/org/clulab/struct/GraphMap.scala
@@ -6,14 +6,14 @@ object GraphMap extends GraphMapNames {
 
   // This was previously a class inheriting from HashMap.  However,
   // [warn] ...: inheritance from class HashMap in package mutable is deprecated (since 2.13.0): HashMap will be made final; use .withDefault for the common use case of computing a default value
-  type GraphMap = mutable.HashMap[String, DirectedGraph[String]]
+  type GraphMapType = mutable.HashMap[String, DirectedGraph[String]]
 
-  def apply(): GraphMap = {
+  def apply(): GraphMapType = {
     // we have very few dependency types, so let's create a small hash to save memory.
-    new GraphMap(2, mutable.HashMap.defaultLoadFactor)
+    new GraphMapType(2, mutable.HashMap.defaultLoadFactor)
   }
 
-  def apply(existing: scala.collection.Map[String, DirectedGraph[String]]): GraphMap = {
+  def apply(existing: scala.collection.Map[String, DirectedGraph[String]]): GraphMapType = {
     val gm = GraphMap()
     gm ++= existing
   }
diff --git a/library/src/main/scala-3/org/clulab/struct/GraphMap.scala b/library/src/main/scala-3/org/clulab/struct/GraphMap.scala
index fd1b32794..805226874 100644
--- a/library/src/main/scala-3/org/clulab/struct/GraphMap.scala
+++ b/library/src/main/scala-3/org/clulab/struct/GraphMap.scala
@@ -6,14 +6,14 @@ object GraphMap extends GraphMapNames {
 
   // This was previously a class inheriting from HashMap.  However,
   // [warn] ...: inheritance from class HashMap in package mutable is deprecated (since 2.13.0): HashMap will be made final; use .withDefault for the common use case of computing a default value
-  type GraphMap = mutable.HashMap[String, DirectedGraph[String]]
+  type GraphMapType = mutable.HashMap[String, DirectedGraph[String]]
 
-  def apply(): GraphMap = {
+  def apply(): GraphMapType = {
     // we have very few dependency types, so let's create a small hash to save memory.
-    new GraphMap(2, mutable.HashMap.defaultLoadFactor)
+    new GraphMapType(2, mutable.HashMap.defaultLoadFactor)
   }
 
-  def apply(existing: scala.collection.Map[String, DirectedGraph[String]]): GraphMap = {
+  def apply(existing: scala.collection.Map[String, DirectedGraph[String]]): GraphMapType = {
     val gm = GraphMap()
     gm ++= existing
   }
diff --git a/library/src/main/scala/org/clulab/processors/Sentence.scala b/library/src/main/scala/org/clulab/processors/Sentence.scala
index 42ce43b0b..02396a4af 100644
--- a/library/src/main/scala/org/clulab/processors/Sentence.scala
+++ b/library/src/main/scala/org/clulab/processors/Sentence.scala
@@ -37,7 +37,7 @@ class Sentence(
   /** Constituent tree of this sentence; includes head words */
   val syntacticTree: Option[Tree] = None,
   /** DAG of syntactic and semantic dependencies; word offsets start at 0 */
-  val graphs: GraphMap = GraphMap(),
+  val graphs: GraphMapType = GraphMap(),
   /** Relation triples from OpenIE */
   val relations:Option[Array[RelationTriple]] = None
 ) extends Serializable {
@@ -181,7 +181,7 @@ class Sentence(
     norms: Option[Array[String]] = norms,
     chunks: Option[Array[String]] = chunks,
     syntacticTree: Option[Tree] = syntacticTree,
-    graphs: GraphMap = graphs,
+    graphs: GraphMapType = graphs,
     relations: Option[Array[RelationTriple]] = relations
   ): Sentence =
     new Sentence(
@@ -226,7 +226,7 @@ object Sentence {
     norms: Option[Array[String]],
     chunks: Option[Array[String]],
     tree: Option[Tree],
-    deps: GraphMap,
+    deps: GraphMapType,
     relations: Option[Array[RelationTriple]]
   ): Sentence = {
     new Sentence(
diff --git a/library/src/main/scala/org/clulab/processors/clu/Veil.scala b/library/src/main/scala/org/clulab/processors/clu/Veil.scala
index 481abf2b9..20d136209 100644
--- a/library/src/main/scala/org/clulab/processors/clu/Veil.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/Veil.scala
@@ -2,7 +2,7 @@ package org.clulab.processors.clu
 
 import org.clulab.processors.{Document, Processor, Sentence}
 import org.clulab.struct.{DirectedGraph, Edge, GraphMap, RelationTriple, Tree}
-import org.clulab.struct.GraphMap._
+import org.clulab.struct.GraphMap.GraphMapType
 
 import scala.collection.mutable.{Set => MutableSet}
 
@@ -136,7 +136,7 @@ class VeiledDocument(originalDocument: Document, veiledWords: Seq[(Int, Range)])
     }
   }
 
-  def unveilGraphs(veiledGraphs: GraphMap, sentenceIndex: Int): GraphMap = {
+  def unveilGraphs(veiledGraphs: GraphMapType, sentenceIndex: Int): GraphMapType = {
     val unveilArray = unveilArrays(sentenceIndex)
     val unveiledGraphs = GraphMap()
     val originalLength = originalDocument.sentences(sentenceIndex).words.length
diff --git a/library/src/main/scala/org/clulab/serialization/json/package.scala b/library/src/main/scala/org/clulab/serialization/json/package.scala
index 27adb3fd9..06cf6715b 100644
--- a/library/src/main/scala/org/clulab/serialization/json/package.scala
+++ b/library/src/main/scala/org/clulab/serialization/json/package.scala
@@ -52,7 +52,7 @@ package object json {
     }
   }
 
-  implicit class GraphMapOps(gm: GraphMap) extends JSONSerialization {
+  implicit class GraphMapOps(gm: GraphMapType) extends JSONSerialization {
     def jsonAST: JValue = Extraction.decompose(gm.toMap.map { case (k, v) => k -> v.jsonAST }) // instead of mapValues
   }
 
diff --git a/library/src/main/scala/org/clulab/struct/Annotation.scala b/library/src/main/scala/org/clulab/struct/Annotation.scala
index 9489154f5..4323cecf3 100644
--- a/library/src/main/scala/org/clulab/struct/Annotation.scala
+++ b/library/src/main/scala/org/clulab/struct/Annotation.scala
@@ -1,6 +1,6 @@
 package org.clulab.struct
 
-import org.clulab.struct.GraphMap.GraphMap
+import org.clulab.struct.GraphMap.GraphMapType
 
 // These are by the word ones and then there are relationships between words.
 // So parse, might not be a thing that is per word.
@@ -21,7 +21,7 @@ case class Annotation(
   /** Constituent tree of this sentence; includes head words */
   syntacticTree: Option[Tree] = None,
   /** DAG of syntactic and semantic dependencies; word offsets start at 0 */
-  graphs: GraphMap = GraphMap(),
+  graphs: GraphMapType = GraphMap(),
   /** Relation triples from OpenIE */
   relations:Option[Array[RelationTriple]] = None
 ) {

From 1996cf370c9451ea1bd0ba3661f20a39ce5146f8 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Fri, 23 May 2025 10:36:22 -0700
Subject: [PATCH 09/42] Scala2

---
 library/build.sbt                                               | 2 +-
 .../main/scala/org/clulab/processors/clu/DocumentMaker.scala    | 2 ++
 .../scala/org/clulab/serialization/DocumentSerializer.scala     | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/library/build.sbt b/library/build.sbt
index a562a04b5..9b2e770d0 100644
--- a/library/build.sbt
+++ b/library/build.sbt
@@ -54,7 +54,7 @@ libraryDependencies ++= {
     "org.scalatest"              %% "scalatest"                  % "3.2.15"  % Test,   // up to 3.2.19, Apache-2.0
     // for odin
     "org.apache.commons"          % "commons-text"               % "1.1",              // up to 1.12.0, Apache-2.0
-    "org.scala-lang.modules"     %% "scala-collection-compat"    % "2.11.0",           // up to 2.12.0 // Apache-2.0
+    "org.scala-lang.modules"     %% "scala-collection-compat"    % "2.13.0",           // up to 2.13.0 // Apache-2.0
     "org.scala-lang.modules"     %% "scala-parser-combinators"   % combinatorsVersion, // Apache-2.0
     "org.yaml"                    % "snakeyaml"                  % "1.14",             // up to 2.2, Apache-2.0
     // progress bar for training
diff --git a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
index bde915cfc..a00fcac83 100644
--- a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
@@ -7,6 +7,8 @@ import org.clulab.processors.Document
 import scala.collection.mutable.ArrayBuffer
 import org.clulab.processors.Sentence
 
+import scala.collection.compat._
+
 class DocumentMaker
 
 object DocumentMaker {
diff --git a/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala b/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
index 7f3103591..b4ccb0122 100644
--- a/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
@@ -117,7 +117,7 @@ class DocumentSerializer extends Logging {
       val attachmentsOpt = namedDocumentAttachmentsOpt.map { namedDocumentAttachments =>
         val attachments = mutable.HashMap[String, DocumentAttachment]()
 
-        attachments.addAll(namedDocumentAttachments)
+        attachments ++= namedDocumentAttachments
         attachments
       }
 

From 39a9dca35c34e641b44418934e677cbd570e55ee Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Fri, 23 May 2025 13:35:35 -0700
Subject: [PATCH 10/42]  Check in Balaur as well

---
 .../processors/clu/BalaurProcessor.scala      | 29 ++++++++++---------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index 68bd2c6de..d33ca6336 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -2,23 +2,24 @@ package org.clulab.processors.clu
 
 import com.typesafe.config.Config
 import com.typesafe.config.ConfigFactory
-import org.clulab.numeric.{NumericEntityRecognizer, mkLabelsAndNorms}
-import org.clulab.processors.clu.tokenizer.{EnglishLemmatizer, Lemmatizer, OpenDomainEnglishTokenizer, OpenDomainPortugueseTokenizer, OpenDomainSpanishTokenizer, PortugueseLemmatizer, SpanishLemmatizer, Tokenizer}
+import org.clulab.numeric.NumericEntityRecognizer
+import org.clulab.numeric.NumericUtils
 import org.clulab.processors.{Document, Processor, Sentence}
-
-import scala.collection.immutable.ArraySeq
+import org.clulab.processors.clu.tokenizer.{EnglishLemmatizer, Lemmatizer, OpenDomainEnglishTokenizer, OpenDomainPortugueseTokenizer, OpenDomainSpanishTokenizer, PortugueseLemmatizer, SpanishLemmatizer, Tokenizer}
+import org.clulab.processors.hexatagging.HexaDecoder
 //import org.clulab.scala.WrappedArray._
-import org.clulab.scala_transformers.encoder.TokenClassifier
 import org.clulab.scala_transformers.encoder.EncoderMaxTokensRuntimeException
+import org.clulab.scala_transformers.encoder.TokenClassifier
 import org.clulab.sequences.{LexiconNER, NamedEntity}
 import org.clulab.struct.DirectedGraph
 import org.clulab.struct.GraphMap
+import org.clulab.struct.GraphMap.GraphMapType
 import org.clulab.utils.{Configured, MathUtils, ToEnhancedDependencies}
 import org.slf4j.{Logger, LoggerFactory}
+
+import scala.collection.compat.immutable.ArraySeq
+
 import BalaurProcessor._
-import PostProcessor._
-import org.clulab.processors.hexatagging.HexaDecoder
-import org.clulab.struct.GraphMap.GraphMap
 
 class BalaurProcessor protected (
   val config: Config,
@@ -40,8 +41,8 @@ class BalaurProcessor protected (
     config,
     optionalNER,
     newNumericEntityRecognizerOpt(seasonPathOpt),
-    mkTokenizer(BalaurProcessor.getArgString(config, s"$prefix.language", Some("EN"))),
-    mkLemmatizer(BalaurProcessor.getArgString(config, s"$prefix.language", Some("EN"))),
+    mkTokenizer(getConfigArgString(config, s"$prefix.language", Some("EN"))),
+    mkLemmatizer(getConfigArgString(config, s"$prefix.language", Some("EN"))),
     // TokenClassifier.fromFiles(config.getString(s"$prefix.modelName"))
     TokenClassifier.fromResources(config.getString(s"$prefix.modelName"))
   )
@@ -185,7 +186,7 @@ class BalaurProcessor protected (
     val fullyAnnotatedDocument =
         if (numericEntityRecognizerOpt.nonEmpty) {
           val numericMentions = numericEntityRecognizerOpt.get.extractFrom(partlyAnnotatedDocument)
-          val (newLabels, newNorms) = mkLabelsAndNorms(partlyAnnotatedDocument, numericMentions)
+          val (newLabels, newNorms) = NumericUtils.mkLabelsAndNorms(partlyAnnotatedDocument, numericMentions)
           val fullyAnnotatedSentences = partlyAnnotatedDocument.sentences.indices.map { index =>
             partlyAnnotatedDocument.sentences(index).copy(
               entities = Some(newLabels(index)),
@@ -205,7 +206,7 @@ class BalaurProcessor protected (
 
     val tags = labels.map(_.head._1).toArray
 
-    postprocessPartOfSpeechTags(words, tags)
+    PostProcessor.postprocessPartOfSpeechTags(words, tags)
     tags
   }
 
@@ -317,7 +318,7 @@ class BalaurProcessor protected (
     words: Array[String], lemmas: Array[String], tags: Array[String],
     termTags: Array[Array[PredictionScore]],
     nonTermTags: Array[Array[PredictionScore]]
-  ): GraphMap = {
+  ): GraphMapType = {
     val verbose = false
     val graphs = GraphMap()
     val size = words.length
@@ -391,7 +392,7 @@ object BalaurProcessor {
     }
   }
 
-  def getArgString (config: Config, argPath: String, defaultValue: Option[String]): String =
+  def getConfigArgString (config: Config, argPath: String, defaultValue: Option[String]): String =
     if (config.hasPath(argPath)) config.getString(argPath)
     else if(defaultValue.nonEmpty) defaultValue.get
     else throw new RuntimeException(s"ERROR: parameter $argPath must be defined!")

From cec4087143b099e5b4c18bd0efd4dd0ba1c8306c Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Fri, 23 May 2025 14:03:23 -0700
Subject: [PATCH 11/42] Start with very basic compatibility

---
 .../processors/clu/BalaurProcessor.scala      | 10 ++++-----
 .../org/clulab/sequences/NamedEntity.scala    |  2 +-
 .../org/clulab/utils/WrappedArraySeq.scala    | 21 +++++++++++++++++++
 3 files changed, 26 insertions(+), 7 deletions(-)
 create mode 100644 library/src/main/scala/org/clulab/utils/WrappedArraySeq.scala

diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index d33ca6336..62a47bda1 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -7,7 +7,7 @@ import org.clulab.numeric.NumericUtils
 import org.clulab.processors.{Document, Processor, Sentence}
 import org.clulab.processors.clu.tokenizer.{EnglishLemmatizer, Lemmatizer, OpenDomainEnglishTokenizer, OpenDomainPortugueseTokenizer, OpenDomainSpanishTokenizer, PortugueseLemmatizer, SpanishLemmatizer, Tokenizer}
 import org.clulab.processors.hexatagging.HexaDecoder
-//import org.clulab.scala.WrappedArray._
+import org.clulab.utils.WrappedArraySeq
 import org.clulab.scala_transformers.encoder.EncoderMaxTokensRuntimeException
 import org.clulab.scala_transformers.encoder.TokenClassifier
 import org.clulab.sequences.{LexiconNER, NamedEntity}
@@ -17,8 +17,6 @@ import org.clulab.struct.GraphMap.GraphMapType
 import org.clulab.utils.{Configured, MathUtils, ToEnhancedDependencies}
 import org.slf4j.{Logger, LoggerFactory}
 
-import scala.collection.compat.immutable.ArraySeq
-
 import BalaurProcessor._
 
 class BalaurProcessor protected (
@@ -151,7 +149,7 @@ class BalaurProcessor protected (
       val lemmas = lemmatize(words)
 
       try {
-        val allLabelsAndScores = tokenClassifier.predictWithScores(ArraySeq.unsafeWrapArray(words))
+        val allLabelsAndScores = tokenClassifier.predictWithScores(WrappedArraySeq(words).toImmutableSeq)
         val tags = mkPosTags(words, allLabelsAndScores(TASK_TO_INDEX(POS_TASK)))
         val entities = {
           val optionalEntities = mkOptionalNerLabels(words, sentence.startOffsets, sentence.endOffsets, tags, lemmas)
@@ -257,13 +255,13 @@ class BalaurProcessor protected (
   private def mergeNerLabels(generic: Array[String], custom: Array[String]): Array[String] = {
     require(generic.length == custom.length)
 
-    val customNamedEntities = NamedEntity.collect(ArraySeq.unsafeWrapArray(custom))
+    val customNamedEntities = NamedEntity.collect(WrappedArraySeq(custom).toImmutableSeq)
     val result = generic.toArray // A copy of the generic labels is created here.
 
     if (customNamedEntities.isEmpty)
       result
     else {
-      val genericNamedEntities = NamedEntity.collect(ArraySeq.unsafeWrapArray(generic))
+      val genericNamedEntities = NamedEntity.collect(WrappedArraySeq(generic).toImmutableSeq)
 
       //println(s"Generic NamedEntity: ${genericNamedEntities.mkString(", ")}")
       //println(s"Custom NamedEntity: ${customNamedEntities.mkString(", ")}")
diff --git a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
index 1c2b2bcb9..5e65094f4 100644
--- a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
+++ b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
@@ -23,7 +23,7 @@ object NamedEntity {
   val INSIDE = "I-"
   val OUTSIDE = "O"
 
-  def collect(bioLabels: IndexedSeq[String]): IndexedSeq[NamedEntity] = {
+  def collect(bioLabels: Seq[String]): Seq[NamedEntity] = {
 
     def mkNamedEntity(label: String, begin: Int): NamedEntity = {
       // Start looking for the end one after the begin.
diff --git a/library/src/main/scala/org/clulab/utils/WrappedArraySeq.scala b/library/src/main/scala/org/clulab/utils/WrappedArraySeq.scala
new file mode 100644
index 000000000..a9f13f830
--- /dev/null
+++ b/library/src/main/scala/org/clulab/utils/WrappedArraySeq.scala
@@ -0,0 +1,21 @@
+package org.clulab.utils
+
+import scala.collection.mutable
+import scala.collection.compat.immutable.ArraySeq
+
+class WrappedArraySeq[T](array: Array[T]) {
+  def toSeq: Seq[T] = toImmutableSeq
+
+  def toMutableSeq: mutable.Seq[T] = {
+    array
+  }
+
+  def toImmutableSeq: Seq[T] = {
+    ArraySeq.unsafeWrapArray(array)
+  }
+}
+
+object WrappedArraySeq {
+
+  def apply[T](array: Array[T]): WrappedArraySeq[T] = new WrappedArraySeq(array)
+}

From 57d1fa56adbaad12ac3054f85e8bc86aaf10f84e Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Fri, 23 May 2025 23:56:23 -0700
Subject: [PATCH 12/42] Down to last 13

---
 build.sbt                                     |  2 +-
 .../org/clulab/scala/SeqView.scala            |  5 ++
 .../scala-2.13/org/clulab/scala/SeqView.scala |  5 ++
 .../scala-3/org/clulab/scala/SeqView.scala    |  5 ++
 .../org/clulab/numeric/NumericUtils.scala     | 11 +--
 .../numeric/actions/NumericActions.scala      |  4 +-
 .../scala/org/clulab/odin/impl/Values.scala   |  2 +-
 .../org/clulab/processors/Document.scala      | 12 +--
 .../org/clulab/processors/Processor.scala     |  2 +-
 .../org/clulab/processors/Sentence.scala      | 78 +++++++++----------
 .../processors/clu/BalaurProcessor.scala      | 30 +++----
 .../clulab/processors/clu/DocumentMaker.scala | 11 +--
 .../clulab/processors/clu/PostProcessor.scala |  2 +-
 .../org/clulab/processors/clu/Veil.scala      |  6 +-
 .../clu/tokenizer/SentenceSplitter.scala      | 11 +--
 .../clulab/sequences/CombinedLexiconNER.scala |  6 +-
 .../clulab/sequences/CompactLexiconNER.scala  | 14 ++--
 .../org/clulab/sequences/LexiconNER.scala     | 29 +++----
 .../org/clulab/sequences/NamedEntity.scala    |  6 +-
 .../sequences/SeparatedLexiconNER.scala       |  2 +-
 .../scala/org/clulab/sequences/Tagger.scala   |  2 +-
 .../serialization/CoNLLUSerializer.scala      |  2 +-
 .../serialization/DocumentSerializer.scala    | 21 ++---
 .../serialization/json/JSONSerializer.scala   |  2 +-
 .../clulab/serialization/json/package.scala   |  2 +-
 .../org/clulab/struct/BooleanHashTrie.scala   |  4 +-
 .../scala/org/clulab/struct/IntHashTrie.scala |  4 +-
 .../clulab/utils/ToEnhancedDependencies.scala | 22 +++---
 28 files changed, 161 insertions(+), 141 deletions(-)
 create mode 100644 library/src/main/scala-2.11_2.12/org/clulab/scala/SeqView.scala
 create mode 100644 library/src/main/scala-2.13/org/clulab/scala/SeqView.scala
 create mode 100644 library/src/main/scala-3/org/clulab/scala/SeqView.scala

diff --git a/build.sbt b/build.sbt
index e7c465370..0d5ffcc14 100644
--- a/build.sbt
+++ b/build.sbt
@@ -18,7 +18,7 @@ val scala37  = "3.7.0"   // up to 3.7.0
 // Scala33: This is the first official LTS, but hold off until necessary.
 val scala3 = scala33
 
-ThisBuild / crossScalaVersions := Seq(scala3, scala213)
+ThisBuild / crossScalaVersions := Seq(scala212, scala3, scala213, scala211)
 ThisBuild / scalaVersion := crossScalaVersions.value.head
 
 lazy val root = (project in file("."))
diff --git a/library/src/main/scala-2.11_2.12/org/clulab/scala/SeqView.scala b/library/src/main/scala-2.11_2.12/org/clulab/scala/SeqView.scala
new file mode 100644
index 000000000..c49d930cb
--- /dev/null
+++ b/library/src/main/scala-2.11_2.12/org/clulab/scala/SeqView.scala
@@ -0,0 +1,5 @@
+package org.clulab.scala
+
+object SeqView {
+  type Immutable[T] = scala.collection.SeqView[T, Seq[T]]
+}
diff --git a/library/src/main/scala-2.13/org/clulab/scala/SeqView.scala b/library/src/main/scala-2.13/org/clulab/scala/SeqView.scala
new file mode 100644
index 000000000..d55c09e97
--- /dev/null
+++ b/library/src/main/scala-2.13/org/clulab/scala/SeqView.scala
@@ -0,0 +1,5 @@
+package org.clulab.scala
+
+object SeqView {
+  type Immutable[T] = scala.collection.View[T]
+}
diff --git a/library/src/main/scala-3/org/clulab/scala/SeqView.scala b/library/src/main/scala-3/org/clulab/scala/SeqView.scala
new file mode 100644
index 000000000..d55c09e97
--- /dev/null
+++ b/library/src/main/scala-3/org/clulab/scala/SeqView.scala
@@ -0,0 +1,5 @@
+package org.clulab.scala
+
+object SeqView {
+  type Immutable[T] = scala.collection.View[T]
+}
diff --git a/library/src/main/scala/org/clulab/numeric/NumericUtils.scala b/library/src/main/scala/org/clulab/numeric/NumericUtils.scala
index 261b8dc74..80149f1ad 100644
--- a/library/src/main/scala/org/clulab/numeric/NumericUtils.scala
+++ b/library/src/main/scala/org/clulab/numeric/NumericUtils.scala
@@ -71,12 +71,12 @@ object NumericUtils {
     * @param doc This document is modified in place
     * @param mentions The numeric mentions previously extracted
     */
-  def mkLabelsAndNorms(doc: Document, mentions: Seq[Mention]): (Array[Array[String]], Array[Array[String]]) = {
+  def mkLabelsAndNorms(doc: Document, mentions: Seq[Mention]): (Seq[Seq[String]], Seq[Seq[String]]) = {
     val allEntities = doc.sentences.map { sentence =>
-      sentence.entities.getOrElse(Array.fill(sentence.size)("O"))
+      sentence.entities.getOrElse(Seq.fill(sentence.size)("O"))
     }
     val allNorms = doc.sentences.map { sentence =>
-      sentence.norms.getOrElse(Array.fill(sentence.size)(""))
+      sentence.norms.getOrElse(Seq.fill(sentence.size)(""))
     }
 
     for (mention <- mentions) {
@@ -93,7 +93,7 @@ object NumericUtils {
     (allEntities, allNorms)
   }
 
-  def removeOneEntityBeforeAnother(entities: Array[String], norms: Array[String], triggerEntity: String, toBeRemovedShortened: String): Unit = {
+  def removeOneEntityBeforeAnother(entities: Seq[String], norms: Seq[String], triggerEntity: String, toBeRemovedShortened: String): Unit = {
     // removes entities and norms for unallowable entity sequences, e.g., don't extract 'in' as 'inch' before B-LOC in '... Sahal 108 in Senegal'
     // toBeRemovedShortened is entity without BIO-
     val zippedEntities = entities.zipWithIndex
@@ -114,7 +114,8 @@ object NumericUtils {
     }
   }
 
-  private def addLabelsAndNorms(m: Norm, entities: Array[String], norms: Array[String], tokenInt: Interval): Unit = {
+  // TODO: These need to be mutable
+  private def addLabelsAndNorms(m: Norm, entities: Seq[String], norms: Seq[String], tokenInt: Interval): Unit = {
     val label = m.neLabel
     val norm = m.neNorm
 
diff --git a/library/src/main/scala/org/clulab/numeric/actions/NumericActions.scala b/library/src/main/scala/org/clulab/numeric/actions/NumericActions.scala
index e2c3fcf97..5d686c2ba 100644
--- a/library/src/main/scala/org/clulab/numeric/actions/NumericActions.scala
+++ b/library/src/main/scala/org/clulab/numeric/actions/NumericActions.scala
@@ -252,14 +252,14 @@ class NumericActions(seasonNormalizer: SeasonNormalizer, unitNormalizer: UnitNor
   /** filter out season homonyms (fall, spring) **/
   def postprocessNumericEntities(mentions: Seq[Mention]): Seq[Mention] = {
 
-    def prevWordsMatch(words: Array[String], wordIndex: Int): Boolean = {
+    def prevWordsMatch(words: Seq[String], wordIndex: Int): Boolean = {
       val prevWords = words.slice(wordIndex - 2, wordIndex).map(_.toLowerCase)
 
       prevWords.exists(NumericActions.preSeasons) ||
           prevWords.containsSlice(NumericActions.inThe)
     }
 
-    def contextWordsMatch(words: Array[String], wordIndex: Int): Boolean = {
+    def contextWordsMatch(words: Seq[String], wordIndex: Int): Boolean = {
       val window = 5
       val contextWords = words.slice(wordIndex - window, wordIndex + window).map(_.toLowerCase)
 
diff --git a/library/src/main/scala/org/clulab/odin/impl/Values.scala b/library/src/main/scala/org/clulab/odin/impl/Values.scala
index c2e03bbd5..0b78e7f45 100644
--- a/library/src/main/scala/org/clulab/odin/impl/Values.scala
+++ b/library/src/main/scala/org/clulab/odin/impl/Values.scala
@@ -3,7 +3,7 @@ package org.clulab.odin.impl
 import org.clulab.processors.Document
 
 trait Values {
-  def values(strings: Option[Array[String]], msg: String): Array[String] =
+  def values(strings: Option[Seq[String]], msg: String): Seq[String] =
     strings match {
       case None => sys.error(msg)
       case Some(strings) => strings
diff --git a/library/src/main/scala/org/clulab/processors/Document.scala b/library/src/main/scala/org/clulab/processors/Document.scala
index f8d226c56..1cae6a826 100644
--- a/library/src/main/scala/org/clulab/processors/Document.scala
+++ b/library/src/main/scala/org/clulab/processors/Document.scala
@@ -17,7 +17,7 @@ import scala.collection.mutable
   *   Last Modified: Add apply method to copy Document.
   */
 class Document(
-  val sentences: Array[Sentence],
+  val sentences: Seq[Sentence],
   /** Unique id for this document, if any */
   val id: Option[String] = None,
   /** Clusters of coreferent mentions */
@@ -30,7 +30,7 @@ class Document(
 ) extends Serializable {
 
   def copy(
-    sentences: Array[Sentence] = sentences,
+    sentences: Seq[Sentence] = sentences,
     id: Option[String] = id,
     coreferenceChains: Option[CorefChains] = coreferenceChains,
     text: Option[String] = text,
@@ -190,11 +190,11 @@ class Document(
 
 object Document {
 
-  def apply(sentences: Array[Sentence]): Document = apply(sentences, text = None)
+  def apply(sentences: Seq[Sentence]): Document = apply(sentences, text = None)
 
-  def apply(sentences: Array[Sentence], text: Option[String]): Document = apply(id = None, sentences, coref = None, text)
+  def apply(sentences: Seq[Sentence], text: Option[String]): Document = apply(id = None, sentences, coref = None, text)
 
-  def apply(id: Option[String], sentences: Array[Sentence], coref: Option[CorefChains], text: Option[String]): Document = {
+  def apply(id: Option[String], sentences: Seq[Sentence], coref: Option[CorefChains], text: Option[String]): Document = {
     val document = new Document(
       sentences,
       id = id,
@@ -209,7 +209,7 @@ object Document {
   def apply(doc: Document): Document =
     apply(doc.id, doc.sentences, doc.coreferenceChains, doc.text)
 
-  def apply(doc: Document, sentences: Array[Sentence]): Document = {
+  def apply(doc: Document, sentences: Seq[Sentence]): Document = {
     val newDocument = new Document(
       sentences,
       id = doc.id,
diff --git a/library/src/main/scala/org/clulab/processors/Processor.scala b/library/src/main/scala/org/clulab/processors/Processor.scala
index 00d5fcdf1..e3df1e506 100644
--- a/library/src/main/scala/org/clulab/processors/Processor.scala
+++ b/library/src/main/scala/org/clulab/processors/Processor.scala
@@ -105,7 +105,7 @@ trait Processor {
   def tagPartsOfSpeech(doc: Document): Unit
 
   /** Lemmatization; modifies the document in place. */
-  def lemmatize(words: Array[String]): Array[String]
+  def lemmatize(words: Seq[String]): Seq[String]
 
   /** Named Entity Recognition; modifies the document in place. */
   def recognizeNamedEntities (doc:Document): Unit
diff --git a/library/src/main/scala/org/clulab/processors/Sentence.scala b/library/src/main/scala/org/clulab/processors/Sentence.scala
index 02396a4af..c5d74a2a0 100644
--- a/library/src/main/scala/org/clulab/processors/Sentence.scala
+++ b/library/src/main/scala/org/clulab/processors/Sentence.scala
@@ -9,11 +9,11 @@ import scala.collection.mutable
 /** Stores the annotations for a single sentence */
 class Sentence(
   /** Raw tokens in this sentence; these MUST match the original text */
-  val raw: Array[String],
+  val raw: Seq[String],
   /** Start character offsets for the raw tokens; start at 0 */
-  val startOffsets: Array[Int],
+  val startOffsets: Seq[Int],
   /** End character offsets for the raw tokens; start at 0 */
-  val endOffsets: Array[Int],
+  val endOffsets: Seq[Int],
 
   /**
     * Words produced from raw tokens, closer to what the downstream components expect
@@ -22,24 +22,24 @@ class Sentence(
     * However, the number of raw tokens MUST always equal the number of words, so if the exact text must be recovered,
     *   please use the raw tokens with the same positions
     */
-  val words: Array[String],
+  val words: Seq[String],
 
   /** POS tags for words */
-  val tags: Option[Array[String]] = None,
+  val tags: Option[Seq[String]] = None,
   /** Lemmas */
-  val lemmas: Option[Array[String]] = None,
+  val lemmas: Option[Seq[String]] = None,
   /** NE labels */
-  val entities: Option[Array[String]] = None,
+  val entities: Option[Seq[String]] = None,
   /** Normalized values of named/numeric entities, such as dates */
-  val norms: Option[Array[String]] = None,
+  val norms: Option[Seq[String]] = None,
   /** Shallow parsing labels */
-  val chunks: Option[Array[String]] = None,
+  val chunks: Option[Seq[String]] = None,
   /** Constituent tree of this sentence; includes head words */
   val syntacticTree: Option[Tree] = None,
   /** DAG of syntactic and semantic dependencies; word offsets start at 0 */
   val graphs: GraphMapType = GraphMap(),
   /** Relation triples from OpenIE */
-  val relations:Option[Array[RelationTriple]] = None
+  val relations:Option[Seq[RelationTriple]] = None
 ) extends Serializable {
 
   def size:Int = raw.length
@@ -64,7 +64,7 @@ class Sentence(
   def equivalenceHash: Int = {
     val stringCode = "org.clulab.processors.Sentence"
 
-    def getAnnotationsHash(labelsOpt: Option[Array[_]]): Int = labelsOpt
+    def getAnnotationsHash(labelsOpt: Option[Seq[_]]): Int = labelsOpt
         .map { labels =>
           val hs = labels.map(_.hashCode)
           val result = Hash.withLast(labels.length)(
@@ -170,19 +170,19 @@ class Sentence(
 
   // TODO
   def copy(
-    raw: Array[String] = raw,
-    startOffsets: Array[Int] = startOffsets,
-    endOffsets: Array[Int] = endOffsets,
-    words: Array[String] = words,
-
-    tags: Option[Array[String]] = tags,
-    lemmas: Option[Array[String]] = lemmas,
-    entities: Option[Array[String]] = entities,
-    norms: Option[Array[String]] = norms,
-    chunks: Option[Array[String]] = chunks,
+    raw: Seq[String] = raw,
+    startOffsets: Seq[Int] = startOffsets,
+    endOffsets: Seq[Int] = endOffsets,
+    words: Seq[String] = words,
+
+    tags: Option[Seq[String]] = tags,
+    lemmas: Option[Seq[String]] = lemmas,
+    entities: Option[Seq[String]] = entities,
+    norms: Option[Seq[String]] = norms,
+    chunks: Option[Seq[String]] = chunks,
     syntacticTree: Option[Tree] = syntacticTree,
     graphs: GraphMapType = graphs,
-    relations: Option[Array[RelationTriple]] = relations
+    relations: Option[Seq[RelationTriple]] = relations
   ): Sentence =
     new Sentence(
       raw, startOffsets, endOffsets, words,
@@ -203,31 +203,31 @@ class Sentence(
 object Sentence {
 
   def apply(
-    raw:Array[String],
-    startOffsets: Array[Int],
-    endOffsets: Array[Int]): Sentence =
+    raw:Seq[String],
+    startOffsets: Seq[Int],
+    endOffsets: Seq[Int]): Sentence =
     new Sentence(raw, startOffsets, endOffsets, raw) // words are identical to raw tokens (a common situation)
 
   def apply(
-    raw:Array[String],
-    startOffsets: Array[Int],
-    endOffsets: Array[Int],
-    words: Array[String]): Sentence =
+    raw:Seq[String],
+    startOffsets: Seq[Int],
+    endOffsets: Seq[Int],
+    words: Seq[String]): Sentence =
     new Sentence(raw, startOffsets, endOffsets, words)
 
   def apply(
-    raw: Array[String],
-    startOffsets: Array[Int],
-    endOffsets: Array[Int],
-    words: Array[String],
-    tags: Option[Array[String]],
-    lemmas: Option[Array[String]],
-    entities: Option[Array[String]],
-    norms: Option[Array[String]],
-    chunks: Option[Array[String]],
+    raw: Seq[String],
+    startOffsets: Seq[Int],
+    endOffsets: Seq[Int],
+    words: Seq[String],
+    tags: Option[Seq[String]],
+    lemmas: Option[Seq[String]],
+    entities: Option[Seq[String]],
+    norms: Option[Seq[String]],
+    chunks: Option[Seq[String]],
     tree: Option[Tree],
     deps: GraphMapType,
-    relations: Option[Array[RelationTriple]]
+    relations: Option[Seq[RelationTriple]]
   ): Sentence = {
     new Sentence(
       raw, startOffsets, endOffsets, words,
diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index 62a47bda1..df968610b 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -91,7 +91,7 @@ class BalaurProcessor protected (
   }
 
   /** Lemmatization; modifies the document in place */
-  override def lemmatize(words: Array[String]): Array[String] = {
+  override def lemmatize(words: Seq[String]): Seq[String] = {
     val lemmas = words.zipWithIndex.map { case (word, index) =>
       val lemma = wordLemmatizer.lemmatizeWord(word)
       // a lemma may be empty in some weird Unicode situations
@@ -109,7 +109,7 @@ class BalaurProcessor protected (
   }
 
   /** Generates cheap lemmas with the word in lower case, for languages where a lemmatizer is not available */
-  def cheapLemmatize(sentence: Sentence): Array[String] = {
+  def cheapLemmatize(sentence: Sentence): Seq[String] = {
     sentence.words.map(_.toLowerCase())
   }
 
@@ -149,7 +149,7 @@ class BalaurProcessor protected (
       val lemmas = lemmatize(words)
 
       try {
-        val allLabelsAndScores = tokenClassifier.predictWithScores(WrappedArraySeq(words).toImmutableSeq)
+        val allLabelsAndScores = tokenClassifier.predictWithScores(words)
         val tags = mkPosTags(words, allLabelsAndScores(TASK_TO_INDEX(POS_TASK)))
         val entities = {
           val optionalEntities = mkOptionalNerLabels(words, sentence.startOffsets, sentence.endOffsets, tags, lemmas)
@@ -199,7 +199,7 @@ class BalaurProcessor protected (
     fullyAnnotatedDocument
   }
 
-  private def mkPosTags(words: Array[String], labels: Array[Array[(String, Float)]]): Array[String] = {
+  private def mkPosTags(words: Seq[String], labels: Seq[Array[(String, Float)]]): Seq[String] = {
     assert(labels.length == words.length)
 
     val tags = labels.map(_.head._1).toArray
@@ -209,9 +209,9 @@ class BalaurProcessor protected (
   }
 
   private def mkOptionalNerLabels(
-    words: Array[String], startOffsets: Array[Int], endOffsets: Array[Int],
-    tags: Array[String], lemmas: Array[String]
-  ): Option[Array[String]] = {
+    words: Seq[String], startOffsets: Seq[Int], endOffsets: Seq[Int],
+    tags: Seq[String], lemmas: Seq[String]
+  ): Option[Seq[String]] = {
     // NER labels from the custom NER
     optionalNER.map { ner =>
       val sentence = Sentence(
@@ -234,10 +234,10 @@ class BalaurProcessor protected (
   }
 
   /** Must be called after assignPosTags and lemmatize because it requires Sentence.tags and Sentence.lemmas */
-  private def mkNamedEntityLabels(words: Array[String], labels: Array[Array[(String, Float)]], optionalNERLabels: Option[Array[String]]): Array[String] = {
+  private def mkNamedEntityLabels(words: Seq[String], labels: Array[Array[(String, Float)]], optionalNERLabels: Option[Seq[String]]): Seq[String] = {
     assert(labels.length == words.length)
 
-    val genericLabels = NamedEntity.patch(labels.map(_.head._1).toArray)
+    val genericLabels = NamedEntity.patch(labels.map(_.head._1))
 
     if (optionalNERLabels.isEmpty) {
       genericLabels
@@ -252,16 +252,16 @@ class BalaurProcessor protected (
     }
   }
 
-  private def mergeNerLabels(generic: Array[String], custom: Array[String]): Array[String] = {
+  private def mergeNerLabels(generic: Seq[String], custom: Seq[String]): Seq[String] = {
     require(generic.length == custom.length)
 
-    val customNamedEntities = NamedEntity.collect(WrappedArraySeq(custom).toImmutableSeq)
+    val customNamedEntities = NamedEntity.collect(custom)
     val result = generic.toArray // A copy of the generic labels is created here.
 
     if (customNamedEntities.isEmpty)
       result
     else {
-      val genericNamedEntities = NamedEntity.collect(WrappedArraySeq(generic).toImmutableSeq)
+      val genericNamedEntities = NamedEntity.collect(generic)
 
       //println(s"Generic NamedEntity: ${genericNamedEntities.mkString(", ")}")
       //println(s"Custom NamedEntity: ${customNamedEntities.mkString(", ")}")
@@ -271,10 +271,10 @@ class BalaurProcessor protected (
     }
   }
 
-  private def mkChunkLabels(words: Array[String], labels: Array[Array[(String, Float)]]): Array[String] = {
+  private def mkChunkLabels(words: Seq[String], labels: Array[Array[(String, Float)]]): Seq[String] = {
     assert(labels.length == words.length)
 
-    labels.map(_.head._1).toArray
+    labels.map(_.head._1)
   }
 
   // The head has one score, the label has another.  Here the two scores are interpolated
@@ -313,7 +313,7 @@ class BalaurProcessor protected (
   }
 
   private def mkDependencyLabelsUsingHexaTags(
-    words: Array[String], lemmas: Array[String], tags: Array[String],
+    words: Seq[String], lemmas: Seq[String], tags: Seq[String],
     termTags: Array[Array[PredictionScore]],
     nonTermTags: Array[Array[PredictionScore]]
   ): GraphMapType = {
diff --git a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
index a00fcac83..6fd8eaa76 100644
--- a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
@@ -1,13 +1,14 @@
 package org.clulab.processors.clu
 
-import org.slf4j.LoggerFactory
-import org.slf4j.Logger
-import org.clulab.processors.clu.tokenizer.Tokenizer
 import org.clulab.processors.Document
-import scala.collection.mutable.ArrayBuffer
 import org.clulab.processors.Sentence
+import org.clulab.processors.clu.tokenizer.Tokenizer
+import org.clulab.scala.WrappedArrayBuffer._
+import org.slf4j.Logger
+import org.slf4j.LoggerFactory
 
 import scala.collection.compat._
+import scala.collection.mutable.ArrayBuffer
 
 class DocumentMaker
 
@@ -73,7 +74,7 @@ object DocumentMaker {
         charOffset += charactersBetweenTokens
       }
       // note: NO postprocessing happens in this case, so use it carefully!
-      sents += new Sentence(sentence.toArray, startOffsets.toArray, endOffsets.toArray, sentence.toArray)
+      sents += new Sentence(sentence.toSeq, startOffsets, endOffsets, sentence.toSeq)
       charOffset += charactersBetweenSentences - charactersBetweenTokens
       if(keepText) {
         text.append(sentence.mkString(mkSep(charactersBetweenTokens)))
diff --git a/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala
index 8de6a5be2..5e3001f86 100644
--- a/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala
@@ -15,7 +15,7 @@ object PostProcessor {
   val WET_OR_DRY_SEASON = Pattern.compile("""(?i)[0-9]+(ds|ws)""")
 
   /** POS tag corrections, in place */
-  def postprocessPartOfSpeechTags(words: Array[String], tags: Array[String]): Array[String] = {
+  def postprocessPartOfSpeechTags(words: Seq[String], tags: Seq[String]): Seq[String] = {
 
     // unigram patterns
     words.indices.foreach { index =>
diff --git a/library/src/main/scala/org/clulab/processors/clu/Veil.scala b/library/src/main/scala/org/clulab/processors/clu/Veil.scala
index 20d136209..aca16ee42 100644
--- a/library/src/main/scala/org/clulab/processors/clu/Veil.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/Veil.scala
@@ -122,7 +122,7 @@ class VeiledDocument(originalDocument: Document, veiledWords: Seq[(Int, Range)])
     originalDocument.copy(veiledSentences)
   }
 
-  def unveilStringArray(veiledArrayOpt: Option[Array[String]], sentenceIndex: Int, veil: String): Option[Array[String]] = {
+  def unveilStringArray(veiledArrayOpt: Option[Seq[String]], sentenceIndex: Int, veil: String): Option[Seq[String]] = {
     val unveilArray = unveilArrays(sentenceIndex)
     val originalLength = originalDocument.sentences(sentenceIndex).words.length
 
@@ -156,7 +156,7 @@ class VeiledDocument(originalDocument: Document, veiledWords: Seq[(Int, Range)])
   def unveilSyntacticTree(syntacticTreeOpt: Option[Tree]): Option[Tree] = syntacticTreeOpt
 
   // TODO
-  def unveilRelations(relations: Option[Array[RelationTriple]]): Option[Array[RelationTriple]] = relations
+  def unveilRelations(relations: Option[Seq[RelationTriple]]): Option[Seq[RelationTriple]] = relations
 
   protected def unveilSentence(veiledSentence: Sentence, sentenceIndex: Int): Sentence = {
     val originalSentence = originalDocument.sentences(sentenceIndex)
@@ -167,7 +167,7 @@ class VeiledDocument(originalDocument: Document, veiledWords: Seq[(Int, Range)])
 
     val unveiledSentence = veiledSentence.copy(unveiledRaw, unveiledStartOffsets, unveiledEndOffsets, unveiledWords)
 
-    def unveilStringArray(veiledArrayOpt: Option[Array[String]], veil: String): Option[Array[String]] =
+    def unveilStringArray(veiledArrayOpt: Option[Seq[String]], veil: String): Option[Seq[String]] =
         this.unveilStringArray(veiledArrayOpt, sentenceIndex, veil)
 
     val tags     = unveilStringArray(unveiledSentence.tags,     Veil.veiledTag)
diff --git a/library/src/main/scala/org/clulab/processors/clu/tokenizer/SentenceSplitter.scala b/library/src/main/scala/org/clulab/processors/clu/tokenizer/SentenceSplitter.scala
index f644da4f0..2b9dd435e 100644
--- a/library/src/main/scala/org/clulab/processors/clu/tokenizer/SentenceSplitter.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/tokenizer/SentenceSplitter.scala
@@ -1,6 +1,7 @@
 package org.clulab.processors.clu.tokenizer
 
 import org.clulab.processors.Sentence
+import org.clulab.scala.WrappedArrayBuffer._
 
 import java.io.{BufferedReader, InputStreamReader}
 import scala.collection.mutable.ArrayBuffer
@@ -18,7 +19,7 @@ abstract class RuleBasedSentenceSplitter extends SentenceSplitter {
     * Sentence splitting over a stream of tokens
     * This includes detection of abbreviations as well
     **/
-  override def split(tokens:Array[RawToken], sentenceSplit:Boolean):Array[Sentence] = {
+  override def split(tokens:Array[RawToken], sentenceSplit:Boolean):Seq[Sentence] = {
     val sentences = new ArrayBuffer[Sentence]()
     var raw = new ArrayBuffer[String]()
     var words = new ArrayBuffer[String]()
@@ -67,7 +68,7 @@ abstract class RuleBasedSentenceSplitter extends SentenceSplitter {
 
         // found a valid end of sentence; start an empty one
         if (isEos) {
-          sentences += Sentence(raw.toArray, beginPositions.toArray, endPositions.toArray, words.toArray)
+          sentences += Sentence(raw.toSeq, beginPositions.toSeq, endPositions.toSeq, words.toSeq)
           raw = new ArrayBuffer[String]()
           words = new ArrayBuffer[String]()
           beginPositions = new ArrayBuffer[Int]()
@@ -104,7 +105,7 @@ abstract class RuleBasedSentenceSplitter extends SentenceSplitter {
         //
         // create the current sentence
         //
-        sentences += Sentence(raw.toArray, beginPositions.toArray, endPositions.toArray, words.toArray)
+        sentences += Sentence(raw, beginPositions, endPositions, words)
         raw = new ArrayBuffer[String]()
         words = new ArrayBuffer[String]()
         beginPositions = new ArrayBuffer[Int]()
@@ -130,10 +131,10 @@ abstract class RuleBasedSentenceSplitter extends SentenceSplitter {
 
     // a few words left over at the end
     if (words.nonEmpty) {
-      sentences += Sentence(raw.toArray, beginPositions.toArray, endPositions.toArray, words.toArray)
+      sentences += Sentence(raw, beginPositions, endPositions, words)
     }
 
-    sentences.toArray
+    sentences
   }
 
   def isAbbreviation(word:String):Boolean
diff --git a/library/src/main/scala/org/clulab/sequences/CombinedLexiconNER.scala b/library/src/main/scala/org/clulab/sequences/CombinedLexiconNER.scala
index cbc12a745..9c12ab411 100644
--- a/library/src/main/scala/org/clulab/sequences/CombinedLexiconNER.scala
+++ b/library/src/main/scala/org/clulab/sequences/CombinedLexiconNER.scala
@@ -64,7 +64,7 @@ class CombinedLexiconNER (
     * @param sentence The input sentence
     * @return An array of BIO notations the store the outcome of the matches
     */
-  def find(sentence: Sentence): Array[String] = {
+  def find(sentence: Sentence): Seq[String] = {
     val caseSensitiveTokens = getTokens(sentence)
     val caseInsensitiveTokens = if (hasCaseInsensitive) caseSensitiveTokens.map(_.toLowerCase) else caseSensitiveTokens
     val seq = findLongestMatch(sentence, caseSensitiveTokens, caseInsensitiveTokens)
@@ -79,7 +79,7 @@ class CombinedLexiconNER (
     * This means that the longest match is always chosen, even if coming from a matcher with lower priority
     * Only ties are disambiguated according to the order provided in the constructor
     */
-  protected def findLongestMatch(sentence: Sentence, caseSensitiveTokens: Array[String], caseInsensitiveTokens: Array[String]): Array[String] = {
+  protected def findLongestMatch(sentence: Sentence, caseSensitiveTokens: Seq[String], caseInsensitiveTokens: Seq[String]): Seq[String] = {
     val labels = new Array[String](caseSensitiveTokens.length)
     val length = labels.length
     var offset = 0
@@ -91,7 +91,7 @@ class CombinedLexiconNER (
 
     def getSpanAndIndex: CombinedLexiconNER.SpanAndIndex = {
 
-      def innerGetSpanAndIndex(condition: Boolean, intHashTrie: IntHashTrie, tokens: => Array[String]): CombinedLexiconNER.SpanAndIndex = {
+      def innerGetSpanAndIndex(condition: Boolean, intHashTrie: IntHashTrie, tokens: => Seq[String]): CombinedLexiconNER.SpanAndIndex = {
         if (condition) {
           val intTrieNodeMatch = intHashTrie.findAt(tokens, offset)
           CombinedLexiconNER.SpanAndIndex(intTrieNodeMatch.length, intTrieNodeMatch.completePath)
diff --git a/library/src/main/scala/org/clulab/sequences/CompactLexiconNER.scala b/library/src/main/scala/org/clulab/sequences/CompactLexiconNER.scala
index 924c8688f..08bee6769 100644
--- a/library/src/main/scala/org/clulab/sequences/CompactLexiconNER.scala
+++ b/library/src/main/scala/org/clulab/sequences/CompactLexiconNER.scala
@@ -56,7 +56,7 @@ class CompactLexiconNER(
 
   def getLabels: Seq[String] = labels
 
-  def find(sentence: Sentence): Array[String] = {
+  def find(sentence: Sentence): Seq[String] = {
     val caseSensitiveTokens = getTokens(sentence)
     val caseInsensitiveTokens =
       if (hasCaseInsensitive) caseSensitiveTokens.map(_.toLowerCase)
@@ -66,14 +66,14 @@ class CompactLexiconNER(
     seq
   }
 
-  protected def findLongestMatch(sentence: Sentence, caseSensitiveTokens: Array[String],
-      caseInsensitiveTokens: Array[String]): Array[String] = {
+  protected def findLongestMatch(sentence: Sentence, caseSensitiveTokens: Seq[String],
+      caseInsensitiveTokens: Seq[String]): Seq[String] = {
     val labels = new Array[String](caseSensitiveTokens.length)
     val length = labels.length
     var offset = 0
 
-    val   caseSensitiveStringIds = if (hasCaseSensitive)     caseSensitiveTokens.map(  caseSensitiveCompactTrie.stringIds) else Array.empty[Int]
-    val caseInsensitiveStringIds = if (hasCaseInsensitive) caseInsensitiveTokens.map(caseInsensitiveCompactTrie.stringIds) else Array.empty[Int]
+    val   caseSensitiveStringIds = if (hasCaseSensitive)     caseSensitiveTokens.map(  caseSensitiveCompactTrie.stringIds) else Seq.empty[Int]
+    val caseInsensitiveStringIds = if (hasCaseInsensitive) caseInsensitiveTokens.map(caseInsensitiveCompactTrie.stringIds) else Seq.empty[Int]
 
     // These are intended to cut down on the number of objects created.
     // It worked better when there was only one setting for case.
@@ -88,7 +88,7 @@ class CompactLexiconNER(
 
     def updateSpanAndIndex(): Unit = {
 
-      def innerGetSpanAndIndex(condition: Boolean, stringIds: Array[Int], spanAndIndex: SpanAndIndex,
+      def innerGetSpanAndIndex(condition: Boolean, stringIds: Seq[Int], spanAndIndex: SpanAndIndex,
           compactTrie: CompactTrie): SpanAndIndex = {
         if (condition) {
           val id = stringIds(offset)
@@ -136,7 +136,7 @@ class CompactLexiconNER(
     labels
   }
 
-  def findAt(ids: Array[Int], wordIndex: Int, nodeMatch: SpanAndIndex, compactTrie: CompactTrie): Unit = {
+  def findAt(ids: Seq[Int], wordIndex: Int, nodeMatch: SpanAndIndex, compactTrie: CompactTrie): Unit = {
 
     def linearSearch(value: Int, left: Int, right: Int): Int = {
       var index = left
diff --git a/library/src/main/scala/org/clulab/sequences/LexiconNER.scala b/library/src/main/scala/org/clulab/sequences/LexiconNER.scala
index b1c643fd3..cc8bebf16 100644
--- a/library/src/main/scala/org/clulab/sequences/LexiconNER.scala
+++ b/library/src/main/scala/org/clulab/sequences/LexiconNER.scala
@@ -1,6 +1,7 @@
 package org.clulab.sequences
 
 import org.clulab.processors.Sentence
+import org.clulab.scala.SeqView
 import org.clulab.scala.WrappedArray._
 import org.clulab.struct.{EntityValidator, TrueEntityValidator}
 import org.clulab.utils.ArrayView
@@ -55,7 +56,7 @@ abstract class LexiconNER(val knownCaseInsensitives: Set[String], val useLemmas:
     * @param sentence The input sentence
     * @return An array of BIO notations the store the outcome of the matches
     */
-  def find(sentence: Sentence): Array[String]
+  def find(sentence: Sentence): Seq[String]
   def getLabels: Seq[String]
 
   /**
@@ -74,49 +75,49 @@ abstract class LexiconNER(val knownCaseInsensitives: Set[String], val useLemmas:
     }
   }
 
-  def hasCondition(wordsView: ArrayView[String], condition: Char => Boolean): Boolean =
+  def hasCondition(wordsView: SeqView.Immutable[String], condition: Char => Boolean): Boolean =
     wordsView.exists(_.exists(condition))
 
-  def hasLetter(wordsView: ArrayView[String]): Boolean =
+  def hasLetter(wordsView: SeqView.Immutable[String]): Boolean =
     hasCondition(wordsView, Character.isLetter)
 
-  def hasDigit(wordsView: ArrayView[String]): Boolean =
+  def hasDigit(wordsView: SeqView.Immutable[String]): Boolean =
     hasCondition(wordsView, Character.isDigit)
 
-  def hasUpperCaseLetters(wordsView: ArrayView[String]): Boolean =
+  def hasUpperCaseLetters(wordsView: SeqView.Immutable[String]): Boolean =
     hasCondition(wordsView, Character.isUpperCase)
 
-  def hasSpace(wordsView: ArrayView[String]): Boolean = wordsView.length > 1
+  def hasSpace(wordsView: SeqView.Immutable[String]): Boolean = wordsView.size > 1
 
-  def countCharacters(wordsView: ArrayView[String]): Int =
+  def countCharacters(wordsView: SeqView.Immutable[String]): Int =
     // Go ahead and calculate them all even though we only need to know if they exceed a value.
     wordsView.foldLeft(0) { (sum, word) => sum + word.length }
 
-  val contentQualifiers: Array[ArrayView[String] => Boolean] = Array(
+  val contentQualifiers: Array[SeqView.Immutable[String] => Boolean] = Array(
     // Start with the quick and easy ones.
     hasSpace,
-    { wordsView => countCharacters(wordsView) > LexiconNER.KNOWN_CASE_INSENSITIVE_LENGTH },
+    { (wordsView: SeqView.Immutable[String]) => countCharacters(wordsView) > LexiconNER.KNOWN_CASE_INSENSITIVE_LENGTH },
     hasDigit,
     hasUpperCaseLetters,
-    { wordsView => knownCaseInsensitives.contains(wordsView.head) }
+    { (wordsView: SeqView.Immutable[String]) => knownCaseInsensitives.contains(wordsView.head) }
   )
 
   protected def contentfulSpan(sentence: Sentence, start: Int, length: Int): Boolean = {
-    val wordsView = ArrayView(sentence.words, start, start + length)
+    val wordsView = sentence.words.view(start, start + length)
     // A valid view/span must have a letter and at least one of the other qualifiers.
     val contentful = hasLetter(wordsView) && contentQualifiers.exists(_(wordsView))
 
     contentful
   }
 
-  protected val getTokens: Sentence => Array[String] =
+  protected val getTokens: Sentence => Seq[String] =
     // Decide this once and for all and don't revisit it each time getTokens is called.
     if (useLemmas) getLemmas
     else getWords
 
-  protected def getLemmas(sentence: Sentence): Array[String] = sentence.lemmas.get
+  protected def getLemmas(sentence: Sentence): Seq[String] = sentence.lemmas.get
 
-  protected def getWords(sentence: Sentence): Array[String] = sentence.words
+  protected def getWords(sentence: Sentence): Seq[String] = sentence.words
 }
 
 object LexiconNER {
diff --git a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
index 5e65094f4..3ad767ca0 100644
--- a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
+++ b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
@@ -57,7 +57,7 @@ object NamedEntity {
     bioLabels
   }
 
-  def isValid(bioLabels: Array[String], index: Int): Boolean = {
+  def isValid(bioLabels: Seq[String], index: Int): Boolean = {
     val currBioLabel = bioLabels(index)
     !currBioLabel.startsWith(INSIDE) || {
       0 < index && {
@@ -69,7 +69,7 @@ object NamedEntity {
     }
   }
 
-  def isValid(bioLabels: Array[String]): Boolean =
+  def isValid(bioLabels: Seq[String]): Boolean =
       bioLabels.indices.forall(isValid(bioLabels, _))
 
   // Only INSIDEs can be invalid and they are made valid by
@@ -78,7 +78,7 @@ object NamedEntity {
       BEGIN + bioLabel.drop(INSIDE.length)
 
   // Note that this patches the array in place!
-  def patch(bioLabels: Array[String]): Array[String] = {
+  def patch(bioLabels: Seq[String]): Seq[String] = {
     bioLabels.indices.foreach { index =>
       if (!isValid(bioLabels, index))
         bioLabels(index) = toBegin(bioLabels(index))
diff --git a/library/src/main/scala/org/clulab/sequences/SeparatedLexiconNER.scala b/library/src/main/scala/org/clulab/sequences/SeparatedLexiconNER.scala
index 435b91b5d..ac3053997 100644
--- a/library/src/main/scala/org/clulab/sequences/SeparatedLexiconNER.scala
+++ b/library/src/main/scala/org/clulab/sequences/SeparatedLexiconNER.scala
@@ -110,7 +110,7 @@ class SeparatedLexiconNER(
     labels
   }
 
-  protected def findAt(tokens: Array[String], caseInsensitiveTokens: Array[String], offset: Int): (Int, Int) = {
+  protected def findAt(tokens: Seq[String], caseInsensitiveTokens: Seq[String], offset: Int): (Int, Int) = {
     def findAt(matcher: BooleanHashTrie): Int =
         matcher.findAt(if (matcher.caseInsensitive) caseInsensitiveTokens else tokens, offset).length
 
diff --git a/library/src/main/scala/org/clulab/sequences/Tagger.scala b/library/src/main/scala/org/clulab/sequences/Tagger.scala
index 973e4dba2..8e3f54e5c 100644
--- a/library/src/main/scala/org/clulab/sequences/Tagger.scala
+++ b/library/src/main/scala/org/clulab/sequences/Tagger.scala
@@ -8,5 +8,5 @@ import org.clulab.processors.Sentence
   * Date: 10/12/17
   */
 trait Tagger[L] {
-  def find(sentence:Sentence):Array[L]
+  def find(sentence:Sentence):Seq[L]
 }
diff --git a/library/src/main/scala/org/clulab/serialization/CoNLLUSerializer.scala b/library/src/main/scala/org/clulab/serialization/CoNLLUSerializer.scala
index 523873e65..9508b133f 100644
--- a/library/src/main/scala/org/clulab/serialization/CoNLLUSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/CoNLLUSerializer.scala
@@ -8,7 +8,7 @@ object CoNLLUSerializer {
   val UNDEF = "_"
   val ROOT = "root"
 
-  def getOrElseUndef(stringsOpt: Option[Array[String]], i: Int): String =
+  def getOrElseUndef(stringsOpt: Option[Seq[String]], i: Int): String =
       stringsOpt.map(_(i)).getOrElse(UNDEF)
 
   /**
diff --git a/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala b/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
index b4ccb0122..c2f3f885c 100644
--- a/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
@@ -3,6 +3,7 @@ package org.clulab.serialization
 import org.clulab.processors.DocumentAttachment
 import org.clulab.processors.DocumentAttachmentBuilderFromText
 import org.clulab.processors.{Document, Sentence}
+import org.clulab.scala.WrappedArrayBuffer._
 import org.clulab.struct._
 import org.clulab.utils.Logging
 import org.json4s.DefaultFormats
@@ -122,7 +123,7 @@ class DocumentSerializer extends Logging {
       }
 
       val doc = new Document(
-        sentences = sents.toArray,
+        sentences = sents,
         text = text,
         attachments = attachmentsOpt
       )
@@ -170,7 +171,7 @@ class DocumentSerializer extends Logging {
     Interval(t(0), t(1))
   }
 
-  private def loadRelations(r: BufferedReader, sz: Int):Option[Array[RelationTriple]] = {
+  private def loadRelations(r: BufferedReader, sz: Int):Option[Seq[RelationTriple]] = {
     val ret = (0 until sz) map {
       _ =>
         val line = r.readLine()
@@ -178,7 +179,7 @@ class DocumentSerializer extends Logging {
         val relInterval = tokens(2) match { case "N" => None; case s => Some(mkRelationInterval(s)) }
         RelationTriple(tokens(0).toFloat, mkRelationInterval(tokens(1)), relInterval, mkRelationInterval(tokens(3)))
     }
-    Some(ret.toArray)
+    Some(ret)
   }
 
   private def loadSentence(r:BufferedReader): Sentence = {
@@ -236,7 +237,7 @@ class DocumentSerializer extends Logging {
 
     var deps = GraphMap()
     var tree:Option[Tree] = None
-    var relations:Option[Array[RelationTriple]] = None
+    var relations:Option[Seq[RelationTriple]] = None
     while ({
       bits = read(r)
       if (bits(0) == START_DEPENDENCIES) {
@@ -256,10 +257,10 @@ class DocumentSerializer extends Logging {
     }) ()
 
     Sentence(
-      rawBuffer.toArray,
-      startOffsetBuffer.toArray,
-      endOffsetBuffer.toArray,
-      wordBuffer.toArray,
+      rawBuffer,
+      startOffsetBuffer,
+      endOffsetBuffer,
+      wordBuffer,
       bufferOption(tagBuffer, nilTags),
       bufferOption(lemmaBuffer, nilLemmas),
       bufferOption(entityBuffer, nilEntities),
@@ -292,10 +293,10 @@ class DocumentSerializer extends Logging {
     dg
   }
 
-  private def bufferOption[T: ClassTag](b:ArrayBuffer[T], allNils:Boolean): Option[Array[T]] = {
+  private def bufferOption[T: ClassTag](b:ArrayBuffer[T], allNils:Boolean): Option[Seq[T]] = {
     if (b.isEmpty) None
     else if (allNils) None
-    else Some(b.toArray)
+    else Some(b)
   }
 
   def save(doc:Document, os:PrintWriter): Unit = save(doc, os, keepText = false)
diff --git a/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala b/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
index cac75f40a..26853d11b 100644
--- a/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
@@ -81,7 +81,7 @@ object JSONSerializer {
 
     def getInts(json: JValue, k: String): Array[Int] = (json \ k).extract[Array[Int]]
 
-    def getLabelsOpt(json: JValue, k: String): Option[Array[String]] = json \ k match {
+    def getLabelsOpt(json: JValue, k: String): Option[Seq[String]] = json \ k match {
       case JNothing => None
       case contents => Some(contents.extract[Array[String]])
     }
diff --git a/library/src/main/scala/org/clulab/serialization/json/package.scala b/library/src/main/scala/org/clulab/serialization/json/package.scala
index 06cf6715b..a27c14174 100644
--- a/library/src/main/scala/org/clulab/serialization/json/package.scala
+++ b/library/src/main/scala/org/clulab/serialization/json/package.scala
@@ -22,7 +22,7 @@ package object json {
   }
 
   // Arrays cannot be directly converted to JValue
-  implicit class ArrayOps(s: Option[Array[String]]) {
+  implicit class ArrayOps(s: Option[Seq[String]]) {
     def toSerializableJSON: Option[List[String]] = s match {
       case Some(s) => Some(s.toList)
       case None => None
diff --git a/library/src/main/scala/org/clulab/struct/BooleanHashTrie.scala b/library/src/main/scala/org/clulab/struct/BooleanHashTrie.scala
index fd0b586fa..dc4bed380 100644
--- a/library/src/main/scala/org/clulab/struct/BooleanHashTrie.scala
+++ b/library/src/main/scala/org/clulab/struct/BooleanHashTrie.scala
@@ -80,7 +80,7 @@ class BooleanHashTrie(val label: String, val caseInsensitive: Boolean = true) ex
     * When multiple paths are found, the longest one is kept
     * Text must be normalized (i.e., case folding) BEFORE this call, if necessary!
     */
-  def findAt(sequenceNormalized: Array[String], offset: Int): BooleanTrieNode.Match = {
+  def findAt(sequenceNormalized: Seq[String], offset: Int): BooleanTrieNode.Match = {
     val longestMatch = new BooleanTrieNode.Match()
 
     entries.get(sequenceNormalized(offset)).map { tree =>
@@ -129,7 +129,7 @@ case class BooleanTrieNode(token: String, var completePath: Boolean, var childre
     * @param longestMatch      The value of the longest match interval
     * @return true if search should stop here; false otherwise
     */
-  def find(sequence: Array[String],
+  def find(sequence: Seq[String],
       startOffset: Int,
       currentSpanLength: Int,
       longestMatch: BooleanTrieNode.Match): Boolean = {
diff --git a/library/src/main/scala/org/clulab/struct/IntHashTrie.scala b/library/src/main/scala/org/clulab/struct/IntHashTrie.scala
index 70a22984e..9b3403cc5 100644
--- a/library/src/main/scala/org/clulab/struct/IntHashTrie.scala
+++ b/library/src/main/scala/org/clulab/struct/IntHashTrie.scala
@@ -82,7 +82,7 @@ class IntHashTrie(val caseInsensitive: Boolean = true) extends Serializable {
     * When multiple paths are found, the longest one is kept
     * Text must be normalized (i.e., case folding) BEFORE this call, if necessary!
     */
-  def findAt(sequenceNormalized: Array[String], offset: Int): IntTrieNode.Match = {
+  def findAt(sequenceNormalized: Seq[String], offset: Int): IntTrieNode.Match = {
     val longestMatch = new IntTrieNode.Match()
 
     entries.get(sequenceNormalized(offset)).map { tree =>
@@ -134,7 +134,7 @@ case class IntTrieNode(token:String, var completePath: Int, var children: Option
     * @param longestMatch      The value of the longest match interval
     * @return true if search should stop here; false otherwise
     */
-  def find(sequence: Array[String],
+  def find(sequence: Seq[String],
       startOffset: Int,
       currentSpanLength: Int,
       longestMatch: IntTrieNode.Match): Boolean = {
diff --git a/library/src/main/scala/org/clulab/utils/ToEnhancedDependencies.scala b/library/src/main/scala/org/clulab/utils/ToEnhancedDependencies.scala
index 63eab7913..e6da8e3b7 100644
--- a/library/src/main/scala/org/clulab/utils/ToEnhancedDependencies.scala
+++ b/library/src/main/scala/org/clulab/utils/ToEnhancedDependencies.scala
@@ -33,7 +33,7 @@ object ToEnhancedDependencies {
     dgi.toDirectedGraph(Some(words.length))
   }
 
-  def generateUniversalEnhancedDependencies(words: Array[String], lemmas: Array[String], tags: Array[String], dg: DirectedGraph[String]): DirectedGraph[String] = {
+  def generateUniversalEnhancedDependencies(words: Seq[String], lemmas: Seq[String], tags: Seq[String], dg: DirectedGraph[String]): DirectedGraph[String] = {
     val dgi = dg.toDirectedGraphIndex()
     collapseMWEs(lemmas, tags, dgi)
     val collapsedNmods = collapsePrepositionsUniversal(words, lemmas, tags, dgi)
@@ -102,7 +102,7 @@ object ToEnhancedDependencies {
    * @param sentence
    * @param dgi
    */
-  def expandConj(words: Array[String], dgi: DirectedGraphIndex[String]): Unit = {
+  def expandConj(words: Seq[String], dgi: DirectedGraphIndex[String]): Unit = {
     val toRemove = new ListBuffer[Edge[String]]
     val conjs = dgi.findByName("conj")
     for (conj <- conjs) {
@@ -140,7 +140,7 @@ object ToEnhancedDependencies {
   }
 
   def collapsePrepositionsUniversal(
-    words: Array[String], lemmas: Array[String], tags: Array[String],
+    words: Seq[String], lemmas: Seq[String], tags: Seq[String],
     dgi:DirectedGraphIndex[String]): Seq[EdgeSpec] = {
 
     val collapsedNmods = new ArrayBuffer[EdgeSpec]()
@@ -156,7 +156,7 @@ object ToEnhancedDependencies {
     * @param dgi The directed graph of collapsed dependencies at this stage
     */
   def collapsePrepositionsUniversalNmodCase(
-    words: Array[String],
+    words: Seq[String],
     dgi:DirectedGraphIndex[String],
     collapsedNmods: ArrayBuffer[EdgeSpec]): Unit = {
 
@@ -189,7 +189,7 @@ object ToEnhancedDependencies {
     * @param dgi The directed graph of collapsed dependencies at this stage
     */
   def collapsePrepositionsUniversalDueTo(
-    lemmas: Array[String], tags: Array[String],
+    lemmas: Seq[String], tags: Seq[String],
     dgi:DirectedGraphIndex[String], 
     collapsedNmods: ArrayBuffer[EdgeSpec]): Unit = {
 
@@ -234,8 +234,8 @@ object ToEnhancedDependencies {
     * @param dgi
     */
   def collapseMWEs(
-    lemmas: Array[String],
-    tags: Array[String],
+    lemmas: Seq[String],
+    tags: Seq[String],
     dgi:DirectedGraphIndex[String]): Unit = {
 
     val size = lemmas.length
@@ -261,7 +261,7 @@ object ToEnhancedDependencies {
     if(shouldRemove) remove(toRemove, dgi)
   }
 
-  def findMultiWord(first: String, firstPos: Int, words: Array[String], dgi:DirectedGraphIndex[String]): String = {
+  def findMultiWord(first: String, firstPos: Int, words: Seq[String], dgi:DirectedGraphIndex[String]): String = {
     val buffer = new StringBuilder
     buffer.append(first)
 
@@ -302,7 +302,7 @@ object ToEnhancedDependencies {
     * @param sentence The sentence to operate on
     * @param dgi The directed graph of collapsed dependencies at this stage
     */
-  def propagateSubjectsAndObjectsInConjVerbs(tags: Array[String], dgi:DirectedGraphIndex[String], universal:Boolean): Unit = {
+  def propagateSubjectsAndObjectsInConjVerbs(tags: Seq[String], dgi:DirectedGraphIndex[String], universal:Boolean): Unit = {
     val conjs = dgi.findByName("conj").sortBy(_.source)
     for(conj <- conjs) {
       val left = math.min(conj.source, conj.destination)
@@ -385,7 +385,7 @@ object ToEnhancedDependencies {
     * @param sentence The sentence to operate on
     * @param dgi The directed graph of collapsed dependencies at this stage
     */
-  def propagateConjSubjectsAndObjects(tags: Array[String], dgi:DirectedGraphIndex[String]): Unit = {
+  def propagateConjSubjectsAndObjects(tags: Seq[String], dgi:DirectedGraphIndex[String]): Unit = {
     val conjs = dgi.findByName("conj").sortBy(_.source)
     for(conj <- conjs) {
       val left = math.min(conj.source, conj.destination)
@@ -421,7 +421,7 @@ object ToEnhancedDependencies {
     * @param sentence The sentence to operate on
     * @param dgi The directed graph of collapsed dependencies at this stage
     */
-  def pushSubjectsObjectsInsideRelativeClauses(tags: Array[String], dgi:DirectedGraphIndex[String], universal:Boolean): Unit = {
+  def pushSubjectsObjectsInsideRelativeClauses(tags: Seq[String], dgi:DirectedGraphIndex[String], universal:Boolean): Unit = {
     val rels =
       if(universal) dgi.findByName("acl:relcl")
       else dgi.findByName("rcmod")

From ed80611107350a5de81ce4a6791a11b5249999e2 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Sat, 24 May 2025 20:02:52 -0700
Subject: [PATCH 13/42] Finish compiling library

---
 .../org/clulab/numeric/NumericUtils.scala     | 94 +++++++++++++++----
 .../clulab/processors/clu/DocumentMaker.scala | 36 +++----
 .../clulab/processors/clu/PostProcessor.scala | 44 ++++++++-
 .../clu/tokenizer/SentenceSplitter.scala      | 76 +++++++--------
 .../processors/clu/tokenizer/Tokenizer.scala  |  4 +-
 .../org/clulab/sequences/NamedEntity.scala    | 46 +++++++--
 .../sequences/SeparatedLexiconNER.scala       |  2 +-
 .../org/clulab/sequences/SequenceTagger.scala |  2 +-
 8 files changed, 210 insertions(+), 94 deletions(-)

diff --git a/library/src/main/scala/org/clulab/numeric/NumericUtils.scala b/library/src/main/scala/org/clulab/numeric/NumericUtils.scala
index 80149f1ad..e60bb225d 100644
--- a/library/src/main/scala/org/clulab/numeric/NumericUtils.scala
+++ b/library/src/main/scala/org/clulab/numeric/NumericUtils.scala
@@ -5,7 +5,9 @@ import org.clulab.numeric.mentions.Norm
 import org.clulab.odin.{EventMention, Mention}
 import org.clulab.processors.Document
 import org.clulab.struct.Interval
+import org.clulab.utils.WrappedArraySeq
 
+import scala.collection.mutable
 import _root_.scala.util.control.Breaks._
 
 object NumericUtils {
@@ -72,36 +74,95 @@ object NumericUtils {
     * @param mentions The numeric mentions previously extracted
     */
   def mkLabelsAndNorms(doc: Document, mentions: Seq[Mention]): (Seq[Seq[String]], Seq[Seq[String]]) = {
-    val allEntities = doc.sentences.map { sentence =>
-      sentence.entities.getOrElse(Seq.fill(sentence.size)("O"))
+    val pertinentMentions = mentions.collect {
+      case mention: Norm if NumericActions.isNumeric(mention) => mention
     }
-    val allNorms = doc.sentences.map { sentence =>
-      sentence.norms.getOrElse(Seq.fill(sentence.size)(""))
+    val mentionsBySentenceIndex = pertinentMentions.groupBy { mention => mention.sentence }
+    val zippedLabelsAndNorms = doc.sentences.zipWithIndex.map { case (sentence, index) =>
+      val mentions = mentionsBySentenceIndex.getOrElse(index, Seq.empty)
+
+      if (mentions.isEmpty) {
+        val entities = sentence.entities.getOrElse(WrappedArraySeq(Array.fill(sentence.size)("O")).toImmutableSeq)
+        val norms = sentence.norms.getOrElse(WrappedArraySeq(Array.fill(sentence.size)("")).toImmutableSeq)
+
+        (entities, norms)
+      }
+      else {
+        val mutableEntities = sentence.entities
+            .map { entities => Array(entities: _*) }
+            .getOrElse(Array.fill(sentence.size)("O"))
+        val mutableNorms = sentence.norms
+            .map { norms => Array(norms: _*) }
+            .getOrElse(Array.fill(sentence.size)(""))
+
+        mentions.foreach { mention =>
+          addLabelsAndNorms(mention.neLabel, mention.neNorm, mention.tokenInterval, mutableEntities, mutableNorms)
+        }
+        removeOneEntityBeforeAnother(mutableEntities, mutableNorms, "B-LOC", "MEASUREMENT-LENGTH")
+
+        val immutableEntities = WrappedArraySeq(mutableEntities).toImmutableSeq
+        val immutableNorms = WrappedArraySeq(mutableNorms).toImmutableSeq
+        (immutableEntities, immutableNorms)
+      }
     }
+    val unzippedLabelsAndNorms = zippedLabelsAndNorms.unzip
+
+    unzippedLabelsAndNorms
+  }
+
+  def removeOneEntityBeforeAnother(entities: mutable.Seq[String], norms: mutable.Seq[String], triggerEntity: String, toBeRemovedShortened: String): Unit = {
+    var triggered = false
 
-    for (mention <- mentions) {
-      if (NumericActions.isNumeric(mention) && mention.isInstanceOf[Norm]) {
-        val sentenceIndex = mention.sentence
-        val entities = allEntities(sentenceIndex)
-        val norms = allNorms(sentenceIndex)
+    entities.indices.reverse.foreach { index =>
+      val entity = entities(index)
 
-        addLabelsAndNorms(mention.asInstanceOf[Norm], entities, norms, mention.tokenInterval)
-        removeOneEntityBeforeAnother(entities, norms, "B-LOC", "MEASUREMENT-LENGTH")
+      if (entity == triggerEntity)
+        triggered = true
+      else {
+        if (triggered)
+          if (entity.endsWith(toBeRemovedShortened)) {
+            entities(index) = "O"
+            norms(index) = ""
+          }
+          else
+            triggered = false
       }
     }
 
-    (allEntities, allNorms)
+
+    // removes entities and norms for unallowable entity sequences, e.g., don't extract 'in' as 'inch' before B-LOC in '... Sahal 108 in Senegal'
+    // toBeRemovedShortened is entity without BIO-
+    val zippedEntities = entities.zipWithIndex
+
+    // So remove all consecutive MEASREMENT-LENGTH in front of a B-LOC
+    // Can it just be done backwards in one pass in a state matchine?
+
+    zippedEntities.foreach { case (outerEntity, outerIndex) =>
+      if (outerIndex > 0 && outerEntity == triggerEntity && entities(outerIndex - 1).endsWith(toBeRemovedShortened)) {
+        // Go in reverse replacing indices and norms in the immediate preceding mention.
+        breakable { // TODO: rewrite
+          for ((innerEntity, innerIndex) <- zippedEntities.slice(0, outerIndex).reverse) {
+            if (innerEntity.endsWith(toBeRemovedShortened)) {
+              entities(innerIndex) = "O"
+              norms(innerIndex) = ""
+            } else break()
+          }
+        }
+      }
+    }
   }
 
-  def removeOneEntityBeforeAnother(entities: Seq[String], norms: Seq[String], triggerEntity: String, toBeRemovedShortened: String): Unit = {
+  def removeOneEntityBeforeAnother2(entities: mutable.Seq[String], norms: mutable.Seq[String], triggerEntity: String, toBeRemovedShortened: String): Unit = {
     // removes entities and norms for unallowable entity sequences, e.g., don't extract 'in' as 'inch' before B-LOC in '... Sahal 108 in Senegal'
     // toBeRemovedShortened is entity without BIO-
     val zippedEntities = entities.zipWithIndex
 
+    // So remove all consecutive MEASREMENT-LENGTH in front of a B-LOC
+    // Can it just be done backwards in one pass in a state matchine?
+
     zippedEntities.foreach { case (outerEntity, outerIndex) =>
       if (outerIndex > 0 && outerEntity == triggerEntity && entities(outerIndex - 1).endsWith(toBeRemovedShortened)) {
         // Go in reverse replacing indices and norms in the immediate preceding mention.
-        zippedEntities.slice(0, outerIndex).reverse
         breakable { // TODO: rewrite
           for ((innerEntity, innerIndex) <- zippedEntities.slice(0, outerIndex).reverse) {
             if (innerEntity.endsWith(toBeRemovedShortened)) {
@@ -115,10 +176,7 @@ object NumericUtils {
   }
 
   // TODO: These need to be mutable
-  private def addLabelsAndNorms(m: Norm, entities: Seq[String], norms: Seq[String], tokenInt: Interval): Unit = {
-    val label = m.neLabel
-    val norm = m.neNorm
-
+  private def addLabelsAndNorms(label: String, norm: String, tokenInt: Interval, entities: mutable.Seq[String], norms: mutable.Seq[String]): Unit = {
     // careful here: we may override some existing entities and norms
     // but, given that the numeric entity rules tend to be high precision, this is probably Ok...
     tokenInt.headOption.foreach { index =>
diff --git a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
index 6fd8eaa76..e37f32109 100644
--- a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
@@ -27,33 +27,23 @@ object DocumentMaker {
   }
 
   /** Constructs a document of tokens from an array of untokenized sentences */
-  def mkDocumentFromSentences(tokenizer:Tokenizer,
-                              sentences:Iterable[String],
-                              keepText:Boolean,
-                              charactersBetweenSentences:Int): Document = {
-    val sents = new ArrayBuffer[Sentence]()
+  def mkDocumentFromSentences(
+    tokenizer: Tokenizer,
+    texts: Iterable[String],
+    keepText: Boolean,
+    charactersBetweenSentences: Int
+  ): Document = {
     var characterOffset = 0
-    for(text <- sentences) {
-      val sent = tokenizer.tokenize(text, sentenceSplit = false).head // we produce a single sentence here!
+    val sentences = texts.map { text =>
+      val sentence = tokenizer.tokenize(text, sentenceSplit = false, characterOffset).head // We produce a single sentence here!
 
-      // update character offsets between sentences
-      for(i <- 0 until sent.size) {
-        sent.startOffsets(i) += characterOffset
-        sent.endOffsets(i) += characterOffset
-      }
-
-      // move the character offset after the current sentence
-      characterOffset = sent.endOffsets.last + charactersBetweenSentences
-
-      //println("SENTENCE: " + sent.words.mkString(", "))
-      //println("Start offsets: " + sent.startOffsets.mkString(", "))
-      //println("End offsets: " + sent.endOffsets.mkString(", "))
-      sents += sent
-    }
+      characterOffset = sentence.endOffsets.last + charactersBetweenSentences
+      sentence
+    }.toVector // TODO: What is the best concrete collection to use?
     val textOpt = Option.when(keepText)(sentences.mkString(mkSep(charactersBetweenSentences)))
-    val doc = Document(sents.toArray, textOpt)
+    val document = Document(sentences, textOpt)
 
-    doc
+    document
   }
 
   /** Constructs a document of tokens from an array of tokenized sentences */
diff --git a/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala
index 5e3001f86..9761eb6d2 100644
--- a/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala
@@ -1,9 +1,7 @@
 package org.clulab.processors.clu
 
-import org.clulab.processors.Sentence
-
 import java.util.regex.Pattern
-import org.clulab.struct.Edge
+import scala.collection.mutable
 
 object PostProcessor {
   //
@@ -15,7 +13,7 @@ object PostProcessor {
   val WET_OR_DRY_SEASON = Pattern.compile("""(?i)[0-9]+(ds|ws)""")
 
   /** POS tag corrections, in place */
-  def postprocessPartOfSpeechTags(words: Seq[String], tags: Seq[String]): Seq[String] = {
+  def postprocessPartOfSpeechTags2(words: Seq[String], tags: mutable.Seq[String]): Seq[String] = {
 
     // unigram patterns
     words.indices.foreach { index =>
@@ -45,4 +43,42 @@ object PostProcessor {
     tags
   }
 
+  /** POS tag corrections */
+  def postprocessPartOfSpeechTags1(words: Seq[String], tags: Seq[String]): Seq[String] = {
+    val newTags = words.indices.map { index =>
+      val word = words(index)
+      val oldTag = tags(index)
+      val newTag = {
+        // unigram patterns
+        if (VERSUS_PATTERN.matcher(word).matches)
+          "CC" // "versus" seems like a CC to me. but maybe not...
+        else if (WET_OR_DRY_SEASON.matcher(word).matches)
+          "CD" // such years should be CDs because our grammars expect it
+        // bigram patterns
+        else if (word.equalsIgnoreCase("due")) {
+          if (words.lift(index + 1).map(_.toLowerCase).contains("to")) "IN" // "due" in "due to" must be a preposition
+          else oldTag
+        }
+        else if (word.equalsIgnoreCase("fall")) {
+          if (tags.lift(index + 1).contains("CD")) "NN" // "fall" followed by a CD must be NN
+          else oldTag
+        }
+        else oldTag
+      }
+
+      newTag
+    }
+
+    newTags
+  }
+
+  def postprocessPartOfSpeechTags(words: Seq[String], tags: Seq[String]): Seq[String] = {
+    val result1 = postprocessPartOfSpeechTags1(words, tags)
+    val result2 = postprocessPartOfSpeechTags2(words, mutable.Seq(tags: _*))
+
+    if (result1 != result2)
+      println("It went awry!")
+
+    result1
+  }
 }
diff --git a/library/src/main/scala/org/clulab/processors/clu/tokenizer/SentenceSplitter.scala b/library/src/main/scala/org/clulab/processors/clu/tokenizer/SentenceSplitter.scala
index 2b9dd435e..8a4790246 100644
--- a/library/src/main/scala/org/clulab/processors/clu/tokenizer/SentenceSplitter.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/tokenizer/SentenceSplitter.scala
@@ -4,6 +4,7 @@ import org.clulab.processors.Sentence
 import org.clulab.scala.WrappedArrayBuffer._
 
 import java.io.{BufferedReader, InputStreamReader}
+import scala.collection.compat._
 import scala.collection.mutable.ArrayBuffer
 import scala.util.matching.Regex
 import scala.util.Using
@@ -11,15 +12,17 @@ import scala.util.Using
 import SentenceSplitter._
 
 trait SentenceSplitter {
-  def split(tokens:Array[RawToken], sentenceSplit:Boolean):Array[Sentence]
+  def split(tokens:Array[RawToken], sentenceSplit:Boolean, characterOffset: Int = 0):Seq[Sentence]
 }
 
 abstract class RuleBasedSentenceSplitter extends SentenceSplitter {
   /**
     * Sentence splitting over a stream of tokens
-    * This includes detection of abbreviations as well
+    * This includes detection of abbreviations as well.
+    * The characterOffset is included so that Sentences
+    * in a longer text need not be edited afterward.
     **/
-  override def split(tokens:Array[RawToken], sentenceSplit:Boolean):Seq[Sentence] = {
+  override def split(tokens: Array[RawToken], sentenceSplit: Boolean, characterOffset: Int): Seq[Sentence] = {
     val sentences = new ArrayBuffer[Sentence]()
     var raw = new ArrayBuffer[String]()
     var words = new ArrayBuffer[String]()
@@ -27,49 +30,46 @@ abstract class RuleBasedSentenceSplitter extends SentenceSplitter {
     var endPositions = new ArrayBuffer[Int]()
 
     for (i <- tokens.indices) {
-      val crt = tokens(i)
-
+      val curr: RawToken = tokens(i)
       // next and previous tokens. We need these to detect proper ends of sentences
-      var next: Option[RawToken] = None
-      if (i < tokens.length - 1) next = Some(tokens(i + 1))
-      var prev: Option[RawToken] = None
-      if (i > 0) prev = Some(tokens(i - 1))
+      val nextOpt: Option[RawToken] = Option.when(i < tokens.length - 1)(tokens(i + 1))
+      val prevOpt: Option[RawToken] = Option.when(i > 0)(tokens(i - 1))
 
       //
       // we handle end-of-sentence markers (periods, etc.) here
       // this includes detecting if a period belongs to the previous token (if it's an abbreviation)
       // and understanding if this token actually marks the end of a sentence
       //
-      if (EOS.findFirstIn(crt.word).isDefined) {
+      if (EOS.findFirstIn(curr.word).isDefined) {
         // found a token that normally indicates end of sentence
         var isEos = sentenceSplit
 
         // period that probably belongs to an abbreviation and should not be marked as EOS
-        if (crt.word == "." && prev.isDefined && isAbbreviation(prev.get.word) && crt.beginPosition == prev.get.endPosition) {
+        if (curr.word == "." && prevOpt.isDefined && isAbbreviation(prevOpt.get.word) && curr.beginPosition == prevOpt.get.endPosition) {
           // found a period that should be attached to the previous abbreviation
-          endPositions(endPositions.size - 1) = crt.endPosition
-          words(words.size - 1) = words.last + crt.word
-          raw(raw.size - 1) = raw.last + crt.raw
+          endPositions(endPositions.size - 1) = curr.endPosition + characterOffset
+          words(words.size - 1) = words.last + curr.word
+          raw(raw.size - 1) = raw.last + curr.raw
 
           // this is not an end of sentence if the next token does NOT look like the start of a sentence
           // TODO: maybe this should be handled with a binary classifier instead?
-          if (isEos && next.isDefined && !isSentStart(next.get.word)) {
+          if (isEos && nextOpt.isDefined && !isSentStart(nextOpt.get.word)) {
             isEos = false
           }
         }
 
         // regular end-of-sentence marker; treat is a distinct token
         else {
-          raw += crt.raw
-          words += crt.word
-          beginPositions += crt.beginPosition
-          endPositions += crt.endPosition
+          raw += curr.raw
+          words += curr.word
+          beginPositions += curr.beginPosition + characterOffset
+          endPositions += curr.endPosition + characterOffset
         }
 
         // found a valid end of sentence; start an empty one
         if (isEos) {
-          sentences += Sentence(raw.toSeq, beginPositions.toSeq, endPositions.toSeq, words.toSeq)
-          raw = new ArrayBuffer[String]()
+          sentences += Sentence(raw, beginPositions, endPositions, words)
+          raw = new ArrayBuffer[String]() // TODO: Check whether clear() is sufficient.
           words = new ArrayBuffer[String]()
           beginPositions = new ArrayBuffer[Int]()
           endPositions = new ArrayBuffer[Int]()
@@ -77,27 +77,27 @@ abstract class RuleBasedSentenceSplitter extends SentenceSplitter {
       }
 
       // found a period *inside* a token; sometimes this is an EOS
-      else if(EOS_FOLLOWEDBY_BULLET.findFirstIn(crt.raw).isDefined &&
-        crt.raw.lastIndexOf('.') > 0 &&
-        next.isDefined && isSentStart(next.get.word)) {
+      else if(EOS_FOLLOWEDBY_BULLET.findFirstIn(curr.raw).isDefined &&
+        curr.raw.lastIndexOf('.') > 0 &&
+        nextOpt.isDefined && isSentStart(nextOpt.get.word)) {
         //println(s"FOUND EOS INSIDE TOKEN: ${crt.raw}")
 
         //
         // create the last token from the token fragment before the period, and the period itself
         //
-        val dotRawPosition = crt.raw.lastIndexOf('.')
+        val dotRawPosition = curr.raw.lastIndexOf('.')
         assert(dotRawPosition > 0)
-        val dotWordPosition = crt.word.lastIndexOf('.')
+        val dotWordPosition = curr.word.lastIndexOf('.')
         assert(dotWordPosition > 0)
 
-        raw += crt.raw.substring(0, dotRawPosition)
-        words += crt.word.substring(0, dotWordPosition)
-        beginPositions += crt.beginPosition
-        endPositions += crt.beginPosition + dotRawPosition
+        raw += curr.raw.substring(0, dotRawPosition)
+        words += curr.word.substring(0, dotWordPosition)
+        beginPositions += curr.beginPosition + characterOffset
+        endPositions += curr.beginPosition + dotRawPosition + characterOffset
 
         // This is just for the period with length of 1.
-        raw += crt.raw.substring(dotRawPosition, dotRawPosition + 1)
-        words += crt.word.substring(dotWordPosition, dotWordPosition + 1)
+        raw += curr.raw.substring(dotRawPosition, dotRawPosition + 1)
+        words += curr.word.substring(dotWordPosition, dotWordPosition + 1)
         beginPositions += endPositions.last
         endPositions += beginPositions.last + 1
         val lastPosition = endPositions.last
@@ -114,18 +114,18 @@ abstract class RuleBasedSentenceSplitter extends SentenceSplitter {
         //
         // add the part of the token after the period to the new sentence
         //
-        raw += crt.raw.substring(dotRawPosition + 1)
-        words += crt.word.substring(dotWordPosition + 1)
+        raw += curr.raw.substring(dotRawPosition + 1)
+        words += curr.word.substring(dotWordPosition + 1)
         beginPositions += lastPosition
         endPositions += lastPosition + raw.head.length
       }
 
       else {
         // just a regular token
-        raw += crt.raw
-        words += crt.word
-        beginPositions += crt.beginPosition
-        endPositions += crt.endPosition
+        raw += curr.raw
+        words += curr.word
+        beginPositions += curr.beginPosition + characterOffset
+        endPositions += curr.endPosition + characterOffset
       }
     }
 
diff --git a/library/src/main/scala/org/clulab/processors/clu/tokenizer/Tokenizer.scala b/library/src/main/scala/org/clulab/processors/clu/tokenizer/Tokenizer.scala
index 85c6a09bc..11fbff7fb 100644
--- a/library/src/main/scala/org/clulab/processors/clu/tokenizer/Tokenizer.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/tokenizer/Tokenizer.scala
@@ -70,7 +70,7 @@ class Tokenizer(
   }
 
   /** Tokenization and sentence splitting */
-  def tokenize(text: String, sentenceSplit: Boolean = true): Array[Sentence] = {
+  def tokenize(text: String, sentenceSplit: Boolean = true, characterOffset: Int = 0): Seq[Sentence] = {
     // raw tokenization, using the antlr grammar
     val rawTokens = readTokens(text)
     // now apply all the additional non-Antlr steps such as solving contractions, normalization, post-processing
@@ -78,7 +78,7 @@ class Tokenizer(
       step.process(rawTokens)
     }
     // sentence splitting, including detection of abbreviations
-    val sentences = sentenceSplitter.split(stepTokens, sentenceSplit)
+    val sentences = sentenceSplitter.split(stepTokens, sentenceSplit, characterOffset)
 
     sentences
   }
diff --git a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
index 3ad767ca0..231f57096 100644
--- a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
+++ b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
@@ -1,5 +1,7 @@
 package org.clulab.sequences
 
+import scala.collection.mutable
+
 // This is definitely not the most efficient as far as number of objects
 // created, but there should be a NamedEntity thing to hold and not just
 // shadows of it projected onto the BIO notation in an array of strings.
@@ -57,11 +59,18 @@ object NamedEntity {
     bioLabels
   }
 
+  // Only INSIDEs can be invalid and they are made valid by
+  // converting them into a BEGIN.
+  def toBegin(bioLabel: String): String = BEGIN + bioLabel.drop(INSIDE.length)
+
+  def isValid(bioLabels: Seq[String]): Boolean = bioLabels.indices.forall(isValid(bioLabels, _))
+
   def isValid(bioLabels: Seq[String], index: Int): Boolean = {
     val currBioLabel = bioLabels(index)
     !currBioLabel.startsWith(INSIDE) || {
       0 < index && {
         val prevBioLabel = bioLabels(index - 1)
+
         prevBioLabel == currBioLabel || {
           prevBioLabel == toBegin(currBioLabel)
         }
@@ -69,20 +78,43 @@ object NamedEntity {
     }
   }
 
-  def isValid(bioLabels: Seq[String]): Boolean =
-      bioLabels.indices.forall(isValid(bioLabels, _))
+  def isValid2(currBioLabel: String, prevBioLabelOpt: Option[String]): Boolean = {
+    !currBioLabel.startsWith(INSIDE) || prevBioLabelOpt.forall { prevBioLabel =>
+      prevBioLabel == currBioLabel || prevBioLabel == toBegin(currBioLabel)
+    }
+  }
 
-  // Only INSIDEs can be invalid and they are made valid by
-  // converting them into a BEGIN.
-  def toBegin(bioLabel: String): String =
-      BEGIN + bioLabel.drop(INSIDE.length)
 
   // Note that this patches the array in place!
-  def patch(bioLabels: Seq[String]): Seq[String] = {
+  def patch2(bioLabels: mutable.Seq[String]): Seq[String] = {
     bioLabels.indices.foreach { index =>
       if (!isValid(bioLabels, index))
         bioLabels(index) = toBegin(bioLabels(index))
     }
     bioLabels
   }
+
+  def patch1(bioLabels: Seq[String]): Seq[String] = {
+    var prevBioLabelOpt = bioLabels.lift(-1)
+    val newBioLabels = bioLabels.indices.map { index =>
+      val oldBioLabel = bioLabels(index)
+      val newBioLabel =
+          if (!isValid2(oldBioLabel, prevBioLabelOpt)) toBegin(oldBioLabel)
+          else oldBioLabel
+
+      prevBioLabelOpt = Some(newBioLabel)
+      newBioLabel
+    }
+
+    newBioLabels
+  }
+
+  def patch(bioLabels: Seq[String]): Seq[String] = {
+    val result1 = patch1(bioLabels)
+    val result2 = patch2(mutable.Seq(bioLabels: _*))
+
+    if (result1 != result2)
+      println("This went awry!")
+    result1
+  }
 }
diff --git a/library/src/main/scala/org/clulab/sequences/SeparatedLexiconNER.scala b/library/src/main/scala/org/clulab/sequences/SeparatedLexiconNER.scala
index ac3053997..852ba1d69 100644
--- a/library/src/main/scala/org/clulab/sequences/SeparatedLexiconNER.scala
+++ b/library/src/main/scala/org/clulab/sequences/SeparatedLexiconNER.scala
@@ -63,7 +63,7 @@ class SeparatedLexiconNER(
     * @param sentence The input sentence
     * @return An array of BIO notations the store the outcome of the matches
     */
-  def find(sentence: Sentence): Array[String] = {
+  def find(sentence: Sentence): Seq[String] = {
     val seq = findLongestMatch(sentence)
     seq
   }
diff --git a/library/src/main/scala/org/clulab/sequences/SequenceTagger.scala b/library/src/main/scala/org/clulab/sequences/SequenceTagger.scala
index 6c902e89f..93fd32c5b 100644
--- a/library/src/main/scala/org/clulab/sequences/SequenceTagger.scala
+++ b/library/src/main/scala/org/clulab/sequences/SequenceTagger.scala
@@ -23,7 +23,7 @@ trait SequenceTagger[L, F] extends Tagger[L] {
   /** Abstract method that extracts the training labels for a given sentence */
   def labelExtractor(sentence:Sentence): Array[L]
 
-  override def find(sentence: Sentence): Array[L] = classesOf(sentence)
+  override def find(sentence: Sentence): Seq[L] = classesOf(sentence)
 
   def save(fn:File): Unit
 

From 741307c3cda458cd3bee3706e921763c152b535e Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Sat, 24 May 2025 20:43:26 -0700
Subject: [PATCH 14/42] Compile for other Scala versions

---
 .../scala/org/clulab/processors/Processor.scala |  2 +-
 .../scala/org/clulab/processors/Sentence.scala  |  4 ++--
 .../clulab/processors/clu/BalaurProcessor.scala | 17 ++++++++++-------
 .../clulab/processors/clu/DocumentMaker.scala   |  2 +-
 .../clulab/processors/clu/PostProcessor.scala   |  2 +-
 .../scala/org/clulab/processors/clu/Veil.scala  |  5 +++--
 .../org/clulab/sequences/NamedEntity.scala      | 14 ++++++++------
 7 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/library/src/main/scala/org/clulab/processors/Processor.scala b/library/src/main/scala/org/clulab/processors/Processor.scala
index e3df1e506..b7cab3423 100644
--- a/library/src/main/scala/org/clulab/processors/Processor.scala
+++ b/library/src/main/scala/org/clulab/processors/Processor.scala
@@ -44,7 +44,7 @@ trait Processor {
       }
     }
 
-    val combinedSentences = documents.flatMap(_.sentences).toArray
+    val combinedSentences = documents.flatMap(_.sentences)
     val combinedDocument = new Document(
       sentences = combinedSentences,
       id = headId,
diff --git a/library/src/main/scala/org/clulab/processors/Sentence.scala b/library/src/main/scala/org/clulab/processors/Sentence.scala
index c5d74a2a0..c7b5a6f20 100644
--- a/library/src/main/scala/org/clulab/processors/Sentence.scala
+++ b/library/src/main/scala/org/clulab/processors/Sentence.scala
@@ -192,8 +192,8 @@ class Sentence(
   def offset(offset: Int): Sentence = {
     if (offset == 0) this
     else {
-      val newStartOffsets = startOffsets.map(_ + offset).toArray
-      val newEndOffsets = endOffsets.map(_ + offset).toArray
+      val newStartOffsets = startOffsets.map(_ + offset)
+      val newEndOffsets = endOffsets.map(_ + offset)
 
       copy(startOffsets = newStartOffsets, endOffsets = newEndOffsets)
     }
diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index df968610b..c3e617e51 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -190,7 +190,7 @@ class BalaurProcessor protected (
               entities = Some(newLabels(index)),
               norms = Some(newNorms(index))
             )
-          }.toArray
+          }
 
           partlyAnnotatedDocument.copy(sentences = fullyAnnotatedSentences)
         }
@@ -199,10 +199,10 @@ class BalaurProcessor protected (
     fullyAnnotatedDocument
   }
 
-  private def mkPosTags(words: Seq[String], labels: Seq[Array[(String, Float)]]): Seq[String] = {
+  private def mkPosTags(words: Seq[String], labels: Array[Array[(String, Float)]]): Seq[String] = {
     assert(labels.length == words.length)
 
-    val tags = labels.map(_.head._1).toArray
+    val tags = WrappedArraySeq(labels.map(_.head._1)).toImmutableSeq
 
     PostProcessor.postprocessPartOfSpeechTags(words, tags)
     tags
@@ -237,7 +237,8 @@ class BalaurProcessor protected (
   private def mkNamedEntityLabels(words: Seq[String], labels: Array[Array[(String, Float)]], optionalNERLabels: Option[Seq[String]]): Seq[String] = {
     assert(labels.length == words.length)
 
-    val genericLabels = NamedEntity.patch(labels.map(_.head._1))
+    val labelsSeq = WrappedArraySeq(labels.map(_.head._1)).toImmutableSeq
+    val genericLabels = NamedEntity.patch(labelsSeq)
 
     if (optionalNERLabels.isEmpty) {
       genericLabels
@@ -256,11 +257,12 @@ class BalaurProcessor protected (
     require(generic.length == custom.length)
 
     val customNamedEntities = NamedEntity.collect(custom)
-    val result = generic.toArray // A copy of the generic labels is created here.
 
     if (customNamedEntities.isEmpty)
-      result
+      generic
     else {
+      // TODO: kwa work on combine
+      val result = generic.toArray // A copy of the generic labels is created here.
       val genericNamedEntities = NamedEntity.collect(generic)
 
       //println(s"Generic NamedEntity: ${genericNamedEntities.mkString(", ")}")
@@ -268,13 +270,14 @@ class BalaurProcessor protected (
 
       // The custom labels override the generic ones!
       NamedEntity.combine(result, genericNamedEntities, customNamedEntities)
+      WrappedArraySeq(result).toImmutableSeq
     }
   }
 
   private def mkChunkLabels(words: Seq[String], labels: Array[Array[(String, Float)]]): Seq[String] = {
     assert(labels.length == words.length)
 
-    labels.map(_.head._1)
+    WrappedArraySeq(labels.map(_.head._1)).toImmutableSeq
   }
 
   // The head has one score, the label has another.  Here the two scores are interpolated
diff --git a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
index e37f32109..524f73758 100644
--- a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
@@ -73,7 +73,7 @@ object DocumentMaker {
     }
 
     val textOpt = Option.when(keepText)(text.toString)
-    val doc = Document(sents.toArray, textOpt)
+    val doc = Document(sents, textOpt)
 
     doc
   }
diff --git a/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala
index 9761eb6d2..837e5c49c 100644
--- a/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala
@@ -13,7 +13,7 @@ object PostProcessor {
   val WET_OR_DRY_SEASON = Pattern.compile("""(?i)[0-9]+(ds|ws)""")
 
   /** POS tag corrections, in place */
-  def postprocessPartOfSpeechTags2(words: Seq[String], tags: mutable.Seq[String]): Seq[String] = {
+  def postprocessPartOfSpeechTags2(words: Seq[String], tags: mutable.Seq[String]): mutable.Seq[String] = {
 
     // unigram patterns
     words.indices.foreach { index =>
diff --git a/library/src/main/scala/org/clulab/processors/clu/Veil.scala b/library/src/main/scala/org/clulab/processors/clu/Veil.scala
index aca16ee42..aac0bc99f 100644
--- a/library/src/main/scala/org/clulab/processors/clu/Veil.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/Veil.scala
@@ -3,6 +3,7 @@ package org.clulab.processors.clu
 import org.clulab.processors.{Document, Processor, Sentence}
 import org.clulab.struct.{DirectedGraph, Edge, GraphMap, RelationTriple, Tree}
 import org.clulab.struct.GraphMap.GraphMapType
+import org.clulab.utils.WrappedArraySeq
 
 import scala.collection.mutable.{Set => MutableSet}
 
@@ -109,7 +110,7 @@ class VeiledDocument(originalDocument: Document, veiledWords: Seq[(Int, Range)])
   }
   protected lazy val veiledDocument = {
     val veiledSentences = originalDocument.sentences.zipWithIndex.map { case (originalSentence, sentenceIndex) =>
-      val wordIndexes = originalSentence.words.indices.filterNot(veilSets(sentenceIndex)).toArray
+      val wordIndexes = originalSentence.words.indices.filterNot(veilSets(sentenceIndex))
       val veiledRaw          = wordIndexes.map(originalSentence.raw)
       val veiledStartOffsets = wordIndexes.map(originalSentence.startOffsets)
       val veiledEndOffsets   = wordIndexes.map(originalSentence.endOffsets)
@@ -132,7 +133,7 @@ class VeiledDocument(originalDocument: Document, veiledWords: Seq[(Int, Range)])
       veiledArray.zipWithIndex.foreach { case (veiledString, veiledIndex) =>
         unveiledArray(unveilArray(veiledIndex)) = veiledString
       }
-      unveiledArray
+      WrappedArraySeq(unveiledArray).toImmutableSeq
     }
   }
 
diff --git a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
index 231f57096..8104ea3f4 100644
--- a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
+++ b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
@@ -63,9 +63,11 @@ object NamedEntity {
   // converting them into a BEGIN.
   def toBegin(bioLabel: String): String = BEGIN + bioLabel.drop(INSIDE.length)
 
-  def isValid(bioLabels: Seq[String]): Boolean = bioLabels.indices.forall(isValid(bioLabels, _))
+  def isValid(bioLabels: Seq[String]): Boolean = bioLabels.indices.forall { index =>
+    isValid1(bioLabels(index), bioLabels.lift(index - 1))
+  }
 
-  def isValid(bioLabels: Seq[String], index: Int): Boolean = {
+  def isValid2(bioLabels: mutable.Seq[String], index: Int): Boolean = {
     val currBioLabel = bioLabels(index)
     !currBioLabel.startsWith(INSIDE) || {
       0 < index && {
@@ -78,7 +80,7 @@ object NamedEntity {
     }
   }
 
-  def isValid2(currBioLabel: String, prevBioLabelOpt: Option[String]): Boolean = {
+  def isValid1(currBioLabel: String, prevBioLabelOpt: Option[String]): Boolean = {
     !currBioLabel.startsWith(INSIDE) || prevBioLabelOpt.forall { prevBioLabel =>
       prevBioLabel == currBioLabel || prevBioLabel == toBegin(currBioLabel)
     }
@@ -86,9 +88,9 @@ object NamedEntity {
 
 
   // Note that this patches the array in place!
-  def patch2(bioLabels: mutable.Seq[String]): Seq[String] = {
+  def patch2(bioLabels: mutable.Seq[String]): mutable.Seq[String] = {
     bioLabels.indices.foreach { index =>
-      if (!isValid(bioLabels, index))
+      if (!isValid2(bioLabels, index))
         bioLabels(index) = toBegin(bioLabels(index))
     }
     bioLabels
@@ -99,7 +101,7 @@ object NamedEntity {
     val newBioLabels = bioLabels.indices.map { index =>
       val oldBioLabel = bioLabels(index)
       val newBioLabel =
-          if (!isValid2(oldBioLabel, prevBioLabelOpt)) toBegin(oldBioLabel)
+          if (!isValid1(oldBioLabel, prevBioLabelOpt)) toBegin(oldBioLabel)
           else oldBioLabel
 
       prevBioLabelOpt = Some(newBioLabel)

From 38369e3f2df3062f4b6c7c34d5bc55bff3961808 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Sat, 24 May 2025 21:02:30 -0700
Subject: [PATCH 15/42] Compile other projects for other Scalas

---
 .../apps/ProcessorsJavaExample.java           | 37 ++++++++++---------
 .../processors/apps/ColumnsToDocument.scala   |  9 +++--
 .../processors/apps/ProcessCoNLL03.scala      |  2 +-
 .../apps/ProcessorsScalaExample.scala         |  2 +-
 .../sentence/HtmlSentenceVisualizer.scala     |  4 +-
 .../TestNumericEntityRecognition.scala        |  6 +--
 .../clulab/numeric/TestSeasonNormalizer.scala |  2 +-
 .../org/clulab/processors/TestTokenizer.scala |  2 +-
 8 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/apps/src/main/java/org/clulab/processors/apps/ProcessorsJavaExample.java b/apps/src/main/java/org/clulab/processors/apps/ProcessorsJavaExample.java
index ecd6005a4..9334be6c5 100644
--- a/apps/src/main/java/org/clulab/processors/apps/ProcessorsJavaExample.java
+++ b/apps/src/main/java/org/clulab/processors/apps/ProcessorsJavaExample.java
@@ -8,6 +8,7 @@
 import org.clulab.utils.JavaUtils;
 
 import java.util.Iterator;
+import scala.collection.Seq;
 
 public class ProcessorsJavaExample {
     public static void main(String [] args) throws Exception {
@@ -20,25 +21,25 @@ public static void main(String [] args) throws Exception {
         // You are basically done.  The rest of this code simply prints out the annotations.
 
         // Let's print the sentence-level annotations.
-        for (int sentenceIndex = 0; sentenceIndex < doc.sentences().length; sentenceIndex++) {
-            Sentence sentence = doc.sentences()[sentenceIndex];
+        for (int sentenceIndex = 0; sentenceIndex < doc.sentences().length(); sentenceIndex++) {
+            Sentence sentence = doc.sentences().apply(sentenceIndex);
             System.out.println("Sentence #" + sentenceIndex + ":");
-            System.out.println("Tokens: " + mkString(sentence.words()));
-            System.out.println("Start character offsets: " + mkString(sentence.startOffsets()));
-            System.out.println("End character offsets: " + mkString(sentence.endOffsets()));
+            System.out.println("Tokens: " + mkStringStr(sentence.words()));
+            System.out.println("Start character offsets: " + mkStringInt(sentence.startOffsets()));
+            System.out.println("End character offsets: " + mkStringInt(sentence.endOffsets()));
 
             // These annotations are optional, so they are stored using Option objects,
             // hence the isDefined() and get() calls.
             if (sentence.lemmas().isDefined())
-                System.out.println("Lemmas: " + mkString(sentence.lemmas().get()));
+                System.out.println("Lemmas: " + mkStringStr(sentence.lemmas().get()));
             if (sentence.tags().isDefined())
-                System.out.println("POS tags: " + mkString(sentence.tags().get()));
+                System.out.println("POS tags: " + mkStringStr(sentence.tags().get()));
             if (sentence.chunks().isDefined())
-                System.out.println("Chunks: " + mkString(sentence.chunks().get()));
+                System.out.println("Chunks: " + mkStringStr(sentence.chunks().get()));
             if (sentence.entities().isDefined())
-                System.out.println("Named entities: " + mkString(sentence.entities().get()));
+                System.out.println("Named entities: " + mkStringStr(sentence.entities().get()));
             if (sentence.norms().isDefined())
-                System.out.println("Normalized entities: " + mkString(sentence.norms().get()));
+                System.out.println("Normalized entities: " + mkStringStr(sentence.norms().get()));
             if (sentence.dependencies().isDefined()) {
                 System.out.println("Syntactic dependencies:");
                 Iterator<scala.Tuple3<Object, Object, String>> iterator =
@@ -53,27 +54,27 @@ public static void main(String [] args) throws Exception {
         }
     }
 
-    public static String mkString(String[] strings, String sep) {
+    public static String mkStringStr(Seq<String> strings, String sep) {
         StringBuilder stringBuilder = new StringBuilder();
-        for (int i = 0; i < strings.length; i ++) {
+        for (int i = 0; i < strings.length(); i ++) {
             if (i > 0) stringBuilder.append(sep);
-            stringBuilder.append(strings[i]);
+            stringBuilder.append(strings.apply(i));
         }
         return stringBuilder.toString();
     }
 
-    public static String mkString(String[] strings) { return mkString(strings, " "); }
+    public static String mkStringStr(Seq<String> strings) { return mkStringStr(strings, " "); }
 
-    public static String mkString(int[] ints, String sep) {
+    public static String mkStringInt(Seq<Object> ints, String sep) {
         StringBuilder stringBuilder = new StringBuilder();
-        for (int i = 0; i < ints.length; i ++) {
+        for (int i = 0; i < ints.length(); i ++) {
             if (i > 0) stringBuilder.append(sep);
-            stringBuilder.append(ints[i]);
+            stringBuilder.append(ints.apply(i));
         }
         return stringBuilder.toString();
     }
 
-    public static String mkString(int[] ints) { return mkString(ints, " "); }
+    public static String mkStringInt(Seq<Object> ints) { return mkStringInt(ints, " "); }
 
     public static<T> Iterable<T> iteratorToIterable(Iterator<T> iterator) { return () -> iterator; }
 }
diff --git a/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala b/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
index 8822ba993..e38b14615 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
@@ -2,6 +2,7 @@ package org.clulab.processors.apps
 
 import org.clulab.processors.{Document, Processor, Sentence}
 import org.clulab.processors.clu.BalaurProcessor
+import org.clulab.scala.WrappedArrayBuffer._
 import org.slf4j.{Logger, LoggerFactory}
 
 import java.io.InputStream
@@ -102,7 +103,7 @@ object ColumnsToDocument {
       if (l.isEmpty) {
         // end of sentence
         if (words.nonEmpty) {
-          val s = new Sentence(words.toArray, startOffsets.toArray, endOffsets.toArray, words.toArray)
+          val s = new Sentence(words, startOffsets, endOffsets, words)
           setLabels(s, labels.toArray)
           sentences += s
           words = new ArrayBuffer[String]()
@@ -139,14 +140,14 @@ object ColumnsToDocument {
     }
     if(words.nonEmpty) {
       val s = new Sentence(
-        words.toArray, startOffsets.toArray, endOffsets.toArray, words.toArray,
-        tags = Some(labels.toArray)
+        words, startOffsets, endOffsets, words,
+        tags = Some(labels)
       )
       sentences += s
     }
     logger.debug(s"Loaded ${sentences.size} sentences.")
 
-    val d = new Document(sentences.toArray)
+    val d = new Document(sentences)
     annotate(d)
 
     d
diff --git a/apps/src/main/scala/org/clulab/processors/apps/ProcessCoNLL03.scala b/apps/src/main/scala/org/clulab/processors/apps/ProcessCoNLL03.scala
index 97a990764..b92b75129 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/ProcessCoNLL03.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/ProcessCoNLL03.scala
@@ -30,7 +30,7 @@ object ProcessCoNLL03 extends App {
     }
   }
 
-  def saveSent(pw: PrintWriter, sent: Array[Row], tags: Option[Array[String]] = None, chunks: Option[Array[String]] = None): Unit = {
+  def saveSent(pw: PrintWriter, sent: Array[Row], tags: Option[Seq[String]] = None, chunks: Option[Seq[String]] = None): Unit = {
     if (tags.isDefined) {
       assert(sent.length == tags.get.length)
       //println("Using generated POS tags")
diff --git a/apps/src/main/scala/org/clulab/processors/apps/ProcessorsScalaExample.scala b/apps/src/main/scala/org/clulab/processors/apps/ProcessorsScalaExample.scala
index 8f8dc65e1..fb203652f 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/ProcessorsScalaExample.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/ProcessorsScalaExample.scala
@@ -34,5 +34,5 @@ object ProcessorsScalaExample extends App {
     println()
   }
 
-  def mkString[T](elems: Array[T]): String = elems.mkString(" ")
+  def mkString[T](elems: Seq[T]): String = elems.mkString(" ")
 }
diff --git a/debugger/src/main/scala/org/clulab/odin/debugger/visualizer/sentence/HtmlSentenceVisualizer.scala b/debugger/src/main/scala/org/clulab/odin/debugger/visualizer/sentence/HtmlSentenceVisualizer.scala
index 4a8866a2e..ff7a632aa 100644
--- a/debugger/src/main/scala/org/clulab/odin/debugger/visualizer/sentence/HtmlSentenceVisualizer.scala
+++ b/debugger/src/main/scala/org/clulab/odin/debugger/visualizer/sentence/HtmlSentenceVisualizer.scala
@@ -18,8 +18,8 @@ class HtmlSentenceVisualizer extends SentenceVisualizer with HtmlVisualizing {
       string
     }
 
-    def getOrEmpty(arrayOpt: Option[Array[String]], index: Int): String =
-        arrayOpt.map(_(index)).getOrElse("")
+    def getOrEmpty(seqOpt: Option[Seq[String]], index: Int): String =
+        seqOpt.map(_(index)).getOrElse("")
 
     val rows = sentence.words.indices.map { i =>
       tr(
diff --git a/library/src/test/scala/org/clulab/numeric/TestNumericEntityRecognition.scala b/library/src/test/scala/org/clulab/numeric/TestNumericEntityRecognition.scala
index 4bd2bd2ea..e476f1d43 100644
--- a/library/src/test/scala/org/clulab/numeric/TestNumericEntityRecognition.scala
+++ b/library/src/test/scala/org/clulab/numeric/TestNumericEntityRecognition.scala
@@ -15,7 +15,7 @@ class TestNumericEntityRecognition extends Test {
   class HabitusTokenizer(tokenizer: Tokenizer) extends Tokenizer(tokenizer.lexer, tokenizer.steps, tokenizer.sentenceSplitter) {
     // TODO: Make sure en dash is preserved in raw somehow!
 
-    override def tokenize(text: String, sentenceSplit: Boolean = true): Array[Sentence] = {
+    override def tokenize(text: String, sentenceSplit: Boolean = true, characterOffset: Int): Seq[Sentence] = {
       // Cheat and swap out some en dashes if necessary.
       val habitusText =
         if (text.contains(HabitusTokenizer.endash))
@@ -23,7 +23,7 @@ class TestNumericEntityRecognition extends Test {
         else
           text
 
-      tokenizer.tokenize(habitusText, sentenceSplit)
+      tokenizer.tokenize(habitusText, sentenceSplit, characterOffset)
     }
   }
 
@@ -653,7 +653,7 @@ class TestNumericEntityRecognition extends Test {
   }
 
   /** Runs the actual numeric entity recognizer */
-  def numericParse(sentence: String): (Array[String], Array[String], Array[String]) = {
+  def numericParse(sentence: String): (Seq[String], Seq[String], Seq[String]) = {
     val doc = proc.annotate(sentence)
     val mentions = ner.extractFrom(doc)
     NumericUtils.mkLabelsAndNorms(doc, mentions)
diff --git a/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala b/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala
index 8f8fa38ff..93db9fa4d 100644
--- a/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala
+++ b/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala
@@ -13,7 +13,7 @@ class TestSeasonNormalizer extends Test {
   val fallDateRange = "2017-09-22 -- 2017-12-21"
   val seasonDateRange = "2017-06-XX -- 2017-10-XX"
 
-  def mkEntitiesAndNorms(processor: BalaurProcessor, text: String): (Array[String], Array[String]) = {
+  def mkEntitiesAndNorms(processor: BalaurProcessor, text: String): (Seq[String], Seq[String]) = {
     val document = processor.annotate(text)
     val mentions = processor.numericEntityRecognizerOpt.get.extractFrom(document)
 
diff --git a/library/src/test/scala/org/clulab/processors/TestTokenizer.scala b/library/src/test/scala/org/clulab/processors/TestTokenizer.scala
index c4d67af56..afd2b594d 100644
--- a/library/src/test/scala/org/clulab/processors/TestTokenizer.scala
+++ b/library/src/test/scala/org/clulab/processors/TestTokenizer.scala
@@ -223,7 +223,7 @@ class TestTokenizer extends Test {
     }
   }
 
-  def tok(s:String):Array[Sentence] = {
+  def tok(s: String): Seq[Sentence] = {
     println(s"Tokenizing text: $s")
     val t = new OpenDomainEnglishTokenizer(None)
     val sents = t.tokenize(s)

From deb244b170e3eb3bbe9c9d17bacab8d96dcb1f06 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Sat, 24 May 2025 23:28:36 -0700
Subject: [PATCH 16/42] Compile tests

---
 .../org/clulab/odin/TestNumericPatterns.scala    | 10 +++++-----
 .../scala/org/clulab/odin/TestTokenPattern.scala | 16 ++++++++--------
 .../clulab/struct/TestDocumentAttachment.scala   |  8 ++++----
 .../scala/org/clulab/utils/TestFindHeads.scala   |  8 ++++----
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/library/src/test/scala/org/clulab/odin/TestNumericPatterns.scala b/library/src/test/scala/org/clulab/odin/TestNumericPatterns.scala
index 297346984..b3b477e18 100644
--- a/library/src/test/scala/org/clulab/odin/TestNumericPatterns.scala
+++ b/library/src/test/scala/org/clulab/odin/TestNumericPatterns.scala
@@ -9,12 +9,12 @@ class TestNumericPatterns extends Test {
 
   val text = "blah"
   val doc = Document(
-    Array(
+    Seq(
       Sentence(
-        Array("blah"),
-        Array(0),
-        Array(4),
-        Array("blah")
+        Seq("blah"),
+        Seq(0),
+        Seq(4),
+        Seq("blah")
       )
     )
   )
diff --git a/library/src/test/scala/org/clulab/odin/TestTokenPattern.scala b/library/src/test/scala/org/clulab/odin/TestTokenPattern.scala
index 10738826d..3300b791e 100644
--- a/library/src/test/scala/org/clulab/odin/TestTokenPattern.scala
+++ b/library/src/test/scala/org/clulab/odin/TestTokenPattern.scala
@@ -61,13 +61,13 @@ class TestTokenPattern extends Test {
   }
 
   val text4 = "a b c d e f g h i c"
-  val tokens = text4.split(" ")
+  val tokens = text4.split(" ").toSeq
   val doc = Document(
-    Array(
+    Seq(
       Sentence(
         tokens,
-        Array(0, 2, 4, 6, 8, 10, 12, 14, 16, 18),
-        Array(1, 3, 5, 7, 9, 11, 13, 15, 17, 19),
+        Seq(0, 2, 4, 6, 8, 10, 12, 14, 16, 18),
+        Seq(1, 3, 5, 7, 9, 11, 13, 15, 17, 19),
         tokens
       )
     )
@@ -614,11 +614,11 @@ class TestTokenPattern extends Test {
 
   val text8 = "x a a b a b a b a b c d"
   val doc8 = Document(
-    Array(
+    Seq(
       Sentence(
-        text8.split(" "),
-        Array(0, 2, 4, 6, 8, 10, 12, 14, 16, 18),
-        Array(1, 3, 5, 7, 9, 11, 13, 15, 17, 19)
+        text8.split(" ").toSeq,
+        Seq(0, 2, 4, 6, 8, 10, 12, 14, 16, 18),
+        Seq(1, 3, 5, 7, 9, 11, 13, 15, 17, 19)
       )
     )
   )
diff --git a/library/src/test/scala/org/clulab/struct/TestDocumentAttachment.scala b/library/src/test/scala/org/clulab/struct/TestDocumentAttachment.scala
index b34393b2b..8bf2c792c 100644
--- a/library/src/test/scala/org/clulab/struct/TestDocumentAttachment.scala
+++ b/library/src/test/scala/org/clulab/struct/TestDocumentAttachment.scala
@@ -130,7 +130,7 @@ class TestDocumentAttachment extends Test {
       (LAST_KEY, new TextNameDocumentAttachment(LAST_NAME)),
       (ALIAS_KEY, new NameDocumentAttachment(ALIAS_NAME))
     )
-    val oldDocument = new Document(sentences = Array.empty[Sentence], attachments = Some(oldAttachments))
+    val oldDocument = new Document(sentences = Seq.empty[Sentence], attachments = Some(oldAttachments))
 
     val documentSerializer = new DocumentSerializer()
     val documentString = documentSerializer.save(oldDocument)
@@ -153,7 +153,7 @@ class TestDocumentAttachment extends Test {
       (LAST_KEY, new ObjectNameDocumentAttachment(LAST_NAME)),
       (ALIAS_KEY, new NameDocumentAttachment(ALIAS_NAME))
     )
-    val oldDocument = new Document(sentences = Array.empty[Sentence], attachments = Some(oldAttachments))
+    val oldDocument = new Document(sentences = Seq.empty[Sentence], attachments = Some(oldAttachments))
 
     val documentSerializer = new DocumentSerializer()
     // This should be a messy string.
@@ -177,7 +177,7 @@ class TestDocumentAttachment extends Test {
       (LAST_KEY, new TextNameDocumentAttachment(LAST_NAME)),
       (ALIAS_KEY, new NameDocumentAttachment(ALIAS_NAME))
     )
-    val oldDocument = new Document(sentences = Array.empty[Sentence], attachments = Some(oldAttachments))
+    val oldDocument = new Document(sentences = Seq.empty[Sentence], attachments = Some(oldAttachments))
 
     // This shouldn't compile.
     /*oldDocument.addAttachment("wrong", new NameMethodAttachment("name"))*/
@@ -203,7 +203,7 @@ class TestDocumentAttachment extends Test {
       (LAST_KEY, new ObjectNameDocumentAttachment(LAST_NAME)),
       (ALIAS_KEY, new NameDocumentAttachment(ALIAS_NAME))
     )
-    val oldDocument = new Document(Array.empty[Sentence], attachments = Some(oldAttachments))
+    val oldDocument = new Document(Seq.empty[Sentence], attachments = Some(oldAttachments))
 
     // This should be a messy string.
     val documentString = prettyJson(renderJValue(oldDocument.jsonAST))
diff --git a/library/src/test/scala/org/clulab/utils/TestFindHeads.scala b/library/src/test/scala/org/clulab/utils/TestFindHeads.scala
index 13e36fb85..4fd3fdfe4 100644
--- a/library/src/test/scala/org/clulab/utils/TestFindHeads.scala
+++ b/library/src/test/scala/org/clulab/utils/TestFindHeads.scala
@@ -6,9 +6,9 @@ import org.clulab.struct.{DirectedGraph, Edge, Interval}
 
 class TestFindHeads extends Test {
 
-  def newSentence(words: Array[String], directedGraph: DirectedGraph[String]): Sentence = {
-    val startOffsets = Array(0) // unused
-    val   endOffsets = Array(0) // unused
+  def newSentence(words: Seq[String], directedGraph: DirectedGraph[String]): Sentence = {
+    val startOffsets = Seq(0) // unused
+    val   endOffsets = Seq(0) // unused
     val sentence = new Sentence(
       words, startOffsets, endOffsets, words,
       tags = Some(words)
@@ -117,7 +117,7 @@ class TestFindHeads extends Test {
     val len: Int = 78
     val directedGraph = DirectedGraph(edges)
     val tokenInterval = Interval(0, len)
-    val words = 1.to(len).map { index => s"word$index" }.toArray
+    val words = 1.to(len).map { index => s"word$index" }
     val sentence = newSentence(words, directedGraph)
     val heads = DependencyUtils.findHeadsStrict(tokenInterval, sentence)
 

From e9876cf55a987c17d27e5106d859bdcfb78f8ab8 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Sun, 25 May 2025 00:42:24 -0700
Subject: [PATCH 17/42] Pass tests

---
 .../org/clulab/processors/clu/BalaurProcessor.scala    |  4 ++--
 .../org/clulab/processors/clu/DocumentMaker.scala      | 10 ++++++----
 .../main/scala/org/clulab/sequences/NamedEntity.scala  |  2 +-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index c3e617e51..39c39fbae 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -203,9 +203,9 @@ class BalaurProcessor protected (
     assert(labels.length == words.length)
 
     val tags = WrappedArraySeq(labels.map(_.head._1)).toImmutableSeq
+    val result = PostProcessor.postprocessPartOfSpeechTags(words, tags)
 
-    PostProcessor.postprocessPartOfSpeechTags(words, tags)
-    tags
+    result
   }
 
   private def mkOptionalNerLabels(
diff --git a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
index 524f73758..2e228c6f3 100644
--- a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
@@ -4,6 +4,7 @@ import org.clulab.processors.Document
 import org.clulab.processors.Sentence
 import org.clulab.processors.clu.tokenizer.Tokenizer
 import org.clulab.scala.WrappedArrayBuffer._
+import org.clulab.utils.WrappedArraySeq
 import org.slf4j.Logger
 import org.slf4j.LoggerFactory
 
@@ -34,13 +35,14 @@ object DocumentMaker {
     charactersBetweenSentences: Int
   ): Document = {
     var characterOffset = 0
-    val sentences = texts.map { text =>
+    val sentencesArray = texts.map { text =>
       val sentence = tokenizer.tokenize(text, sentenceSplit = false, characterOffset).head // We produce a single sentence here!
 
       characterOffset = sentence.endOffsets.last + charactersBetweenSentences
       sentence
-    }.toVector // TODO: What is the best concrete collection to use?
-    val textOpt = Option.when(keepText)(sentences.mkString(mkSep(charactersBetweenSentences)))
+    }.toArray
+    val sentences = WrappedArraySeq(sentencesArray).toImmutableSeq
+    val textOpt = Option.when(keepText)(texts.mkString(mkSep(charactersBetweenSentences)))
     val document = Document(sentences, textOpt)
 
     document
@@ -52,7 +54,7 @@ object DocumentMaker {
                            charactersBetweenSentences:Int,
                            charactersBetweenTokens:Int): Document = {
     var charOffset = 0
-    var sents = new ArrayBuffer[Sentence]()
+    val sents = new ArrayBuffer[Sentence]()
     val text = new StringBuilder
     for(sentence <- sentences) {
       val startOffsets = new ArrayBuffer[Int]()
diff --git a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
index 8104ea3f4..f20f6f91e 100644
--- a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
+++ b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
@@ -81,7 +81,7 @@ object NamedEntity {
   }
 
   def isValid1(currBioLabel: String, prevBioLabelOpt: Option[String]): Boolean = {
-    !currBioLabel.startsWith(INSIDE) || prevBioLabelOpt.forall { prevBioLabel =>
+    !currBioLabel.startsWith(INSIDE) || prevBioLabelOpt.exists { prevBioLabel =>
       prevBioLabel == currBioLabel || prevBioLabel == toBegin(currBioLabel)
     }
   }

From bec8f1862d532f6b278c5af004c34fdc56a6f99f Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 09:05:08 -0700
Subject: [PATCH 18/42] Clean, get webapp to work

---
 build.sbt                                     | 12 ++---
 .../processors/clu/BalaurProcessor.scala      | 47 +++++++++----------
 .../webapp/serialization/ParseObj.scala       |  6 +--
 3 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/build.sbt b/build.sbt
index 0d5ffcc14..7a7df39bf 100644
--- a/build.sbt
+++ b/build.sbt
@@ -34,17 +34,17 @@ lazy val library = project
 lazy val apps = project
   .dependsOn(library % "compile -> compile; test -> test")
 
-// lazy val webapp = project
-  // .enablePlugins(PlayScala)
-  // .dependsOn(library % "compile -> compile; test -> test")
-  // .settings(
+ lazy val webapp = project
+   .enablePlugins(PlayScala)
+   .dependsOn(library % "compile -> compile; test -> test")
+   .settings(
     // scala3 doesn't have play (for 2.8.19 as specified by the project) and is ruled out completely.
     // scala213 has version problems for com.fasterxml.jackson.databind.JsonMappingException.
     // scala212 works!
     // scala211 isn't compiling and complains on twirlCompileTemplates.
     // This isn't a library.  Only one version needs to work.  We shouldn't use play for this anyway.
-    // crossScalaVersions := Seq(scala212)
-  // )
+     crossScalaVersions := Seq(scala212)
+   )
 
 lazy val debugger = project
     .dependsOn(library % "compile -> compile; test -> test")
diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index 39c39fbae..34b616395 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -90,11 +90,10 @@ class BalaurProcessor protected (
     throw new RuntimeException("ERROR: cannot call this method on its own in this processor!")
   }
 
-  /** Lemmatization; modifies the document in place */
   override def lemmatize(words: Seq[String]): Seq[String] = {
     val lemmas = words.zipWithIndex.map { case (word, index) =>
       val lemma = wordLemmatizer.lemmatizeWord(word)
-      // a lemma may be empty in some weird Unicode situations
+      // A lemma may be empty in some weird Unicode situations.
       val nonEmptyLemma =
           if (lemma.isEmpty) {
             logger.debug(s"""WARNING: Found empty lemma for word #$index "$word" in sentence: ${words.mkString(" ")}""")
@@ -163,11 +162,11 @@ class BalaurProcessor protected (
           allLabelsAndScores(TASK_TO_INDEX(HEXA_NONTERM_TASK))
         )
         // Entities and norms need to still be patched and filled in, so this is only a partly annotated sentence.
-        val partlyAnnotatedDocument = sentence.copy(
+        val partlyAnnotatedSentence = sentence.copy(
           tags = Some(tags), lemmas = Some(lemmas), entities = Some(entities), chunks = Some(chunks), graphs = graphs
         )
 
-        partlyAnnotatedDocument
+        partlyAnnotatedSentence
       }
       catch {
         // No values, not even lemmas, will be included in the annotation is there was an exception.
@@ -181,20 +180,18 @@ class BalaurProcessor protected (
       }
     }
     val partlyAnnotatedDocument = document.copy(sentences = partlyAnnotatedSentences)
-    val fullyAnnotatedDocument =
-        if (numericEntityRecognizerOpt.nonEmpty) {
-          val numericMentions = numericEntityRecognizerOpt.get.extractFrom(partlyAnnotatedDocument)
-          val (newLabels, newNorms) = NumericUtils.mkLabelsAndNorms(partlyAnnotatedDocument, numericMentions)
-          val fullyAnnotatedSentences = partlyAnnotatedDocument.sentences.indices.map { index =>
-            partlyAnnotatedDocument.sentences(index).copy(
-              entities = Some(newLabels(index)),
-              norms = Some(newNorms(index))
-            )
-          }
+    val fullyAnnotatedDocument = numericEntityRecognizerOpt.map { numericEntityRecognizer =>
+      val numericMentions = numericEntityRecognizer.extractFrom(partlyAnnotatedDocument)
+      val (newLabels, newNorms) = NumericUtils.mkLabelsAndNorms(partlyAnnotatedDocument, numericMentions)
+      val fullyAnnotatedSentences = partlyAnnotatedDocument.sentences.indices.map { index =>
+        partlyAnnotatedDocument.sentences(index).copy(
+          entities = Some(newLabels(index)),
+          norms = Some(newNorms(index))
+        )
+      }
 
-          partlyAnnotatedDocument.copy(sentences = fullyAnnotatedSentences)
-        }
-        else partlyAnnotatedDocument
+      partlyAnnotatedDocument.copy(sentences = fullyAnnotatedSentences)
+    }.getOrElse(partlyAnnotatedDocument)
 
     fullyAnnotatedDocument
   }
@@ -239,18 +236,18 @@ class BalaurProcessor protected (
 
     val labelsSeq = WrappedArraySeq(labels.map(_.head._1)).toImmutableSeq
     val genericLabels = NamedEntity.patch(labelsSeq)
-
-    if (optionalNERLabels.isEmpty) {
-      genericLabels
-    }
-    else {
+    val specificLabels = optionalNERLabels.map { nerLabels =>
       //println(s"MERGING NE labels for sentence: ${sent.words.mkString(" ")}")
       //println(s"Generic labels: ${NamedEntity.patch(labels).mkString(", ")}")
       //println(s"Optional labels: ${optionalNERLabels.get.mkString(", ")}")
-      val mergedLabels = NamedEntity.patch(mergeNerLabels(genericLabels, optionalNERLabels.get))
+      val mergedLabels = mergeNerLabels(genericLabels, nerLabels)
+      val patchedLabels = NamedEntity.patch(mergedLabels)
       //println(s"Merged labels: ${mergedLabels.mkString(", ")}")
-      mergedLabels
-    }
+
+      patchedLabels
+    }.getOrElse(genericLabels)
+
+    specificLabels
   }
 
   private def mergeNerLabels(generic: Seq[String], custom: Seq[String]): Seq[String] = {
diff --git a/webapp/app/org/clulab/processors/webapp/serialization/ParseObj.scala b/webapp/app/org/clulab/processors/webapp/serialization/ParseObj.scala
index 0c9bff455..617a4303d 100644
--- a/webapp/app/org/clulab/processors/webapp/serialization/ParseObj.scala
+++ b/webapp/app/org/clulab/processors/webapp/serialization/ParseObj.scala
@@ -14,7 +14,7 @@ class ParseObj(doc: Document) {
       head + xml.Utility.escape(text) + tail
     }
 
-    def getTdAtOptString(option: Option[Array[String]], n: Int): String = {
+    def getTdAtOptString(option: Option[Seq[String]], n: Int): String = {
       val text =
         if (option.isEmpty) ""
         else option.get(n)
@@ -22,9 +22,9 @@ class ParseObj(doc: Document) {
       getTd(text)
     }
 
-    def getTdAtString(values: Array[String], n: Int): String = getTd(values(n))
+    def getTdAtString(values: Seq[String], n: Int): String = getTd(values(n))
 
-    def getTdAtInt(values: Array[Int], n: Int): String = getTd(values(n).toString, true)
+    def getTdAtInt(values: Seq[Int], n: Int): String = getTd(values(n).toString, true)
 
     def edgesToString(to: Int): String = {
       val edges = sentence.dependencies.map(_.incomingEdges(to)).getOrElse(Array.empty)

From 737e538b40b6f49829bdcc32f0668fded58ccf1f Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 09:11:22 -0700
Subject: [PATCH 19/42] Remove dead code

---
 .../clulab/processors/clu/PostProcessor.scala | 43 +------------------
 .../org/clulab/sequences/NamedEntity.scala    | 42 +++---------------
 2 files changed, 6 insertions(+), 79 deletions(-)

diff --git a/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala
index 837e5c49c..2226e4642 100644
--- a/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/PostProcessor.scala
@@ -12,39 +12,8 @@ object PostProcessor {
   // Matches agricultural season short hands such as "2021DS" or "2021WS"
   val WET_OR_DRY_SEASON = Pattern.compile("""(?i)[0-9]+(ds|ws)""")
 
-  /** POS tag corrections, in place */
-  def postprocessPartOfSpeechTags2(words: Seq[String], tags: mutable.Seq[String]): mutable.Seq[String] = {
-
-    // unigram patterns
-    words.indices.foreach { index =>
-      if (tags(index) != "CC" && VERSUS_PATTERN.matcher(words(index)).matches) {
-        tags(index) = "CC" // "versus" seems like a CC to me. but maybe not...
-      }
-
-      if(WET_OR_DRY_SEASON.matcher(words(index)).matches) {
-        tags(index) = "CD" // such years should be CDs because our grammars expect it
-      }
-    }
-
-    // bigram patterns
-    words.indices.dropRight(1).foreach { curr =>
-      val next = curr + 1
-      // "due" in "due to" must be a preposition
-      if (words(curr).equalsIgnoreCase("due") && words(next).equalsIgnoreCase("to")) {
-        tags(curr) = "IN"
-      }
-
-      // "fall" followed by a CD must be NN
-      else if(words(curr).equalsIgnoreCase("fall") && tags(next).equals("CD")) {
-        tags(curr) = "NN"
-      }
-    }
-
-    tags
-  }
-
   /** POS tag corrections */
-  def postprocessPartOfSpeechTags1(words: Seq[String], tags: Seq[String]): Seq[String] = {
+  def postprocessPartOfSpeechTags(words: Seq[String], tags: Seq[String]): Seq[String] = {
     val newTags = words.indices.map { index =>
       val word = words(index)
       val oldTag = tags(index)
@@ -71,14 +40,4 @@ object PostProcessor {
 
     newTags
   }
-
-  def postprocessPartOfSpeechTags(words: Seq[String], tags: Seq[String]): Seq[String] = {
-    val result1 = postprocessPartOfSpeechTags1(words, tags)
-    val result2 = postprocessPartOfSpeechTags2(words, mutable.Seq(tags: _*))
-
-    if (result1 != result2)
-      println("It went awry!")
-
-    result1
-  }
 }
diff --git a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
index f20f6f91e..36aae40ce 100644
--- a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
+++ b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
@@ -59,49 +59,26 @@ object NamedEntity {
     bioLabels
   }
 
-  // Only INSIDEs can be invalid and they are made valid by
+  // Only INSIDEs can be invalid, and they are made valid by
   // converting them into a BEGIN.
   def toBegin(bioLabel: String): String = BEGIN + bioLabel.drop(INSIDE.length)
 
   def isValid(bioLabels: Seq[String]): Boolean = bioLabels.indices.forall { index =>
-    isValid1(bioLabels(index), bioLabels.lift(index - 1))
+    isValid(bioLabels(index), bioLabels.lift(index - 1))
   }
 
-  def isValid2(bioLabels: mutable.Seq[String], index: Int): Boolean = {
-    val currBioLabel = bioLabels(index)
-    !currBioLabel.startsWith(INSIDE) || {
-      0 < index && {
-        val prevBioLabel = bioLabels(index - 1)
-
-        prevBioLabel == currBioLabel || {
-          prevBioLabel == toBegin(currBioLabel)
-        }
-      }
-    }
-  }
-
-  def isValid1(currBioLabel: String, prevBioLabelOpt: Option[String]): Boolean = {
+  def isValid(currBioLabel: String, prevBioLabelOpt: Option[String]): Boolean = {
     !currBioLabel.startsWith(INSIDE) || prevBioLabelOpt.exists { prevBioLabel =>
       prevBioLabel == currBioLabel || prevBioLabel == toBegin(currBioLabel)
     }
   }
 
-
-  // Note that this patches the array in place!
-  def patch2(bioLabels: mutable.Seq[String]): mutable.Seq[String] = {
-    bioLabels.indices.foreach { index =>
-      if (!isValid2(bioLabels, index))
-        bioLabels(index) = toBegin(bioLabels(index))
-    }
-    bioLabels
-  }
-
-  def patch1(bioLabels: Seq[String]): Seq[String] = {
+  def patch(bioLabels: Seq[String]): Seq[String] = {
     var prevBioLabelOpt = bioLabels.lift(-1)
     val newBioLabels = bioLabels.indices.map { index =>
       val oldBioLabel = bioLabels(index)
       val newBioLabel =
-          if (!isValid1(oldBioLabel, prevBioLabelOpt)) toBegin(oldBioLabel)
+          if (!isValid(oldBioLabel, prevBioLabelOpt)) toBegin(oldBioLabel)
           else oldBioLabel
 
       prevBioLabelOpt = Some(newBioLabel)
@@ -110,13 +87,4 @@ object NamedEntity {
 
     newBioLabels
   }
-
-  def patch(bioLabels: Seq[String]): Seq[String] = {
-    val result1 = patch1(bioLabels)
-    val result2 = patch2(mutable.Seq(bioLabels: _*))
-
-    if (result1 != result2)
-      println("This went awry!")
-    result1
-  }
 }

From 4cfd51822164f1d051a2798c88c7ad940cfd7959 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 12:04:29 -0700
Subject: [PATCH 20/42] Maintenance

Compile with no warnings, some renamed processor variables, view changes
---
 .../apps/NumericEntityRecognizerShell.scala   |   3 +-
 .../clulab/processors/apps/OdinStarter.scala  |   2 +-
 .../apps/DebuggingOdinStarterApp.scala        |   2 +-
 .../graph/DebugRelationGraphExtractor.scala   |   2 +-
 .../DebugTriggerMentionGraphExtractor.scala   |   2 +-
 .../DebugTriggerPatternGraphExtractor.scala   |   2 +-
 .../org/clulab/struct/GraphMap.scala          |   2 +
 .../org/clulab/struct/GraphMap.scala          |   2 +
 .../org/clulab/odinstarter/OdinStarter3.scala |   2 +-
 .../scala-3/org/clulab/struct/GraphMap.scala  |   2 +
 .../org/clulab/processors/Sentence.scala      |  12 +-
 .../processors/clu/BalaurProcessor.scala      | 199 ++++++++----------
 .../sequences/BiMEMMSequenceTagger.scala      |   2 +-
 .../clulab/sequences/CombinedLexiconNER.scala |   1 +
 .../clulab/sequences/CompactLexiconNER.scala  |  10 +-
 .../org/clulab/sequences/LexiconNER.scala     |   7 +-
 .../clulab/sequences/MEMMSequenceTagger.scala |   4 +-
 .../org/clulab/sequences/NamedEntity.scala    |  11 +-
 .../org/clulab/sequences/SequenceTagger.scala |   2 +-
 .../sequences/SequenceTaggerShell.scala       |   1 +
 .../org/clulab/struct/BooleanHashTrie.scala   |   4 +-
 .../scala/org/clulab/struct/HashTrie.scala    |   4 +-
 .../scala/org/clulab/utils/ArrayView.scala    |  37 ----
 .../clulab/utils/ToEnhancedDependencies.scala |   2 +-
 .../org/clulab/utils/TestHash.scala           |   2 +-
 .../org/clulab/utils/TestHash.scala           |   2 +-
 .../scala-3/org/clulab/utils/TestHash.scala   |   2 +-
 .../scala/org/clulab/processors/CluTest.scala |   2 +-
 .../clulab/sequences/TestNamedEntity.scala    |   3 +-
 .../org/clulab/utils/TestArrayView.scala      |  54 -----
 .../webapp/controllers/HomeController.scala   |   2 +-
 31 files changed, 141 insertions(+), 243 deletions(-)
 delete mode 100644 library/src/main/scala/org/clulab/utils/ArrayView.scala
 delete mode 100644 library/src/test/scala/org/clulab/utils/TestArrayView.scala

diff --git a/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala b/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala
index 3a93ff4bd..c77688a54 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala
@@ -23,9 +23,8 @@ class ReloadableNumericProcessor(ruleDirOpt: Option[String]) extends ReloadableP
     val numericEntityRecognizerOpt = balaurProcessor
         .numericEntityRecognizerOpt
         .map(_.reloaded(new File(ruleDirOpt.get)))
-    val numericEntityRecognizerOptOpt = numericEntityRecognizerOpt.map(Option(_))
 
-    processorOpt = Some(balaurProcessor.copy(numericEntityRecognizerOptOpt = numericEntityRecognizerOptOpt))
+    processorOpt = Some(balaurProcessor.copy(numericEntityRecognizerOpt = numericEntityRecognizerOpt))
   }
 }
 
diff --git a/apps/src/main/scala/org/clulab/processors/apps/OdinStarter.scala b/apps/src/main/scala/org/clulab/processors/apps/OdinStarter.scala
index 09440b813..54abb3b5e 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/OdinStarter.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/OdinStarter.scala
@@ -24,7 +24,7 @@ object OdinStarter extends App {
 
     LexiconNER(kbs, caseInsensitiveMatchings, baseDirOpt)
   }
-  val processor = new BalaurProcessor(optionalNER = Some(customLexiconNer))
+  val processor = new BalaurProcessor(lexiconNerOpt = Some(customLexiconNer))
   val extractorEngine = {
     val masterResource = "/org/clulab/odinstarter/main.yml"
     // We usually want to reload rules during development,
diff --git a/debugger/src/main/scala/org/clulab/odin/debugger/apps/DebuggingOdinStarterApp.scala b/debugger/src/main/scala/org/clulab/odin/debugger/apps/DebuggingOdinStarterApp.scala
index 6924eef03..f55ff1b9b 100644
--- a/debugger/src/main/scala/org/clulab/odin/debugger/apps/DebuggingOdinStarterApp.scala
+++ b/debugger/src/main/scala/org/clulab/odin/debugger/apps/DebuggingOdinStarterApp.scala
@@ -31,7 +31,7 @@ object DebuggingOdinStarterApp extends App {
 
     LexiconNER(kbs, caseInsensitiveMatchings, baseDirOpt)
   }
-  val processor = new CluProcessor(optionalNER = Some(customLexiconNer))
+  val processor = new CluProcessor(lexiconNerOpt = Some(customLexiconNer))
   val exampleGlobalAction = (inMentions: Seq[Mention], state: State) => {
     val outMentions = inMentions.map { mention =>
       if (mention.words.length % 2 == 0)
diff --git a/debugger/src/test/scala/org/clulab/odin/debugger/extractor/graph/DebugRelationGraphExtractor.scala b/debugger/src/test/scala/org/clulab/odin/debugger/extractor/graph/DebugRelationGraphExtractor.scala
index 7534c6415..b88f678b1 100644
--- a/debugger/src/test/scala/org/clulab/odin/debugger/extractor/graph/DebugRelationGraphExtractor.scala
+++ b/debugger/src/test/scala/org/clulab/odin/debugger/extractor/graph/DebugRelationGraphExtractor.scala
@@ -20,7 +20,7 @@ class DebugRelationGraphExtractor extends DebugTest {
   val resourceDir: File = new File(resourceDirName)
 
   val customLexiconNer = LexiconNER(Seq(s"$baseResourceName/FOOD.tsv"), Seq(true), Some(resourceDir))
-  val processor = new CluProcessor(optionalNER = Some(customLexiconNer))
+  val processor = new CluProcessor(lexiconNerOpt = Some(customLexiconNer))
   val document = processor.annotate("John eats cake.", keepText = true)
   val sentence = document.sentences.head
   val ruleName = "people-eat-food"
diff --git a/debugger/src/test/scala/org/clulab/odin/debugger/extractor/graph/DebugTriggerMentionGraphExtractor.scala b/debugger/src/test/scala/org/clulab/odin/debugger/extractor/graph/DebugTriggerMentionGraphExtractor.scala
index 860646c06..97f8c4631 100644
--- a/debugger/src/test/scala/org/clulab/odin/debugger/extractor/graph/DebugTriggerMentionGraphExtractor.scala
+++ b/debugger/src/test/scala/org/clulab/odin/debugger/extractor/graph/DebugTriggerMentionGraphExtractor.scala
@@ -20,7 +20,7 @@ class DebugTriggerMentionGraphExtractor extends DebugTest {
   val resourceDir: File = new File(resourceDirName)
 
   val customLexiconNer = LexiconNER(Seq(s"$baseResourceName/FOOD.tsv"), Seq(true), Some(resourceDir))
-  val processor = new CluProcessor(optionalNER = Some(customLexiconNer))
+  val processor = new CluProcessor(lexiconNerOpt = Some(customLexiconNer))
   val document = processor.annotate("John eats cake.", keepText = true)
   val sentence = document.sentences.head
   val ruleName = "people-eat-food"
diff --git a/debugger/src/test/scala/org/clulab/odin/debugger/extractor/graph/DebugTriggerPatternGraphExtractor.scala b/debugger/src/test/scala/org/clulab/odin/debugger/extractor/graph/DebugTriggerPatternGraphExtractor.scala
index 7aa28848b..31447ac66 100644
--- a/debugger/src/test/scala/org/clulab/odin/debugger/extractor/graph/DebugTriggerPatternGraphExtractor.scala
+++ b/debugger/src/test/scala/org/clulab/odin/debugger/extractor/graph/DebugTriggerPatternGraphExtractor.scala
@@ -20,7 +20,7 @@ class DebugTriggerPatternGraphExtractor extends DebugTest {
   val resourceDir: File = new File(resourceDirName)
 
   val customLexiconNer = LexiconNER(Seq(s"$baseResourceName/FOOD.tsv"), Seq(true), Some(resourceDir))
-  val processor = new CluProcessor(optionalNER = Some(customLexiconNer))
+  val processor = new CluProcessor(lexiconNerOpt = Some(customLexiconNer))
   val document = processor.annotate("John eats cake.", keepText = true)
   val sentence = document.sentences.head
   val ruleName = "people-eat-food"
diff --git a/library/src/main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala b/library/src/main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala
index de5a5472f..8de1af507 100644
--- a/library/src/main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala
+++ b/library/src/main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala
@@ -9,6 +9,8 @@ class GraphMap protected extends mutable.HashMap[String, DirectedGraph[String]]
 object GraphMap extends GraphMapNames {
   type GraphMapType = GraphMap
 
+  val EMPTY_GRAPH = GraphMap()
+
   def apply(): GraphMapType = new GraphMap()
 
   def apply(existing: Map[String, DirectedGraph[String]]): GraphMapType = {
diff --git a/library/src/main/scala-2.13/org/clulab/struct/GraphMap.scala b/library/src/main/scala-2.13/org/clulab/struct/GraphMap.scala
index 805226874..4cb404f24 100644
--- a/library/src/main/scala-2.13/org/clulab/struct/GraphMap.scala
+++ b/library/src/main/scala-2.13/org/clulab/struct/GraphMap.scala
@@ -8,6 +8,8 @@ object GraphMap extends GraphMapNames {
   // [warn] ...: inheritance from class HashMap in package mutable is deprecated (since 2.13.0): HashMap will be made final; use .withDefault for the common use case of computing a default value
   type GraphMapType = mutable.HashMap[String, DirectedGraph[String]]
 
+  val EMPTY_GRAPH = GraphMap()
+
   def apply(): GraphMapType = {
     // we have very few dependency types, so let's create a small hash to save memory.
     new GraphMapType(2, mutable.HashMap.defaultLoadFactor)
diff --git a/library/src/main/scala-3/org/clulab/odinstarter/OdinStarter3.scala b/library/src/main/scala-3/org/clulab/odinstarter/OdinStarter3.scala
index fa9dfa73d..a1332bf6d 100644
--- a/library/src/main/scala-3/org/clulab/odinstarter/OdinStarter3.scala
+++ b/library/src/main/scala-3/org/clulab/odinstarter/OdinStarter3.scala
@@ -27,7 +27,7 @@ object OdinStarter3:
       val baseDirOpt = if isLocal then Some(resourceDir) else None
 
       LexiconNER(kbs, caseInsensitiveMatchings, baseDirOpt)
-    val processor = new BalaurProcessor(optionalNER = Some(customLexiconNer))
+    val processor = new BalaurProcessor(lexiconNerOpt = Some(customLexiconNer))
     val extractorEngine =
       val masterResource = "/org/clulab/odinstarter/main.yml"
       // We usually want to reload rules during development,
diff --git a/library/src/main/scala-3/org/clulab/struct/GraphMap.scala b/library/src/main/scala-3/org/clulab/struct/GraphMap.scala
index 805226874..4cb404f24 100644
--- a/library/src/main/scala-3/org/clulab/struct/GraphMap.scala
+++ b/library/src/main/scala-3/org/clulab/struct/GraphMap.scala
@@ -8,6 +8,8 @@ object GraphMap extends GraphMapNames {
   // [warn] ...: inheritance from class HashMap in package mutable is deprecated (since 2.13.0): HashMap will be made final; use .withDefault for the common use case of computing a default value
   type GraphMapType = mutable.HashMap[String, DirectedGraph[String]]
 
+  val EMPTY_GRAPH = GraphMap()
+
   def apply(): GraphMapType = {
     // we have very few dependency types, so let's create a small hash to save memory.
     new GraphMapType(2, mutable.HashMap.defaultLoadFactor)
diff --git a/library/src/main/scala/org/clulab/processors/Sentence.scala b/library/src/main/scala/org/clulab/processors/Sentence.scala
index c7b5a6f20..276e2dc2a 100644
--- a/library/src/main/scala/org/clulab/processors/Sentence.scala
+++ b/library/src/main/scala/org/clulab/processors/Sentence.scala
@@ -222,12 +222,12 @@ object Sentence {
     words: Seq[String],
     tags: Option[Seq[String]],
     lemmas: Option[Seq[String]],
-    entities: Option[Seq[String]],
-    norms: Option[Seq[String]],
-    chunks: Option[Seq[String]],
-    tree: Option[Tree],
-    deps: GraphMapType,
-    relations: Option[Seq[RelationTriple]]
+    entities: Option[Seq[String]] = None,
+    norms: Option[Seq[String]] = None,
+    chunks: Option[Seq[String]] = None,
+    tree: Option[Tree] = None,
+    deps: GraphMapType = GraphMap.EMPTY_GRAPH,
+    relations: Option[Seq[RelationTriple]] = None
   ): Sentence = {
     new Sentence(
       raw, startOffsets, endOffsets, words,
diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index 34b616395..4716e472e 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -5,23 +5,25 @@ import com.typesafe.config.ConfigFactory
 import org.clulab.numeric.NumericEntityRecognizer
 import org.clulab.numeric.NumericUtils
 import org.clulab.processors.{Document, Processor, Sentence}
-import org.clulab.processors.clu.tokenizer.{EnglishLemmatizer, Lemmatizer, OpenDomainEnglishTokenizer, OpenDomainPortugueseTokenizer, OpenDomainSpanishTokenizer, PortugueseLemmatizer, SpanishLemmatizer, Tokenizer}
+import org.clulab.processors.clu.tokenizer.Lemmatizer
+import org.clulab.processors.clu.tokenizer.{EnglishLemmatizer, PortugueseLemmatizer, SpanishLemmatizer}
+import org.clulab.processors.clu.tokenizer.Tokenizer
+import org.clulab.processors.clu.tokenizer.{OpenDomainEnglishTokenizer, OpenDomainPortugueseTokenizer, OpenDomainSpanishTokenizer}
 import org.clulab.processors.hexatagging.HexaDecoder
-import org.clulab.utils.WrappedArraySeq
 import org.clulab.scala_transformers.encoder.EncoderMaxTokensRuntimeException
 import org.clulab.scala_transformers.encoder.TokenClassifier
 import org.clulab.sequences.{LexiconNER, NamedEntity}
 import org.clulab.struct.DirectedGraph
 import org.clulab.struct.GraphMap
-import org.clulab.struct.GraphMap.GraphMapType
 import org.clulab.utils.{Configured, MathUtils, ToEnhancedDependencies}
+import org.clulab.utils.WrappedArraySeq
 import org.slf4j.{Logger, LoggerFactory}
 
 import BalaurProcessor._
 
 class BalaurProcessor protected (
   val config: Config,
-  val optionalNER: Option[LexiconNER],
+  val lexiconNerOpt: Option[LexiconNER],
   val numericEntityRecognizerOpt: Option[NumericEntityRecognizer],
   wordTokenizer: Tokenizer,
   wordLemmatizer: Lemmatizer,
@@ -33,11 +35,11 @@ class BalaurProcessor protected (
   // standard, abbreviated constructor
   def this(
     config: Config = ConfigFactory.load("balaurprocessor"),
-    optionalNER: Option[LexiconNER] = None,
+    lexiconNerOpt: Option[LexiconNER] = None,
     seasonPathOpt: Option[String] = Some("/org/clulab/numeric/SEASON.tsv")
   ) = this(
     config,
-    optionalNER,
+    lexiconNerOpt,
     newNumericEntityRecognizerOpt(seasonPathOpt),
     mkTokenizer(getConfigArgString(config, s"$prefix.language", Some("EN"))),
     mkLemmatizer(getConfigArgString(config, s"$prefix.language", Some("EN"))),
@@ -46,43 +48,49 @@ class BalaurProcessor protected (
   )
 
   def copy(
-    configOpt: Option[Config] = None,
-    optionalNEROpt: Option[Option[LexiconNER]] = None,
-    numericEntityRecognizerOptOpt: Option[Option[NumericEntityRecognizer]] = None,
-    wordTokenizerOpt: Option[Tokenizer] = None,
-    wordLemmatizerOpt: Option[Lemmatizer] = None,
-    tokenClassifierOpt: Option[TokenClassifier] = None
+    config: Config = config,
+    lexiconNerOpt: Option[LexiconNER] = lexiconNerOpt,
+    numericEntityRecognizerOpt: Option[NumericEntityRecognizer] = numericEntityRecognizerOpt,
+    wordTokenizer: Tokenizer = wordTokenizer,
+    wordLemmatizer: Lemmatizer = wordLemmatizer,
+    tokenClassifier: TokenClassifier = tokenClassifier
   ): BalaurProcessor = {
     new BalaurProcessor(
-      configOpt.getOrElse(this.config),
-      optionalNEROpt.getOrElse(this.optionalNER),
-      numericEntityRecognizerOptOpt.getOrElse(this.numericEntityRecognizerOpt),
-      wordTokenizerOpt.getOrElse(this.wordTokenizer),
-      wordLemmatizerOpt.getOrElse(this.wordLemmatizer),
-      tokenClassifierOpt.getOrElse(this.tokenClassifier)
+      config,
+      lexiconNerOpt,
+      numericEntityRecognizerOpt,
+      wordTokenizer,
+      wordLemmatizer,
+      tokenClassifier
     )
   }
 
+  // TODO: Try not to make a new decoder for each processor?
   val hexaDecoder = new HexaDecoder()
 
   override def getConf: Config = config
 
+  // TODO: Why not make the wordTokenizer a val then?
+  def tokenizer: Tokenizer = wordTokenizer
+
   override def mkDocument(text: String, keepText: Boolean): Document = { 
     DocumentMaker.mkDocument(tokenizer, text, keepText)
   }
 
-  def tokenizer: Tokenizer = wordTokenizer
-
-  override def mkDocumentFromSentences(sentences: Iterable[String], 
+  override def mkDocumentFromSentences(
+    sentences: Iterable[String],
     keepText: Boolean, 
-    charactersBetweenSentences: Int): Document = {     
+    charactersBetweenSentences: Int
+  ): Document = {
     DocumentMaker.mkDocumentFromSentences(tokenizer, sentences, keepText, charactersBetweenSentences)
   }
 
-  override def mkDocumentFromTokens(sentences: Iterable[Iterable[String]], 
+  override def mkDocumentFromTokens(
+    sentences: Iterable[Iterable[String]],
     keepText: Boolean, 
     charactersBetweenSentences: Int, 
-    charactersBetweenTokens: Int): Document = { 
+    charactersBetweenTokens: Int
+  ): Document = {
     DocumentMaker.mkDocumentFromTokens(sentences, keepText, charactersBetweenSentences, charactersBetweenSentences)
   }
 
@@ -108,37 +116,28 @@ class BalaurProcessor protected (
   }
 
   /** Generates cheap lemmas with the word in lower case, for languages where a lemmatizer is not available */
-  def cheapLemmatize(sentence: Sentence): Seq[String] = {
-    sentence.words.map(_.toLowerCase())
-  }
+  def cheapLemmatize(sentence: Sentence): Seq[String] =
+      sentence.words.map(_.toLowerCase())
 
-  override def recognizeNamedEntities(doc: Document): Unit = {
-    throw new RuntimeException("ERROR: cannot call this method on its own in this procecessor!")
-  }
+  def throwCannotCallException(methodName: String): Unit =
+      throw new RuntimeException(s"ERROR: cannot call $methodName on its own in this processor!")
 
-  override def parse(doc: Document): Unit = {
-    throw new RuntimeException("ERROR: cannot call this method on its own in this procecessor!")
-  }
+  override def recognizeNamedEntities(doc: Document): Unit = throwCannotCallException("recognizeNamedEntities")
 
-  override def srl(doc: Document): Unit = {
-    throw new RuntimeException("ERROR: functionality not supported in this procecessor!")
-  }
+  override def parse(doc: Document): Unit = throwCannotCallException("parse")
 
-  override def chunking(doc: Document): Unit = {
-    throw new RuntimeException("ERROR: cannot call this method on its own in this procecessor!")
-  }
+  override def chunking(doc: Document): Unit = throwCannotCallException("chunking")
 
-  override def resolveCoreference(doc: Document): Unit = {
-    throw new RuntimeException("ERROR: functionality not supported in this procecessor!")
-  }
+  def throwNotSupportedException(methodName: String): Unit =
+      throw new RuntimeException(s"ERROR: $methodName functionality not supported in this procecessor!")
 
-  override def discourse(doc: Document): Unit = {
-    throw new RuntimeException("ERROR: functionality not supported in this procecessor!")
-  }
+  override def srl(doc: Document): Unit = throwNotSupportedException("srl")
 
-  override def relationExtraction(doc: Document): Unit = {
-    throw new RuntimeException("ERROR: functionality not supported in this procecessor!")
-  }
+  override def resolveCoreference(doc: Document): Unit = throwNotSupportedException("resolveCoreference")
+
+  override def discourse(doc: Document): Unit = throwNotSupportedException("discourse")
+
+  override def relationExtraction(doc: Document): Unit = throwNotSupportedException("relationExtraction")
 
   override def annotate(document: Document): Document = {
     // Process one sentence at a time through the MTL framework.
@@ -151,7 +150,7 @@ class BalaurProcessor protected (
         val allLabelsAndScores = tokenClassifier.predictWithScores(words)
         val tags = mkPosTags(words, allLabelsAndScores(TASK_TO_INDEX(POS_TASK)))
         val entities = {
-          val optionalEntities = mkOptionalNerLabels(words, sentence.startOffsets, sentence.endOffsets, tags, lemmas)
+          val optionalEntities = mkNerLabelsOpt(words, sentence.startOffsets, sentence.endOffsets, tags, lemmas)
 
           mkNamedEntityLabels(words, allLabelsAndScores(TASK_TO_INDEX(NER_TASK)), optionalEntities)
         }
@@ -199,44 +198,37 @@ class BalaurProcessor protected (
   private def mkPosTags(words: Seq[String], labels: Array[Array[(String, Float)]]): Seq[String] = {
     assert(labels.length == words.length)
 
-    val tags = WrappedArraySeq(labels.map(_.head._1)).toImmutableSeq
-    val result = PostProcessor.postprocessPartOfSpeechTags(words, tags)
+    val rawTags = WrappedArraySeq(labels.map(_.head._1)).toImmutableSeq
+    val cookedTags = PostProcessor.postprocessPartOfSpeechTags(words, rawTags)
 
-    result
+    cookedTags
   }
 
-  private def mkOptionalNerLabels(
+  private def mkNerLabelsOpt(
     words: Seq[String], startOffsets: Seq[Int], endOffsets: Seq[Int],
     tags: Seq[String], lemmas: Seq[String]
   ): Option[Seq[String]] = {
-    // NER labels from the custom NER
-    optionalNER.map { ner =>
+    lexiconNerOpt.map { lexiconNer =>
       val sentence = Sentence(
-        words, // Why isn't this raw?
+        words, // TODO: Why isn't this raw?
         startOffsets,
         endOffsets,
         words,
         Some(tags),
-        Some(lemmas),
-        entities = None,
-        norms = None,
-        chunks = None,
-        tree = None,
-        deps = EMPTY_GRAPH,
-        relations = None
+        Some(lemmas)
       )
 
-      ner.find(sentence)
+      lexiconNer.find(sentence)
     }
   }
 
   /** Must be called after assignPosTags and lemmatize because it requires Sentence.tags and Sentence.lemmas */
-  private def mkNamedEntityLabels(words: Seq[String], labels: Array[Array[(String, Float)]], optionalNERLabels: Option[Seq[String]]): Seq[String] = {
+  private def mkNamedEntityLabels(words: Seq[String], labels: Array[Array[(String, Float)]], nerLabelsOpt: Option[Seq[String]]): Seq[String] = {
     assert(labels.length == words.length)
 
     val labelsSeq = WrappedArraySeq(labels.map(_.head._1)).toImmutableSeq
     val genericLabels = NamedEntity.patch(labelsSeq)
-    val specificLabels = optionalNERLabels.map { nerLabels =>
+    val specificLabels = nerLabelsOpt.map { nerLabels =>
       //println(s"MERGING NE labels for sentence: ${sent.words.mkString(" ")}")
       //println(s"Generic labels: ${NamedEntity.patch(labels).mkString(", ")}")
       //println(s"Optional labels: ${optionalNERLabels.get.mkString(", ")}")
@@ -258,9 +250,8 @@ class BalaurProcessor protected (
     if (customNamedEntities.isEmpty)
       generic
     else {
-      // TODO: kwa work on combine
-      val result = generic.toArray // A copy of the generic labels is created here.
       val genericNamedEntities = NamedEntity.collect(generic)
+      val result = generic.toArray // A copy of the generic labels is created here.
 
       //println(s"Generic NamedEntity: ${genericNamedEntities.mkString(", ")}")
       //println(s"Custom NamedEntity: ${customNamedEntities.mkString(", ")}")
@@ -277,13 +268,15 @@ class BalaurProcessor protected (
     WrappedArraySeq(labels.map(_.head._1)).toImmutableSeq
   }
 
+  // TODO: This appears to be unused.
   // The head has one score, the label has another.  Here the two scores are interpolated
   // and the head and label are stored together in a single object with the score if the
   // object, the Dependency, has a valid absolute head.
   private def interpolateHeadsAndLabels(
-      sentHeadPredictionScores: Array[Array[PredictionScore]],
-      sentLabelPredictionScores: Array[Array[PredictionScore]],
-      lambda: Float): Array[Array[Dependency]] = {
+    sentHeadPredictionScores: Array[Array[PredictionScore]],
+    sentLabelPredictionScores: Array[Array[PredictionScore]],
+    lambda: Float
+  ): Array[Array[Dependency]] = {
     assert(sentHeadPredictionScores.length == sentLabelPredictionScores.length)
 
     val sentDependencies = sentHeadPredictionScores.zip(sentLabelPredictionScores).zipWithIndex.map { case ((wordHeadPredictionScores, wordLabelPredictionScores), wordIndex) =>
@@ -316,21 +309,22 @@ class BalaurProcessor protected (
     words: Seq[String], lemmas: Seq[String], tags: Seq[String],
     termTags: Array[Array[PredictionScore]],
     nonTermTags: Array[Array[PredictionScore]]
-  ): GraphMapType = {
+  ): GraphMap.GraphMapType = {
     val verbose = false
     val graphs = GraphMap()
     val size = words.length
-
     // bht is used just for debugging purposes here
     val (bht, deps, roots) = hexaDecoder.decode(termTags, nonTermTags, topK = 25, verbose)
-    if(verbose && bht.nonEmpty) {
+
+    if (verbose && bht.nonEmpty) {
       println(bht)
       println(s"Dependencies (${deps.get.size}):")
       println(deps.mkString("\n"))
       println("Roots: " + roots.get.mkString(", "))
     }
-
     if (deps.nonEmpty && roots.nonEmpty) {
+      // TODO: This can be made in one fell swoop.
+
       // basic dependencies that replicate treebank annotations
       val depGraph = new DirectedGraph[String](deps.get, Some(size), roots)
       graphs += GraphMap.UNIVERSAL_BASIC -> depGraph
@@ -351,51 +345,38 @@ object BalaurProcessor {
   val logger:Logger = LoggerFactory.getLogger(classOf[BalaurProcessor])
   val prefix:String = "BalaurProcessor"
 
-  val OUTSIDE = "O"
-  val EMPTY_GRAPH = GraphMap()
-
   val NER_TASK = "NER"
   val POS_TASK = "POS"
   val CHUNKING_TASK = "Chunking"
-  val DEPS_HEAD_TASK = "Deps Head"
-  val DEPS_LABEL_TASK = "Deps Label"
   val HEXA_TERM_TASK = "Hexa Term"
   val HEXA_NONTERM_TASK = "Hexa NonTerm"
 
-  val PARSING_INTERPOLATION_LAMBDA = 0.6f
-  val PARSING_TOPK = 5
-
   // maps a task name to a head index in the encoder
-  val TASK_TO_INDEX = Map(
-    NER_TASK -> 0,
-    POS_TASK -> 1,
-    CHUNKING_TASK -> 2,
-    HEXA_TERM_TASK -> 3, 
-    HEXA_NONTERM_TASK -> 4
-  )
-
-  def mkTokenizer(lang: String): Tokenizer = {
-    lang match {
-      case "PT" => new OpenDomainPortugueseTokenizer
-      case "ES" => new OpenDomainSpanishTokenizer
-      case _ => new OpenDomainEnglishTokenizer
-    }
+  val TASK_TO_INDEX: Map[String, Int] = Seq(
+    NER_TASK,
+    POS_TASK,
+    CHUNKING_TASK,
+    HEXA_TERM_TASK,
+    HEXA_NONTERM_TASK
+  ).zipWithIndex.toMap
+
+  def mkTokenizer(lang: String): Tokenizer = lang match {
+    case "PT" => new OpenDomainPortugueseTokenizer
+    case "ES" => new OpenDomainSpanishTokenizer
+    case "EN" | _ => new OpenDomainEnglishTokenizer
   }
 
-  def mkLemmatizer(lang: String): Lemmatizer = {
-    lang match {
-      case "PT" => new PortugueseLemmatizer
-      case "ES" => new SpanishLemmatizer
-      case _ => new EnglishLemmatizer
-    }
+  def mkLemmatizer(lang: String): Lemmatizer = lang match {
+    case "PT" => new PortugueseLemmatizer
+    case "ES" => new SpanishLemmatizer
+    case "EN" | _ => new EnglishLemmatizer
   }
 
   def getConfigArgString (config: Config, argPath: String, defaultValue: Option[String]): String =
-    if (config.hasPath(argPath)) config.getString(argPath)
-    else if(defaultValue.nonEmpty) defaultValue.get
-    else throw new RuntimeException(s"ERROR: parameter $argPath must be defined!")
+      if (config.hasPath(argPath)) config.getString(argPath)
+      else if (defaultValue.nonEmpty) defaultValue.get
+      else throw new RuntimeException(s"ERROR: parameter $argPath must be defined!")
 
-  def newNumericEntityRecognizerOpt(seasonPathOpt: Option[String]): Option[NumericEntityRecognizer] = {
-    seasonPathOpt.map(NumericEntityRecognizer(_))
-  }
+  def newNumericEntityRecognizerOpt(seasonPathOpt: Option[String]): Option[NumericEntityRecognizer] =
+      seasonPathOpt.map(NumericEntityRecognizer(_))
 }
diff --git a/library/src/main/scala/org/clulab/sequences/BiMEMMSequenceTagger.scala b/library/src/main/scala/org/clulab/sequences/BiMEMMSequenceTagger.scala
index 3278df5f2..dd7118ac5 100644
--- a/library/src/main/scala/org/clulab/sequences/BiMEMMSequenceTagger.scala
+++ b/library/src/main/scala/org/clulab/sequences/BiMEMMSequenceTagger.scala
@@ -233,7 +233,7 @@ abstract class BiMEMMSequenceTagger[L: ClassTag, F: ClassTag](
     if(leftToRight) history.toArray else SeqUtils.revert(history).toArray
   }
 
-  override def classesOf(sentence: Sentence):Array[L] = {
+  override def classesOf(sentence: Sentence):Seq[L] = {
     var firstPassLabels:Option[Array[L]] = None
     if(firstPassModel.nonEmpty)
       firstPassLabels = Some(classesOf(firstPassModel.get, sentence, None, ! leftToRight))
diff --git a/library/src/main/scala/org/clulab/sequences/CombinedLexiconNER.scala b/library/src/main/scala/org/clulab/sequences/CombinedLexiconNER.scala
index 9c12ab411..9ab41afda 100644
--- a/library/src/main/scala/org/clulab/sequences/CombinedLexiconNER.scala
+++ b/library/src/main/scala/org/clulab/sequences/CombinedLexiconNER.scala
@@ -2,6 +2,7 @@ package org.clulab.sequences
 
 import org.clulab.processors.Sentence
 import org.clulab.sequences.LexiconNER._
+import org.clulab.scala.WrappedArray._
 import org.clulab.struct.EntityValidator
 import org.clulab.struct.IntHashTrie
 
diff --git a/library/src/main/scala/org/clulab/sequences/CompactLexiconNER.scala b/library/src/main/scala/org/clulab/sequences/CompactLexiconNER.scala
index 08bee6769..2d16ddbf2 100644
--- a/library/src/main/scala/org/clulab/sequences/CompactLexiconNER.scala
+++ b/library/src/main/scala/org/clulab/sequences/CompactLexiconNER.scala
@@ -1,15 +1,15 @@
 package org.clulab.sequences
 
-import java.io.ObjectInputStream
-import java.io.ObjectOutputStream
-import java.util.Arrays
-
 import org.clulab.processors.Sentence
 import org.clulab.sequences.LexiconNER.OUTSIDE_LABEL
+import org.clulab.scala.WrappedArray._
 import org.clulab.struct.EntityValidator
 import org.clulab.struct.IntHashTrie
 import org.clulab.struct.IntTrieNode
 
+import java.io.ObjectInputStream
+import java.io.ObjectOutputStream
+import java.util.Arrays
 import scala.collection.mutable
 
 /** Lexicon-based NER similar to [[org.clulab.sequences.CombinedLexiconNER CombinedLexiconNER]] but which
@@ -376,7 +376,7 @@ object CompactLexiconNER {
 
     // Assume that trieNodes are already sorted as much as necessary and all the tokens have stringIds.
     // Returns the number of parentsAdded and childrenAdded
-    def add(trieNodes: Array[IntTrieNode], parentOffset: Int, childOffset: Int): (Int, Int) = {
+    def add(trieNodes: Seq[IntTrieNode], parentOffset: Int, childOffset: Int): (Int, Int) = {
       // Area between parentOffset and parentOffset + parentRserve is for this recursive pass and
       // likewise for between childOffset and childOffset + childReserve.
       val parentReserve = trieNodes.length
diff --git a/library/src/main/scala/org/clulab/sequences/LexiconNER.scala b/library/src/main/scala/org/clulab/sequences/LexiconNER.scala
index cc8bebf16..24d0b143f 100644
--- a/library/src/main/scala/org/clulab/sequences/LexiconNER.scala
+++ b/library/src/main/scala/org/clulab/sequences/LexiconNER.scala
@@ -4,7 +4,6 @@ import org.clulab.processors.Sentence
 import org.clulab.scala.SeqView
 import org.clulab.scala.WrappedArray._
 import org.clulab.struct.{EntityValidator, TrueEntityValidator}
-import org.clulab.utils.ArrayView
 
 import java.io.File
 import scala.collection.mutable
@@ -103,7 +102,7 @@ abstract class LexiconNER(val knownCaseInsensitives: Set[String], val useLemmas:
   )
 
   protected def contentfulSpan(sentence: Sentence, start: Int, length: Int): Boolean = {
-    val wordsView = sentence.words.view(start, start + length)
+    val wordsView = sentence.words.view.slice(start, start + length)
     // A valid view/span must have a letter and at least one of the other qualifiers.
     val contentful = hasLetter(wordsView) && contentQualifiers.exists(_(wordsView))
 
@@ -314,7 +313,7 @@ object LexiconNER {
     var upperCaseLetters = 0
     val spaces = math.max(0, end - start - 1) // Spaces are between words, not after them.
 
-    ArrayView(words, start, end).foreach { word =>
+    words.view.slice(start, end).foreach { word =>
       characters += word.length
       word.foreach { c =>
         if (Character.isLetter(c)) letters += 1
@@ -347,7 +346,7 @@ object LexiconNER {
     while (offset < length) {
       val notOutsideCount = countWhile(src, offset, isNotOutside)
       // Check that there is not anything in dst that should not be overwritten.
-      if (!ArrayView(dst, offset, offset + notOutsideCount).exists(isNotOutside(_)))
+      if (!dst.view.slice(offset, offset + notOutsideCount).exists(isNotOutside(_)))
         Array.copy(src, offset, dst, offset, notOutsideCount)
       offset += notOutsideCount
 
diff --git a/library/src/main/scala/org/clulab/sequences/MEMMSequenceTagger.scala b/library/src/main/scala/org/clulab/sequences/MEMMSequenceTagger.scala
index aa6ac8b47..a78fc7795 100644
--- a/library/src/main/scala/org/clulab/sequences/MEMMSequenceTagger.scala
+++ b/library/src/main/scala/org/clulab/sequences/MEMMSequenceTagger.scala
@@ -67,7 +67,7 @@ abstract class MEMMSequenceTagger[L: ClassTag, F: ClassTag](var order:Int = 1, v
     logger.debug("Finished training.")
   }
 
-  override def classesOf(origSentence: Sentence):Array[L] = {
+  override def classesOf(origSentence: Sentence):Seq[L] = {
     val sentence = if(leftToRight) origSentence else origSentence.revert()
 
     val history = new ArrayBuffer[L]()
@@ -80,7 +80,7 @@ abstract class MEMMSequenceTagger[L: ClassTag, F: ClassTag](var order:Int = 1, v
       history += label
     }
 
-    if(leftToRight) history.toArray else SeqUtils.revert(history).toArray
+    if(leftToRight) history else SeqUtils.revert(history)
   }
 
   override def save(file: File): Unit = {
diff --git a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
index 36aae40ce..2b74c5b6d 100644
--- a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
+++ b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
@@ -43,7 +43,7 @@ object NamedEntity {
     namedEntities
   }
 
-  def combine(bioLabels: Array[String], genericNamedEntities: Seq[NamedEntity], customNamedEntities: Seq[NamedEntity]): Array[String] = {
+  def combine(bioLabels: Array[String], genericNamedEntities: Seq[NamedEntity], customNamedEntities: Seq[NamedEntity]): Unit = {
     // Neither named entities sequence can contain overlapping elements within the sequence.
     // At most, there is overlap between sequences.  Use is made of that fact.
     // The NamedEntities never have empty Ranges, so end - 1 is always at least start.
@@ -51,12 +51,13 @@ object NamedEntity {
     val validStarts = (genericNamedEntities.map(_.range.start) ++ outsides).toSet
     // The -1 is used to coordinate ends (exclusive) with the OUTSIDE positions (inclusive).
     val validEnds = (genericNamedEntities.map(_.range.end - 1) ++ outsides).toSet
+    val validCustomNamedEntities = customNamedEntities.filter { customNamedEntity =>
+      validStarts(customNamedEntity.range.start) && validEnds(customNamedEntity.range.end - 1)
+    }
 
-    customNamedEntities.foreach { customNamedEntity =>
-      if (validStarts(customNamedEntity.range.start) && validEnds(customNamedEntity.range.end - 1))
-        customNamedEntity.fill(bioLabels)
+    validCustomNamedEntities.foreach { customNamedEntity =>
+      customNamedEntity.fill(bioLabels)
     }
-    bioLabels
   }
 
   // Only INSIDEs can be invalid, and they are made valid by
diff --git a/library/src/main/scala/org/clulab/sequences/SequenceTagger.scala b/library/src/main/scala/org/clulab/sequences/SequenceTagger.scala
index 93fd32c5b..76081875a 100644
--- a/library/src/main/scala/org/clulab/sequences/SequenceTagger.scala
+++ b/library/src/main/scala/org/clulab/sequences/SequenceTagger.scala
@@ -15,7 +15,7 @@ import scala.util.Using
 trait SequenceTagger[L, F] extends Tagger[L] {
   def train(docs:Iterator[Document]): Unit
 
-  def classesOf(sentence: Sentence):Array[L]
+  def classesOf(sentence: Sentence):Seq[L]
 
   /** Abstract method that generates the features for the word at the position offset in the given sentence */
   def featureExtractor(features:Counter[F], sentence: Sentence, offset:Int): Unit
diff --git a/library/src/main/scala/org/clulab/sequences/SequenceTaggerShell.scala b/library/src/main/scala/org/clulab/sequences/SequenceTaggerShell.scala
index 9bcb7368f..1b4566e68 100644
--- a/library/src/main/scala/org/clulab/sequences/SequenceTaggerShell.scala
+++ b/library/src/main/scala/org/clulab/sequences/SequenceTaggerShell.scala
@@ -5,6 +5,7 @@ import java.io.File
 import jline.console.ConsoleReader
 import jline.console.history.FileHistory
 import org.clulab.processors.Sentence
+import org.clulab.scala.WrappedArray._
 
 /**
   * Simple shell for sequence taggers
diff --git a/library/src/main/scala/org/clulab/struct/BooleanHashTrie.scala b/library/src/main/scala/org/clulab/struct/BooleanHashTrie.scala
index dc4bed380..5ab19e3ad 100644
--- a/library/src/main/scala/org/clulab/struct/BooleanHashTrie.scala
+++ b/library/src/main/scala/org/clulab/struct/BooleanHashTrie.scala
@@ -261,13 +261,13 @@ class DebugBooleanHashTrie(label: String, caseInsensitive: Boolean = true) exten
     * Generates BIO labels for this sequence when complete trie paths match
     * When multiple paths match, the longest one is kept
     */
-  def find(sequence: Array[String], outsideLabel: String): Array[String] = {
+  def find(sequence: Seq[String], outsideLabel: String): Array[String] = {
     val casedSequence = if (caseInsensitive) sequence.map(_.toLowerCase) else sequence
 
     findNormalized(casedSequence, outsideLabel)
   }
 
-  private def findNormalized(sequence: Array[String], outsideLabel: String): Array[String] = {
+  private def findNormalized(sequence: Seq[String], outsideLabel: String): Array[String] = {
     val labels = new Array[String](sequence.length)
     var offset = 0
 
diff --git a/library/src/main/scala/org/clulab/struct/HashTrie.scala b/library/src/main/scala/org/clulab/struct/HashTrie.scala
index 331858735..1bcd8c0af 100644
--- a/library/src/main/scala/org/clulab/struct/HashTrie.scala
+++ b/library/src/main/scala/org/clulab/struct/HashTrie.scala
@@ -4,11 +4,11 @@ package org.clulab.struct
 class HashTrie(caseInsensitive: Boolean = true)
     extends BooleanHashTrie("", caseInsensitive) {
 
-  def find(sequence:Array[String], label: String, outsideLabel: String): Array[String] =
+  def find(sequence:Seq[String], label: String, outsideLabel: String): Array[String] =
       if (caseInsensitive) findNormalized(sequence.map(_.toLowerCase), label, outsideLabel)
       else findNormalized(sequence, label, outsideLabel)
 
-  protected def findNormalized(tokens: Array[String], label: String, outsideLabel: String): Array[String] = {
+  protected def findNormalized(tokens: Seq[String], label: String, outsideLabel: String): Array[String] = {
     val labels = new Array[String](tokens.length)
     lazy val bLabel = "B-" + label // lazy thinking that most calls will not use it
     lazy val iLabel = "I-" + label
diff --git a/library/src/main/scala/org/clulab/utils/ArrayView.scala b/library/src/main/scala/org/clulab/utils/ArrayView.scala
deleted file mode 100644
index afbd6d42a..000000000
--- a/library/src/main/scala/org/clulab/utils/ArrayView.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-package org.clulab.utils
-
-import scala.collection.mutable
-
-// Array.view(from, until) is no longer available in Scala 2.13+.
-class ArrayView[T](array: Array[T], from: Int, until: Int) extends IndexedSeq[T] {
-  val length = until - from
-
-  override def apply(index: Int): T = array(from + index)
-}
-
-object ArrayView {
-
-  def apply[T](array: Array[T]): ArrayView[T] = apply(array, 0)
-
-  def apply[T](array: Array[T], from: Int): ArrayView[T] = apply(array, from, array.length)
-
-  def apply[T](array: Array[T], from: Int, until: Int): ArrayView[T] = new ArrayView(array, from, until)
-}
-
-// Array.view(from, until) is no longer available in Scala 2.13+.
-class MutableArrayView[T](array: Array[T], from: Int, until: Int) extends mutable.IndexedSeq[T] {
-  val length = until - from
-
-  override def apply(index: Int): T = array(from + index)
-
-  override def update(index: Int, elem: T): Unit = array(from + index) = elem
-}
-
-object MutableArrayView {
-
-  def apply[T](array: Array[T]): MutableArrayView[T] = apply(array, 0)
-
-  def apply[T](array: Array[T], from: Int): MutableArrayView[T] = apply(array, from, array.length)
-
-  def apply[T](array: Array[T], from: Int, until: Int): MutableArrayView[T] = new MutableArrayView(array, from, until)
-}
diff --git a/library/src/main/scala/org/clulab/utils/ToEnhancedDependencies.scala b/library/src/main/scala/org/clulab/utils/ToEnhancedDependencies.scala
index e6da8e3b7..1eb8314d5 100644
--- a/library/src/main/scala/org/clulab/utils/ToEnhancedDependencies.scala
+++ b/library/src/main/scala/org/clulab/utils/ToEnhancedDependencies.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 object ToEnhancedDependencies {
   type EdgeSpec = (Int, Int, String)
 
-  def generateStanfordEnhancedDependencies(words: Array[String], tags: Array[String], dg:DirectedGraph[String]): DirectedGraph[String] = {
+  def generateStanfordEnhancedDependencies(words: Array[String], tags: Seq[String], dg:DirectedGraph[String]): DirectedGraph[String] = {
     val dgi = dg.toDirectedGraphIndex()
     collapsePrepositionsStanford(words, dgi)
     raiseSubjects(dgi)
diff --git a/library/src/test/scala-2.11_2.12/org/clulab/utils/TestHash.scala b/library/src/test/scala-2.11_2.12/org/clulab/utils/TestHash.scala
index 54ce33916..85125c04b 100644
--- a/library/src/test/scala-2.11_2.12/org/clulab/utils/TestHash.scala
+++ b/library/src/test/scala-2.11_2.12/org/clulab/utils/TestHash.scala
@@ -16,7 +16,7 @@ class TestHash extends Test {
 
     LexiconNER(kbs, caseInsensitiveMatchings, None)
   }
-  val processor = new BalaurProcessor(optionalNER = Some(customLexiconNer))
+  val processor = new BalaurProcessor(lexiconNerOpt = Some(customLexiconNer))
   val extractorEngine = {
     val rules = FileUtils.getTextFromResource("/org/clulab/odinstarter/main.yml")
 
diff --git a/library/src/test/scala-2.13/org/clulab/utils/TestHash.scala b/library/src/test/scala-2.13/org/clulab/utils/TestHash.scala
index 31e03d8ec..88e2b0726 100644
--- a/library/src/test/scala-2.13/org/clulab/utils/TestHash.scala
+++ b/library/src/test/scala-2.13/org/clulab/utils/TestHash.scala
@@ -16,7 +16,7 @@ class TestHash extends Test {
 
     LexiconNER(kbs, caseInsensitiveMatchings, None)
   }
-  val processor = new BalaurProcessor(optionalNER = Some(customLexiconNer))
+  val processor = new BalaurProcessor(lexiconNerOpt = Some(customLexiconNer))
   val extractorEngine = {
     val rules = FileUtils.getTextFromResource("/org/clulab/odinstarter/main.yml")
 
diff --git a/library/src/test/scala-3/org/clulab/utils/TestHash.scala b/library/src/test/scala-3/org/clulab/utils/TestHash.scala
index 9a08e0ca5..9186e9ae6 100644
--- a/library/src/test/scala-3/org/clulab/utils/TestHash.scala
+++ b/library/src/test/scala-3/org/clulab/utils/TestHash.scala
@@ -17,7 +17,7 @@ class TestHash extends Test {
 
     LexiconNER(kbs, caseInsensitiveMatchings, None)
   }
-  val processor = new BalaurProcessor(optionalNER = Some(customLexiconNer))
+  val processor = new BalaurProcessor(lexiconNerOpt = Some(customLexiconNer))
   val extractorEngine = {
     val rules = FileUtils.getTextFromResource("/org/clulab/odinstarter/main.yml")
 
diff --git a/library/src/test/scala/org/clulab/processors/CluTest.scala b/library/src/test/scala/org/clulab/processors/CluTest.scala
index 7b7d323e5..025e71413 100644
--- a/library/src/test/scala/org/clulab/processors/CluTest.scala
+++ b/library/src/test/scala/org/clulab/processors/CluTest.scala
@@ -29,7 +29,7 @@ class CluTest extends Test with BeforeAndAfterAll {
     )
     val lexiconNer = LexiconNER(kbs, Seq(false), useLemmasForMatching = false) // case sensitive match on this KB
 
-    new BalaurProcessor(optionalNER = Some(lexiconNer))
+    new BalaurProcessor(lexiconNerOpt = Some(lexiconNer))
   }
 
   def stop(): Unit = {
diff --git a/library/src/test/scala/org/clulab/sequences/TestNamedEntity.scala b/library/src/test/scala/org/clulab/sequences/TestNamedEntity.scala
index 08a774400..cd731635f 100644
--- a/library/src/test/scala/org/clulab/sequences/TestNamedEntity.scala
+++ b/library/src/test/scala/org/clulab/sequences/TestNamedEntity.scala
@@ -45,7 +45,8 @@ class TestNamedEntity extends Test {
         val customBioLabels = customBioLabelString.split(" +")
         val genericNamedEntities = NamedEntity.collect(genericBioLabels)
         val customNamedEntities = NamedEntity.collect(customBioLabels)
-        val actualCombinedBioLabels = NamedEntity.combine(genericBioLabels, genericNamedEntities, customNamedEntities)
+        NamedEntity.combine(genericBioLabels, genericNamedEntities, customNamedEntities)
+        val actualCombinedBioLabels = genericBioLabels
         val actualCombinedBioLabelString = actualCombinedBioLabels.mkString(" ")
         val formattedExpectedCombinedBioLabelString = expectedCombinedBioLabelString.split(" +").mkString(" ")
 
diff --git a/library/src/test/scala/org/clulab/utils/TestArrayView.scala b/library/src/test/scala/org/clulab/utils/TestArrayView.scala
deleted file mode 100644
index 2bfbd08ff..000000000
--- a/library/src/test/scala/org/clulab/utils/TestArrayView.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-package org.clulab.utils
-
-class TestArrayView extends Test {
-
-  behavior of "ArrayView"
-
-  it should "work with no offset" in {
-    val array = Array(1, 2, 3)
-    val arrayView = MutableArrayView(array)
-
-    array.length should be (arrayView.length)
-
-    arrayView.zip(array).foreach { case (arrayViewItem, arrayItem) =>
-      arrayViewItem should be (arrayItem)
-    }
-
-    arrayView(0) = 4
-    arrayView(0) should be (4)
-    array(0) should be (4)
-  }
-
-  it should "work with an offset" in {
-    val offset = 1
-    val array = Array(1, 2, 3)
-    val arrayView = MutableArrayView(array, offset)
-
-    array.length should be (arrayView.length + offset)
-
-    arrayView.zip(array).foreach { case (arrayViewItem, arrayItem) =>
-      arrayViewItem should be (arrayItem + offset)
-    }
-
-    arrayView(0) = 4
-    arrayView(0) should be (4)
-    array(1) should be (4)
-  }
-
-  it should "work when clipped" in {
-    val offset = 1
-    val clip = 1
-    val array = Array(1, 2, 3)
-    val arrayView = MutableArrayView(array, offset, array.length - clip)
-
-    array.length should be (arrayView.length + offset + clip)
-
-    arrayView.zip(array).foreach { case (arrayViewItem, arrayItem) =>
-      arrayViewItem should be (arrayItem + offset)
-    }
-
-    arrayView(0) = 4
-    arrayView(0) should be (4)
-    array(1) should be (4)
-  }
-}
diff --git a/webapp/app/org/clulab/processors/webapp/controllers/HomeController.scala b/webapp/app/org/clulab/processors/webapp/controllers/HomeController.scala
index 9f4691529..14fc5ebb8 100644
--- a/webapp/app/org/clulab/processors/webapp/controllers/HomeController.scala
+++ b/webapp/app/org/clulab/processors/webapp/controllers/HomeController.scala
@@ -33,7 +33,7 @@ class HomeController @Inject()(cc: ControllerComponents) extends AbstractControl
       val kbs = customLexiconNerConfigs.map(_.kb)
       val caseInsensitiveMatchings = customLexiconNerConfigs.map(_.caseInsensitiveMatching)
       val customLexiconNer = LexiconNER(kbs, caseInsensitiveMatchings, None)
-      val processor = new BalaurProcessor(optionalNER = Some(customLexiconNer))
+      val processor = new BalaurProcessor(lexiconNerOpt = Some(customLexiconNer))
 
       processor
     }

From dbfe52b5f8fefa2bb85d986aebf890dcf8a2d08a Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 12:30:00 -0700
Subject: [PATCH 21/42] Document, Sentence

---
 .../org/clulab/processors/Document.scala      | 24 ++-----
 .../org/clulab/processors/Processor.scala     | 64 ++++++++++---------
 .../org/clulab/processors/Sentence.scala      | 33 +++++-----
 .../sequences/BiMEMMSequenceTagger.scala      |  4 +-
 .../clulab/sequences/MEMMSequenceTagger.scala |  4 +-
 5 files changed, 61 insertions(+), 68 deletions(-)

diff --git a/library/src/main/scala/org/clulab/processors/Document.scala b/library/src/main/scala/org/clulab/processors/Document.scala
index 1cae6a826..34db68688 100644
--- a/library/src/main/scala/org/clulab/processors/Document.scala
+++ b/library/src/main/scala/org/clulab/processors/Document.scala
@@ -1,7 +1,5 @@
 package org.clulab.processors
 
-import java.io.PrintWriter
-
 import org.clulab.struct.{CorefChains, DirectedGraphEdgeIterator}
 import org.clulab.utils.Hash
 import org.clulab.utils.Serializer
@@ -9,6 +7,7 @@ import org.json4s.JString
 import org.json4s.JValue
 import org.json4s.jackson.prettyJson
 
+import java.io.PrintWriter
 import scala.collection.mutable
 
 /**
@@ -26,7 +25,8 @@ class Document(
   val text: Option[String] = None,
   /** Map of any arbitrary document attachments such as document creation time */
   protected val attachments: Option[mutable.HashMap[String, DocumentAttachment]] = None,
-  protected val documentCreationTime:Option[String] = None
+  /** DCT is Document Creation Time */
+  protected val dct: Option[String] = None
 ) extends Serializable {
 
   def copy(
@@ -35,8 +35,8 @@ class Document(
     coreferenceChains: Option[CorefChains] = coreferenceChains,
     text: Option[String] = text,
     attachments: Option[mutable.HashMap[String, DocumentAttachment]] = None,
-    documentCreationTime: Option[String] = documentCreationTime
-  ): Document = new Document(sentences, id, coreferenceChains, text, attachments, documentCreationTime)
+    dct: Option[String] = dct
+  ): Document = new Document(sentences, id, coreferenceChains, text, attachments, dct)
 
   /** Clears any internal state potentially constructed by the annotators */
   // def clear(): Unit = { }
@@ -72,18 +72,9 @@ class Document(
     Hash.ordered(sentences.map(_.ambivalenceHash))
   )
 
-  /** Adds an attachment to the document's attachment map */
-//  def addAttachment(name: String, attachment: DocumentAttachment): Unit = {
-//    if (attachments.isEmpty)
-//      attachments = Some(new mutable.HashMap[String, DocumentAttachment]())
-//    attachments.get += name -> attachment
-//  }
-
   /** Retrieves the attachment with the given name */
   def getAttachment(name: String): Option[DocumentAttachment] = attachments.flatMap(_.get(name))
 
-  def removeAttachment(name: String): Unit = attachments.foreach(_ -= name)
-
   /** Retrieves keys to all attachments so that the entire collection can be read
     * for purposes including but not limited to serialization.  If there are no
     * attachments, that is attachments == None, an empty set is returned.
@@ -102,9 +93,8 @@ class Document(
    * The DCT will impacts how Sentence.norms are generated for DATE expressions
    * @param dct Document creation time
    */
-//  def setDCT(dct:String): Unit = documentCreationTime = Some(dct)
 
-  def getDCT: Option[String] = documentCreationTime
+  def getDCT: Option[String] = dct
 
   def prettyPrint(pw: PrintWriter): Unit = {
     // let's print the sentence-level annotations
@@ -216,7 +206,7 @@ object Document {
       coreferenceChains = doc.coreferenceChains,
       text = doc.text,
       attachments = doc.attachments,
-      documentCreationTime = doc.documentCreationTime
+      dct = doc.dct
     )
 
     newDocument
diff --git a/library/src/main/scala/org/clulab/processors/Processor.scala b/library/src/main/scala/org/clulab/processors/Processor.scala
index b7cab3423..9d84e1527 100644
--- a/library/src/main/scala/org/clulab/processors/Processor.scala
+++ b/library/src/main/scala/org/clulab/processors/Processor.scala
@@ -12,7 +12,7 @@ import scala.collection.mutable
 trait Processor {
 
   /** Constructs a document of tokens from free text; includes sentence splitting and tokenization. */
-  def mkDocument (text:String, keepText:Boolean = false): Document
+  def mkDocument(text:String, keepText:Boolean = false): Document
 
   // The documents here were created with Processor.mkDocument, which could have created a subclassed
   // Document or documents with certain fields already filled in.  This implementation only handles
@@ -51,7 +51,7 @@ trait Processor {
       coreferenceChains = None,
       text = combinedTextOpt,
       attachments = Some(attachments),
-      documentCreationTime = headDctOpt
+      dct = headDctOpt
     )
 
     combinedDocument
@@ -84,16 +84,22 @@ trait Processor {
   }
 
   /** Constructs a document of tokens from an array of untokenized sentences. */
-  def mkDocumentFromSentences (sentences:Iterable[String],
-                               keepText:Boolean = false,
-                               charactersBetweenSentences:Int = 1): Document
+  def mkDocumentFromSentences(
+    sentences: Iterable[String],
+    keepText: Boolean = false,
+    charactersBetweenSentences: Int = 1
+  ): Document
 
   /** Constructs a document of tokens from an array of tokenized sentences. */
-  def mkDocumentFromTokens (sentences:Iterable[Iterable[String]],
-                            keepText:Boolean = false,
-                            charactersBetweenSentences:Int = 1,
-                            charactersBetweenTokens:Int = 1): Document
+  def mkDocumentFromTokens(
+    sentences: Iterable[Iterable[String]],
+    keepText: Boolean = false,
+    charactersBetweenSentences: Int = 1,
+    charactersBetweenTokens: Int = 1
+  ): Document
 
+  /** Lemmatization; modifies the document in place. */
+  def lemmatize(words: Seq[String]): Seq[String]
 
   // Side-effecting annotations. These modify the document in place, which is not too elegant.
   // There are two reasons for this:
@@ -104,52 +110,52 @@ trait Processor {
   /** Part of speech tagging; modifies the document in place. */
   def tagPartsOfSpeech(doc: Document): Unit
 
-  /** Lemmatization; modifies the document in place. */
-  def lemmatize(words: Seq[String]): Seq[String]
-
   /** Named Entity Recognition; modifies the document in place. */
-  def recognizeNamedEntities (doc:Document): Unit
+  def recognizeNamedEntities(doc: Document): Unit
 
   /** Syntactic parsing; modifies the document in place. */
-  def parse (doc:Document): Unit
+  def parse(doc:Document): Unit
 
   /** Semantic role labeling */
-  def srl (doc: Document): Unit
+  def srl(doc: Document): Unit
 
   /** Shallow parsing; modifies the document in place. */
-  def chunking (doc:Document): Unit
+  def chunking(doc:Document): Unit
 
   /** Coreference resolution; modifies the document in place. */
-  def resolveCoreference (doc:Document): Unit
+  def resolveCoreference(doc:Document): Unit
 
   /** Discourse parsing; modifies the document in place. */
-  def discourse (doc:Document): Unit
+  def discourse(doc:Document): Unit
 
   /** Relation extraction; modifies the document in place. */
   def relationExtraction(doc:Document): Unit
 
 
   /** Annotate the given text string, specify whether to retain the text in the resultant Document. */
-  def annotate (text:String, keepText:Boolean = false): Document = {
-    val doc = mkDocument(text, keepText)
-    if (doc.sentences.nonEmpty)
-      annotate(doc)
-    else
-      doc
+  def annotate(text: String, keepText: Boolean = false): Document = {
+    val tokenizedDoc = mkDocument(text, keepText)
+    val annotatedDoc = // For now, these two documents have the same type.
+        if (tokenizedDoc.sentences.nonEmpty) annotate(tokenizedDoc)
+        else tokenizedDoc
+
+    annotatedDoc
   }
 
   /** Annotate the given sentences, specify whether to retain the text in the resultant Document. */
-  def annotateFromSentences (
-    sentences:Iterable[String],
-    keepText:Boolean = false): Document = {
+  def annotateFromSentences(
+    sentences: Iterable[String],
+    keepText: Boolean = false
+  ): Document = {
     val doc = mkDocumentFromSentences(sentences, keepText)
     annotate(doc)
   }
 
   /** Annotate the given tokens, specify whether to retain the text in the resultant Document. */
-  def annotateFromTokens (
+  def annotateFromTokens(
     sentences:Iterable[Iterable[String]],
-    keepText:Boolean = false): Document = {
+    keepText:Boolean = false
+  ): Document = {
     val doc = mkDocumentFromTokens(sentences, keepText)
     annotate(doc)
   }
diff --git a/library/src/main/scala/org/clulab/processors/Sentence.scala b/library/src/main/scala/org/clulab/processors/Sentence.scala
index 276e2dc2a..97a2350a8 100644
--- a/library/src/main/scala/org/clulab/processors/Sentence.scala
+++ b/library/src/main/scala/org/clulab/processors/Sentence.scala
@@ -96,39 +96,37 @@ class Sentence(
     *
     * @return A directed graph of dependencies if any exist, otherwise None
     */
-  def dependencies:Option[DirectedGraph[String]] = graphs match {
+  def dependencies: Option[DirectedGraph[String]] = graphs match {
     case collapsed if collapsed.contains(UNIVERSAL_ENHANCED) => collapsed.get(UNIVERSAL_ENHANCED)
     case basic if basic.contains(UNIVERSAL_BASIC) => basic.get(UNIVERSAL_BASIC)
     case _ => None
   }
 
   /** Fetches the universal basic dependencies */
-  def universalBasicDependencies:Option[DirectedGraph[String]] = graphs.get(UNIVERSAL_BASIC)
+  def universalBasicDependencies: Option[DirectedGraph[String]] = graphs.get(UNIVERSAL_BASIC)
 
   /** Fetches the universal enhanced dependencies */
-  def universalEnhancedDependencies:Option[DirectedGraph[String]] = graphs.get(UNIVERSAL_ENHANCED)
+  def universalEnhancedDependencies: Option[DirectedGraph[String]] = graphs.get(UNIVERSAL_ENHANCED)
 
   /** Fetches the Stanford basic dependencies */
-  def stanfordBasicDependencies:Option[DirectedGraph[String]] = graphs.get(STANFORD_BASIC)
+  def stanfordBasicDependencies: Option[DirectedGraph[String]] = graphs.get(STANFORD_BASIC)
 
   /** Fetches the Stanford collapsed dependencies */
-  def stanfordCollapsedDependencies:Option[DirectedGraph[String]] = graphs.get(STANFORD_COLLAPSED)
+  def stanfordCollapsedDependencies: Option[DirectedGraph[String]] = graphs.get(STANFORD_COLLAPSED)
 
-  def semanticRoles:Option[DirectedGraph[String]] = graphs.get(SEMANTIC_ROLES)
-  def enhancedSemanticRoles:Option[DirectedGraph[String]] = graphs.get(ENHANCED_SEMANTIC_ROLES)
+  def semanticRoles: Option[DirectedGraph[String]] = graphs.get(SEMANTIC_ROLES)
+  def enhancedSemanticRoles: Option[DirectedGraph[String]] = graphs.get(ENHANCED_SEMANTIC_ROLES)
 
-  def hybridDependencies:Option[DirectedGraph[String]] = graphs.get(HYBRID_DEPENDENCIES)
-
-  def setDependencies(depType: String, deps: DirectedGraph[String]): Unit = graphs += (depType -> deps)
+  def hybridDependencies: Option[DirectedGraph[String]] = graphs.get(HYBRID_DEPENDENCIES)
 
   /**
     * Recreates the text of the sentence, preserving the original number of white spaces between tokens
     *
     * @return the text of the sentence
     */
-  def getSentenceText:String =  getSentenceFragmentText(0, words.length)
+  def getSentenceText: String =  getSentenceFragmentText(0, words.length)
 
-  def getSentenceFragmentText(start:Int, end:Int):String = {
+  def getSentenceFragmentText(start: Int, end: Int):String = {
     // optimize the single token case
     if (end - start == 1) raw(start)
     else {
@@ -147,8 +145,8 @@ class Sentence(
     }
   }
 
-  /** Reverts the current sentence */
-  def revert(): Sentence = {
+  /** Reverses the current sentence */
+  def reverse(): Sentence = {
     val reversedSentence = Sentence(
       raw.reverse,
       startOffsets.reverse,
@@ -168,7 +166,6 @@ class Sentence(
     reversedSentence
   }
 
-  // TODO
   def copy(
     raw: Seq[String] = raw,
     startOffsets: Seq[Int] = startOffsets,
@@ -203,13 +200,13 @@ class Sentence(
 object Sentence {
 
   def apply(
-    raw:Seq[String],
+    raw: Seq[String],
     startOffsets: Seq[Int],
     endOffsets: Seq[Int]): Sentence =
     new Sentence(raw, startOffsets, endOffsets, raw) // words are identical to raw tokens (a common situation)
 
   def apply(
-    raw:Seq[String],
+    raw: Seq[String],
     startOffsets: Seq[Int],
     endOffsets: Seq[Int],
     words: Seq[String]): Sentence =
@@ -234,4 +231,4 @@ object Sentence {
       tags, lemmas, entities, norms, chunks, tree, deps, relations
     )
   }
-}
\ No newline at end of file
+}
diff --git a/library/src/main/scala/org/clulab/sequences/BiMEMMSequenceTagger.scala b/library/src/main/scala/org/clulab/sequences/BiMEMMSequenceTagger.scala
index dd7118ac5..d9fb83262 100644
--- a/library/src/main/scala/org/clulab/sequences/BiMEMMSequenceTagger.scala
+++ b/library/src/main/scala/org/clulab/sequences/BiMEMMSequenceTagger.scala
@@ -168,7 +168,7 @@ abstract class BiMEMMSequenceTagger[L: ClassTag, F: ClassTag](
       // original sentence
       val origSentence = sentences(sentOffset)
       // actual sentence to be used
-      val sentence = if (leftToRight) origSentence else origSentence.revert()
+      val sentence = if (leftToRight) origSentence else origSentence.reverse()
       // labels to be learned
       val labels =
         if (leftToRight) labelExtractor(origSentence)
@@ -211,7 +211,7 @@ abstract class BiMEMMSequenceTagger[L: ClassTag, F: ClassTag](
                 origSentence: Sentence,
                 firstPassLabels:Option[Array[L]],
                 leftToRight:Boolean): Array[L] = {
-    val sentence = if(leftToRight) origSentence else origSentence.revert()
+    val sentence = if(leftToRight) origSentence else origSentence.reverse()
 
     val firstPass =
       if(firstPassLabels.nonEmpty) {
diff --git a/library/src/main/scala/org/clulab/sequences/MEMMSequenceTagger.scala b/library/src/main/scala/org/clulab/sequences/MEMMSequenceTagger.scala
index a78fc7795..7cba53724 100644
--- a/library/src/main/scala/org/clulab/sequences/MEMMSequenceTagger.scala
+++ b/library/src/main/scala/org/clulab/sequences/MEMMSequenceTagger.scala
@@ -32,7 +32,7 @@ abstract class MEMMSequenceTagger[L: ClassTag, F: ClassTag](var order:Int = 1, v
     var sentCount = 0
     for(doc <- docs; origSentence <- doc.sentences) {
       // labels and features for one sentence
-      val sentence = if(leftToRight) origSentence else origSentence.revert()
+      val sentence = if(leftToRight) origSentence else origSentence.reverse()
       val labels =
         if(leftToRight) labelExtractor(origSentence)
         else SeqUtils.revert(labelExtractor(origSentence)).toArray
@@ -68,7 +68,7 @@ abstract class MEMMSequenceTagger[L: ClassTag, F: ClassTag](var order:Int = 1, v
   }
 
   override def classesOf(origSentence: Sentence):Seq[L] = {
-    val sentence = if(leftToRight) origSentence else origSentence.revert()
+    val sentence = if(leftToRight) origSentence else origSentence.reverse()
 
     val history = new ArrayBuffer[L]()
     for(i <- 0 until sentence.size) {

From 2c19b03c10564ff245979fe8d1d43bd411b6bece Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 12:30:27 -0700
Subject: [PATCH 22/42] Balaur

---
 .../clulab/processors/clu/BalaurProcessor.scala    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index 4716e472e..67ef882d0 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -94,10 +94,6 @@ class BalaurProcessor protected (
     DocumentMaker.mkDocumentFromTokens(sentences, keepText, charactersBetweenSentences, charactersBetweenSentences)
   }
 
-  override def tagPartsOfSpeech(doc: Document): Unit = {
-    throw new RuntimeException("ERROR: cannot call this method on its own in this processor!")
-  }
-
   override def lemmatize(words: Seq[String]): Seq[String] = {
     val lemmas = words.zipWithIndex.map { case (word, index) =>
       val lemma = wordLemmatizer.lemmatizeWord(word)
@@ -119,9 +115,12 @@ class BalaurProcessor protected (
   def cheapLemmatize(sentence: Sentence): Seq[String] =
       sentence.words.map(_.toLowerCase())
 
+  // TODO: Just don't include anything that calls this.
   def throwCannotCallException(methodName: String): Unit =
       throw new RuntimeException(s"ERROR: cannot call $methodName on its own in this processor!")
 
+  override def tagPartsOfSpeech(doc: Document): Unit = throwCannotCallException("tagPartsOfSpeech")
+
   override def recognizeNamedEntities(doc: Document): Unit = throwCannotCallException("recognizeNamedEntities")
 
   override def parse(doc: Document): Unit = throwCannotCallException("parse")
@@ -139,9 +138,9 @@ class BalaurProcessor protected (
 
   override def relationExtraction(doc: Document): Unit = throwNotSupportedException("relationExtraction")
 
-  override def annotate(document: Document): Document = {
+  override def annotate(doc: Document): Document = {
     // Process one sentence at a time through the MTL framework.
-    val partlyAnnotatedSentences = document.sentences.map { sentence =>
+    val partlyAnnotatedSentences = doc.sentences.map { sentence =>
       val words = sentence.words
       // Lemmas are created deterministically, not through the MTL framework.
       val lemmas = lemmatize(words)
@@ -167,6 +166,7 @@ class BalaurProcessor protected (
 
         partlyAnnotatedSentence
       }
+      // TODO: Improve error handling.
       catch {
         // No values, not even lemmas, will be included in the annotation is there was an exception.
         case e: EncoderMaxTokensRuntimeException =>
@@ -178,7 +178,7 @@ class BalaurProcessor protected (
           sentence
       }
     }
-    val partlyAnnotatedDocument = document.copy(sentences = partlyAnnotatedSentences)
+    val partlyAnnotatedDocument = doc.copy(sentences = partlyAnnotatedSentences)
     val fullyAnnotatedDocument = numericEntityRecognizerOpt.map { numericEntityRecognizer =>
       val numericMentions = numericEntityRecognizer.extractFrom(partlyAnnotatedDocument)
       val (newLabels, newNorms) = NumericUtils.mkLabelsAndNorms(partlyAnnotatedDocument, numericMentions)

From 55eb202c8069a81771304e8078e44c104368a365 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 15:53:14 -0700
Subject: [PATCH 23/42] Remove Scala-specific GraphMap

---
 .../org/clulab/struct/DependencyMap.scala     | 12 ---------
 .../org/clulab/struct/GraphMap.scala          | 20 --------------
 .../org/clulab/struct/DependencyMap.scala     | 14 ----------
 .../org/clulab/struct/GraphMap.scala          | 22 ----------------
 .../org/clulab/struct/DependencyMap.scala     | 14 ----------
 .../scala-3/org/clulab/struct/GraphMap.scala  | 22 ----------------
 .../org/clulab/processors/Sentence.scala      | 26 +++++++++----------
 .../processors/clu/BalaurProcessor.scala      | 25 +++++++++---------
 .../org/clulab/processors/clu/Veil.scala      |  9 +++----
 .../serialization/DocumentSerializer.scala    |  4 +--
 .../serialization/json/JSONSerializer.scala   |  2 +-
 .../clulab/serialization/json/package.scala   |  4 +--
 .../scala/org/clulab/struct/Annotation.scala  |  4 +--
 .../clulab/struct/DependencyMapNames.scala    |  7 -----
 .../{GraphMapNames.scala => GraphMap.scala}   | 10 ++++++-
 .../org/clulab/utils/TestFindHeads.scala      |  4 +--
 16 files changed, 45 insertions(+), 154 deletions(-)
 delete mode 100644 library/src/main/scala-2.11_2.12/org/clulab/struct/DependencyMap.scala
 delete mode 100644 library/src/main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala
 delete mode 100644 library/src/main/scala-2.13/org/clulab/struct/DependencyMap.scala
 delete mode 100644 library/src/main/scala-2.13/org/clulab/struct/GraphMap.scala
 delete mode 100644 library/src/main/scala-3/org/clulab/struct/DependencyMap.scala
 delete mode 100644 library/src/main/scala-3/org/clulab/struct/GraphMap.scala
 delete mode 100644 library/src/main/scala/org/clulab/struct/DependencyMapNames.scala
 rename library/src/main/scala/org/clulab/struct/{GraphMapNames.scala => GraphMap.scala} (68%)

diff --git a/library/src/main/scala-2.11_2.12/org/clulab/struct/DependencyMap.scala b/library/src/main/scala-2.11_2.12/org/clulab/struct/DependencyMap.scala
deleted file mode 100644
index d9b2cbfc5..000000000
--- a/library/src/main/scala-2.11_2.12/org/clulab/struct/DependencyMap.scala
+++ /dev/null
@@ -1,12 +0,0 @@
-package org.clulab.struct
-
-import scala.collection.mutable
-
-class DependencyMap protected extends mutable.HashMap[Int, DirectedGraph[String]] {
-  override def initialSize: Int = 2 // we have very few dependency types, so let's create a small hash to save memory
-}
-
-object DependencyMap extends DependencyMapNames {
-
-  def apply(): DependencyMap = new DependencyMap()
-}
diff --git a/library/src/main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala b/library/src/main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala
deleted file mode 100644
index 8de1af507..000000000
--- a/library/src/main/scala-2.11_2.12/org/clulab/struct/GraphMap.scala
+++ /dev/null
@@ -1,20 +0,0 @@
-package org.clulab.struct
-
-import scala.collection.mutable
-
-class GraphMap protected extends mutable.HashMap[String, DirectedGraph[String]] {
-  override def initialSize: Int = 2 // we have very few dependency types, so let's create a small hash to save memory
-}
-
-object GraphMap extends GraphMapNames {
-  type GraphMapType = GraphMap
-
-  val EMPTY_GRAPH = GraphMap()
-
-  def apply(): GraphMapType = new GraphMap()
-
-  def apply(existing: Map[String, DirectedGraph[String]]): GraphMapType = {
-    val gm = GraphMap()
-    gm ++= existing
-  }
-}
diff --git a/library/src/main/scala-2.13/org/clulab/struct/DependencyMap.scala b/library/src/main/scala-2.13/org/clulab/struct/DependencyMap.scala
deleted file mode 100644
index c4ed49b82..000000000
--- a/library/src/main/scala-2.13/org/clulab/struct/DependencyMap.scala
+++ /dev/null
@@ -1,14 +0,0 @@
-package org.clulab.struct
-
-import scala.collection.mutable
-
-object DependencyMap extends DependencyMapNames {
-  // This was previously a class inheriting from HashMap.  However,
-  // [warn] ...: inheritance from class HashMap in package mutable is deprecated (since 2.13.0): HashMap will be made final; use .withDefault for the common use case of computing a default value.
-  type DependencyMap = mutable.HashMap[String, DirectedGraph[String]]
-
-  def apply(): DependencyMap = {
-    // we have very few dependency types, so let's create a small hash to save memory.
-    new DependencyMap(2, mutable.HashMap.defaultLoadFactor)
-  }
-}
diff --git a/library/src/main/scala-2.13/org/clulab/struct/GraphMap.scala b/library/src/main/scala-2.13/org/clulab/struct/GraphMap.scala
deleted file mode 100644
index 4cb404f24..000000000
--- a/library/src/main/scala-2.13/org/clulab/struct/GraphMap.scala
+++ /dev/null
@@ -1,22 +0,0 @@
-package org.clulab.struct
-
-import scala.collection.mutable
-
-object GraphMap extends GraphMapNames {
-
-  // This was previously a class inheriting from HashMap.  However,
-  // [warn] ...: inheritance from class HashMap in package mutable is deprecated (since 2.13.0): HashMap will be made final; use .withDefault for the common use case of computing a default value
-  type GraphMapType = mutable.HashMap[String, DirectedGraph[String]]
-
-  val EMPTY_GRAPH = GraphMap()
-
-  def apply(): GraphMapType = {
-    // we have very few dependency types, so let's create a small hash to save memory.
-    new GraphMapType(2, mutable.HashMap.defaultLoadFactor)
-  }
-
-  def apply(existing: scala.collection.Map[String, DirectedGraph[String]]): GraphMapType = {
-    val gm = GraphMap()
-    gm ++= existing
-  }
-}
diff --git a/library/src/main/scala-3/org/clulab/struct/DependencyMap.scala b/library/src/main/scala-3/org/clulab/struct/DependencyMap.scala
deleted file mode 100644
index c4ed49b82..000000000
--- a/library/src/main/scala-3/org/clulab/struct/DependencyMap.scala
+++ /dev/null
@@ -1,14 +0,0 @@
-package org.clulab.struct
-
-import scala.collection.mutable
-
-object DependencyMap extends DependencyMapNames {
-  // This was previously a class inheriting from HashMap.  However,
-  // [warn] ...: inheritance from class HashMap in package mutable is deprecated (since 2.13.0): HashMap will be made final; use .withDefault for the common use case of computing a default value.
-  type DependencyMap = mutable.HashMap[String, DirectedGraph[String]]
-
-  def apply(): DependencyMap = {
-    // we have very few dependency types, so let's create a small hash to save memory.
-    new DependencyMap(2, mutable.HashMap.defaultLoadFactor)
-  }
-}
diff --git a/library/src/main/scala-3/org/clulab/struct/GraphMap.scala b/library/src/main/scala-3/org/clulab/struct/GraphMap.scala
deleted file mode 100644
index 4cb404f24..000000000
--- a/library/src/main/scala-3/org/clulab/struct/GraphMap.scala
+++ /dev/null
@@ -1,22 +0,0 @@
-package org.clulab.struct
-
-import scala.collection.mutable
-
-object GraphMap extends GraphMapNames {
-
-  // This was previously a class inheriting from HashMap.  However,
-  // [warn] ...: inheritance from class HashMap in package mutable is deprecated (since 2.13.0): HashMap will be made final; use .withDefault for the common use case of computing a default value
-  type GraphMapType = mutable.HashMap[String, DirectedGraph[String]]
-
-  val EMPTY_GRAPH = GraphMap()
-
-  def apply(): GraphMapType = {
-    // we have very few dependency types, so let's create a small hash to save memory.
-    new GraphMapType(2, mutable.HashMap.defaultLoadFactor)
-  }
-
-  def apply(existing: scala.collection.Map[String, DirectedGraph[String]]): GraphMapType = {
-    val gm = GraphMap()
-    gm ++= existing
-  }
-}
diff --git a/library/src/main/scala/org/clulab/processors/Sentence.scala b/library/src/main/scala/org/clulab/processors/Sentence.scala
index 97a2350a8..d7589cc19 100644
--- a/library/src/main/scala/org/clulab/processors/Sentence.scala
+++ b/library/src/main/scala/org/clulab/processors/Sentence.scala
@@ -1,7 +1,6 @@
 package org.clulab.processors
 
 import org.clulab.struct.{DirectedGraph, GraphMap, RelationTriple, Tree}
-import org.clulab.struct.GraphMap._
 import org.clulab.utils.Hash
 
 import scala.collection.mutable
@@ -37,7 +36,7 @@ class Sentence(
   /** Constituent tree of this sentence; includes head words */
   val syntacticTree: Option[Tree] = None,
   /** DAG of syntactic and semantic dependencies; word offsets start at 0 */
-  val graphs: GraphMapType = GraphMap(),
+  val graphs: GraphMap.ImmutableType = GraphMap.immutableEmpty,
   /** Relation triples from OpenIE */
   val relations:Option[Seq[RelationTriple]] = None
 ) extends Serializable {
@@ -97,27 +96,28 @@ class Sentence(
     * @return A directed graph of dependencies if any exist, otherwise None
     */
   def dependencies: Option[DirectedGraph[String]] = graphs match {
-    case collapsed if collapsed.contains(UNIVERSAL_ENHANCED) => collapsed.get(UNIVERSAL_ENHANCED)
-    case basic if basic.contains(UNIVERSAL_BASIC) => basic.get(UNIVERSAL_BASIC)
+    case collapsed if collapsed.contains(GraphMap.UNIVERSAL_ENHANCED) => collapsed.get(GraphMap.UNIVERSAL_ENHANCED)
+    case basic if basic.contains(GraphMap.UNIVERSAL_BASIC) => basic.get(GraphMap.UNIVERSAL_BASIC)
     case _ => None
   }
 
   /** Fetches the universal basic dependencies */
-  def universalBasicDependencies: Option[DirectedGraph[String]] = graphs.get(UNIVERSAL_BASIC)
+  def universalBasicDependencies: Option[DirectedGraph[String]] = graphs.get(GraphMap.UNIVERSAL_BASIC)
 
   /** Fetches the universal enhanced dependencies */
-  def universalEnhancedDependencies: Option[DirectedGraph[String]] = graphs.get(UNIVERSAL_ENHANCED)
+  def universalEnhancedDependencies: Option[DirectedGraph[String]] = graphs.get(GraphMap.UNIVERSAL_ENHANCED)
 
   /** Fetches the Stanford basic dependencies */
-  def stanfordBasicDependencies: Option[DirectedGraph[String]] = graphs.get(STANFORD_BASIC)
+  def stanfordBasicDependencies: Option[DirectedGraph[String]] = graphs.get(GraphMap.STANFORD_BASIC)
 
   /** Fetches the Stanford collapsed dependencies */
-  def stanfordCollapsedDependencies: Option[DirectedGraph[String]] = graphs.get(STANFORD_COLLAPSED)
+  def stanfordCollapsedDependencies: Option[DirectedGraph[String]] = graphs.get(GraphMap.STANFORD_COLLAPSED)
 
-  def semanticRoles: Option[DirectedGraph[String]] = graphs.get(SEMANTIC_ROLES)
-  def enhancedSemanticRoles: Option[DirectedGraph[String]] = graphs.get(ENHANCED_SEMANTIC_ROLES)
+  def semanticRoles: Option[DirectedGraph[String]] = graphs.get(GraphMap.SEMANTIC_ROLES)
 
-  def hybridDependencies: Option[DirectedGraph[String]] = graphs.get(HYBRID_DEPENDENCIES)
+  def enhancedSemanticRoles: Option[DirectedGraph[String]] = graphs.get(GraphMap.ENHANCED_SEMANTIC_ROLES)
+
+  def hybridDependencies: Option[DirectedGraph[String]] = graphs.get(GraphMap.HYBRID_DEPENDENCIES)
 
   /**
     * Recreates the text of the sentence, preserving the original number of white spaces between tokens
@@ -178,7 +178,7 @@ class Sentence(
     norms: Option[Seq[String]] = norms,
     chunks: Option[Seq[String]] = chunks,
     syntacticTree: Option[Tree] = syntacticTree,
-    graphs: GraphMapType = graphs,
+    graphs: GraphMap.ImmutableType = graphs,
     relations: Option[Seq[RelationTriple]] = relations
   ): Sentence =
     new Sentence(
@@ -223,7 +223,7 @@ object Sentence {
     norms: Option[Seq[String]] = None,
     chunks: Option[Seq[String]] = None,
     tree: Option[Tree] = None,
-    deps: GraphMapType = GraphMap.EMPTY_GRAPH,
+    deps: GraphMap.ImmutableType = GraphMap.immutableEmpty,
     relations: Option[Seq[RelationTriple]] = None
   ): Sentence = {
     new Sentence(
diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index 67ef882d0..ce20b2f62 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -309,9 +309,8 @@ class BalaurProcessor protected (
     words: Seq[String], lemmas: Seq[String], tags: Seq[String],
     termTags: Array[Array[PredictionScore]],
     nonTermTags: Array[Array[PredictionScore]]
-  ): GraphMap.GraphMapType = {
+  ): GraphMap.ImmutableType = {
     val verbose = false
-    val graphs = GraphMap()
     val size = words.length
     // bht is used just for debugging purposes here
     val (bht, deps, roots) = hexaDecoder.decode(termTags, nonTermTags, topK = 25, verbose)
@@ -323,27 +322,27 @@ class BalaurProcessor protected (
       println("Roots: " + roots.get.mkString(", "))
     }
     if (deps.nonEmpty && roots.nonEmpty) {
-      // TODO: This can be made in one fell swoop.
-
       // basic dependencies that replicate treebank annotations
       val depGraph = new DirectedGraph[String](deps.get, Some(size), roots)
-      graphs += GraphMap.UNIVERSAL_BASIC -> depGraph
-
       // enhanced dependencies as defined by Manning
       val enhancedDepGraph = ToEnhancedDependencies.generateUniversalEnhancedDependencies(words, lemmas, tags, depGraph)
-      graphs += GraphMap.UNIVERSAL_ENHANCED -> enhancedDepGraph
 
-      // ideally, hybrid dependencies should contain both syntactic dependencies and semantic roles
-      // however, this processor produces only syntactic dependencies
-      graphs += GraphMap.HYBRID_DEPENDENCIES -> enhancedDepGraph
+      Map(
+        GraphMap.UNIVERSAL_BASIC -> depGraph,
+        GraphMap.UNIVERSAL_ENHANCED -> enhancedDepGraph,
+        // ideally, hybrid dependencies should contain both syntactic dependencies and semantic roles
+        // however, this processor produces only syntactic dependencies
+        GraphMap.HYBRID_DEPENDENCIES -> enhancedDepGraph
+      )
     }
-    graphs
+    else
+      GraphMap.immutableEmpty
   }
 }
 
 object BalaurProcessor {
-  val logger:Logger = LoggerFactory.getLogger(classOf[BalaurProcessor])
-  val prefix:String = "BalaurProcessor"
+  val logger: Logger = LoggerFactory.getLogger(classOf[BalaurProcessor])
+  val prefix: String = "BalaurProcessor"
 
   val NER_TASK = "NER"
   val POS_TASK = "POS"
diff --git a/library/src/main/scala/org/clulab/processors/clu/Veil.scala b/library/src/main/scala/org/clulab/processors/clu/Veil.scala
index aac0bc99f..430762e83 100644
--- a/library/src/main/scala/org/clulab/processors/clu/Veil.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/Veil.scala
@@ -2,7 +2,6 @@ package org.clulab.processors.clu
 
 import org.clulab.processors.{Document, Processor, Sentence}
 import org.clulab.struct.{DirectedGraph, Edge, GraphMap, RelationTriple, Tree}
-import org.clulab.struct.GraphMap.GraphMapType
 import org.clulab.utils.WrappedArraySeq
 
 import scala.collection.mutable.{Set => MutableSet}
@@ -137,18 +136,16 @@ class VeiledDocument(originalDocument: Document, veiledWords: Seq[(Int, Range)])
     }
   }
 
-  def unveilGraphs(veiledGraphs: GraphMapType, sentenceIndex: Int): GraphMapType = {
+  def unveilGraphs(veiledGraphs: GraphMap.ImmutableType, sentenceIndex: Int): GraphMap.ImmutableType = {
     val unveilArray = unveilArrays(sentenceIndex)
-    val unveiledGraphs = GraphMap()
     val originalLength = originalDocument.sentences(sentenceIndex).words.length
-
-    veiledGraphs.foreach { case (name, veiledDirectedGraph) =>
+    val unveiledGraphs = veiledGraphs.map { case (name, veiledDirectedGraph) =>
       val unveiledEdges = veiledDirectedGraph.allEdges.map { case (veiledSource, veiledDestination, relation) =>
         Edge(unveilArray(veiledSource), unveilArray(veiledDestination), relation)
       }
       val unveiledRoots = veiledDirectedGraph.roots.map(unveilArray)
 
-      unveiledGraphs(name) = new DirectedGraph(unveiledEdges, Some(originalLength), Some(unveiledRoots))
+      name -> new DirectedGraph(unveiledEdges, Some(originalLength), Some(unveiledRoots))
     }
     unveiledGraphs
   }
diff --git a/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala b/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
index c2f3f885c..4e9db1baf 100644
--- a/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
@@ -235,7 +235,7 @@ class DocumentSerializer extends Logging {
     assert(normBuffer.isEmpty || normBuffer.size == tokenCount)
     assert(chunkBuffer.isEmpty || chunkBuffer.size == tokenCount)
 
-    var deps = GraphMap()
+    val deps = GraphMap.mutableEmpty
     var tree:Option[Tree] = None
     var relations:Option[Seq[RelationTriple]] = None
     while ({
@@ -266,7 +266,7 @@ class DocumentSerializer extends Logging {
       bufferOption(entityBuffer, nilEntities),
       bufferOption(normBuffer, nilNorms),
       bufferOption(chunkBuffer, nilChunks),
-      tree, deps, relations
+      tree, deps.toMap, relations
     )
   }
 
diff --git a/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala b/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
index 26853d11b..2e66d1f76 100644
--- a/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
@@ -102,7 +102,7 @@ object JSONSerializer {
         key -> toDirectedGraph(json, Some(preferredSize))
       }.toMap
 
-      GraphMap(graphs)
+      graphs
     }
     val relations = None // TODO: Are these not serialized?
     val parsedSentence = Sentence(
diff --git a/library/src/main/scala/org/clulab/serialization/json/package.scala b/library/src/main/scala/org/clulab/serialization/json/package.scala
index a27c14174..3d93d9cf4 100644
--- a/library/src/main/scala/org/clulab/serialization/json/package.scala
+++ b/library/src/main/scala/org/clulab/serialization/json/package.scala
@@ -52,8 +52,8 @@ package object json {
     }
   }
 
-  implicit class GraphMapOps(gm: GraphMapType) extends JSONSerialization {
-    def jsonAST: JValue = Extraction.decompose(gm.toMap.map { case (k, v) => k -> v.jsonAST }) // instead of mapValues
+  implicit class GraphMapOps(gm: GraphMap.ImmutableType) extends JSONSerialization {
+    def jsonAST: JValue = Extraction.decompose(gm.map { case (k, v) => k -> v.jsonAST }) // instead of mapValues
   }
 
   /** For Document */
diff --git a/library/src/main/scala/org/clulab/struct/Annotation.scala b/library/src/main/scala/org/clulab/struct/Annotation.scala
index 4323cecf3..5d98de5ed 100644
--- a/library/src/main/scala/org/clulab/struct/Annotation.scala
+++ b/library/src/main/scala/org/clulab/struct/Annotation.scala
@@ -1,7 +1,5 @@
 package org.clulab.struct
 
-import org.clulab.struct.GraphMap.GraphMapType
-
 // These are by the word ones and then there are relationships between words.
 // So parse, might not be a thing that is per word.
 //case class WordParse(tag: String, lemma: String, entity: String, norm: String, chunk: String)
@@ -21,7 +19,7 @@ case class Annotation(
   /** Constituent tree of this sentence; includes head words */
   syntacticTree: Option[Tree] = None,
   /** DAG of syntactic and semantic dependencies; word offsets start at 0 */
-  graphs: GraphMapType = GraphMap(),
+  graphs: GraphMap.ImmutableType = GraphMap.immutableEmpty,
   /** Relation triples from OpenIE */
   relations:Option[Array[RelationTriple]] = None
 ) {
diff --git a/library/src/main/scala/org/clulab/struct/DependencyMapNames.scala b/library/src/main/scala/org/clulab/struct/DependencyMapNames.scala
deleted file mode 100644
index 82a8b39ab..000000000
--- a/library/src/main/scala/org/clulab/struct/DependencyMapNames.scala
+++ /dev/null
@@ -1,7 +0,0 @@
-package org.clulab.struct
-
-trait DependencyMapNames {
-  val STANFORD_BASIC = 0 // basic Stanford dependencies
-  val STANFORD_COLLAPSED = 1 // collapsed Stanford dependencies
-  val SEMANTIC_ROLES = 2 // semantic roles from CoNLL 2008-09, which includes PropBank and NomBank
-}
diff --git a/library/src/main/scala/org/clulab/struct/GraphMapNames.scala b/library/src/main/scala/org/clulab/struct/GraphMap.scala
similarity index 68%
rename from library/src/main/scala/org/clulab/struct/GraphMapNames.scala
rename to library/src/main/scala/org/clulab/struct/GraphMap.scala
index 012f0f52a..f9111af49 100644
--- a/library/src/main/scala/org/clulab/struct/GraphMapNames.scala
+++ b/library/src/main/scala/org/clulab/struct/GraphMap.scala
@@ -1,6 +1,14 @@
 package org.clulab.struct
 
-trait GraphMapNames {
+import scala.collection.mutable
+
+object GraphMap {
+  type ImmutableType = Map[String, DirectedGraph[String]]
+  type MutableType = mutable.Map[String, DirectedGraph[String]]
+
+  val immutableEmpty: ImmutableType = Map.empty
+  val mutableEmpty: MutableType = mutable.Map.empty[String, DirectedGraph[String]]
+
   val UNIVERSAL_BASIC = "universal-basic" // basic Universal dependencies
   val UNIVERSAL_ENHANCED = "universal-enhanced" // collapsed (or enhanced) Universal dependencies
   val STANFORD_BASIC = "stanford-basic" // basic Stanford dependencies
diff --git a/library/src/test/scala/org/clulab/utils/TestFindHeads.scala b/library/src/test/scala/org/clulab/utils/TestFindHeads.scala
index 4fd3fdfe4..bb9ba3823 100644
--- a/library/src/test/scala/org/clulab/utils/TestFindHeads.scala
+++ b/library/src/test/scala/org/clulab/utils/TestFindHeads.scala
@@ -11,10 +11,10 @@ class TestFindHeads extends Test {
     val   endOffsets = Seq(0) // unused
     val sentence = new Sentence(
       words, startOffsets, endOffsets, words,
-      tags = Some(words)
+      tags = Some(words),
+      graphs = Map(UNIVERSAL_BASIC -> directedGraph)
     )
 
-    sentence.graphs(UNIVERSAL_BASIC) = directedGraph
     sentence
   }
 

From 3c3f3db3f1f0b0192e6e57ad81f649cc1c3817dd Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 16:04:10 -0700
Subject: [PATCH 24/42] More GraphMap

---
 .../src/main/scala/org/clulab/processors/Sentence.scala   | 6 +++---
 .../scala/org/clulab/processors/clu/BalaurProcessor.scala | 4 ++--
 .../src/main/scala/org/clulab/processors/clu/Veil.scala   | 2 +-
 .../org/clulab/serialization/DocumentSerializer.scala     | 4 ++--
 .../scala/org/clulab/serialization/json/package.scala     | 2 +-
 library/src/main/scala/org/clulab/struct/Annotation.scala | 2 +-
 library/src/main/scala/org/clulab/struct/GraphMap.scala   | 8 ++------
 7 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/library/src/main/scala/org/clulab/processors/Sentence.scala b/library/src/main/scala/org/clulab/processors/Sentence.scala
index d7589cc19..acb14b56b 100644
--- a/library/src/main/scala/org/clulab/processors/Sentence.scala
+++ b/library/src/main/scala/org/clulab/processors/Sentence.scala
@@ -36,7 +36,7 @@ class Sentence(
   /** Constituent tree of this sentence; includes head words */
   val syntacticTree: Option[Tree] = None,
   /** DAG of syntactic and semantic dependencies; word offsets start at 0 */
-  val graphs: GraphMap.ImmutableType = GraphMap.immutableEmpty,
+  val graphs: GraphMap.Type = GraphMap.empty,
   /** Relation triples from OpenIE */
   val relations:Option[Seq[RelationTriple]] = None
 ) extends Serializable {
@@ -178,7 +178,7 @@ class Sentence(
     norms: Option[Seq[String]] = norms,
     chunks: Option[Seq[String]] = chunks,
     syntacticTree: Option[Tree] = syntacticTree,
-    graphs: GraphMap.ImmutableType = graphs,
+    graphs: GraphMap.Type = graphs,
     relations: Option[Seq[RelationTriple]] = relations
   ): Sentence =
     new Sentence(
@@ -223,7 +223,7 @@ object Sentence {
     norms: Option[Seq[String]] = None,
     chunks: Option[Seq[String]] = None,
     tree: Option[Tree] = None,
-    deps: GraphMap.ImmutableType = GraphMap.immutableEmpty,
+    deps: GraphMap.Type = GraphMap.empty,
     relations: Option[Seq[RelationTriple]] = None
   ): Sentence = {
     new Sentence(
diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index ce20b2f62..8404ed13f 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -309,7 +309,7 @@ class BalaurProcessor protected (
     words: Seq[String], lemmas: Seq[String], tags: Seq[String],
     termTags: Array[Array[PredictionScore]],
     nonTermTags: Array[Array[PredictionScore]]
-  ): GraphMap.ImmutableType = {
+  ): GraphMap.Type = {
     val verbose = false
     val size = words.length
     // bht is used just for debugging purposes here
@@ -336,7 +336,7 @@ class BalaurProcessor protected (
       )
     }
     else
-      GraphMap.immutableEmpty
+      GraphMap.empty
   }
 }
 
diff --git a/library/src/main/scala/org/clulab/processors/clu/Veil.scala b/library/src/main/scala/org/clulab/processors/clu/Veil.scala
index 430762e83..31d25ed9c 100644
--- a/library/src/main/scala/org/clulab/processors/clu/Veil.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/Veil.scala
@@ -136,7 +136,7 @@ class VeiledDocument(originalDocument: Document, veiledWords: Seq[(Int, Range)])
     }
   }
 
-  def unveilGraphs(veiledGraphs: GraphMap.ImmutableType, sentenceIndex: Int): GraphMap.ImmutableType = {
+  def unveilGraphs(veiledGraphs: GraphMap.Type, sentenceIndex: Int): GraphMap.Type = {
     val unveilArray = unveilArrays(sentenceIndex)
     val originalLength = originalDocument.sentences(sentenceIndex).words.length
     val unveiledGraphs = veiledGraphs.map { case (name, veiledDirectedGraph) =>
diff --git a/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala b/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
index 4e9db1baf..cfae7e40b 100644
--- a/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
@@ -235,7 +235,7 @@ class DocumentSerializer extends Logging {
     assert(normBuffer.isEmpty || normBuffer.size == tokenCount)
     assert(chunkBuffer.isEmpty || chunkBuffer.size == tokenCount)
 
-    val deps = GraphMap.mutableEmpty
+    var deps = GraphMap.empty
     var tree:Option[Tree] = None
     var relations:Option[Seq[RelationTriple]] = None
     while ({
@@ -266,7 +266,7 @@ class DocumentSerializer extends Logging {
       bufferOption(entityBuffer, nilEntities),
       bufferOption(normBuffer, nilNorms),
       bufferOption(chunkBuffer, nilChunks),
-      tree, deps.toMap, relations
+      tree, deps, relations
     )
   }
 
diff --git a/library/src/main/scala/org/clulab/serialization/json/package.scala b/library/src/main/scala/org/clulab/serialization/json/package.scala
index 3d93d9cf4..88276826b 100644
--- a/library/src/main/scala/org/clulab/serialization/json/package.scala
+++ b/library/src/main/scala/org/clulab/serialization/json/package.scala
@@ -52,7 +52,7 @@ package object json {
     }
   }
 
-  implicit class GraphMapOps(gm: GraphMap.ImmutableType) extends JSONSerialization {
+  implicit class GraphMapOps(gm: GraphMap.Type) extends JSONSerialization {
     def jsonAST: JValue = Extraction.decompose(gm.map { case (k, v) => k -> v.jsonAST }) // instead of mapValues
   }
 
diff --git a/library/src/main/scala/org/clulab/struct/Annotation.scala b/library/src/main/scala/org/clulab/struct/Annotation.scala
index 5d98de5ed..d9f390a86 100644
--- a/library/src/main/scala/org/clulab/struct/Annotation.scala
+++ b/library/src/main/scala/org/clulab/struct/Annotation.scala
@@ -19,7 +19,7 @@ case class Annotation(
   /** Constituent tree of this sentence; includes head words */
   syntacticTree: Option[Tree] = None,
   /** DAG of syntactic and semantic dependencies; word offsets start at 0 */
-  graphs: GraphMap.ImmutableType = GraphMap.immutableEmpty,
+  graphs: GraphMap.Type = GraphMap.empty,
   /** Relation triples from OpenIE */
   relations:Option[Array[RelationTriple]] = None
 ) {
diff --git a/library/src/main/scala/org/clulab/struct/GraphMap.scala b/library/src/main/scala/org/clulab/struct/GraphMap.scala
index f9111af49..6857916e3 100644
--- a/library/src/main/scala/org/clulab/struct/GraphMap.scala
+++ b/library/src/main/scala/org/clulab/struct/GraphMap.scala
@@ -1,13 +1,9 @@
 package org.clulab.struct
 
-import scala.collection.mutable
-
 object GraphMap {
-  type ImmutableType = Map[String, DirectedGraph[String]]
-  type MutableType = mutable.Map[String, DirectedGraph[String]]
+  type Type = Map[String, DirectedGraph[String]]
 
-  val immutableEmpty: ImmutableType = Map.empty
-  val mutableEmpty: MutableType = mutable.Map.empty[String, DirectedGraph[String]]
+  val empty: Type = Map.empty
 
   val UNIVERSAL_BASIC = "universal-basic" // basic Universal dependencies
   val UNIVERSAL_ENHANCED = "universal-enhanced" // collapsed (or enhanced) Universal dependencies

From 61d871dffc09ef2f748aed023c15e69d713e4729 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 16:43:01 -0700
Subject: [PATCH 25/42] SeqView again

---
 .../org/clulab/scala/SeqView.scala            |  2 +-
 .../org/clulab/scala/package.scala            | 11 ---
 .../scala-2.13/org/clulab/scala/SeqView.scala |  2 +-
 .../scala-2.13/org/clulab/scala/package.scala | 11 ---
 .../org/clulab/odinstarter/OdinStarter3.scala | 67 -------------------
 .../scala-3/org/clulab/scala/SeqView.scala    |  2 +-
 .../scala-3/org/clulab/scala/package.scala    | 11 ---
 .../scala/org/clulab/odin/impl/Taxonomy.scala |  2 +-
 .../org/clulab/sequences/LexiconNER.scala     | 19 +++---
 9 files changed, 13 insertions(+), 114 deletions(-)
 delete mode 100644 library/src/main/scala-2.11_2.12/org/clulab/scala/package.scala
 delete mode 100644 library/src/main/scala-2.13/org/clulab/scala/package.scala
 delete mode 100644 library/src/main/scala-3/org/clulab/odinstarter/OdinStarter3.scala
 delete mode 100644 library/src/main/scala-3/org/clulab/scala/package.scala

diff --git a/library/src/main/scala-2.11_2.12/org/clulab/scala/SeqView.scala b/library/src/main/scala-2.11_2.12/org/clulab/scala/SeqView.scala
index c49d930cb..649887166 100644
--- a/library/src/main/scala-2.11_2.12/org/clulab/scala/SeqView.scala
+++ b/library/src/main/scala-2.11_2.12/org/clulab/scala/SeqView.scala
@@ -1,5 +1,5 @@
 package org.clulab.scala
 
 object SeqView {
-  type Immutable[T] = scala.collection.SeqView[T, Seq[T]]
+  type Type[T] = scala.collection.SeqView[T, Seq[T]]
 }
diff --git a/library/src/main/scala-2.11_2.12/org/clulab/scala/package.scala b/library/src/main/scala-2.11_2.12/org/clulab/scala/package.scala
deleted file mode 100644
index a6a43654c..000000000
--- a/library/src/main/scala-2.11_2.12/org/clulab/scala/package.scala
+++ /dev/null
@@ -1,11 +0,0 @@
-package org.clulab
-
-import _root_.scala.{BufferedIterator => GenericBufferedIterator}
-import _root_.scala.collection.immutable.{Stream => ImmutableStream}
-
-package object scala {
-  type BufferedIterator[T] = GenericBufferedIterator[T]
-
-  type LazyList[T] = ImmutableStream[T]
-  val LazyList = ImmutableStream
-}
diff --git a/library/src/main/scala-2.13/org/clulab/scala/SeqView.scala b/library/src/main/scala-2.13/org/clulab/scala/SeqView.scala
index d55c09e97..e227e7cbb 100644
--- a/library/src/main/scala-2.13/org/clulab/scala/SeqView.scala
+++ b/library/src/main/scala-2.13/org/clulab/scala/SeqView.scala
@@ -1,5 +1,5 @@
 package org.clulab.scala
 
 object SeqView {
-  type Immutable[T] = scala.collection.View[T]
+  type Type[T] = scala.collection.View[T]
 }
diff --git a/library/src/main/scala-2.13/org/clulab/scala/package.scala b/library/src/main/scala-2.13/org/clulab/scala/package.scala
deleted file mode 100644
index 8df18bbdf..000000000
--- a/library/src/main/scala-2.13/org/clulab/scala/package.scala
+++ /dev/null
@@ -1,11 +0,0 @@
-package org.clulab
-
-import _root_.scala.collection.{BufferedIterator => GenericBufferedIterator}
-import _root_.scala.collection.immutable.{LazyList => ImmutableLazyList}
-
-package object scala {
-  type BufferedIterator[T] = GenericBufferedIterator[T]
-
-  type LazyList[T] = ImmutableLazyList[T]
-  val LazyList = ImmutableLazyList
-}
diff --git a/library/src/main/scala-3/org/clulab/odinstarter/OdinStarter3.scala b/library/src/main/scala-3/org/clulab/odinstarter/OdinStarter3.scala
deleted file mode 100644
index a1332bf6d..000000000
--- a/library/src/main/scala-3/org/clulab/odinstarter/OdinStarter3.scala
+++ /dev/null
@@ -1,67 +0,0 @@
-package org.clulab.odinstarter
-
-import org.clulab.odin.ExtractorEngine
-import org.clulab.odin.Mention
-import org.clulab.processors.clu.BalaurProcessor
-import org.clulab.sequences.LexiconNER
-import org.clulab.utils.FileUtils
-
-import java.io.File
-
-object OdinStarter3:
-
-  // From sbt use "runMain org.clulab.odinstarter.main".
-  @main def main() =
-    // When using an IDE rather than sbt, make sure the working directory for the run
-    // configuration is the subproject directory so that this resourceDir is accessible.
-    val resourceDir: File = new File("./src/main/resources")
-    val customLexiconNer = // i.e., Named Entity Recognizer
-      val kbsAndCaseInsensitiveMatchings: Seq[(String, Boolean)] = Seq(
-        // You can add additional kbs (knowledge bases) and caseInsensitiveMatchings here.
-        ("org/clulab/odinstarter/FOOD.tsv", true) // ,
-        // ("org/clulab/odinstarter/RESTAURANTS.tsv", false)
-      )
-      val kbs = kbsAndCaseInsensitiveMatchings.map(_._1)
-      val caseInsensitiveMatchings = kbsAndCaseInsensitiveMatchings.map(_._2)
-      val isLocal = kbs.forall(new File(resourceDir, _).exists)
-      val baseDirOpt = if isLocal then Some(resourceDir) else None
-
-      LexiconNER(kbs, caseInsensitiveMatchings, baseDirOpt)
-    val processor = new BalaurProcessor(lexiconNerOpt = Some(customLexiconNer))
-    val extractorEngine =
-      val masterResource = "/org/clulab/odinstarter/main.yml"
-      // We usually want to reload rules during development,
-      // so we try to load them from the filesystem first, then jar.
-      // The resource must start with /, but the file probably shouldn't.
-      val masterFile = new File(resourceDir, masterResource.drop(1))
-
-      if masterFile.exists then
-        // Read rules from file in filesystem.
-        val rules = FileUtils.getTextFromFile(masterFile)
-        ExtractorEngine(rules, ruleDir = Some(resourceDir))
-      else
-        // Read rules from resource in jar.
-        val rules = FileUtils.getTextFromResource(masterResource)
-        ExtractorEngine(rules, ruleDir = None)
-    val document = processor.annotate("John eats cake.")
-    val mentions = extractorEngine.extractFrom(document).sortBy(_.arguments.size)
-
-    for mention <- mentions
-    do printMention(mention)
-
-  def printMention(mention: Mention, nameOpt: Option[String] = None, depth: Int = 0): Unit =
-    val indent = "    " * depth
-    val name = nameOpt.getOrElse("<none>")
-    val labels = mention.labels
-    val words = mention.sentenceObj.words
-    val tokens = mention.tokenInterval.map(mention.sentenceObj.words)
-
-    println(indent + "     Name: " + name)
-    println(indent + "   Labels: " + labels.mkString(" "))
-    println(indent + " Sentence: " +  words.mkString(" "))
-    println(indent + "   Tokens: " + tokens.mkString(" "))
-    if mention.arguments.nonEmpty then
-      println(indent + "Arguments:")
-      for (name, mentions) <- mention.arguments; mention <- mentions
-      do printMention(mention, Some(name), depth + 1)
-    println()
diff --git a/library/src/main/scala-3/org/clulab/scala/SeqView.scala b/library/src/main/scala-3/org/clulab/scala/SeqView.scala
index d55c09e97..e227e7cbb 100644
--- a/library/src/main/scala-3/org/clulab/scala/SeqView.scala
+++ b/library/src/main/scala-3/org/clulab/scala/SeqView.scala
@@ -1,5 +1,5 @@
 package org.clulab.scala
 
 object SeqView {
-  type Immutable[T] = scala.collection.View[T]
+  type Type[T] = scala.collection.View[T]
 }
diff --git a/library/src/main/scala-3/org/clulab/scala/package.scala b/library/src/main/scala-3/org/clulab/scala/package.scala
deleted file mode 100644
index 8df18bbdf..000000000
--- a/library/src/main/scala-3/org/clulab/scala/package.scala
+++ /dev/null
@@ -1,11 +0,0 @@
-package org.clulab
-
-import _root_.scala.collection.{BufferedIterator => GenericBufferedIterator}
-import _root_.scala.collection.immutable.{LazyList => ImmutableLazyList}
-
-package object scala {
-  type BufferedIterator[T] = GenericBufferedIterator[T]
-
-  type LazyList[T] = ImmutableLazyList[T]
-  val LazyList = ImmutableLazyList
-}
diff --git a/library/src/main/scala/org/clulab/odin/impl/Taxonomy.scala b/library/src/main/scala/org/clulab/odin/impl/Taxonomy.scala
index 3afe9794a..96c3d2e57 100644
--- a/library/src/main/scala/org/clulab/odin/impl/Taxonomy.scala
+++ b/library/src/main/scala/org/clulab/odin/impl/Taxonomy.scala
@@ -1,7 +1,7 @@
 package org.clulab.odin.impl
 
-import org.clulab.scala.LazyList
 import java.util.{ Collection, Map => JMap }
+import scala.collection.compat.immutable.LazyList
 import scala.jdk.CollectionConverters._
 
 class Taxonomy(parents: Map[String, String]) {
diff --git a/library/src/main/scala/org/clulab/sequences/LexiconNER.scala b/library/src/main/scala/org/clulab/sequences/LexiconNER.scala
index 24d0b143f..688f196b6 100644
--- a/library/src/main/scala/org/clulab/sequences/LexiconNER.scala
+++ b/library/src/main/scala/org/clulab/sequences/LexiconNER.scala
@@ -6,7 +6,6 @@ import org.clulab.scala.WrappedArray._
 import org.clulab.struct.{EntityValidator, TrueEntityValidator}
 
 import java.io.File
-import scala.collection.mutable
 
 /**
   * The abstract base class for several concrete child classes used for Named Entity
@@ -74,31 +73,31 @@ abstract class LexiconNER(val knownCaseInsensitives: Set[String], val useLemmas:
     }
   }
 
-  def hasCondition(wordsView: SeqView.Immutable[String], condition: Char => Boolean): Boolean =
+  def hasCondition(wordsView: SeqView.Type[String], condition: Char => Boolean): Boolean =
     wordsView.exists(_.exists(condition))
 
-  def hasLetter(wordsView: SeqView.Immutable[String]): Boolean =
+  def hasLetter(wordsView: SeqView.Type[String]): Boolean =
     hasCondition(wordsView, Character.isLetter)
 
-  def hasDigit(wordsView: SeqView.Immutable[String]): Boolean =
+  def hasDigit(wordsView: SeqView.Type[String]): Boolean =
     hasCondition(wordsView, Character.isDigit)
 
-  def hasUpperCaseLetters(wordsView: SeqView.Immutable[String]): Boolean =
+  def hasUpperCaseLetters(wordsView: SeqView.Type[String]): Boolean =
     hasCondition(wordsView, Character.isUpperCase)
 
-  def hasSpace(wordsView: SeqView.Immutable[String]): Boolean = wordsView.size > 1
+  def hasSpace(wordsView: SeqView.Type[String]): Boolean = wordsView.size > 1
 
-  def countCharacters(wordsView: SeqView.Immutable[String]): Int =
+  def countCharacters(wordsView: SeqView.Type[String]): Int =
     // Go ahead and calculate them all even though we only need to know if they exceed a value.
     wordsView.foldLeft(0) { (sum, word) => sum + word.length }
 
-  val contentQualifiers: Array[SeqView.Immutable[String] => Boolean] = Array(
+  val contentQualifiers: Array[SeqView.Type[String] => Boolean] = Array(
     // Start with the quick and easy ones.
     hasSpace,
-    { (wordsView: SeqView.Immutable[String]) => countCharacters(wordsView) > LexiconNER.KNOWN_CASE_INSENSITIVE_LENGTH },
+    { (wordsView: SeqView.Type[String]) => countCharacters(wordsView) > LexiconNER.KNOWN_CASE_INSENSITIVE_LENGTH },
     hasDigit,
     hasUpperCaseLetters,
-    { (wordsView: SeqView.Immutable[String]) => knownCaseInsensitives.contains(wordsView.head) }
+    { (wordsView: SeqView.Type[String]) => knownCaseInsensitives.contains(wordsView.head) }
   )
 
   protected def contentfulSpan(sentence: Sentence, start: Int, length: Int): Boolean = {

From e9979eabcc854752c76ac8cc400fc4a67aa68a18 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 16:51:47 -0700
Subject: [PATCH 26/42] Remove spaces

---
 build.sbt | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/build.sbt b/build.sbt
index 7a7df39bf..69a37d8d2 100644
--- a/build.sbt
+++ b/build.sbt
@@ -34,19 +34,19 @@ lazy val library = project
 lazy val apps = project
   .dependsOn(library % "compile -> compile; test -> test")
 
- lazy val webapp = project
-   .enablePlugins(PlayScala)
-   .dependsOn(library % "compile -> compile; test -> test")
-   .settings(
+lazy val webapp = project
+  .enablePlugins(PlayScala)
+  .dependsOn(library % "compile -> compile; test -> test")
+  .settings(
     // scala3 doesn't have play (for 2.8.19 as specified by the project) and is ruled out completely.
     // scala213 has version problems for com.fasterxml.jackson.databind.JsonMappingException.
     // scala212 works!
     // scala211 isn't compiling and complains on twirlCompileTemplates.
     // This isn't a library.  Only one version needs to work.  We shouldn't use play for this anyway.
-     crossScalaVersions := Seq(scala212)
-   )
+    crossScalaVersions := Seq(scala212)
+  )
 
 lazy val debugger = project
-    .dependsOn(library % "compile -> compile; test -> test")
+  .dependsOn(library % "compile -> compile; test -> test")
 
 addCommandAlias("dockerizeWebapp", ";webapp/docker:publishLocal")

From 9c80f426278f8ff0f9407f9d87b25b95fb709dd8 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 17:01:04 -0700
Subject: [PATCH 27/42] Update sbt again

---
 project/build.properties | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/project/build.properties b/project/build.properties
index 4c19fc197..29f5dd953 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -1,9 +1,9 @@
-# This was last checked on 2025-05-09.
+# This was last checked on 2025-05-26.
 # Version 1.7.2+ will cause problems when combined with the play plug-in used for the webapp!
 # [error]         * org.scala-lang.modules:scala-xml_2.12:2.1.0 (early-semver) is selected over {1.2.0, 1.1.1}
 # [error]             +- org.scala-lang:scala-compiler:2.12.17              (depends on 2.1.0)
 # [error]             +- com.typesafe.sbt:sbt-native-packager:1.5.2 (scalaVersion=2.12, sbtVersion=1.0) (depends on 1.1.1)
 # [error]             +- com.typesafe.play:twirl-api_2.12:1.5.1             (depends on 1.2.0)
 # This error is solved by adding a VersionScheme.Always to plugins.sbt.
-# up to 1.10.11
-sbt.version = 1.10.11
+# up to 1.11.0
+sbt.version = 1.11.0

From 70b031b54f9b2fa0ba065aa4569bb5f156d8bf31 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 17:20:24 -0700
Subject: [PATCH 28/42] Fix a toSeq

---
 .../org/clulab/processors/apps/CommandLineInterface.scala | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/apps/src/main/scala/org/clulab/processors/apps/CommandLineInterface.scala b/apps/src/main/scala/org/clulab/processors/apps/CommandLineInterface.scala
index 0e84c662d..c0303f6ca 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/CommandLineInterface.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/CommandLineInterface.scala
@@ -3,7 +3,7 @@ package org.clulab.processors.apps
 import org.clulab.processors.Document
 import org.clulab.processors.clu.BalaurProcessor
 import org.clulab.serialization.CoNLLUSerializer
-import org.clulab.utils.{FileUtils, StringUtils}
+import org.clulab.utils.{FileUtils, StringUtils, WrappedArraySeq}
 
 import java.io.PrintWriter
 import scala.util.Using
@@ -36,7 +36,11 @@ object CommandLineInterface extends App {
       } else if(props.containsKey(TOKENS)) {
         // one sentence per line; sentences are tokenized
         val sents = FileUtils.getLinesFromFile(props.getProperty(INPUT))
-        val tokenizedSents = sents.map(_.split("\\s+").toSeq)
+        val tokenizedSents = sents.map { sent =>
+          val tokens = sent.split("\\s+")
+
+          WrappedArraySeq(tokens).toImmutableSeq
+        }
         proc.annotateFromTokens(tokenizedSents)
       } else {
         // assume raw text

From de0041f73349eec040412e2df7d222b61a62db4c Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 18:03:16 -0700
Subject: [PATCH 29/42] Account for immutable doc in some tests

---
 .../apps/NumericEntityRecognizerShell.scala     |  4 +++-
 .../scala/org/clulab/numeric/EvalTimeNorm.scala | 17 +++++++++++------
 .../clulab/numeric/TestSeasonNormalizer.scala   |  4 ++--
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala b/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala
index c77688a54..d4ddd3cb4 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/NumericEntityRecognizerShell.scala
@@ -34,9 +34,11 @@ class NumericEntityRecognizerShell(ruleDirOpt: Option[String]) extends Reloadabl
   /** The actual work, including printing out the output */
   def work(text: String): Unit = {
     val doc = proc.get.annotate(text)
+    // This gets the same numericEntityRecognizer already used in the annotation
+    // so that the mentions, since thrown away, can be recalculated.
     val mentions = proc.get.numericEntityRecognizerOpt.map(_.extractFrom(doc)).getOrElse(Seq.empty)
 
-    NumericUtils.mkLabelsAndNorms(doc, mentions)
+    // The doc should already have been annotated two lines above.
     NumericUtils.displayMentions(mentions, doc)
   }
 
diff --git a/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala b/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala
index 08acac195..db074c570 100644
--- a/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala
+++ b/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala
@@ -2,6 +2,7 @@ package org.clulab.numeric
 
 import org.clulab.numeric.mentions.Norm
 import org.clulab.processors.Processor
+import org.clulab.processors.clu.BalaurProcessor
 
 import java.nio.charset.StandardCharsets
 import scala.io.Source
@@ -9,8 +10,11 @@ import scala.util.Using
 
 object EvalTimeNorm {
 
-  def runEval(proc: Processor, ner: NumericEntityRecognizer,
-              testFile: String): Double = {
+  def runEval(
+    proc: Processor,
+    ner: NumericEntityRecognizer,
+    testFile: String
+  ): Double = {
     val timeNormEvalDir = "/org/clulab/numeric/TimeNormEvalSet"
     val goldStream = getClass.getResourceAsStream(s"$timeNormEvalDir/$testFile")
     val goldLines = Source.fromInputStream(goldStream).getLines()
@@ -34,8 +38,9 @@ object EvalTimeNorm {
       }
       val doc = proc.annotate(docText)
       val mentions = ner.extractFrom(doc)
-      NumericUtils.mkLabelsAndNorms(doc, mentions)
-      val prediction =  mentions.collect{
+      // The following line does not change the document.
+      // NumericUtils.mkLabelsAndNorms(doc, mentions)
+      val prediction = mentions.collect{
         case m: Norm if m.neLabel.equals("DATE") || m.neLabel.equals("DATE-RANGE") =>
           (m.startOffset.toString, m.endOffset.toString, m.neNorm)
       }.toSet
@@ -53,8 +58,8 @@ object EvalTimeNorm {
     fscore
   }
 
-  def run(proc: Processor): Double = {
-    val ner = NumericEntityRecognizer()
+  def run(proc: BalaurProcessor): Double = {
+    val ner = proc.numericEntityRecognizerOpt.get
 
     test(proc, ner)
   }
diff --git a/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala b/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala
index 93db9fa4d..423bd3eb7 100644
--- a/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala
+++ b/library/src/test/scala/org/clulab/numeric/TestSeasonNormalizer.scala
@@ -17,8 +17,8 @@ class TestSeasonNormalizer extends Test {
     val document = processor.annotate(text)
     val mentions = processor.numericEntityRecognizerOpt.get.extractFrom(document)
 
-    NumericUtils.mkLabelsAndNorms(document, mentions)
-    (document.sentences.head.entities.get, document.sentences.head.norms.get)
+    val (entities, norms) = NumericUtils.mkLabelsAndNorms(document, mentions)
+    (entities.head, norms.head)
   }
 
   behavior of "Default seasonal BalaurProcessor"

From db9b5e5e2dca956931542963098051a777d0abb1 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 18:28:24 -0700
Subject: [PATCH 30/42] Move evaluation resources to app

---
 .../CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16   |  0
 .../EAST_AFRICA_Seasonal_Monitor_5-Jun-17          |  0
 .../EA_Seasonal_Monitor_Aug-17                     |  0
 .../Enhancing_Food_Security_in_South_Sudan_Nov-15  |  0
 .../Ethiopia_Food_Security_Outlook_1-Feb-17        |  0
 .../FAO_GIEWS_South_Sudan_Country_Brief_Sep-17     |  0
 .../FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17  |  0
 .../FEWS_NET_South_Sudan_Outlook_Jan-18            |  0
 .../FFP_Fact_Sheet_South_Sudan_Jan-18              |  0
 ...lace_Hundreds_In_War-torn_In_South_Sudan_Sep-17 |  0
 .../Food_Assistance_Outlook_Brief_1-Jan-18         |  0
 .../Price_Watch_28-Feb-18/Price_Watch_28-Feb-18    |  0
 .../South_Sudan_Humanitarian_Response_Plan_Jan-18  |  0
 .../South_Sudanese_Risk_Facing_Famine_Jan-18       |  0
 .../TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14       |  0
 ...F_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17 |  0
 ...pia_Drought_Emergency_Situation_Report_5_Jul-17 |  0
 .../WorldModelersDatesRangesTimex.csv              |  0
 .../clulab/processors/apps/EvalTimeNormApp.scala   |  4 +++-
 .../scala/org/clulab/numeric/EvalTimeNorm.scala    | 14 +++++---------
 20 files changed, 8 insertions(+), 10 deletions(-)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EAST_AFRICA_Seasonal_Monitor_5-Jun-17/EAST_AFRICA_Seasonal_Monitor_5-Jun-17 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EA_Seasonal_Monitor_Aug-17/EA_Seasonal_Monitor_Aug-17 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Enhancing_Food_Security_in_South_Sudan_Nov-15/Enhancing_Food_Security_in_South_Sudan_Nov-15 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Ethiopia_Food_Security_Outlook_1-Feb-17/Ethiopia_Food_Security_Outlook_1-Feb-17 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Outlook_Jan-18/FEWS_NET_South_Sudan_Outlook_Jan-18 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FFP_Fact_Sheet_South_Sudan_Jan-18/FFP_Fact_Sheet_South_Sudan_Jan-18 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Food_Assistance_Outlook_Brief_1-Jan-18/Food_Assistance_Outlook_Brief_1-Jan-18 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Price_Watch_28-Feb-18/Price_Watch_28-Feb-18 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudan_Humanitarian_Response_Plan_Jan-18/South_Sudan_Humanitarian_Response_Plan_Jan-18 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudanese_Risk_Facing_Famine_Jan-18/South_Sudanese_Risk_Facing_Famine_Jan-18 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17 (100%)
 rename {library => apps}/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WorldModelersDatesRangesTimex.csv (100%)

diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EAST_AFRICA_Seasonal_Monitor_5-Jun-17/EAST_AFRICA_Seasonal_Monitor_5-Jun-17 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EAST_AFRICA_Seasonal_Monitor_5-Jun-17/EAST_AFRICA_Seasonal_Monitor_5-Jun-17
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EAST_AFRICA_Seasonal_Monitor_5-Jun-17/EAST_AFRICA_Seasonal_Monitor_5-Jun-17
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EAST_AFRICA_Seasonal_Monitor_5-Jun-17/EAST_AFRICA_Seasonal_Monitor_5-Jun-17
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EA_Seasonal_Monitor_Aug-17/EA_Seasonal_Monitor_Aug-17 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EA_Seasonal_Monitor_Aug-17/EA_Seasonal_Monitor_Aug-17
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EA_Seasonal_Monitor_Aug-17/EA_Seasonal_Monitor_Aug-17
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EA_Seasonal_Monitor_Aug-17/EA_Seasonal_Monitor_Aug-17
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Enhancing_Food_Security_in_South_Sudan_Nov-15/Enhancing_Food_Security_in_South_Sudan_Nov-15 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Enhancing_Food_Security_in_South_Sudan_Nov-15/Enhancing_Food_Security_in_South_Sudan_Nov-15
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Enhancing_Food_Security_in_South_Sudan_Nov-15/Enhancing_Food_Security_in_South_Sudan_Nov-15
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Enhancing_Food_Security_in_South_Sudan_Nov-15/Enhancing_Food_Security_in_South_Sudan_Nov-15
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Ethiopia_Food_Security_Outlook_1-Feb-17/Ethiopia_Food_Security_Outlook_1-Feb-17 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Ethiopia_Food_Security_Outlook_1-Feb-17/Ethiopia_Food_Security_Outlook_1-Feb-17
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Ethiopia_Food_Security_Outlook_1-Feb-17/Ethiopia_Food_Security_Outlook_1-Feb-17
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Ethiopia_Food_Security_Outlook_1-Feb-17/Ethiopia_Food_Security_Outlook_1-Feb-17
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Outlook_Jan-18/FEWS_NET_South_Sudan_Outlook_Jan-18 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Outlook_Jan-18/FEWS_NET_South_Sudan_Outlook_Jan-18
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Outlook_Jan-18/FEWS_NET_South_Sudan_Outlook_Jan-18
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Outlook_Jan-18/FEWS_NET_South_Sudan_Outlook_Jan-18
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FFP_Fact_Sheet_South_Sudan_Jan-18/FFP_Fact_Sheet_South_Sudan_Jan-18 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FFP_Fact_Sheet_South_Sudan_Jan-18/FFP_Fact_Sheet_South_Sudan_Jan-18
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FFP_Fact_Sheet_South_Sudan_Jan-18/FFP_Fact_Sheet_South_Sudan_Jan-18
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FFP_Fact_Sheet_South_Sudan_Jan-18/FFP_Fact_Sheet_South_Sudan_Jan-18
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Food_Assistance_Outlook_Brief_1-Jan-18/Food_Assistance_Outlook_Brief_1-Jan-18 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Food_Assistance_Outlook_Brief_1-Jan-18/Food_Assistance_Outlook_Brief_1-Jan-18
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Food_Assistance_Outlook_Brief_1-Jan-18/Food_Assistance_Outlook_Brief_1-Jan-18
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Food_Assistance_Outlook_Brief_1-Jan-18/Food_Assistance_Outlook_Brief_1-Jan-18
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Price_Watch_28-Feb-18/Price_Watch_28-Feb-18 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Price_Watch_28-Feb-18/Price_Watch_28-Feb-18
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Price_Watch_28-Feb-18/Price_Watch_28-Feb-18
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Price_Watch_28-Feb-18/Price_Watch_28-Feb-18
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudan_Humanitarian_Response_Plan_Jan-18/South_Sudan_Humanitarian_Response_Plan_Jan-18 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudan_Humanitarian_Response_Plan_Jan-18/South_Sudan_Humanitarian_Response_Plan_Jan-18
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudan_Humanitarian_Response_Plan_Jan-18/South_Sudan_Humanitarian_Response_Plan_Jan-18
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudan_Humanitarian_Response_Plan_Jan-18/South_Sudan_Humanitarian_Response_Plan_Jan-18
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudanese_Risk_Facing_Famine_Jan-18/South_Sudanese_Risk_Facing_Famine_Jan-18 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudanese_Risk_Facing_Famine_Jan-18/South_Sudanese_Risk_Facing_Famine_Jan-18
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudanese_Risk_Facing_Famine_Jan-18/South_Sudanese_Risk_Facing_Famine_Jan-18
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudanese_Risk_Facing_Famine_Jan-18/South_Sudanese_Risk_Facing_Famine_Jan-18
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17 b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17
diff --git a/library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WorldModelersDatesRangesTimex.csv b/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WorldModelersDatesRangesTimex.csv
similarity index 100%
rename from library/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WorldModelersDatesRangesTimex.csv
rename to apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WorldModelersDatesRangesTimex.csv
diff --git a/apps/src/main/scala/org/clulab/processors/apps/EvalTimeNormApp.scala b/apps/src/main/scala/org/clulab/processors/apps/EvalTimeNormApp.scala
index fdba5a609..8be6b6deb 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/EvalTimeNormApp.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/EvalTimeNormApp.scala
@@ -5,6 +5,8 @@ import org.clulab.processors.clu.BalaurProcessor
 
 object EvalTimeNormApp extends App {
   val proc = new BalaurProcessor()
+  val timeNormEvalDir = "/org/clulab/numeric/TimeNormEvalSet"
+  val testFile = "WorldModelersDatesRangesTimex.csv"
 
-  EvalTimeNorm.run(proc)
+  EvalTimeNorm.run(proc, timeNormEvalDir, testFile)
 }
diff --git a/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala b/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala
index db074c570..9804190fe 100644
--- a/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala
+++ b/library/src/main/scala/org/clulab/numeric/EvalTimeNorm.scala
@@ -12,10 +12,10 @@ object EvalTimeNorm {
 
   def runEval(
     proc: Processor,
-    ner: NumericEntityRecognizer,
-    testFile: String
+    timeNormEvalDir: String,
+    testFile: String,
+    ner: NumericEntityRecognizer
   ): Double = {
-    val timeNormEvalDir = "/org/clulab/numeric/TimeNormEvalSet"
     val goldStream = getClass.getResourceAsStream(s"$timeNormEvalDir/$testFile")
     val goldLines = Source.fromInputStream(goldStream).getLines()
     // Build a Map with the gold time expressions.
@@ -58,13 +58,9 @@ object EvalTimeNorm {
     fscore
   }
 
-  def run(proc: BalaurProcessor): Double = {
+  def run(proc: BalaurProcessor, timeNormEvalDir: String, testFile: String): Double = {
     val ner = proc.numericEntityRecognizerOpt.get
 
-    test(proc, ner)
-  }
-
-  def test(proc: Processor, ner: NumericEntityRecognizer): Double = {
-    runEval(proc, ner, "WorldModelersDatesRangesTimex.csv")
+    runEval(proc, timeNormEvalDir, testFile, ner)
   }
 }

From 7239f2b1b0e9b4f1afa76298694f5a55b0e1e814 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 26 May 2025 19:23:45 -0700
Subject: [PATCH 31/42] Fix test

---
 .../org/clulab/processors/apps/EvalTimeNormApp.scala | 12 ------------
 .../CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16     |  0
 .../EAST_AFRICA_Seasonal_Monitor_5-Jun-17            |  0
 .../EA_Seasonal_Monitor_Aug-17                       |  0
 .../Enhancing_Food_Security_in_South_Sudan_Nov-15    |  0
 .../Ethiopia_Food_Security_Outlook_1-Feb-17          |  0
 .../FAO_GIEWS_South_Sudan_Country_Brief_Sep-17       |  0
 .../FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17    |  0
 .../FEWS_NET_South_Sudan_Outlook_Jan-18              |  0
 .../FFP_Fact_Sheet_South_Sudan_Jan-18                |  0
 ...splace_Hundreds_In_War-torn_In_South_Sudan_Sep-17 |  0
 .../Food_Assistance_Outlook_Brief_1-Jan-18           |  0
 .../Price_Watch_28-Feb-18/Price_Watch_28-Feb-18      |  0
 .../South_Sudan_Humanitarian_Response_Plan_Jan-18    |  0
 .../South_Sudanese_Risk_Facing_Famine_Jan-18         |  0
 .../TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14         |  0
 ...CEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17 |  0
 ...iopia_Drought_Emergency_Situation_Report_5_Jul-17 |  0
 .../WorldModelersDatesRangesTimex.csv                |  0
 .../scala/org/clulab/numeric/TestEvalTimeNorm.scala  | 11 ++++++-----
 20 files changed, 6 insertions(+), 17 deletions(-)
 delete mode 100644 apps/src/main/scala/org/clulab/processors/apps/EvalTimeNormApp.scala
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/EAST_AFRICA_Seasonal_Monitor_5-Jun-17/EAST_AFRICA_Seasonal_Monitor_5-Jun-17 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/EA_Seasonal_Monitor_Aug-17/EA_Seasonal_Monitor_Aug-17 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/Enhancing_Food_Security_in_South_Sudan_Nov-15/Enhancing_Food_Security_in_South_Sudan_Nov-15 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/Ethiopia_Food_Security_Outlook_1-Feb-17/Ethiopia_Food_Security_Outlook_1-Feb-17 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Outlook_Jan-18/FEWS_NET_South_Sudan_Outlook_Jan-18 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/FFP_Fact_Sheet_South_Sudan_Jan-18/FFP_Fact_Sheet_South_Sudan_Jan-18 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/Food_Assistance_Outlook_Brief_1-Jan-18/Food_Assistance_Outlook_Brief_1-Jan-18 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/Price_Watch_28-Feb-18/Price_Watch_28-Feb-18 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudan_Humanitarian_Response_Plan_Jan-18/South_Sudan_Humanitarian_Response_Plan_Jan-18 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudanese_Risk_Facing_Famine_Jan-18/South_Sudanese_Risk_Facing_Famine_Jan-18 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17 (100%)
 rename {apps/src/main => library/src/test}/resources/org/clulab/numeric/TimeNormEvalSet/WorldModelersDatesRangesTimex.csv (100%)

diff --git a/apps/src/main/scala/org/clulab/processors/apps/EvalTimeNormApp.scala b/apps/src/main/scala/org/clulab/processors/apps/EvalTimeNormApp.scala
deleted file mode 100644
index 8be6b6deb..000000000
--- a/apps/src/main/scala/org/clulab/processors/apps/EvalTimeNormApp.scala
+++ /dev/null
@@ -1,12 +0,0 @@
-package org.clulab.processors.apps
-
-import org.clulab.numeric.EvalTimeNorm
-import org.clulab.processors.clu.BalaurProcessor
-
-object EvalTimeNormApp extends App {
-  val proc = new BalaurProcessor()
-  val timeNormEvalDir = "/org/clulab/numeric/TimeNormEvalSet"
-  val testFile = "WorldModelersDatesRangesTimex.csv"
-
-  EvalTimeNorm.run(proc, timeNormEvalDir, testFile)
-}
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16/CLiMIS_FAO_UNICEF_WFP_South_Sudan_IPC_Jun-16
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EAST_AFRICA_Seasonal_Monitor_5-Jun-17/EAST_AFRICA_Seasonal_Monitor_5-Jun-17 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/EAST_AFRICA_Seasonal_Monitor_5-Jun-17/EAST_AFRICA_Seasonal_Monitor_5-Jun-17
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EAST_AFRICA_Seasonal_Monitor_5-Jun-17/EAST_AFRICA_Seasonal_Monitor_5-Jun-17
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/EAST_AFRICA_Seasonal_Monitor_5-Jun-17/EAST_AFRICA_Seasonal_Monitor_5-Jun-17
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EA_Seasonal_Monitor_Aug-17/EA_Seasonal_Monitor_Aug-17 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/EA_Seasonal_Monitor_Aug-17/EA_Seasonal_Monitor_Aug-17
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/EA_Seasonal_Monitor_Aug-17/EA_Seasonal_Monitor_Aug-17
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/EA_Seasonal_Monitor_Aug-17/EA_Seasonal_Monitor_Aug-17
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Enhancing_Food_Security_in_South_Sudan_Nov-15/Enhancing_Food_Security_in_South_Sudan_Nov-15 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/Enhancing_Food_Security_in_South_Sudan_Nov-15/Enhancing_Food_Security_in_South_Sudan_Nov-15
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Enhancing_Food_Security_in_South_Sudan_Nov-15/Enhancing_Food_Security_in_South_Sudan_Nov-15
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/Enhancing_Food_Security_in_South_Sudan_Nov-15/Enhancing_Food_Security_in_South_Sudan_Nov-15
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Ethiopia_Food_Security_Outlook_1-Feb-17/Ethiopia_Food_Security_Outlook_1-Feb-17 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/Ethiopia_Food_Security_Outlook_1-Feb-17/Ethiopia_Food_Security_Outlook_1-Feb-17
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Ethiopia_Food_Security_Outlook_1-Feb-17/Ethiopia_Food_Security_Outlook_1-Feb-17
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/Ethiopia_Food_Security_Outlook_1-Feb-17/Ethiopia_Food_Security_Outlook_1-Feb-17
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17/FAO_GIEWS_South_Sudan_Country_Brief_Sep-17
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17/FEWS_NET_South_Sudan_Famine_Risk_Alert_Jan-17
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Outlook_Jan-18/FEWS_NET_South_Sudan_Outlook_Jan-18 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Outlook_Jan-18/FEWS_NET_South_Sudan_Outlook_Jan-18
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Outlook_Jan-18/FEWS_NET_South_Sudan_Outlook_Jan-18
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/FEWS_NET_South_Sudan_Outlook_Jan-18/FEWS_NET_South_Sudan_Outlook_Jan-18
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FFP_Fact_Sheet_South_Sudan_Jan-18/FFP_Fact_Sheet_South_Sudan_Jan-18 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/FFP_Fact_Sheet_South_Sudan_Jan-18/FFP_Fact_Sheet_South_Sudan_Jan-18
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/FFP_Fact_Sheet_South_Sudan_Jan-18/FFP_Fact_Sheet_South_Sudan_Jan-18
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/FFP_Fact_Sheet_South_Sudan_Jan-18/FFP_Fact_Sheet_South_Sudan_Jan-18
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17/Floods_Displace_Hundreds_In_War-torn_In_South_Sudan_Sep-17
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Food_Assistance_Outlook_Brief_1-Jan-18/Food_Assistance_Outlook_Brief_1-Jan-18 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/Food_Assistance_Outlook_Brief_1-Jan-18/Food_Assistance_Outlook_Brief_1-Jan-18
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Food_Assistance_Outlook_Brief_1-Jan-18/Food_Assistance_Outlook_Brief_1-Jan-18
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/Food_Assistance_Outlook_Brief_1-Jan-18/Food_Assistance_Outlook_Brief_1-Jan-18
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Price_Watch_28-Feb-18/Price_Watch_28-Feb-18 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/Price_Watch_28-Feb-18/Price_Watch_28-Feb-18
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/Price_Watch_28-Feb-18/Price_Watch_28-Feb-18
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/Price_Watch_28-Feb-18/Price_Watch_28-Feb-18
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudan_Humanitarian_Response_Plan_Jan-18/South_Sudan_Humanitarian_Response_Plan_Jan-18 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudan_Humanitarian_Response_Plan_Jan-18/South_Sudan_Humanitarian_Response_Plan_Jan-18
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudan_Humanitarian_Response_Plan_Jan-18/South_Sudan_Humanitarian_Response_Plan_Jan-18
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudan_Humanitarian_Response_Plan_Jan-18/South_Sudan_Humanitarian_Response_Plan_Jan-18
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudanese_Risk_Facing_Famine_Jan-18/South_Sudanese_Risk_Facing_Famine_Jan-18 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudanese_Risk_Facing_Famine_Jan-18/South_Sudanese_Risk_Facing_Famine_Jan-18
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudanese_Risk_Facing_Famine_Jan-18/South_Sudanese_Risk_Facing_Famine_Jan-18
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/South_Sudanese_Risk_Facing_Famine_Jan-18/South_Sudanese_Risk_Facing_Famine_Jan-18
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14/TECHNICAL_BRIEF_(RE)ASSESSING_THE_Oct-14
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17/UNICEF_ETHIOPIA_HUMANITARIAN_SITUATION_REPORT_Apr-17
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17 b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17/WFP_Ethiopia_Drought_Emergency_Situation_Report_5_Jul-17
diff --git a/apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WorldModelersDatesRangesTimex.csv b/library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/WorldModelersDatesRangesTimex.csv
similarity index 100%
rename from apps/src/main/resources/org/clulab/numeric/TimeNormEvalSet/WorldModelersDatesRangesTimex.csv
rename to library/src/test/resources/org/clulab/numeric/TimeNormEvalSet/WorldModelersDatesRangesTimex.csv
diff --git a/library/src/test/scala/org/clulab/numeric/TestEvalTimeNorm.scala b/library/src/test/scala/org/clulab/numeric/TestEvalTimeNorm.scala
index bc22534cd..b5575b1b3 100644
--- a/library/src/test/scala/org/clulab/numeric/TestEvalTimeNorm.scala
+++ b/library/src/test/scala/org/clulab/numeric/TestEvalTimeNorm.scala
@@ -8,12 +8,13 @@ class TestEvalTimeNorm extends Test {
   behavior of "temporal parser"
 
   it should "not degrade in performance" in {
+    val timeNormEvalDir = "/org/clulab/numeric/TimeNormEvalSet"
+    val testFile = "WorldModelersDatesRangesTimex.csv"
+    val seasonPath = "/org/clulab/numeric/custom/SEASON.tsv"
     val expectedFscore = 0.85
-    val proc = new BalaurProcessor(seasonPathOpt = Some("/org/clulab/numeric/custom/SEASON.tsv"))
-    val ner = NumericEntityRecognizer(seasonPath = "/org/clulab/numeric/custom/SEASON.tsv")
-    val actualFscore = EvalTimeNorm.test(proc, ner)
+    val proc = new BalaurProcessor(seasonPathOpt = Some(seasonPath))
+    val actualFscore = EvalTimeNorm.run(proc, timeNormEvalDir, testFile)
+
     actualFscore should be >= expectedFscore
   }
-
 }
-

From 8b1c2f3bda9857dd8faec899ca9d8a54770ff84b Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Tue, 27 May 2025 09:42:41 -0700
Subject: [PATCH 32/42] Make DocumentAttachments immutable

---
 .../org/clulab/processors/Document.scala      | 45 +++++---------
 .../org/clulab/processors/Processor.scala     | 22 ++++---
 .../serialization/DocumentSerializer.scala    | 10 ++--
 .../serialization/json/JSONSerializer.scala   | 16 ++---
 .../clulab/serialization/json/package.scala   |  5 +-
 .../struct/TestDocumentAttachment.scala       | 60 ++++++++++---------
 6 files changed, 71 insertions(+), 87 deletions(-)

diff --git a/library/src/main/scala/org/clulab/processors/Document.scala b/library/src/main/scala/org/clulab/processors/Document.scala
index 34db68688..7dd9bcfd3 100644
--- a/library/src/main/scala/org/clulab/processors/Document.scala
+++ b/library/src/main/scala/org/clulab/processors/Document.scala
@@ -8,7 +8,6 @@ import org.json4s.JValue
 import org.json4s.jackson.prettyJson
 
 import java.io.PrintWriter
-import scala.collection.mutable
 
 /**
   * Stores all annotations for one document.
@@ -24,9 +23,13 @@ class Document(
   /** The original text corresponding to this document, if it was preserved by the corresponding processor */
   val text: Option[String] = None,
   /** Map of any arbitrary document attachments such as document creation time */
-  protected val attachments: Option[mutable.HashMap[String, DocumentAttachment]] = None,
-  /** DCT is Document Creation Time */
-  protected val dct: Option[String] = None
+  val attachments: Option[DocumentAttachments.Type] = None,
+  /**
+   * The document creation time using the CoreNLP format
+   * See useFixedDate here for more details: https://stanfordnlp.github.io/CoreNLP/ner.html#setting-document-date
+   * The DCT will impact how Sentence.norms are generated for DATE expressions.
+   */
+  val dct: Option[String] = None
 ) extends Serializable {
 
   def copy(
@@ -34,19 +37,18 @@ class Document(
     id: Option[String] = id,
     coreferenceChains: Option[CorefChains] = coreferenceChains,
     text: Option[String] = text,
-    attachments: Option[mutable.HashMap[String, DocumentAttachment]] = None,
+    attachments: Option[DocumentAttachments.Type] = None,
     dct: Option[String] = dct
   ): Document = new Document(sentences, id, coreferenceChains, text, attachments, dct)
 
   /** Clears any internal state potentially constructed by the annotators */
-  // def clear(): Unit = { }
+  def clear(): Unit = { } // This is for subclass support.
 
   /**
     * Used to compare Documents.
     * @return a hash (Int) based primarily on the sentences, ignoring attachments
     */
   def equivalenceHash: Int = {
-
     val stringCode = "org.clulab.processors.Document"
 
     // Hash representing the sentences.
@@ -72,30 +74,6 @@ class Document(
     Hash.ordered(sentences.map(_.ambivalenceHash))
   )
 
-  /** Retrieves the attachment with the given name */
-  def getAttachment(name: String): Option[DocumentAttachment] = attachments.flatMap(_.get(name))
-
-  /** Retrieves keys to all attachments so that the entire collection can be read
-    * for purposes including but not limited to serialization.  If there are no
-    * attachments, that is attachments == None, an empty set is returned.
-    * This does not distinguish between None and Some(HashMap.empty), especially
-    * since the latter should not be possible because of the lazy initialization.
-    */
-  def getAttachmentKeys: collection.Set[String] = {
-    attachments.map { attachments =>
-      attachments.keySet
-    }.getOrElse(collection.Set.empty[String])
-  }
-
-  /**
-   * Sets the document creation time using the CoreNLP format.
-   * See useFixedDate here for more details: https://stanfordnlp.github.io/CoreNLP/ner.html#setting-document-date
-   * The DCT will impacts how Sentence.norms are generated for DATE expressions
-   * @param dct Document creation time
-   */
-
-  def getDCT: Option[String] = dct
-
   def prettyPrint(pw: PrintWriter): Unit = {
     // let's print the sentence-level annotations
     sentences.zipWithIndex.foreach { case (sentence, sentenceCount) =>
@@ -312,6 +290,11 @@ trait JsonSerializerAble {
   */
 trait DocumentAttachment extends DocumentAble with DocumentSerializerAble with JsonSerializerAble
 
+object DocumentAttachments {
+  type Type = Map[String, DocumentAttachment]
+}
+
+
 /**
  * Designed to store intermediate attachments that are only used to pass information between processor components.
  * Thus, these do not need to be serialized
diff --git a/library/src/main/scala/org/clulab/processors/Processor.scala b/library/src/main/scala/org/clulab/processors/Processor.scala
index 9d84e1527..e856f20a5 100644
--- a/library/src/main/scala/org/clulab/processors/Processor.scala
+++ b/library/src/main/scala/org/clulab/processors/Processor.scala
@@ -26,23 +26,21 @@ trait Processor {
 
     val headId = headDocument.id
     require(tailDocuments.forall(_.id == headId))
-    val headDctOpt = headDocument.getDCT
-    require(documents.tail.forall(_.getDCT == headDctOpt))
+    val headDctOpt = headDocument.dct
+    require(documents.tail.forall(_.dct == headDctOpt))
     // Coreference chains involve Mentions that include references to documents.  The Mentions are being
     // moved to a new Document and it would be infeasible to move the chains.
     require(documents.forall(_.coreferenceChains.isEmpty))
 
-    val attachments = mutable.HashMap[String, DocumentAttachment]()
-
-    documents.foreach { document =>
-      document.getAttachmentKeys.foreach { attachmentKey =>
-        val valueOpt = attachments.get(attachmentKey)
-        val isValid = valueOpt.forall(_ == document.getAttachment(attachmentKey).get)
-
-        require(isValid, "The attachments cannot contradict each other.")
-        attachments(attachmentKey) = document.getAttachment(attachmentKey).get
-      }
+    val allAttachments = documents.flatMap { document =>
+      document.attachments.getOrElse(Map.empty).toSeq
     }
+    // This will remove duplicate (key, value) pairs.
+    val distinctAttachments = allAttachments.distinct
+    // If for any key, there are different, contradictory values, only one value will make it into the map.
+    val attachments = distinctAttachments.toMap
+
+    require(attachments.size == distinctAttachments.length, "Attachments can't contradict each other.  Each key needs to map onto the same value.")
 
     val combinedSentences = documents.flatMap(_.sentences)
     val combinedDocument = new Document(
diff --git a/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala b/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
index cfae7e40b..093185b7f 100644
--- a/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/DocumentSerializer.scala
@@ -116,10 +116,7 @@ class DocumentSerializer extends Logging {
       */
 
       val attachmentsOpt = namedDocumentAttachmentsOpt.map { namedDocumentAttachments =>
-        val attachments = mutable.HashMap[String, DocumentAttachment]()
-
-        attachments ++= namedDocumentAttachments
-        attachments
+        namedDocumentAttachments.toMap
       }
 
       val doc = new Document(
@@ -334,11 +331,12 @@ class DocumentSerializer extends Logging {
       }
 
       // Sort these so that serialization is the same each time.
-      val attachmentKeys = doc.getAttachmentKeys.toList.sorted
+      val attachments = doc.attachments.getOrElse(Map.empty)
+      val attachmentKeys = attachments.keySet
       if (attachmentKeys.nonEmpty) {
         os.println(START_ATTACHMENTS + SEP + attachmentKeys.size)
         attachmentKeys.foreach { key =>
-          val value = doc.getAttachment(key).get
+          val value = attachments(key)
           os.print(escapeAttachment(key))
           os.print(SEP)
           os.print(escapeAttachment(value.documentAttachmentBuilderFromTextClassName))
diff --git a/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala b/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
index 2e66d1f76..ebc20d8b7 100644
--- a/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
+++ b/library/src/main/scala/org/clulab/serialization/json/JSONSerializer.scala
@@ -1,9 +1,9 @@
 package org.clulab.serialization.json
 
 import java.io.File
-import org.clulab.processors.{Document, DocumentAttachment, DocumentAttachmentBuilderFromJson, Sentence}
+import org.clulab.processors.{Document, DocumentAttachment, DocumentAttachmentBuilderFromJson, DocumentAttachments, Sentence}
 import org.clulab.struct.Edge
-import org.clulab.struct.{DirectedGraph, GraphMap}
+import org.clulab.struct.DirectedGraph
 import org.clulab.utils.FileUtils
 import org.json4s
 import org.json4s.JsonDSL._
@@ -24,13 +24,12 @@ object JSONSerializer {
 
   def jsonAST(f: File): JValue = jsonAST(FileUtils.getTextFromFile(f))
 
-  protected def getDocumentAttachments(jValue: JValue): Option[mutable.HashMap[String, DocumentAttachment]] = {
+  protected def getDocumentAttachments(jValue: JValue): Option[DocumentAttachments.Type] = {
     // See also DocumentSerializer for text version of nearly the same thing.
     (jValue \ DOCUMENT_ATTACHMENTS_KEY) match {
       case jObject: JObject =>
-        val attachments = new mutable.HashMap[String, DocumentAttachment]()
         val keys = jObject.values.keys
-        keys.foreach { (key: String) =>
+        val keyAndDocumentAttachmentPairs = keys.flatMap { (key: String) =>
           (jObject \ key) match {
             case jObject: JObject =>
               val documentAttachmentBuilderFromJsonClassName = (jObject \ DOCUMENT_ATTACHMENTS_BUILDER_KEY).extract[String]
@@ -40,15 +39,16 @@ object JSONSerializer {
               val documentAttachmentBuilder = obj.asInstanceOf[DocumentAttachmentBuilderFromJson]
               val value = (jObject \ DOCUMENT_ATTACHMENTS_VALUE_KEY)
               val documentAttachment = documentAttachmentBuilder.mkDocumentAttachment(value)
-              attachments(key) = documentAttachment
+
+              Some((key, documentAttachment))
             case jValue: JValue =>
               val text = prettyJson(jValue)
               throw new RuntimeException(s"ERROR: While deserializing document attachments expected JObject but found this: $text")
             // case _ => // noop.  It should never get here.  (Famous last words.)
-            case null => // noop.  It should never get here.  (Famous last words.)  Scala 3 prefers null over _.
+            case null => None // noop.  It should never get here.  (Famous last words.)  Scala 3 prefers null over _.
           }
         }
-        Some(attachments)
+        Some(keyAndDocumentAttachmentPairs.toMap)
       case _ => // Leave documentAttachments as is: None
         None
     }
diff --git a/library/src/main/scala/org/clulab/serialization/json/package.scala b/library/src/main/scala/org/clulab/serialization/json/package.scala
index 88276826b..a0fbf4f0e 100644
--- a/library/src/main/scala/org/clulab/serialization/json/package.scala
+++ b/library/src/main/scala/org/clulab/serialization/json/package.scala
@@ -61,10 +61,11 @@ package object json {
 
     def jsonAST: JValue = {
       // See also DocumentSerializer for a similar text implementation.
-      val attachmentKeys = doc.getAttachmentKeys.toList.sorted
+      val attachments = doc.attachments.getOrElse(Map.empty)
+      val attachmentKeys = attachments.keySet.toList.sorted
       val documentAttachments: JValue = if (attachmentKeys.nonEmpty) {
         val jFields = attachmentKeys.map { key =>
-          val value = doc.getAttachment(key).get
+          val value = attachments(key)
           JField(key,
               (DOCUMENT_ATTACHMENTS_BUILDER_KEY -> JString(value.documentAttachmentBuilderFromJsonClassName)) ~
               (DOCUMENT_ATTACHMENTS_VALUE_KEY -> value.toJsonSerializer)
diff --git a/library/src/test/scala/org/clulab/struct/TestDocumentAttachment.scala b/library/src/test/scala/org/clulab/struct/TestDocumentAttachment.scala
index 8bf2c792c..a820e26fa 100644
--- a/library/src/test/scala/org/clulab/struct/TestDocumentAttachment.scala
+++ b/library/src/test/scala/org/clulab/struct/TestDocumentAttachment.scala
@@ -16,7 +16,6 @@ import java.io.ByteArrayInputStream
 import java.io.ByteArrayOutputStream
 import java.io.ObjectInputStream
 import java.io.ObjectOutputStream
-import scala.collection.mutable
 import scala.util.Using
 
 class TestDocumentAttachment extends Test {
@@ -124,7 +123,7 @@ class TestDocumentAttachment extends Test {
 //  }
 
   "Document with TextNameDocumentAttachment" should "serialize as text" in {
-    val oldAttachments = mutable.HashMap[String, DocumentAttachment](
+    val oldAttachments = Map[String, DocumentAttachment](
       (FIRST_KEY, new TextNameDocumentAttachment(FIRST_NAME)),
       (MIDDLE_KEY, new TextNameDocumentAttachment(MIDDLE_NAME)),
       (LAST_KEY, new TextNameDocumentAttachment(LAST_NAME)),
@@ -136,18 +135,20 @@ class TestDocumentAttachment extends Test {
     val documentString = documentSerializer.save(oldDocument)
 
     val newDocument = documentSerializer.load(documentString)
-    require(newDocument.getAttachment(FIRST_KEY) == oldDocument.getAttachment(FIRST_KEY))
-    require(newDocument.getAttachment(MIDDLE_KEY) == oldDocument.getAttachment(MIDDLE_KEY))
-    require(newDocument.getAttachment(LAST_KEY) == oldDocument.getAttachment(LAST_KEY))
-    require(newDocument.getAttachment(ALIAS_KEY).get.asInstanceOf[NameDocumentAttachment].name ==
-        oldDocument.getAttachment(ALIAS_KEY).get.asInstanceOf[NameDocumentAttachment].name)
+    val newAttachments = newDocument.attachments.get
+
+    require(newAttachments(FIRST_KEY) == oldAttachments(FIRST_KEY))
+    require(newAttachments(MIDDLE_KEY) == oldAttachments(MIDDLE_KEY))
+    require(newAttachments(LAST_KEY) == oldAttachments(LAST_KEY))
+    require(newAttachments(ALIAS_KEY).asInstanceOf[NameDocumentAttachment].name ==
+        oldAttachments(ALIAS_KEY).asInstanceOf[NameDocumentAttachment].name)
 
     // This one must be avoided.
     /*require(newDocument == oldDocument)*/
   }
 
   "Document with ObjectNameDocumentAttachment" should "serialize as text" in {
-    val oldAttachments = mutable.HashMap[String, DocumentAttachment](
+    val oldAttachments = Map[String, DocumentAttachment](
       (FIRST_KEY, new ObjectNameDocumentAttachment(FIRST_NAME)),
       (MIDDLE_KEY, new ObjectNameDocumentAttachment(MIDDLE_NAME)),
       (LAST_KEY, new ObjectNameDocumentAttachment(LAST_NAME)),
@@ -158,20 +159,21 @@ class TestDocumentAttachment extends Test {
     val documentSerializer = new DocumentSerializer()
     // This should be a messy string.
     val documentString = documentSerializer.save(oldDocument)
-
     val newDocument = documentSerializer.load(documentString)
-    require(newDocument.getAttachment(FIRST_KEY) == oldDocument.getAttachment(FIRST_KEY))
-    require(newDocument.getAttachment(MIDDLE_KEY) == oldDocument.getAttachment(MIDDLE_KEY))
-    require(newDocument.getAttachment(LAST_KEY) == oldDocument.getAttachment(LAST_KEY))
-    require(newDocument.getAttachment(ALIAS_KEY).get.asInstanceOf[NameDocumentAttachment].name ==
-        oldDocument.getAttachment(ALIAS_KEY).get.asInstanceOf[NameDocumentAttachment].name)
+    val newAttachments = newDocument.attachments.get
+
+    require(newAttachments(FIRST_KEY) == oldAttachments(FIRST_KEY))
+    require(newAttachments(MIDDLE_KEY) == oldAttachments(MIDDLE_KEY))
+    require(newAttachments(LAST_KEY) == oldAttachments(LAST_KEY))
+    require(newAttachments(ALIAS_KEY).asInstanceOf[NameDocumentAttachment].name ==
+        oldAttachments(ALIAS_KEY).asInstanceOf[NameDocumentAttachment].name)
 
     // This one must be avoided.
     /*require(newDocument == oldDocument)*/
   }
 
   "Document with TextNameDocumentAttachments" should "serialize as json" in {
-    val oldAttachments = mutable.HashMap[String, DocumentAttachment](
+    val oldAttachments = Map[String, DocumentAttachment](
       (FIRST_KEY, new TextNameDocumentAttachment(FIRST_NAME)),
       (MIDDLE_KEY, new TextNameDocumentAttachment(MIDDLE_NAME)),
       (LAST_KEY, new TextNameDocumentAttachment(LAST_NAME)),
@@ -183,13 +185,14 @@ class TestDocumentAttachment extends Test {
     /*oldDocument.addAttachment("wrong", new NameMethodAttachment("name"))*/
 
     val documentString = prettyJson(renderJValue(oldDocument.jsonAST))
-
     val newDocument: Document = JSONSerializer.toDocument(parseJson(documentString))
-    newDocument.getAttachment(FIRST_KEY) should be (oldDocument.getAttachment(FIRST_KEY))
-    newDocument.getAttachment(MIDDLE_KEY) should be (oldDocument.getAttachment(MIDDLE_KEY))
-    newDocument.getAttachment(LAST_KEY) should be (oldDocument.getAttachment(LAST_KEY))
-    newDocument.getAttachment(ALIAS_KEY).asInstanceOf[Option[NameDocumentAttachment]].get.name should be (
-      oldDocument.getAttachment(ALIAS_KEY).asInstanceOf[Option[NameDocumentAttachment]].get.name
+    val newAttachments = newDocument.attachments.get
+
+    newAttachments(FIRST_KEY) should be (oldAttachments(FIRST_KEY))
+    newAttachments(MIDDLE_KEY) should be (oldAttachments(MIDDLE_KEY))
+    newAttachments(LAST_KEY) should be (oldAttachments(LAST_KEY))
+    newAttachments(ALIAS_KEY).asInstanceOf[NameDocumentAttachment].name should be (
+      oldAttachments(ALIAS_KEY).asInstanceOf[NameDocumentAttachment].name
     )
 
     // This one must be avoided.
@@ -197,7 +200,7 @@ class TestDocumentAttachment extends Test {
   }
 
   "Document with ObjectNameDocumentAttachment" should "serialize as json" in {
-    val oldAttachments = mutable.HashMap[String, DocumentAttachment](
+    val oldAttachments = Map[String, DocumentAttachment](
       (FIRST_KEY, new ObjectNameDocumentAttachment(FIRST_NAME)),
       (MIDDLE_KEY, new ObjectNameDocumentAttachment(MIDDLE_NAME)),
       (LAST_KEY, new ObjectNameDocumentAttachment(LAST_NAME)),
@@ -207,13 +210,14 @@ class TestDocumentAttachment extends Test {
 
     // This should be a messy string.
     val documentString = prettyJson(renderJValue(oldDocument.jsonAST))
-
     val newDocument: Document = JSONSerializer.toDocument(parseJson(documentString))
-    require(newDocument.getAttachment(FIRST_KEY) == oldDocument.getAttachment(FIRST_KEY))
-    require(newDocument.getAttachment(MIDDLE_KEY) == oldDocument.getAttachment(MIDDLE_KEY))
-    require(newDocument.getAttachment(LAST_KEY) == oldDocument.getAttachment(LAST_KEY))
-    require(newDocument.getAttachment(ALIAS_KEY).get.asInstanceOf[NameDocumentAttachment].name ==
-        oldDocument.getAttachment(ALIAS_KEY).get.asInstanceOf[NameDocumentAttachment].name)
+    val newAttachments = newDocument.attachments.get
+
+    require(newAttachments(FIRST_KEY) == oldAttachments(FIRST_KEY))
+    require(newAttachments(MIDDLE_KEY) == oldAttachments(MIDDLE_KEY))
+    require(newAttachments(LAST_KEY) == oldAttachments(LAST_KEY))
+    require(newAttachments(ALIAS_KEY).asInstanceOf[NameDocumentAttachment].name ==
+        oldAttachments(ALIAS_KEY).asInstanceOf[NameDocumentAttachment].name)
 
     // This one must be avoided.
     /*require(newDocument == oldDocument)*/

From e9864206085631bbf7b8a559948562474ad16922 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Tue, 27 May 2025 09:58:32 -0700
Subject: [PATCH 33/42] Fix test compilation warning

---
 .../src/test/scala/org/clulab/processors/TestHashTrie.scala | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/library/src/test/scala/org/clulab/processors/TestHashTrie.scala b/library/src/test/scala/org/clulab/processors/TestHashTrie.scala
index d304713b7..4ec8ee171 100644
--- a/library/src/test/scala/org/clulab/processors/TestHashTrie.scala
+++ b/library/src/test/scala/org/clulab/processors/TestHashTrie.scala
@@ -19,7 +19,7 @@ class TestHashTrie extends Test {
 
     //println("TRIE:\n" + trie)
 
-    val tokens = Array("a", "a", "b", "d", "a", "b", "d", "b", "b", "b")
+    val tokens = Seq("a", "a", "b", "d", "a", "b", "d", "b", "b", "b")
     val labels = trie.find(tokens, "O")
     //println("TOKENS: " + tokens.mkString(" "))
     //println("LABELS: " + labels.mkString(" "))
@@ -44,7 +44,7 @@ class TestHashTrie extends Test {
     trie.add(Array("this", "is", "c", "test"))
     trie.add(Array("this", "is", "b", "test"))
 
-    val labels = trie.find(Array("this", "is", "c", "test"), "o")
+    val labels = trie.find(Seq("this", "is", "c", "test"), "o")
 
     sameLabels(Array("B-hello", "I-hello", "I-hello", "I-hello"), labels)
   }
@@ -55,7 +55,7 @@ class TestHashTrie extends Test {
     trie.add(Array("this", "is", "c", "test"))
     trie.add(Array("this", "is", "d", "test"))
 
-    val labels = trie.find(Array("this", "is", "b", "test"), "o")
+    val labels = trie.find(Seq("this", "is", "b", "test"), "o")
 
     sameLabels(Array("o", "o", "o", "o"), labels)
   }

From 7d4fec111c892efd7fd69d6952ef35d8c33bd6db Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Thu, 29 May 2025 08:39:23 -0700
Subject: [PATCH 34/42] Use Option.when

---
 .../apps/InfiniteParallelProcessorsExample.scala         | 9 ++++-----
 .../scala/org/clulab/processors/apps/OdinStarter.scala   | 3 ++-
 .../scala/org/clulab/odin/impl/MarkdownGeneration.scala  | 7 ++++---
 .../scala/org/clulab/odin/impl/OdinResourceManager.scala | 4 ++--
 .../src/main/scala/org/clulab/odin/impl/RuleReader.scala | 4 ++--
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/apps/src/main/scala/org/clulab/processors/apps/InfiniteParallelProcessorsExample.scala b/apps/src/main/scala/org/clulab/processors/apps/InfiniteParallelProcessorsExample.scala
index f320b6f4b..3465d8758 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/InfiniteParallelProcessorsExample.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/InfiniteParallelProcessorsExample.scala
@@ -2,23 +2,22 @@ package org.clulab.processors.apps
 
 import org.clulab.processors.Document
 import org.clulab.processors.Processor
+import org.clulab.processors.clu.BalaurProcessor
 import org.clulab.serialization.DocumentSerializer
 import org.clulab.utils.{FileUtils, StringUtils, ThreadUtils, Timer}
 
-import java.io.BufferedOutputStream
 import java.io.File
-import java.io.FileOutputStream
 import java.io.PrintWriter
+import scala.collection.compat._
 import scala.collection.parallel.ParSeq
 import scala.util.Using
-import org.clulab.processors.clu.BalaurProcessor
+
 
 object InfiniteParallelProcessorsExample {
 
   class ProcessorProvider(reuseProcessor: Boolean) {
     protected val processorOpt: Option[Processor] =
-        if (reuseProcessor) Some(new BalaurProcessor())
-        else None
+        Option.when(reuseProcessor)(new BalaurProcessor())
 
     def newOrReusedProcessor: Processor =
         if (reuseProcessor) processorOpt.get
diff --git a/apps/src/main/scala/org/clulab/processors/apps/OdinStarter.scala b/apps/src/main/scala/org/clulab/processors/apps/OdinStarter.scala
index 54abb3b5e..106f7d09b 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/OdinStarter.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/OdinStarter.scala
@@ -6,6 +6,7 @@ import org.clulab.sequences.LexiconNER
 import org.clulab.utils.FileUtils
 
 import java.io.File
+import scala.collection.compat._
 
 object OdinStarter extends App {
   // When using an IDE rather than sbt, make sure the working directory for the run
@@ -20,7 +21,7 @@ object OdinStarter extends App {
     val kbs = kbsAndCaseInsensitiveMatchings.map(_._1)
     val caseInsensitiveMatchings = kbsAndCaseInsensitiveMatchings.map(_._2)
     val isLocal = kbs.forall(new File(resourceDir, _).exists)
-    val baseDirOpt = if (isLocal) Some(resourceDir) else None
+    val baseDirOpt = Option.when(isLocal)(resourceDir)
 
     LexiconNER(kbs, caseInsensitiveMatchings, baseDirOpt)
   }
diff --git a/library/src/main/scala/org/clulab/odin/impl/MarkdownGeneration.scala b/library/src/main/scala/org/clulab/odin/impl/MarkdownGeneration.scala
index f6e282934..26c2252e8 100644
--- a/library/src/main/scala/org/clulab/odin/impl/MarkdownGeneration.scala
+++ b/library/src/main/scala/org/clulab/odin/impl/MarkdownGeneration.scala
@@ -3,6 +3,7 @@ package org.clulab.odin.impl
 import org.clulab.odin.impl.MarkdownGeneration._
 import org.clulab.odin.impl.RuleReader.{DefaultAction, Rule}
 
+import scala.collection.compat._
 import scala.collection.mutable.ArrayBuffer
 
 case class RuleSchema(
@@ -180,7 +181,7 @@ object MarkdownGeneration {
       extractorType = "CrossSentenceExtractor",
       labels = x.labels,
       priority = priorityString(x.priority),
-      action = if (r.action != DefaultAction) Some(r.action) else None,
+      action = Option.when(r.action != DefaultAction)(r.action),
       keep = x.keep,
       additional = Map(
         "leftWindow" -> x.leftWindow.toString,
@@ -198,7 +199,7 @@ object MarkdownGeneration {
       extractorType = "TokenExtractor",
       labels = x.labels,
       priority = priorityString(x.priority),
-      action = if (r.action != DefaultAction) Some(r.action) else None,
+      action = Option.when(r.action != DefaultAction)(r.action),
       keep = x.keep,
       additional = Map.empty,
       arguments = Seq.empty
@@ -213,7 +214,7 @@ object MarkdownGeneration {
       extractorType = "GraphExtractor",
       labels = x.labels,
       priority = priorityString(x.priority),
-      action = if (r.action != DefaultAction) Some(r.action) else None,
+      action = Option.when(r.action != DefaultAction)(r.action),
       keep = x.keep,
       additional = Map.empty,
       arguments = toArgSchema(x.pattern.arguments)
diff --git a/library/src/main/scala/org/clulab/odin/impl/OdinResourceManager.scala b/library/src/main/scala/org/clulab/odin/impl/OdinResourceManager.scala
index f6b8c2c7c..817d93d52 100644
--- a/library/src/main/scala/org/clulab/odin/impl/OdinResourceManager.scala
+++ b/library/src/main/scala/org/clulab/odin/impl/OdinResourceManager.scala
@@ -1,6 +1,7 @@
 package org.clulab.odin.impl
 
 import java.io.{BufferedInputStream, InputStream}
+import scala.collection.compat._
 import scala.io.Source
 
 /**
@@ -22,8 +23,7 @@ object OdinResourceManager {
     val embeddingsOption: Option[OdinResource] = constructorMap("embeddings")
     // cast as EmbeddingsResources, if present
     val embeddings: Option[EmbeddingsResource] =
-      if (embeddingsOption.nonEmpty) Some(embeddingsOption.get.asInstanceOf[EmbeddingsResource])
-      else None
+      Option.when(embeddingsOption.nonEmpty)(embeddingsOption.get.asInstanceOf[EmbeddingsResource])
     new OdinResourceManager(embeddings)
   }
 
diff --git a/library/src/main/scala/org/clulab/odin/impl/RuleReader.scala b/library/src/main/scala/org/clulab/odin/impl/RuleReader.scala
index a349b4193..45f9a1e35 100644
--- a/library/src/main/scala/org/clulab/odin/impl/RuleReader.scala
+++ b/library/src/main/scala/org/clulab/odin/impl/RuleReader.scala
@@ -13,6 +13,7 @@ import java.net.URL
 import java.nio.charset.Charset
 import java.nio.charset.StandardCharsets
 import java.util.{Collection, Map => JMap}
+import scala.collection.compat._
 import scala.io.{Codec, Source}
 import scala.jdk.CollectionConverters._
 import scala.util.Using
@@ -28,8 +29,7 @@ class RuleReader(val actions: Actions, val charset: Charset, val ruleDir: Option
   private val mirror = new ActionMirror(actions)
 
   val ruleYamlOpt =
-      if (OdinConfig.keepRule) Some(new Yaml(new Constructor(classOf[Map[String, Any]])))
-      else None
+      Option.when(OdinConfig.keepRule)(new Yaml(new Constructor(classOf[Map[String, Any]])))
 
   def read(input: String): Vector[Extractor] = {
     val rules = getRules(input)

From 4f06301cfba9caba8e02c8e29697026ce8545343 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Thu, 29 May 2025 10:17:35 -0700
Subject: [PATCH 35/42] Extract the DocumentPrinter

---
 .../InfiniteParallelProcessorsExample.scala   |  8 +-
 .../apps/ParallelProcessorsExample.scala      |  7 +-
 .../apps/ProcessorsDocSerializerExample.scala |  7 +-
 .../processors/apps/ProcessorsShell.scala     |  7 +-
 .../org/clulab/processors/Document.scala      | 75 ---------------
 .../processors/clu/DocumentPrinter.scala      | 91 +++++++++++++++++++
 6 files changed, 102 insertions(+), 93 deletions(-)
 create mode 100644 library/src/main/scala/org/clulab/processors/clu/DocumentPrinter.scala

diff --git a/apps/src/main/scala/org/clulab/processors/apps/InfiniteParallelProcessorsExample.scala b/apps/src/main/scala/org/clulab/processors/apps/InfiniteParallelProcessorsExample.scala
index 3465d8758..6220f625e 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/InfiniteParallelProcessorsExample.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/InfiniteParallelProcessorsExample.scala
@@ -2,7 +2,7 @@ package org.clulab.processors.apps
 
 import org.clulab.processors.Document
 import org.clulab.processors.Processor
-import org.clulab.processors.clu.BalaurProcessor
+import org.clulab.processors.clu.{BalaurProcessor, DocumentPrettyPrinter}
 import org.clulab.serialization.DocumentSerializer
 import org.clulab.utils.{FileUtils, StringUtils, ThreadUtils, Timer}
 
@@ -12,7 +12,6 @@ import scala.collection.compat._
 import scala.collection.parallel.ParSeq
 import scala.util.Using
 
-
 object InfiniteParallelProcessorsExample {
 
   class ProcessorProvider(reuseProcessor: Boolean) {
@@ -36,9 +35,6 @@ object InfiniteParallelProcessorsExample {
     val documentSerializer = new DocumentSerializer
 
     def processFiles(parFiles: ParSeq[File], processor: Processor): Unit = {
-
-      def printDocument(document: Document, printWriter: PrintWriter): Unit = document.prettyPrint(printWriter)
-
       parFiles.foreach { file =>
         println(s"Processing ${file.getName}...")
 
@@ -46,7 +42,7 @@ object InfiniteParallelProcessorsExample {
         val outputFile = new File(outputDir + "/" + file.getName)
         val document = processor.annotate(text)
         val printedDocument = StringUtils.viaPrintWriter { printWriter =>
-          printDocument(document, printWriter)
+          new DocumentPrettyPrinter(printWriter).print(document)
         }
         val savedDocument = documentSerializer.save(document)
         val outputDocument = printedDocument + savedDocument
diff --git a/apps/src/main/scala/org/clulab/processors/apps/ParallelProcessorsExample.scala b/apps/src/main/scala/org/clulab/processors/apps/ParallelProcessorsExample.scala
index 6e514c3e1..bc28048e5 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/ParallelProcessorsExample.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/ParallelProcessorsExample.scala
@@ -2,7 +2,7 @@ package org.clulab.processors.apps
 
 import org.clulab.processors.Document
 import org.clulab.processors.Processor
-import org.clulab.processors.clu.BalaurProcessor
+import org.clulab.processors.clu.{BalaurProcessor, DocumentPrettyPrinter}
 import org.clulab.serialization.DocumentSerializer
 import org.clulab.utils.{FileUtils, StringUtils, ThreadUtils, Timer}
 
@@ -13,9 +13,6 @@ import scala.util.Using
 object ParallelProcessorsExample {
 
   def mainWithCallback(args: Array[String])(callback: (File, String) => Unit): Unit = {
-
-    def printDocument(document: Document, printWriter: PrintWriter): Unit = document.prettyPrint(printWriter)
-
     val inputDir = args(0)
     val outputDir = args(1)
     val extension = args(2)
@@ -56,7 +53,7 @@ object ParallelProcessorsExample {
           throw throwable
       }
       val printedDocument = StringUtils.viaPrintWriter { printWriter =>
-        printDocument(document, printWriter)
+        new DocumentPrettyPrinter(printWriter).print(document)
       }
       val savedDocument = documentSerializer.save(document)
       val outputDocument = printedDocument + savedDocument
diff --git a/apps/src/main/scala/org/clulab/processors/apps/ProcessorsDocSerializerExample.scala b/apps/src/main/scala/org/clulab/processors/apps/ProcessorsDocSerializerExample.scala
index 8bc6aa608..518e0f667 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/ProcessorsDocSerializerExample.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/ProcessorsDocSerializerExample.scala
@@ -1,5 +1,6 @@
 package org.clulab.processors.apps
 
+import org.clulab.processors.clu.DocumentPrettyPrinter
 import org.clulab.processors.{Document, Processor}
 import org.clulab.serialization.DocumentSerializer
 
@@ -13,6 +14,7 @@ import java.io.PrintWriter
  */
 object ProcessorsDocSerializerExample {
   def main(args:Array[String]): Unit = {
+    val documentPrinter = new DocumentPrettyPrinter(new PrintWriter(System.out))
     // create the processor
     val proc = Processor()
 
@@ -20,14 +22,11 @@ object ProcessorsDocSerializerExample {
     val doc = proc.annotate("John Smith went to China. He visited Beijing, on January 10th, 2013.")
 
     // you are basically done. the rest of this code simply prints out the annotations
-    printDoc(doc)
+    documentPrinter.print(doc)
 
     // serialize the doc using our custom serializer
     val ser = new DocumentSerializer
     val out = ser.save(doc)
     println("SERIALIZED DOC:\n" + out)
   }
-
-  def printDoc(doc:Document): Unit = { doc.prettyPrint(new PrintWriter(System.out)) }
-
 }
diff --git a/apps/src/main/scala/org/clulab/processors/apps/ProcessorsShell.scala b/apps/src/main/scala/org/clulab/processors/apps/ProcessorsShell.scala
index 012949e4a..903cf6113 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/ProcessorsShell.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/ProcessorsShell.scala
@@ -1,7 +1,7 @@
 package org.clulab.processors.apps
 
 import org.clulab.processors.Processor
-import org.clulab.processors.clu.BalaurProcessor
+import org.clulab.processors.clu.{BalaurProcessor, DocumentPrettyPrinter}
 import org.clulab.utils.CliReader
 import org.clulab.utils.ExitMenuItem
 import org.clulab.utils.HelpMenuItem
@@ -27,6 +27,7 @@ class ProcessorsShell extends Shell {
 
   val lineReader = new CliReader(proc.prompt, "user.home", ".processorshellhistory")
   val printWriter = new PrintWriter(System.out)
+  val documentPrinter = new DocumentPrettyPrinter(printWriter)
 
   def prepareProcessor(message: String, promptedReloadableProcessor: PromptedReloadableProcessor): Unit = {
     lineReader.setPrompt(promptedReloadableProcessor.prompt)
@@ -40,8 +41,8 @@ class ProcessorsShell extends Shell {
 
   override def work(text: String): Unit = {
     val doc = proc.get.annotate(text)
-    doc.prettyPrint(printWriter)
-    printWriter.flush()
+
+    documentPrinter.print(doc)
   }
 
   // We inherit now just from Shell, so no reloading is performed.
diff --git a/library/src/main/scala/org/clulab/processors/Document.scala b/library/src/main/scala/org/clulab/processors/Document.scala
index 7dd9bcfd3..eb379e3a1 100644
--- a/library/src/main/scala/org/clulab/processors/Document.scala
+++ b/library/src/main/scala/org/clulab/processors/Document.scala
@@ -74,81 +74,6 @@ class Document(
     Hash.ordered(sentences.map(_.ambivalenceHash))
   )
 
-  def prettyPrint(pw: PrintWriter): Unit = {
-    // let's print the sentence-level annotations
-    sentences.zipWithIndex.foreach { case (sentence, sentenceCount) =>
-      pw.println("Sentence #" + sentenceCount + ":")
-      pw.println("Tokens: " + sentence.words.zipWithIndex.mkString(" "))
-      pw.println("Start character offsets: " + sentence.startOffsets.mkString(" "))
-      pw.println("End character offsets: " + sentence.endOffsets.mkString(" "))
-
-      // these annotations are optional, so they are stored using Option objects, hence the foreach statement
-      sentence.lemmas.foreach(lemmas => pw.println(s"Lemmas: ${lemmas.mkString(" ")}"))
-      sentence.tags.foreach(tags => pw.println(s"POS tags: ${tags.mkString(" ")}"))
-      sentence.chunks.foreach(chunks => pw.println(s"Chunks: ${chunks.mkString(" ")}"))
-      sentence.entities.foreach(entities => pw.println(s"Named entities: ${entities.mkString(" ")}"))
-      sentence.norms.foreach(norms => pw.println(s"Normalized entities: ${norms.mkString(" ")}"))
-      sentence.universalBasicDependencies.foreach(dependencies => {
-        pw.println("Basic syntactic dependencies:")
-        val iterator = new DirectedGraphEdgeIterator[String](dependencies)
-        while(iterator.hasNext) {
-          val dep = iterator.next()
-          // note that we use offsets starting at 0 (unlike CoreNLP, which uses offsets starting at 1)
-          pw.println(" head:" + dep._1 + " modifier:" + dep._2 + " label:" + dep._3)
-        }
-      })
-      sentence.universalEnhancedDependencies.foreach(dependencies => {
-        pw.println("Enhanced syntactic dependencies:")
-        val iterator = new DirectedGraphEdgeIterator[String](dependencies)
-        while(iterator.hasNext) {
-          val dep = iterator.next()
-          // note that we use offsets starting at 0 (unlike CoreNLP, which uses offsets starting at 1)
-          pw.println(" head:" + dep._1 + " modifier:" + dep._2 + " label:" + dep._3)
-        }
-      })
-      sentence.semanticRoles.foreach(dependencies => {
-        pw.println("Semantic dependencies:")
-        val iterator = new DirectedGraphEdgeIterator[String](dependencies)
-        while(iterator.hasNext) {
-          val dep = iterator.next()
-          // note that we use offsets starting at 0 (unlike CoreNLP, which uses offsets starting at 1)
-          pw.println(" head:" + dep._1 + " modifier:" + dep._2 + " label:" + dep._3)
-        }
-      })
-      sentence.enhancedSemanticRoles.foreach(dependencies => {
-        pw.println("Enhanced semantic dependencies:")
-        val iterator = new DirectedGraphEdgeIterator[String](dependencies)
-        while(iterator.hasNext) {
-          val dep = iterator.next()
-          // note that we use offsets starting at 0 (unlike CoreNLP, which uses offsets starting at 1)
-          pw.println(" head:" + dep._1 + " modifier:" + dep._2 + " label:" + dep._3)
-        }
-      })
-      sentence.syntacticTree.foreach(tree => {
-        pw.println("Constituent tree: " + tree.toStringDepth(showHead = false))
-        // see the org.clulab.struct.Tree class for more information
-        // on syntactic trees, including access to head phrases/words
-      })
-
-      pw.println("\n")
-    }
-
-    // let's print the coreference chains
-    coreferenceChains.foreach(chains => {
-      for (chain <- chains.getChains) {
-        pw.println("Found one coreference chain containing the following mentions:")
-        for (mention <- chain) {
-          // note that all these offsets start at 0 too
-          pw.println("\tsentenceIndex:" + mention.sentenceIndex +
-            " headIndex:" + mention.headIndex +
-            " startTokenOffset:" + mention.startOffset +
-            " endTokenOffset:" + mention.endOffset +
-            " text: " + sentences(mention.sentenceIndex).words.slice(mention.startOffset, mention.endOffset).mkString("[", " ", "]"))
-        }
-      }
-    })
-  }
-
   def offset(offset: Int): Document =
       // If a subclass of Document constructs itself with an attachment or a documentCreationTime that
       // would be overwritten on the copy(), then it should provide its own copy() method(s).
diff --git a/library/src/main/scala/org/clulab/processors/clu/DocumentPrinter.scala b/library/src/main/scala/org/clulab/processors/clu/DocumentPrinter.scala
new file mode 100644
index 000000000..22c9845c7
--- /dev/null
+++ b/library/src/main/scala/org/clulab/processors/clu/DocumentPrinter.scala
@@ -0,0 +1,91 @@
+package org.clulab.processors.clu
+
+import org.clulab.processors.Document
+import org.clulab.struct.DirectedGraphEdgeIterator
+
+import java.io.PrintWriter
+
+trait DocumentPrinter {
+  def print(document: Document): Unit
+}
+
+class DocumentPrettyPrinter(printWriter: PrintWriter) extends DocumentPrinter {
+
+  def println(string: String): Unit = printWriter.println(string)
+
+  def print(document: Document): Unit = {
+    // let's print the sentence-level annotations
+    document.sentences.zipWithIndex.foreach { case (sentence, sentenceCount) =>
+      println("Sentence #" + sentenceCount + ":")
+      println("Tokens: " + sentence.words.zipWithIndex.mkString(" "))
+      println("Start character offsets: " + sentence.startOffsets.mkString(" "))
+      println("End character offsets: " + sentence.endOffsets.mkString(" "))
+
+      // these annotations are optional, so they are stored using Option objects, hence the foreach statement
+      sentence.lemmas.foreach(lemmas => println(s"Lemmas: ${lemmas.mkString(" ")}"))
+      sentence.tags.foreach(tags => println(s"POS tags: ${tags.mkString(" ")}"))
+      sentence.chunks.foreach(chunks => println(s"Chunks: ${chunks.mkString(" ")}"))
+      sentence.entities.foreach(entities => println(s"Named entities: ${entities.mkString(" ")}"))
+      sentence.norms.foreach(norms => println(s"Normalized entities: ${norms.mkString(" ")}"))
+      sentence.universalBasicDependencies.foreach(dependencies => {
+        println("Basic syntactic dependencies:")
+        val iterator = new DirectedGraphEdgeIterator[String](dependencies)
+        while (iterator.hasNext) {
+          val dep = iterator.next()
+          // note that we use offsets starting at 0 (unlike CoreNLP, which uses offsets starting at 1)
+          println(" head:" + dep._1 + " modifier:" + dep._2 + " label:" + dep._3)
+        }
+      })
+      sentence.universalEnhancedDependencies.foreach(dependencies => {
+        println("Enhanced syntactic dependencies:")
+        val iterator = new DirectedGraphEdgeIterator[String](dependencies)
+        while (iterator.hasNext) {
+          val dep = iterator.next()
+          // note that we use offsets starting at 0 (unlike CoreNLP, which uses offsets starting at 1)
+          println(" head:" + dep._1 + " modifier:" + dep._2 + " label:" + dep._3)
+        }
+      })
+      sentence.semanticRoles.foreach(dependencies => {
+        println("Semantic dependencies:")
+        val iterator = new DirectedGraphEdgeIterator[String](dependencies)
+        while (iterator.hasNext) {
+          val dep = iterator.next()
+          // note that we use offsets starting at 0 (unlike CoreNLP, which uses offsets starting at 1)
+          println(" head:" + dep._1 + " modifier:" + dep._2 + " label:" + dep._3)
+        }
+      })
+      sentence.enhancedSemanticRoles.foreach(dependencies => {
+        println("Enhanced semantic dependencies:")
+        val iterator = new DirectedGraphEdgeIterator[String](dependencies)
+        while (iterator.hasNext) {
+          val dep = iterator.next()
+          // note that we use offsets starting at 0 (unlike CoreNLP, which uses offsets starting at 1)
+          println(" head:" + dep._1 + " modifier:" + dep._2 + " label:" + dep._3)
+        }
+      })
+      sentence.syntacticTree.foreach(tree => {
+        println("Constituent tree: " + tree.toStringDepth(showHead = false))
+        // see the org.clulab.struct.Tree class for more information
+        // on syntactic trees, including access to head phrases/words
+      })
+
+      println("\n")
+    }
+
+    // let's print the coreference chains
+    document.coreferenceChains.foreach(chains => {
+      for (chain <- chains.getChains) {
+        println("Found one coreference chain containing the following mentions:")
+        for (mention <- chain) {
+          // note that all these offsets start at 0 too
+          println("\tsentenceIndex:" + mention.sentenceIndex +
+              " headIndex:" + mention.headIndex +
+              " startTokenOffset:" + mention.startOffset +
+              " endTokenOffset:" + mention.endOffset +
+              " text: " + document.sentences(mention.sentenceIndex).words.slice(mention.startOffset, mention.endOffset).mkString("[", " ", "]"))
+        }
+      }
+    })
+    printWriter.flush()
+  }
+}

From 0b33f209be12926826a4b980f9c6ea48dd31e5a4 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Thu, 29 May 2025 11:02:38 -0700
Subject: [PATCH 36/42] Clean up DocumentMaker

Clean it up more

Get rid of debug files
---
 .../org/clulab/processors/Document.scala      |  4 +-
 .../clulab/processors/clu/DocumentMaker.scala | 88 +++++++++++--------
 2 files changed, 51 insertions(+), 41 deletions(-)

diff --git a/library/src/main/scala/org/clulab/processors/Document.scala b/library/src/main/scala/org/clulab/processors/Document.scala
index eb379e3a1..1c9e1ece3 100644
--- a/library/src/main/scala/org/clulab/processors/Document.scala
+++ b/library/src/main/scala/org/clulab/processors/Document.scala
@@ -1,14 +1,12 @@
 package org.clulab.processors
 
-import org.clulab.struct.{CorefChains, DirectedGraphEdgeIterator}
+import org.clulab.struct.CorefChains
 import org.clulab.utils.Hash
 import org.clulab.utils.Serializer
 import org.json4s.JString
 import org.json4s.JValue
 import org.json4s.jackson.prettyJson
 
-import java.io.PrintWriter
-
 /**
   * Stores all annotations for one document.
   *   Written by: Mihai Surdeanu and Gus Hahn-Powell.
diff --git a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
index 2e228c6f3..0a303701e 100644
--- a/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/DocumentMaker.scala
@@ -3,7 +3,6 @@ package org.clulab.processors.clu
 import org.clulab.processors.Document
 import org.clulab.processors.Sentence
 import org.clulab.processors.clu.tokenizer.Tokenizer
-import org.clulab.scala.WrappedArrayBuffer._
 import org.clulab.utils.WrappedArraySeq
 import org.slf4j.Logger
 import org.slf4j.LoggerFactory
@@ -14,26 +13,29 @@ import scala.collection.mutable.ArrayBuffer
 class DocumentMaker
 
 object DocumentMaker {
-  val logger:Logger = LoggerFactory.getLogger(classOf[DocumentMaker])
+  val logger: Logger = LoggerFactory.getLogger(classOf[DocumentMaker])
 
   /** Constructs a document of tokens from free text; includes sentence splitting and tokenization */
-  def mkDocument(tokenizer:Tokenizer,
-                 text:String,
-                 keepText:Boolean): Document = {
-    val sents = tokenizer.tokenize(text)
+  def mkDocument( // TODO: mkDocumentFromText
+    tokenizer: Tokenizer,
+    text: String,
+    keepText: Boolean
+  ): Document = {
+    val sentences = tokenizer.tokenize(text)
     val textOpt = Option.when(keepText)(text)
-    val doc = Document(sents, textOpt)
+    val document = Document(sentences, textOpt)
 
-    doc
+    document
   }
 
   /** Constructs a document of tokens from an array of untokenized sentences */
-  def mkDocumentFromSentences(
+  def mkDocumentFromSentences( // TODO: mkDocumentFromTexts
     tokenizer: Tokenizer,
     texts: Iterable[String],
     keepText: Boolean,
     charactersBetweenSentences: Int
   ): Document = {
+    val sentenceSep = " " * charactersBetweenSentences
     var characterOffset = 0
     val sentencesArray = texts.map { text =>
       val sentence = tokenizer.tokenize(text, sentenceSplit = false, characterOffset).head // We produce a single sentence here!
@@ -42,47 +44,57 @@ object DocumentMaker {
       sentence
     }.toArray
     val sentences = WrappedArraySeq(sentencesArray).toImmutableSeq
-    val textOpt = Option.when(keepText)(texts.mkString(mkSep(charactersBetweenSentences)))
+    val textOpt = Option.when(keepText)(texts.mkString(sentenceSep))
     val document = Document(sentences, textOpt)
 
     document
   }
 
   /** Constructs a document of tokens from an array of tokenized sentences */
-  def mkDocumentFromTokens(sentences:Iterable[Iterable[String]],
-                           keepText:Boolean,
-                           charactersBetweenSentences:Int,
-                           charactersBetweenTokens:Int): Document = {
+  def mkDocumentFromTokens( // TODO: mkDocumentFromTokenizedTexts
+    tokenizedTexts: Iterable[Iterable[String]],
+    keepText: Boolean,
+    charactersBetweenSentences: Int,
+    charactersBetweenTokens: Int
+  ): Document = {
+    val sentenceSep = " " * charactersBetweenSentences
+    val tokenSep = " " * charactersBetweenTokens
     var charOffset = 0
-    val sents = new ArrayBuffer[Sentence]()
     val text = new StringBuilder
-    for(sentence <- sentences) {
-      val startOffsets = new ArrayBuffer[Int]()
-      val endOffsets = new ArrayBuffer[Int]()
-      for(word <- sentence) {
-        startOffsets += charOffset
-        charOffset += word.length
-        endOffsets += charOffset
+    // Just use one buffer for each but clear them as necessary.
+    val startOffsetsBuffer = new ArrayBuffer[Int]()
+    val endOffsetsBuffer = new ArrayBuffer[Int]()
+    val sentencesArray = tokenizedTexts.map { tokenizedTextIterable =>
+      // We are going to need to tokens in an array anyway, so make them now.
+      val tokenizedTextArray = tokenizedTextIterable.toArray
+
+      tokenizedTextArray.foreach { token =>
+        startOffsetsBuffer += charOffset
+        charOffset += token.length
+        endOffsetsBuffer += charOffset
         charOffset += charactersBetweenTokens
       }
-      // note: NO postprocessing happens in this case, so use it carefully!
-      sents += new Sentence(sentence.toSeq, startOffsets, endOffsets, sentence.toSeq)
-      charOffset += charactersBetweenSentences - charactersBetweenTokens
-      if(keepText) {
-        text.append(sentence.mkString(mkSep(charactersBetweenTokens)))
-        text.append(mkSep(charactersBetweenSentences))
-      }
-    }
+      // The simple version of this doesn't work if there were no tokens.
+      charOffset += charactersBetweenSentences - (if (tokenizedTextArray.nonEmpty) charactersBetweenTokens else 0)
 
-    val textOpt = Option.when(keepText)(text.toString)
-    val doc = Document(sents, textOpt)
+      // Note: NO postprocessing happens in this case, so use it carefully!
+      val startOffsets = WrappedArraySeq(startOffsetsBuffer.toArray).toImmutableSeq
+      startOffsetsBuffer.clear()
+      val endOffsets = WrappedArraySeq(endOffsetsBuffer.toArray).toImmutableSeq
+      endOffsetsBuffer.clear()
+      val tokens = WrappedArraySeq(tokenizedTextArray).toImmutableSeq
+      val sentence = new Sentence(tokens, startOffsets, endOffsets, tokens)
 
-    doc
-  }
+      if (keepText) {
+        text.append(tokens.mkString(tokenSep))
+        text.append(sentenceSep)
+      }
+      sentence
+    }.toArray
+    val sentences = WrappedArraySeq(sentencesArray).toImmutableSeq
+    val textOpt = Option.when(keepText)(text.toString)
+    val document = Document(sentences, textOpt)
 
-  private def mkSep(size:Int):String = {
-    val os = new StringBuilder
-    for (_ <- 0 until size) os.append(" ")
-    os.toString()
+    document
   }
 }

From 276e89467d4caf45e5124dd2118f610d5ee5ebfa Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Thu, 29 May 2025 17:29:29 -0700
Subject: [PATCH 37/42] Fix ColumnsToDocument

---
 .../processors/apps/ColumnsToDocument.scala   | 134 +++++++++---------
 1 file changed, 67 insertions(+), 67 deletions(-)

diff --git a/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala b/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
index e38b14615..e412c6be4 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
@@ -3,6 +3,7 @@ package org.clulab.processors.apps
 import org.clulab.processors.{Document, Processor, Sentence}
 import org.clulab.processors.clu.BalaurProcessor
 import org.clulab.scala.WrappedArrayBuffer._
+import org.clulab.utils.WrappedArraySeq
 import org.slf4j.{Logger, LoggerFactory}
 
 import java.io.InputStream
@@ -18,6 +19,8 @@ class ColumnsToDocument
   * Last Modified: Fix compiler issue: import scala.io.Source.
   */
 object ColumnsToDocument {
+  type LabelSetter = (Sentence, Seq[String]) => Sentence
+  type Annotator = (Document) => Document
   val logger:Logger = LoggerFactory.getLogger(classOf[ColumnsToDocument])
 
   val WORD_POS_CONLLX = 1
@@ -25,91 +28,94 @@ object ColumnsToDocument {
   val WORD_POS_CONLLU = 1
   val TAG_POS_CONLLU = 3
 
-  var proc:Processor = new BalaurProcessor()
+  var proc: Processor = new BalaurProcessor()
   var prevLang: String = "en"
 
-  def readFromFile(fn:String,
-                   wordPos:Int = WORD_POS_CONLLX,
-                   labelPos:Int = TAG_POS_CONLLX,
-                   setLabels: (Sentence, Array[String]) => Unit,
-                   annotate: (Document) => Unit,
-                   filterOutContractions:Boolean = false,
-                   lang: String = "en"
-                  ): Document = {
-
-    // redefine proc acording to the language used
+  protected def setProcessor(lang: String): Unit = {
     if (lang != prevLang) {
       if (lang == "pt") {
         println("Using Portuguese processors")
         throw new RuntimeException(s"ERROR: language '$lang' not supported!")
         //this.proc = new PortugueseCluProcessor()
-      } else if (lang == "es") {
+      }
+      else if (lang == "es") {
         println("Using Spanish processors")
         //this.proc = new SpanishCluProcessor()
         throw new RuntimeException(s"ERROR: language '$lang' not supported!")
-      } else {
+      }
+      else {
         println("Using English processors")
         this.proc = new BalaurProcessor()
       }
       this.prevLang = lang
     }
+  }
 
+  def readFromFile(
+    fn: String,
+    wordPos: Int = WORD_POS_CONLLX,
+    labelPos: Int = TAG_POS_CONLLX,
+    setLabels: LabelSetter,
+    annotate: Annotator,
+    filterOutContractions: Boolean = false,
+    lang: String = "en"
+  ): Document = {
+    setProcessor(lang)
     Using.resource(Source.fromFile(fn)) { source =>
       readFromSource(source, wordPos, labelPos, setLabels, annotate, filterOutContractions)
     }
   }
 
-  def readFromStream(stream:InputStream,
-                     wordPos:Int = WORD_POS_CONLLX,
-                     labelPos:Int = TAG_POS_CONLLX,
-                     setLabels: (Sentence, Array[String]) => Unit,
-                     annotate: (Document) => Unit,
-                     filterOutContractions:Boolean = false,
-                     lang: String = "en"): Document = {
-
-    // redefine proc acording to the language used
-    if (lang == "pt"){
-      println("Using Portuguese processors")
-      //this.proc = new PortugueseCluProcessor()
-      throw new RuntimeException(s"ERROR: language '$lang' not supported!")
-    } else if(lang == "es") {
-      println("Using Spanish processors")
-      //this.proc = new SpanishCluProcessor()
-      throw new RuntimeException(s"ERROR: language '$lang' not supported!")
-    } else {
-      println("Using English processors")
-      this.proc = new BalaurProcessor()
-    }
-
+  def readFromStream(
+    stream: InputStream,
+    wordPos: Int = WORD_POS_CONLLX,
+    labelPos: Int = TAG_POS_CONLLX,
+    setLabels: LabelSetter,
+    annotate: Annotator,
+    filterOutContractions: Boolean = false,
+    lang: String = "en"
+  ): Document = {
+    setProcessor(lang)
     Using.resource(Source.fromInputStream(stream)) { source =>
       readFromSource(source, wordPos, labelPos, setLabels, annotate, filterOutContractions)
     }
   }
 
-  def readFromSource(source:Source,
-                     wordPos:Int,
-                     labelPos:Int,
-                     setLabels: (Sentence, Array[String]) => Unit,
-                     annotate: (Document) => Unit,
-                     filterOutContractions:Boolean): Document = {
-    var words = new ArrayBuffer[String]()
-    var startOffsets = new ArrayBuffer[Int]()
-    var endOffsets = new ArrayBuffer[Int]()
-    var labels = new ArrayBuffer[String]()
-    var charOffset = 0
+  def readFromSource(
+    source: Source,
+    wordPos: Int,
+    labelPos: Int,
+    setLabels: LabelSetter,
+    annotate: Annotator,
+    filterOutContractions:Boolean
+  ): Document = {
+    val words = new ArrayBuffer[String]()
+    val startOffsets = new ArrayBuffer[Int]()
+    val endOffsets = new ArrayBuffer[Int]()
+    val labels = new ArrayBuffer[String]()
     val sentences = new ArrayBuffer[Sentence]()
-    for(line <- source.getLines()) {
+    var charOffset = 0
+
+    def mkSentence(): Sentence = {
+      val wordsSeq = new WrappedArraySeq(words.toArray).toImmutableSeq
+      val unlabeledSentence = new Sentence(wordsSeq, startOffsets, endOffsets, wordsSeq)
+
+      words.clear()
+      startOffsets.clear()
+      endOffsets.clear()
+
+      val labeledSentence = setLabels(unlabeledSentence, labels.toSeq)
+
+      labels.clear()
+      labeledSentence
+    }
+
+    for (line <- source.getLines()) {
       val l = line.trim
       if (l.isEmpty) {
         // end of sentence
         if (words.nonEmpty) {
-          val s = new Sentence(words, startOffsets, endOffsets, words)
-          setLabels(s, labels.toArray)
-          sentences += s
-          words = new ArrayBuffer[String]()
-          startOffsets = new ArrayBuffer[Int]()
-          endOffsets = new ArrayBuffer[Int]()
-          labels = new ArrayBuffer[String]()
+          sentences += mkSentence()
           charOffset += 1
         }
       } else {
@@ -126,7 +132,7 @@ object ColumnsToDocument {
         //   10	as	o	DET	_	Gender=Fem|Number=Plur	11	det	_	_
         //
         val offset = bits(0) // we assume token offsets are always in column 0!
-        if(! filterOutContractions || ! offset.contains("-")) {
+        if (!filterOutContractions || ! offset.contains("-")) {
           words += bits(wordPos)
           labels += bits(labelPos)
           startOffsets += charOffset
@@ -138,21 +144,15 @@ object ColumnsToDocument {
         }
       }
     }
-    if(words.nonEmpty) {
-      val s = new Sentence(
-        words, startOffsets, endOffsets, words,
-        tags = Some(labels)
-      )
-      sentences += s
-    }
+    if (words.nonEmpty)
+      sentences += mkSentence()
     logger.debug(s"Loaded ${sentences.size} sentences.")
 
-    val d = new Document(sentences)
-    annotate(d)
-
-    d
+    val unannotatedSentence = new Document(sentences)
+    val annotatedSentence = annotate(unannotatedSentence)
 
+    annotatedSentence
   }
 
-  def annotateNil(doc:Document): Unit = {}
+  def annotateNil(document: Document): Document = document
 }

From 0b933796e715a417a18174b63dee5a2e1011ca0e Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Thu, 29 May 2025 17:30:23 -0700
Subject: [PATCH 38/42] Remove unused and duplicate code in NumericUtils

---
 .../processors/apps/ColumnsToDocument.scala   |  9 ++--
 .../org/clulab/numeric/NumericUtils.scala     | 47 -------------------
 2 files changed, 5 insertions(+), 51 deletions(-)

diff --git a/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala b/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
index e412c6be4..23ad73ca1 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
@@ -110,15 +110,15 @@ object ColumnsToDocument {
       labeledSentence
     }
 
-    for (line <- source.getLines()) {
-      val l = line.trim
+    source.getLines().map(_.trim).foreach { l =>
       if (l.isEmpty) {
         // end of sentence
         if (words.nonEmpty) {
           sentences += mkSentence()
           charOffset += 1
         }
-      } else {
+      }
+      else {
         // within the same sentence
         val bits = l.split("\\s+")
         if (bits.length < 2)
@@ -139,7 +139,8 @@ object ColumnsToDocument {
           charOffset = bits(wordPos).length
           endOffsets += charOffset
           charOffset += 1
-        } else {
+        }
+        else {
           // println("Skipped line: " + l)
         }
       }
diff --git a/library/src/main/scala/org/clulab/numeric/NumericUtils.scala b/library/src/main/scala/org/clulab/numeric/NumericUtils.scala
index e60bb225d..ba9bcd84b 100644
--- a/library/src/main/scala/org/clulab/numeric/NumericUtils.scala
+++ b/library/src/main/scala/org/clulab/numeric/NumericUtils.scala
@@ -8,7 +8,6 @@ import org.clulab.struct.Interval
 import org.clulab.utils.WrappedArraySeq
 
 import scala.collection.mutable
-import _root_.scala.util.control.Breaks._
 
 object NumericUtils {
   def displayMentions(mentions: Seq[Mention], doc: Document): Unit = {
@@ -128,54 +127,8 @@ object NumericUtils {
             triggered = false
       }
     }
-
-
-    // removes entities and norms for unallowable entity sequences, e.g., don't extract 'in' as 'inch' before B-LOC in '... Sahal 108 in Senegal'
-    // toBeRemovedShortened is entity without BIO-
-    val zippedEntities = entities.zipWithIndex
-
-    // So remove all consecutive MEASREMENT-LENGTH in front of a B-LOC
-    // Can it just be done backwards in one pass in a state matchine?
-
-    zippedEntities.foreach { case (outerEntity, outerIndex) =>
-      if (outerIndex > 0 && outerEntity == triggerEntity && entities(outerIndex - 1).endsWith(toBeRemovedShortened)) {
-        // Go in reverse replacing indices and norms in the immediate preceding mention.
-        breakable { // TODO: rewrite
-          for ((innerEntity, innerIndex) <- zippedEntities.slice(0, outerIndex).reverse) {
-            if (innerEntity.endsWith(toBeRemovedShortened)) {
-              entities(innerIndex) = "O"
-              norms(innerIndex) = ""
-            } else break()
-          }
-        }
-      }
-    }
-  }
-
-  def removeOneEntityBeforeAnother2(entities: mutable.Seq[String], norms: mutable.Seq[String], triggerEntity: String, toBeRemovedShortened: String): Unit = {
-    // removes entities and norms for unallowable entity sequences, e.g., don't extract 'in' as 'inch' before B-LOC in '... Sahal 108 in Senegal'
-    // toBeRemovedShortened is entity without BIO-
-    val zippedEntities = entities.zipWithIndex
-
-    // So remove all consecutive MEASREMENT-LENGTH in front of a B-LOC
-    // Can it just be done backwards in one pass in a state matchine?
-
-    zippedEntities.foreach { case (outerEntity, outerIndex) =>
-      if (outerIndex > 0 && outerEntity == triggerEntity && entities(outerIndex - 1).endsWith(toBeRemovedShortened)) {
-        // Go in reverse replacing indices and norms in the immediate preceding mention.
-        breakable { // TODO: rewrite
-          for ((innerEntity, innerIndex) <- zippedEntities.slice(0, outerIndex).reverse) {
-            if (innerEntity.endsWith(toBeRemovedShortened)) {
-              entities(innerIndex) = "O"
-              norms(innerIndex) = ""
-            } else break()
-          }
-        }
-      }
-    }
   }
 
-  // TODO: These need to be mutable
   private def addLabelsAndNorms(label: String, norm: String, tokenInt: Interval, entities: mutable.Seq[String], norms: mutable.Seq[String]): Unit = {
     // careful here: we may override some existing entities and norms
     // but, given that the numeric entity rules tend to be high precision, this is probably Ok...

From e8262754fb4bc45be0188aef4db8beea5315e595 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Fri, 30 May 2025 08:23:27 -0700
Subject: [PATCH 39/42] Fix typos

---
 .../scala/org/clulab/processors/apps/ColumnsToDocument.scala    | 2 +-
 .../main/scala/org/clulab/sequences/MEMMSequenceTagger.scala    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala b/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
index 23ad73ca1..506486e88 100644
--- a/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
+++ b/apps/src/main/scala/org/clulab/processors/apps/ColumnsToDocument.scala
@@ -21,7 +21,7 @@ class ColumnsToDocument
 object ColumnsToDocument {
   type LabelSetter = (Sentence, Seq[String]) => Sentence
   type Annotator = (Document) => Document
-  val logger:Logger = LoggerFactory.getLogger(classOf[ColumnsToDocument])
+  val logger: Logger = LoggerFactory.getLogger(classOf[ColumnsToDocument])
 
   val WORD_POS_CONLLX = 1
   val TAG_POS_CONLLX = 4
diff --git a/library/src/main/scala/org/clulab/sequences/MEMMSequenceTagger.scala b/library/src/main/scala/org/clulab/sequences/MEMMSequenceTagger.scala
index 7cba53724..ff2dacaab 100644
--- a/library/src/main/scala/org/clulab/sequences/MEMMSequenceTagger.scala
+++ b/library/src/main/scala/org/clulab/sequences/MEMMSequenceTagger.scala
@@ -14,7 +14,7 @@ import scala.reflect.ClassTag
 import scala.util.Using
 
 /**
-  * Sequence tagger using a maximum entrop Markov model (MEMM)
+  * Sequence tagger using a maximum entropy Markov model (MEMM)
   * User: mihais
   * Date: 8/26/17
   */

From b355af7e0cb4de684e68092fcc1a6bc2f2a9409a Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 2 Jun 2025 10:21:16 -0700
Subject: [PATCH 40/42] Combine named entity without exposing array

---
 .../org/clulab/processors/clu/BalaurProcessor.scala    |  9 +++------
 .../main/scala/org/clulab/sequences/NamedEntity.scala  | 10 +++++++---
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
index 8404ed13f..2af5a1d3a 100644
--- a/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
+++ b/library/src/main/scala/org/clulab/processors/clu/BalaurProcessor.scala
@@ -250,15 +250,12 @@ class BalaurProcessor protected (
     if (customNamedEntities.isEmpty)
       generic
     else {
-      val genericNamedEntities = NamedEntity.collect(generic)
-      val result = generic.toArray // A copy of the generic labels is created here.
-
       //println(s"Generic NamedEntity: ${genericNamedEntities.mkString(", ")}")
       //println(s"Custom NamedEntity: ${customNamedEntities.mkString(", ")}")
+      val genericNamedEntities = NamedEntity.collect(generic)
+      val combinedNamedEntities = NamedEntity.combine(generic, genericNamedEntities, customNamedEntities)
 
-      // The custom labels override the generic ones!
-      NamedEntity.combine(result, genericNamedEntities, customNamedEntities)
-      WrappedArraySeq(result).toImmutableSeq
+      combinedNamedEntities
     }
   }
 
diff --git a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
index 2b74c5b6d..a8f2a8da8 100644
--- a/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
+++ b/library/src/main/scala/org/clulab/sequences/NamedEntity.scala
@@ -1,5 +1,7 @@
 package org.clulab.sequences
 
+import org.clulab.utils.WrappedArraySeq
+
 import scala.collection.mutable
 
 // This is definitely not the most efficient as far as number of objects
@@ -43,11 +45,12 @@ object NamedEntity {
     namedEntities
   }
 
-  def combine(bioLabels: Array[String], genericNamedEntities: Seq[NamedEntity], customNamedEntities: Seq[NamedEntity]): Unit = {
+  def combine(bioLabels: Seq[String], genericNamedEntities: Seq[NamedEntity], customNamedEntities: Seq[NamedEntity]): Seq[String] = {
+    val bioLabelsArray = bioLabels.toArray
     // Neither named entities sequence can contain overlapping elements within the sequence.
     // At most, there is overlap between sequences.  Use is made of that fact.
     // The NamedEntities never have empty Ranges, so end - 1 is always at least start.
-    val outsides = bioLabels.indices.filter(bioLabels(_) == OUTSIDE)
+    val outsides = bioLabelsArray.indices.filter(bioLabelsArray(_) == OUTSIDE)
     val validStarts = (genericNamedEntities.map(_.range.start) ++ outsides).toSet
     // The -1 is used to coordinate ends (exclusive) with the OUTSIDE positions (inclusive).
     val validEnds = (genericNamedEntities.map(_.range.end - 1) ++ outsides).toSet
@@ -56,8 +59,9 @@ object NamedEntity {
     }
 
     validCustomNamedEntities.foreach { customNamedEntity =>
-      customNamedEntity.fill(bioLabels)
+      customNamedEntity.fill(bioLabelsArray)
     }
+    WrappedArraySeq(bioLabelsArray).toImmutableSeq
   }
 
   // Only INSIDEs can be invalid, and they are made valid by

From 000f0ed06ea1175ddbb00a03272809caa79610a6 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 2 Jun 2025 10:21:24 -0700
Subject: [PATCH 41/42] Update sbt again

---
 project/build.properties | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/project/build.properties b/project/build.properties
index 29f5dd953..75ac47aaa 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -1,9 +1,9 @@
-# This was last checked on 2025-05-26.
+# This was last checked on 2025-06-02.
 # Version 1.7.2+ will cause problems when combined with the play plug-in used for the webapp!
 # [error]         * org.scala-lang.modules:scala-xml_2.12:2.1.0 (early-semver) is selected over {1.2.0, 1.1.1}
 # [error]             +- org.scala-lang:scala-compiler:2.12.17              (depends on 2.1.0)
 # [error]             +- com.typesafe.sbt:sbt-native-packager:1.5.2 (scalaVersion=2.12, sbtVersion=1.0) (depends on 1.1.1)
 # [error]             +- com.typesafe.play:twirl-api_2.12:1.5.1             (depends on 1.2.0)
 # This error is solved by adding a VersionScheme.Always to plugins.sbt.
-# up to 1.11.0
-sbt.version = 1.11.0
+# up to 1.11.1
+sbt.version = 1.11.1

From 143298578c2447da25e65d0c2fe8e2b6b25606a4 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 2 Jun 2025 11:01:47 -0700
Subject: [PATCH 42/42] Fix test

---
 .../src/test/scala/org/clulab/sequences/TestNamedEntity.scala  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/library/src/test/scala/org/clulab/sequences/TestNamedEntity.scala b/library/src/test/scala/org/clulab/sequences/TestNamedEntity.scala
index cd731635f..08a774400 100644
--- a/library/src/test/scala/org/clulab/sequences/TestNamedEntity.scala
+++ b/library/src/test/scala/org/clulab/sequences/TestNamedEntity.scala
@@ -45,8 +45,7 @@ class TestNamedEntity extends Test {
         val customBioLabels = customBioLabelString.split(" +")
         val genericNamedEntities = NamedEntity.collect(genericBioLabels)
         val customNamedEntities = NamedEntity.collect(customBioLabels)
-        NamedEntity.combine(genericBioLabels, genericNamedEntities, customNamedEntities)
-        val actualCombinedBioLabels = genericBioLabels
+        val actualCombinedBioLabels = NamedEntity.combine(genericBioLabels, genericNamedEntities, customNamedEntities)
         val actualCombinedBioLabelString = actualCombinedBioLabels.mkString(" ")
         val formattedExpectedCombinedBioLabelString = expectedCombinedBioLabelString.split(" +").mkString(" ")