DDMAL · timothydereuse · Feb 1, 2024 · Feb 1, 2024 · Feb 9, 2024 · Feb 9, 2024
diff --git a/rodan-main/code/rodan/jobs/MEI_encoding/build_mei_file.py b/rodan-main/code/rodan/jobs/MEI_encoding/build_mei_file.py
@@ -12,12 +12,16 @@
 import math
 import numpy as np
 import json
+
 from rodan.jobs.MEI_encoding import parse_classifier_table as pct  # for rodan
 
-# import parse_classifier_table as pct #---> for testing locally
+# import parse_classifier_table as pct  # ---> for testing locally
+
 from itertools import groupby
+
+# from state_machine import SylMachine  # ---> for testing locally
+
 from rodan.jobs.MEI_encoding.state_machine import SylMachine
-# from state_machine import SylMachine #---> for testing locally
 
 try:
     from rodan.jobs.MEI_encoding import __version__
@@ -178,7 +182,7 @@ def generate_base_document(column_split_info: Optional[dict]):
 
     mei = new_el("mei")
     mei.set("xmlns", "http://www.music-encoding.org/ns/mei")
-    mei.set("meiversion", "5.0.0-dev")
+    mei.set("meiversion", "5.0.0")
 
     meiHead = new_el("meiHead", mei)
 
@@ -569,6 +573,7 @@ def build_mei(
     staves: List[dict],
     page: dict,
     column_split_info: Optional[dict],
+    verbose: bool = False,
 ):
     """
     Encodes the final MEI document using:
@@ -643,6 +648,9 @@ def build_mei(
 
     # add to the MEI document, syllable by syllable
     for glyphs, syl_box in pairs:
+        if verbose:
+            print("processing syl: ", syl_box["syl"], [g["name"] for g in glyphs])
+
         bb = {
             "ulx": syl_box["ul"][0],
             "uly": syl_box["ul"][1],
@@ -667,25 +675,33 @@ def build_mei(
 
         # iterate over glyphs belonging to this syllable up to and including the final neume
         for i, glyph in enumerate(glyphs):
-            # if this glyph is a custos, make it the same pitch as next neume
-            if glyph["name"] == "custos":
+            # if this glyph is a custos and we've already added one to this syllable, disregard it
+            prev_custos = "custos" in [x.tag for x in machine.layer]
+            if glyph["name"] == "custos" and prev_custos:
+                continue
+            elif glyph["name"] == "custos":
+                # else, if this glyph is a custos, make it the same pitch as next neume
                 note, octave = get_custos_pitch_heuristic(all_glyphs, glyph)
                 glyph["note"] = note
                 glyph["octave"] = octave
 
-            # new_element is of type ET.Element. <neueme>, <divLine>, <clef>, <accid>, <custos>, etc.
+            # new_element is of type ET.Element. <neume>, <divLine>, <clef>, <accid>, <custos>, etc.
             new_element = glyph_to_element(classifier, width_container, glyph, surface)
-            # TODO
-            # Investigate why new_element can be None
+
+            # new_element can be None iff the name is not found in the classifier
             if new_element is None:
                 continue
-            # tag is "neume", "divLine", "clef", "accid", "custos", etc.
+
+            # tag is one of "neume", "divLine", "clef", "accid", "custos", etc.
             tag = new_element.tag
 
             # the state machine is responsible for abstracting the confusing logic of when to add
             # an element inside vs outside the syllable. An optimization we make is that we find where the last
             # neume is, and consider that the true end of the syllable, and every glyph associated with this syllable
             # that comes after that are added outside the syllable.
+            if verbose:
+                print(f"machine_state: {machine.prev_state} glyph: {tag}")
+
             if i <= last_neume_index:
                 machine.read(tag, new_element)
             else:
@@ -832,7 +848,7 @@ def process(
     width_mult: float,
     width_container: dict,
     column_split_info: dict,
-    verbose: bool = True,
+    verbose: bool = False,
 ):
     """
     Runs the entire MEI encoding process given the three inputs to the rodan job and the
@@ -851,8 +867,15 @@ def process(
         )
         column_split_info["staff_to_column"] = staff_to_column
         precompute_multi_column(glyphs, column_split_info, staves)
+
     meiDoc = build_mei(
-        pairs, classifier, width_container, staves, jsomr["page"], column_split_info
+        pairs,
+        classifier,
+        width_container,
+        staves,
+        jsomr["page"],
+        column_split_info,
+        verbose,
     )
 
     if width_mult > 0:
@@ -889,4 +912,4 @@ def translate_bbox(bbox: dict, ranges: list, height: int, col: int):
         new_box["nrows"] = bbox["nrows"]
     if "ncols" in bbox:
         new_box["ncols"] = bbox["ncols"]
-    return new_box
+    return new_box
diff --git a/rodan-main/code/rodan/jobs/MEI_encoding/scripts/parse_local.py b/rodan-main/code/rodan/jobs/MEI_encoding/scripts/parse_local.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
-import sys
- 
+import os, sys
+
 # setting path
-sys.path.append('..')
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 # To run this file, some imports in build_mei_file.py will need to be changed
 # The correct ones for local development are there commented out
@@ -14,58 +14,78 @@
 
 
 def run_my_task(inputs, settings, outputs):
-
-    jsomr_path = inputs['JSOMR'][0]['resource_path']
-    with open(jsomr_path, 'r') as file:
+    jsomr_path = inputs["JSOMR"][0]["resource_path"]
+    with open(jsomr_path, "r") as file:
         jsomr = json.loads(file.read())
 
-    if 'Column Splitting Data' in inputs:
-        split_ranges_path = inputs['Column Splitting Data'][0]['resource_path']
-        with open(split_ranges_path, 'r') as file:
+    if "Column Splitting Data" in inputs:
+        split_ranges_path = inputs["Column Splitting Data"][0]["resource_path"]
+        with open(split_ranges_path, "r") as file:
             split_ranges = json.loads(file.read())
     else:
         split_ranges = None
 
-
     try:
-        alignment_path = inputs['Text Alignment JSON'][0]['resource_path']
+        alignment_path = inputs["Text Alignment JSON"][0]["resource_path"]
     except KeyError:
         syls = None
     else:
-        with open(alignment_path, 'r') as file:
+        with open(alignment_path, "r") as file:
             syls = json.loads(file.read())
 
-    classifier_table, width_container = pct.fetch_table_from_csv(inputs['MEI Mapping CSV'][0]['resource_path'])
-    width_mult = settings[u'Neume Component Spacing']
-    mei_string = bm.process(jsomr, syls, classifier_table, width_mult, width_container, split_ranges)
-
-    outfile_path = outputs['MEI'][0]['resource_path']
-    with open(outfile_path, 'w') as file:
+    classifier_table, width_container = pct.fetch_table_from_csv(
+        inputs["MEI Mapping CSV"][0]["resource_path"]
+    )
+    width_mult = settings["Neume Component Spacing"]
+    verbose = settings["verbose"]
+    mei_string = bm.process(
+        jsomr,
+        syls,
+        classifier_table,
+        width_mult,
+        width_container,
+        split_ranges,
+        verbose,
+    )
+
+    outfile_path = outputs["MEI"][0]["resource_path"]
+    with open(outfile_path, "w") as file:
         file.write(mei_string)
 
-
     return True
 
+
 if __name__ == "__main__":
-    import re
-    input_jsomr = "../debug/mei-encoding-test-hpf.json" # path to hpf output
-    input_text = "../debug/mei-encoding-test-ta.json" # path to text alignment json
-    input_csd = "../debug/mei-encoding-test-csd.json" # path to column splitting data
-    input_mei_mapping = "../meimapping.csv" # path to mei mapping csv
-    output_path = "../debug/result.mei" # path to output mei
-    gt_output_path = "/code/Rodan/rodan/test/files/mei-encoding-test.mei" # path to ground truth mei
+    import re, os
+
+    base_path = "C:/Users/tim/Documents/Rodan/rodan-main/code/rodan/test/files/"
+    # path to hpf output
+    # input_jsomr = os.path.join(base_path, "mei-encoding-test-hpf.json")
+    # path to text alignment json
+    # input_text = os.path.join(base_path, "mei-encoding-test-ta.json")
+    # path to column splitting data
+    # input_csd = os.path.join(base_path, "mei-encoding-test-csd.json")
+    # path to mei mapping csv
+    input_mei_mapping = os.path.join(base_path, "mei-encoding-test.csv")
+    # path to output mei
+    output_path = os.path.join(base_path, "mei-result.mei")
+    # path to ground truth mei
+    gt_output_path = os.path.join(base_path, "mei-encoding-test.mei")
+
+    input_jsomr = r"C:\Users\tim\Desktop\manuscript\165v.PF.json"
+    input_text = r"C:\Users\tim\Desktop\manuscript\165v.TA.json"
+    input_mei_mapping = (
+        r"C:\Users\tim\Desktop\manuscript\csv-square_notation_neume_level_newest.csv"
+    )
+
     inputs = {
-        "JSOMR": [{"resource_path":input_jsomr}],
-        "Text Alignment JSON": [{"resource_path":input_text}],
-        "MEI Mapping CSV": [{"resource_path":input_mei_mapping}],
-        "Column Splitting Data": [{"resource_path":input_csd}]
-    }
-    outputs = {
-        "MEI": [{"resource_path":output_path}]
-    }
-    settings = {
-        "Neume Component Spacing":0.5
+        "JSOMR": [{"resource_path": input_jsomr}],
+        "Text Alignment JSON": [{"resource_path": input_text}],
+        "MEI Mapping CSV": [{"resource_path": input_mei_mapping}],
+        # "Column Splitting Data": [{"resource_path": input_csd}],
     }
+    outputs = {"MEI": [{"resource_path": output_path}]}
+    settings = {"Neume Component Spacing": 0.5, "verbose": True}
 
     run_my_task(inputs=inputs, outputs=outputs, settings=settings)
 
@@ -82,7 +102,7 @@ def run_my_task(inputs, settings, outputs):
         gt_line = pattern.sub("_", gt_line)
         pred_line = pattern.sub("_", pred_line)
         # and compare if two meis are identical to each other
-        if(gt_line != pred_line):
+        if gt_line != pred_line:
             print("failed at line {}".format(i))
         else:
             print("passed at line {}".format(i))