From bbb5524515336e2e73c00592e4278961e8604505 Mon Sep 17 00:00:00 2001
From: Bre Naidu <brehananaidu@gmail.com>
Date: Mon, 9 Feb 2026 15:08:54 -0600
Subject: [PATCH 1/2] Add XML files to gitignore

---
 .gitignore | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 539a250d..3a4e3009 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,4 +21,7 @@ Thumbs.db
 
 # Cache files
 __pycache__/
-*.pyc
\ No newline at end of file
+*.pyc
+
+# XML files
+*.xml
\ No newline at end of file

From 2b22cc0bcfd6b8b0a44e2942b6fb1fa76dcfb315 Mon Sep 17 00:00:00 2001
From: Bre Naidu <brehananaidu@gmail.com>
Date: Mon, 9 Feb 2026 15:12:52 -0600
Subject: [PATCH 2/2] Delete xml_boneset_reader.py

---
 data_extraction/xml_boneset_reader.py | 100 --------------------------
 1 file changed, 100 deletions(-)
 delete mode 100644 data_extraction/xml_boneset_reader.py

diff --git a/data_extraction/xml_boneset_reader.py b/data_extraction/xml_boneset_reader.py
deleted file mode 100644
index a09ec540..00000000
--- a/data_extraction/xml_boneset_reader.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import os
-import xml.etree.ElementTree as ET
-import json
-import argparse
-
-def extract_bones_from_xml(xml_path):
-    """
-    Parses the XML file and extracts bonesets and their associated bones.
-    Bonesets are determined by hyperlink text with size 1200.
-    Bones with size 900 are assigned to the most recent bolded boneset.
-    """
-    try:
-        print(f"Parsing XML: {xml_path}")
-        tree = ET.parse(xml_path)
-        root = tree.getroot()
-    except ET.ParseError as e:
-        print(f"Error parsing {xml_path}: {e}")
-        return {}
-
-    # Namespace handling for XML
-    ns = {
-        'p': 'http://schemas.openxmlformats.org/presentationml/2006/main',
-        'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
-        'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
-    }
-
-    bonesets = {}  # Dictionary to store bonesets
-    bonesetContent =[]
-    total_boneset = None
-    bolded_set = None
-    boldedList=[]
-
-    # Extract bonesets based on hyperlinks and size attributes
-    for sp_element in root.findall(".//p:sp", ns):
-        for r_element in sp_element.findall(".//p:txBody//a:r", ns):
-            rPr_element = r_element.find("a:rPr", ns)
-            text_element = r_element.find("a:t", ns)
-
-            if rPr_element is not None and text_element is not None:
-                text = text_element.text.strip()
-                size = rPr_element.get("sz")
-                is_bold = rPr_element.get("b") == "1"
-                has_hyperlink = rPr_element.find("a:hlinkClick", ns) is not None
-
-                if has_hyperlink:
-                    if size == "1200":
-                        if is_bold:
-                            bolded_set = text
-                            bonesets[bolded_set] = list()
-
-                        if total_boneset is None:
-                            total_boneset = text
-                            bonesets[total_boneset] = list()
-                            continue
-                        # These are their own bonesets                        
-                        bonesets[total_boneset].append(text.capitalize())
-                    elif size == "900":
-                        if not bolded_set:
-                            bonesetContent.append(text.capitalize())
-                        else:
-                            bonesets[bolded_set].append(text.capitalize())
-    for i in boldedList:
-        bonesets[bolded_set].append(i)
-                        
-
-    return bonesets, bonesetContent
-
-def generate_json_output(bonesets, output_json_path):
-    """
-    Converts bonesets dictionary into a structured JSON format and writes it to a file.
-    """
-    structured_data = []
-
-    for boneset_name, bonesetContent in bonesets.items():
-        structured_data.append({
-            "name": boneset_name,
-            "id": boneset_name.lower().replace(" ", "_"),
-            "bones": bonesetContent
-        })
-
-    # Save to JSON file
-    try:
-        with open(output_json_path, 'w') as json_file:
-            json.dump(structured_data, json_file, indent=4)
-            print(f"JSON file saved: {output_json_path}")
-    except IOError as e:
-        print(f"Error writing to {output_json_path}: {e}")
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Extract bonesets from XML.")
-    parser.add_argument("--xml-file", required=True, help="Path to the XML file.")
-    parser.add_argument("--json-file", required=True, help="Path to the output JSON file.")
-    
-    args = parser.parse_args()
-    
-    # Extract bonesets and their bones
-    bonesets, bonesetContent = extract_bones_from_xml(args.xml_file)
-
-    # Generate and save JSON output
-    generate_json_output(bonesets, args.json_file)