diff --git a/.gitignore b/.gitignore index 539a250..3a4e300 100644 --- a/.gitignore +++ b/.gitignore @@ -21,4 +21,7 @@ Thumbs.db # Cache files __pycache__/ -*.pyc \ No newline at end of file +*.pyc + +# XML files +*.xml \ No newline at end of file diff --git a/boneset-api/package-lock.json b/boneset-api/package-lock.json index 4811db2..df6755a 100644 --- a/boneset-api/package-lock.json +++ b/boneset-api/package-lock.json @@ -477,6 +477,8 @@ "resolved": "https://registry.npmjs.org/express/-/express-4.22.1.tgz", "integrity": "sha512-F2X8g9P1X7uCPZMA3MVf9wcTqlyNp7IhH5qPCI0izhaOIYXaW9L535tGA3qmjRzpH+bZczqq7hVKxTR4NWnu+g==", "license": "MIT", + "peer": true, +>>>>>>>>> Temporary merge branch 2 "dependencies": { "accepts": "~1.3.8", "array-flatten": "1.1.1", diff --git a/data_extraction/Extract_Bone_Descriptions.py b/data_extraction/Extract_Bone_Descriptions.py index 6ff956e..6085bf8 100644 --- a/data_extraction/Extract_Bone_Descriptions.py +++ b/data_extraction/Extract_Bone_Descriptions.py @@ -1,9 +1,11 @@ import xml.etree.ElementTree as ET import json import os -import argparse import sys +slides_dir = "ppt/slides" +output_filename = "all_bone_descriptions.json" + def extract_descriptions_from_slide(xml_file): # Extract descriptions from a single slide XML file try: tree = ET.parse(xml_file) @@ -206,4 +208,13 @@ def process_all_slides(ppt_dir, output_dir): os.makedirs(args.output_dir, exist_ok=True) success = process_all_slides(args.ppt_dir, args.output_dir) - sys.exit(0 if success else 1) \ No newline at end of file + sys.exit(0 if success else 1) + with open(output_json_path, 'w') as f: + json.dump(bone_data, f, indent=4) + + print(f"Descriptions saved to {output_json_path}") + +# Example usage +xml_file = "/Users/joshbudzynski/Downloads/example_folder/ppt/slides/slide3.xml" +output_json = "slide3_Descriptions.json" +parse_slide_xml(xml_file, output_json)