From a4b78ebd16eb92f856e9fd2bc3ddc54e7c1d46b6 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Sat, 9 Mar 2024 15:47:04 -0800 Subject: [PATCH 1/7] Add support for cheffing QTI IMSCP files. --- ricecooker/classes/files.py | 151 +++++++++++++++++++++++++++++++++++- ricecooker/classes/nodes.py | 75 ++++++++++++++++++ setup.py | 1 + 3 files changed, 225 insertions(+), 2 deletions(-) diff --git a/ricecooker/classes/files.py b/ricecooker/classes/files.py index f6451533..b5a997d8 100644 --- a/ricecooker/classes/files.py +++ b/ricecooker/classes/files.py @@ -2,8 +2,10 @@ from __future__ import unicode_literals import hashlib +import io import json import os +import re import shutil import tempfile import zipfile @@ -12,13 +14,16 @@ from urllib.parse import urlparse from xml.etree import ElementTree +import chardet import filetype +import xmltodict import yt_dlp from cachecontrol.caches.file_cache import FileCache from le_utils.constants import exercises from le_utils.constants import file_formats from le_utils.constants import format_presets from le_utils.constants import languages +from lxml import etree from PIL import Image from PIL import UnidentifiedImageError from requests.exceptions import ConnectionError @@ -745,6 +750,9 @@ def get_preset(self): return self.preset or format_presets.EPUB +no_index_zips = set([format_presets.HTML5_DEPENDENCY_ZIP, format_presets.QTI_ZIP]) + + class HTMLZipFile(DownloadFile): default_ext = file_formats.HTML5 allowed_formats = [file_formats.HTML5] @@ -757,8 +765,8 @@ def process_file(self): self.filename = super(HTMLZipFile, self).process_file() if self.filename: try: - # make sure index.html exists unless this is a dependency (i.e. shared resources) zip - if not self.get_preset() == format_presets.HTML5_DEPENDENCY_ZIP: + # make sure index.html exists unless this is a dependency file or IMS content package + if self.get_preset() not in no_index_zips: with zipfile.ZipFile(config.get_storage_path(self.filename)) as zf: _ = zf.getinfo("index.html") except KeyError as err: @@ -768,6 +776,145 @@ def process_file(self): return self.filename +class IMSCPZipFile(HTMLZipFile): + def strip_ns_prefix(self, tree): + """Strip namespace prefixes from an LXML tree. + From https://stackoverflow.com/a/30233635 + """ + for element in tree.xpath("descendant-or-self::*[namespace-uri()!='']"): + element.tag = etree.QName(element).localname + + def strip_langstring(self, tree): + """Replace all langstring elements with their text value.""" + for ls in tree.xpath(".//langstring"): + ls.tail = ls.text + ls.tail if ls.tail else ls.text + etree.strip_elements(tree, "langstring", with_tail=False) + + def collect_metadata(self, metadata_elem): + self.strip_ns_prefix(metadata_elem) + self.strip_langstring(metadata_elem) + metadata_dict = {} + + for tag in ("general", "rights", "educational", "lifecycle"): + elem = metadata_elem.find("lom/%s" % tag) + if elem: + metadata_dict.update(xmltodict.parse(etree.tostring(elem))) + + return metadata_dict + + def get_manifest(self): + with zipfile.ZipFile(config.get_storage_path(self.filename)) as zf: + manifest_file = zf.open("imsmanifest.xml") + try: + return etree.parse(manifest_file).getroot() + except etree.XMLSyntaxError: + # we've run across XML files that are marked as UTF-8 encoded but which have non-UTF-8 characters in them + # for this case, detect the 'real' encoding and decode it as unicode, then make it actual UTF-8 and parse. + f = zf.open("imsmanifest.xml", "rb") + data = f.read() + f.close() + + info = chardet.detect(data) + data = data.decode(info["encoding"]) + return etree.parse(io.BytesIO(data.encode("utf-8"))).getroot() + + def walk_items(self, root): + root_dict = dict(root.items()) + + title_elem = root.find("title", root.nsmap) + if title_elem is not None: + # title_elem.text has issues when there are BR tags. Instead get ALL text, ignoring BR tags. + # As BR tags do not make sense in metadata, we can assume it's an editor glitch causing it. + text = "" + for child in title_elem.iter(): + if child.text: + text += child.text + if child.tail: + text += child.tail + assert text.strip(), "Title element has no title: {}".format( + etree.tostring(title_elem, pretty_print=True) + ) + root_dict["title"] = text.strip() + + metadata_elem = root.find("metadata", root.nsmap) + if metadata_elem is not None: + root_dict["metadata"] = self.collect_metadata(metadata_elem) + + children = [] + for item in root.findall("item", root.nsmap): + children.append(self.walk_items(item)) + + if children: + root_dict["children"] = children + + return root_dict + + def derive_content_files_dict(self, resource_elem, resources_dict): + nsmap = resource_elem.nsmap + file_elements = resource_elem.findall("file", nsmap) + base = "./" + ( + resource_elem.get("{http://www.w3.org/XML/1998/namespace}base") or "" + ) + file_paths = [base + fe.get("href") for fe in file_elements] + dep_elements = resource_elem.findall("dependency", nsmap) + dep_paths = [] + for de in dep_elements: + dre = resources_dict[de.get("identifierref")] + dep_paths.extend(self.derive_content_files_dict(dre, resources_dict)) + return file_paths + dep_paths + + def collect_resources(self, item, resources_dict): + if item.get("children"): + for child in item["children"]: + self.collect_resources(child, resources_dict) + elif item.get("identifierref"): + resource_elem = resources_dict[item["identifierref"]] + + # Add all resource attrs to item dict + for key, value in resource_elem.items(): + key_stripped = re.sub("^{.*}", "", key) # Strip any namespace prefix + item[key_stripped] = value + + if resource_elem.get("type") == "webcontent": + item["index_file"] = resource_elem.get("href") + item["files"] = self.derive_content_files_dict( + resource_elem, resources_dict + ) + + def extract_metadata(self): + """Extract metadata and topic tree info from an IMSCP file. + Return a dict {'metadata': {...}, 'organizations': [list of topic dicts]} + """ + manifest = self.get_manifest() + + nsmap = manifest.nsmap + + metadata_elem = manifest.find("metadata", nsmap) + metadata = {} + if metadata_elem is not None: + metadata = self.collect_metadata(metadata_elem) + + resources_elem = manifest.find("resources", nsmap) + resources_dict = dict((r.get("identifier"), r) for r in resources_elem) + + organizations = [] + for org_elem in manifest.findall("organizations/organization", nsmap): + item_tree = self.walk_items(org_elem) + self.collect_resources(license, item_tree, resources_dict) + organizations.append(item_tree) + + return { + "identifier": manifest.get("identifier"), + "metadata": metadata, + "organizations": organizations, + } + + +class QTIZipFile(IMSCPZipFile): + def get_preset(self): + return format_presets.QTI_ZIP + + class H5PFile(DownloadFile): default_ext = file_formats.H5P allowed_formats = [file_formats.H5P] diff --git a/ricecooker/classes/nodes.py b/ricecooker/classes/nodes.py index efdd012c..acf4f721 100644 --- a/ricecooker/classes/nodes.py +++ b/ricecooker/classes/nodes.py @@ -1647,3 +1647,78 @@ def to_dict(self): # add alias for back-compatibility RemoteContentNode = StudioContentNode + + +def get_by_path(obj, *args): + for arg in args: + obj = obj.get(arg, {}) + if obj == {}: + return None + return obj + + +QTI_PLACEHOLDER_SOURCE_ID = "A SOURCE ID THAT WILL BE REMOVED BEFORE UPLOAD" + + +class QTINode(ContentNode): + """ + Node representing QTI exercise + """ + + kind = content_kinds.EXERCISE + + def __init__( + self, source_id=QTI_PLACEHOLDER_SOURCE_ID, title=None, license=None, **kwargs + ): + super(QTINode, self).__init__(source_id, title, license, **kwargs) + from .files import QTIZipFile + + qti_files = [f for f in self.files if isinstance(f, QTIZipFile)] + qti_file = qti_files[0] + qti_file.process_file() + metadata = qti_file.extract_metadata() + self.source_id = ( + metadata.get("identifer") + if self.source_id == QTI_PLACEHOLDER_SOURCE_ID + else self.source_id + ) + if self.source_id is None: + raise InvalidNodeException( + "No source_id was provided and the QTI file {} does not have an identifier".format( + qti_file.path + ) + ) + self.title = self.title or get_by_path( + metadata, "metadata", "general", "title", "string" + ) + if self.title is None: + raise InvalidNodeException( + "No title was provided and the QTI file {} does not have a title".format( + qti_file.path + ) + ) + + def validate(self): + """validate: Makes sure QTI is valid + Args: None + Returns: boolean indicating if QTI is valid + """ + from .files import QTIZipFile + + try: + assert ( + self.kind == content_kinds.EXERCISE + ), "Assumption Failed: Node should be an Exercise" + assert ( + self.questions == [] + ), "Assumption Failed: QTI should not have questions" + assert [ + f for f in self.files if isinstance(f, QTIZipFile) + ], "Assumption Failed: QTI should have at least one QTI file" + return super(QTINode, self).validate() + except AssertionError as ae: + raise InvalidNodeException( + "Invalid node ({}): {} - {}".format( + ae.args[0], self.title, self.__dict__ + ) + ) diff --git a/setup.py b/setup.py index 1b84ac24..c6c05f5f 100644 --- a/setup.py +++ b/setup.py @@ -53,6 +53,7 @@ "EbookLib>=0.17.1", "filetype>=1.1.0", "urllib3==2.2.1", + "xmltodict==0.13.0", ], python_requires=">=3.8, <3.11", license="MIT license", From 58fb6fed225e94ed264ca0509f3c73abd2c056d2 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Sun, 18 Feb 2024 16:49:12 -0800 Subject: [PATCH 2/7] Add utilities to map SCORM metadata to our own metadata schema where possible. --- ricecooker/utils/SCORM_metadata.py | 158 +++++++++++++++++++++++++++++ tests/test_SCORM_metadata.py | 88 ++++++++++++++++ 2 files changed, 246 insertions(+) create mode 100644 ricecooker/utils/SCORM_metadata.py create mode 100644 tests/test_SCORM_metadata.py diff --git a/ricecooker/utils/SCORM_metadata.py b/ricecooker/utils/SCORM_metadata.py new file mode 100644 index 00000000..bd1db071 --- /dev/null +++ b/ricecooker/utils/SCORM_metadata.py @@ -0,0 +1,158 @@ +""" +Utilities for mapping from SCORM metadata to LE Utils metadata. +""" +from le_utils.constants.labels import learning_activities +from le_utils.constants.labels import needs +from le_utils.constants.labels import resource_type + + +imscp_metadata_keys = { + "general": ["title", "description", "language", "keyword"], + "rights": [], + "educational": [ + "interactivityType", + "interactivityLevel", + "learningResourceType", + "intendedEndUserRole", + ], + "lifecycle": [], +} + + +# Define mappings from SCORM educational types to LE Utils activity types +SCORM_to_learning_activities_mappings = { + "exercise": learning_activities.PRACTICE, + "simulation": learning_activities.EXPLORE, + "questionnaire": learning_activities.PRACTICE, + "diagram": learning_activities.EXPLORE, + "figure": learning_activities.EXPLORE, + "graph": learning_activities.EXPLORE, + "index": learning_activities.READ, + "slide": learning_activities.READ, + "table": learning_activities.READ, + "narrative text": learning_activities.READ, + "exam": learning_activities.PRACTICE, + "experiment": learning_activities.EXPLORE, + "problem statement": learning_activities.REFLECT, + "self assessment": learning_activities.REFLECT, + "lecture": learning_activities.WATCH, +} + + +def map_scorm_to_le_utils_activities(scorm_dict): + le_utils_activities = [] + + # Adjustments based on interactivity + interactive_adjustments = { + learning_activities.EXPLORE: ( + learning_activities.READ, + learning_activities.WATCH, + ) + } + + # Determine the interactivity level and type + interactivity_type = scorm_dict.get("educational", {}).get("interactivityType") + interactivity_level = scorm_dict.get("educational", {}).get("interactivityLevel") + + is_interactive = ( + interactivity_type + in [ + "active", + "mixed", + ] + or interactivity_level in ["medium", "high"] + ) + + # Extract the learning resource types from the SCORM data + learning_resource_types = scorm_dict.get("educational", {}).get( + "learningResourceType", [] + ) + + # Map each SCORM type to an LE Utils activity type + for learning_resource_type in learning_resource_types: + le_utils_type = SCORM_to_learning_activities_mappings.get( + learning_resource_type + ) + # Adjust based on interactivity + if not is_interactive and le_utils_type in interactive_adjustments: + le_utils_type = ( + interactive_adjustments[le_utils_type][0] + if learning_resource_type == "simulation" + else interactive_adjustments[le_utils_type][1] + ) + + if le_utils_type and le_utils_type not in le_utils_activities: + le_utils_activities.append(le_utils_type) + + return le_utils_activities + + +# Define mappings from SCORM educational types to educator-focused resource types +SCORM_to_resource_type_mappings = { + "exercise": resource_type.EXERCISE, + "simulation": resource_type.ACTIVITY, + "questionnaire": resource_type.ACTIVITY, + "diagram": resource_type.MEDIA, + "figure": resource_type.MEDIA, + "graph": resource_type.MEDIA, + "index": resource_type.GUIDE, + "slide": resource_type.LESSON, + "table": resource_type.TUTORIAL, + "narrative text": resource_type.TEXTBOOK, + "exam": resource_type.EXERCISE, + "experiment": resource_type.ACTIVITY, + "problem statement": resource_type.ACTIVITY, + "self assessment": resource_type.ACTIVITY, + "lecture": resource_type.LESSON, +} + + +# Mapping for intendedEndUserRole when the resource is for educators +SCORM_intended_role_to_resource_type_mapping = { + "teacher": resource_type.LESSON_PLAN, + "author": resource_type.GUIDE, + "manager": resource_type.GUIDE, +} + + +def map_scorm_to_educator_resource_types(scorm_dict): + educator_resource_types = [] + + # Extract the learning resource types and intended end user role from the SCORM data + learning_resource_types = scorm_dict.get("educational", {}).get( + "learningResourceType", [] + ) + intended_roles = scorm_dict.get("educational", {}).get("intendedEndUserRole", []) + + # Map each SCORM type to an educator-focused resource type + for learning_resource_type in learning_resource_types: + mapped_type = SCORM_to_resource_type_mappings.get(learning_resource_type) + if mapped_type and mapped_type not in educator_resource_types: + educator_resource_types.append(mapped_type) + + # Check if the intended end user role matches any educator roles + for role in intended_roles: + if ( + role in SCORM_intended_role_to_resource_type_mapping + and SCORM_intended_role_to_resource_type_mapping[role] + not in educator_resource_types + ): + educator_resource_types.append( + SCORM_intended_role_to_resource_type_mapping[role] + ) + + return educator_resource_types + + +def infer_beginner_level_from_difficulty(scorm_dict): + # Beginner difficulty levels + beginner_difficulties = {"very easy", "easy"} + + educational_metadata = scorm_dict.get("educational", {}) + + # Check if the difficulty level indicates beginner content + difficulty = educational_metadata.get("difficulty") + if difficulty in beginner_difficulties: + return [needs.FOR_BEGINNERS] + + return [] diff --git a/tests/test_SCORM_metadata.py b/tests/test_SCORM_metadata.py new file mode 100644 index 00000000..297e6416 --- /dev/null +++ b/tests/test_SCORM_metadata.py @@ -0,0 +1,88 @@ +import pytest +from le_utils.constants.labels import learning_activities +from le_utils.constants.labels import needs +from le_utils.constants.labels import resource_type + +from ricecooker.utils.SCORM_metadata import infer_beginner_level_from_difficulty +from ricecooker.utils.SCORM_metadata import map_scorm_to_educator_resource_types +from ricecooker.utils.SCORM_metadata import map_scorm_to_le_utils_activities + + +@pytest.mark.parametrize( + "scorm_dict, expected_result", + [ + ( + { + "educational": { + "interactivityType": "active", + "interactivityLevel": "high", + "learningResourceType": ["exercise", "simulation"], + } + }, + [learning_activities.PRACTICE, learning_activities.EXPLORE], + ), + ( + {"educational": {"learningResourceType": ["lecture", "self assessment"]}}, + [learning_activities.REFLECT, learning_activities.WATCH], + ), + ( + { + "educational": { + "interactivityType": "mixed", + "interactivityLevel": "medium", + "learningResourceType": ["simulation", "graph"], + } + }, + [learning_activities.EXPLORE], + ), + ( + { + "educational": { + "interactivityType": "expositive", + "interactivityLevel": "low", + "learningResourceType": ["simulation", "graph"], + } + }, + [learning_activities.READ, learning_activities.WATCH], + ), + ], +) +def test_map_scorm_to_le_utils_activities(scorm_dict, expected_result): + assert set(map_scorm_to_le_utils_activities(scorm_dict)) == set(expected_result) + + +@pytest.mark.parametrize( + "scorm_dict, expected_result", + [ + ( + { + "educational": { + "learningResourceType": ["exercise", "lecture"], + "intendedEndUserRole": ["teacher"], + } + }, + [resource_type.EXERCISE, resource_type.LESSON, resource_type.LESSON_PLAN], + ), + ( + { + "educational": { + "learningResourceType": ["simulation", "figure"], + "intendedEndUserRole": ["author"], + } + }, + [resource_type.ACTIVITY, resource_type.MEDIA, resource_type.GUIDE], + ), + ], +) +def test_map_scorm_to_educator_resource_types(scorm_dict, expected_result): + assert set(map_scorm_to_educator_resource_types(scorm_dict)) == set(expected_result) + + +def test_infer_beginner_level_from_difficulty(): + scorm_dict_easy = {"educational": {"difficulty": "easy"}} + assert infer_beginner_level_from_difficulty(scorm_dict_easy) == [ + needs.FOR_BEGINNERS + ] + + scorm_dict_hard = {"educational": {"difficulty": "difficult"}} + assert infer_beginner_level_from_difficulty(scorm_dict_hard) == [] From d13181b946c09dc70dec741bce1bda5a083e69c0 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Sun, 18 Feb 2024 16:52:30 -0800 Subject: [PATCH 3/7] Update IMSCP file parsing to more fully parse metadata and external metadata files. --- ricecooker/classes/files.py | 135 +++++++--- tests/test_files.py | 206 ++++++++++++++++ ...mplete_manifest_with_external_metadata.xml | 102 ++++++++ .../ims_xml/metadata_hummingbirds_course.xml | 232 ++++++++++++++++++ .../metadata_hummingbirds_organization.xml | 14 ++ .../samples/ims_xml/simple_manifest.xml | 63 +++++ 6 files changed, 716 insertions(+), 36 deletions(-) create mode 100644 tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml create mode 100644 tests/testcontent/samples/ims_xml/metadata_hummingbirds_course.xml create mode 100644 tests/testcontent/samples/ims_xml/metadata_hummingbirds_organization.xml create mode 100644 tests/testcontent/samples/ims_xml/simple_manifest.xml diff --git a/ricecooker/classes/files.py b/ricecooker/classes/files.py index b5a997d8..2d9d1c33 100644 --- a/ricecooker/classes/files.py +++ b/ricecooker/classes/files.py @@ -42,6 +42,7 @@ from ricecooker.utils.images import create_image_from_zip from ricecooker.utils.images import create_tiled_image from ricecooker.utils.images import ThumbnailGenerationError +from ricecooker.utils.SCORM_metadata import imscp_metadata_keys from ricecooker.utils.subtitles import build_subtitle_converter_from_file from ricecooker.utils.subtitles import InvalidSubtitleFormatError from ricecooker.utils.subtitles import InvalidSubtitleLanguageError @@ -750,9 +751,6 @@ def get_preset(self): return self.preset or format_presets.EPUB -no_index_zips = set([format_presets.HTML5_DEPENDENCY_ZIP, format_presets.QTI_ZIP]) - - class HTMLZipFile(DownloadFile): default_ext = file_formats.HTML5 allowed_formats = [file_formats.HTML5] @@ -765,8 +763,8 @@ def process_file(self): self.filename = super(HTMLZipFile, self).process_file() if self.filename: try: - # make sure index.html exists unless this is a dependency file or IMS content package - if self.get_preset() not in no_index_zips: + # make sure index.html exists unless this is a dependency file + if self.get_preset() != format_presets.HTML5_DEPENDENCY_ZIP: with zipfile.ZipFile(config.get_storage_path(self.filename)) as zf: _ = zf.getinfo("index.html") except KeyError as err: @@ -776,7 +774,38 @@ def process_file(self): return self.filename -class IMSCPZipFile(HTMLZipFile): +def denest_xml_value(value, preferred_language): + if isinstance(value, dict): + # Handle the 'string' -> '#text' nested structure + if "string" in value: + return denest_xml_value(value["string"], preferred_language) + elif "langstring" in value: + return denest_xml_value(value["langstring"], preferred_language) + # Handle other simple text and key-value pairs + elif "#text" in value: + return value["#text"] + elif "value" in value: + return value["value"] + elif isinstance(value, list): + try: + return next( + denest_xml_value(item, preferred_language) + for item in value + if item.get("@language", "").startswith(preferred_language) + ) + except StopIteration: + return [denest_xml_value(item, preferred_language) for item in value] + return value + + +class IMSCPZipFile(DownloadFile): + default_ext = file_formats.HTML5 + allowed_formats = [file_formats.HTML5] + is_primary = True + + def get_preset(self): + return self.preset or format_presets.IMSCP_ZIP + def strip_ns_prefix(self, tree): """Strip namespace prefixes from an LXML tree. From https://stackoverflow.com/a/30233635 @@ -784,33 +813,70 @@ def strip_ns_prefix(self, tree): for element in tree.xpath("descendant-or-self::*[namespace-uri()!='']"): element.tag = etree.QName(element).localname - def strip_langstring(self, tree): - """Replace all langstring elements with their text value.""" - for ls in tree.xpath(".//langstring"): - ls.tail = ls.text + ls.tail if ls.tail else ls.text - etree.strip_elements(tree, "langstring", with_tail=False) + def _get_elem_for_tag(self, root, tag): + elem = root.find("lom/%s" % tag) + if elem is not None: + return elem + return root.find(tag) - def collect_metadata(self, metadata_elem): - self.strip_ns_prefix(metadata_elem) - self.strip_langstring(metadata_elem) + def collect_metadata(self, root): metadata_dict = {} - for tag in ("general", "rights", "educational", "lifecycle"): - elem = metadata_elem.find("lom/%s" % tag) - if elem: - metadata_dict.update(xmltodict.parse(etree.tostring(elem))) + metadata_elem = root.find("metadata", root.nsmap) + + if metadata_elem is None: + return metadata_dict + + # Check for external metadata reference + external_metadata_ref = metadata_elem.find( + "adlcp:location", + namespaces={"adlcp": "http://www.adlnet.org/xsd/adlcp_v1p3"}, + ) + if external_metadata_ref is not None: + # External metadata file path + external_file_path = external_metadata_ref.text + with self.open_zip() as zip_file: + with zip_file.open(external_file_path) as external_file: + metadata_elem = etree.parse(external_file).getroot() + + self.strip_ns_prefix(metadata_elem) + preferred_language = self.language + + if preferred_language is None: + elem = self._get_elem_for_tag(metadata_elem, "general") + if elem is not None: + values = xmltodict.parse(etree.tostring(elem)) + if "language" in values["general"]: + preferred_language = denest_xml_value( + values["general"]["language"], None + ) + + for tag, fields in imscp_metadata_keys.items(): + elem = self._get_elem_for_tag(metadata_elem, tag) + if elem is not None: + values = xmltodict.parse(etree.tostring(elem)) + for field in fields: + if field in values[tag]: + metadata_dict[field] = denest_xml_value( + values[tag][field], preferred_language + ) return metadata_dict + @contextmanager + def open_zip(self): + with zipfile.ZipFile(config.get_storage_path(self.get_filename())) as zf: + yield zf + def get_manifest(self): - with zipfile.ZipFile(config.get_storage_path(self.filename)) as zf: - manifest_file = zf.open("imsmanifest.xml") + with self.open_zip() as zf: try: - return etree.parse(manifest_file).getroot() + with zf.open("imsmanifest.xml") as manifest_file: + return etree.parse(manifest_file).getroot() except etree.XMLSyntaxError: # we've run across XML files that are marked as UTF-8 encoded but which have non-UTF-8 characters in them # for this case, detect the 'real' encoding and decode it as unicode, then make it actual UTF-8 and parse. - f = zf.open("imsmanifest.xml", "rb") + f = zf.open("imsmanifest.xml", "r") data = f.read() f.close() @@ -836,9 +902,7 @@ def walk_items(self, root): ) root_dict["title"] = text.strip() - metadata_elem = root.find("metadata", root.nsmap) - if metadata_elem is not None: - root_dict["metadata"] = self.collect_metadata(metadata_elem) + root_dict["metadata"] = self.collect_metadata(root) children = [] for item in root.findall("item", root.nsmap): @@ -852,9 +916,7 @@ def walk_items(self, root): def derive_content_files_dict(self, resource_elem, resources_dict): nsmap = resource_elem.nsmap file_elements = resource_elem.findall("file", nsmap) - base = "./" + ( - resource_elem.get("{http://www.w3.org/XML/1998/namespace}base") or "" - ) + base = resource_elem.get("{http://www.w3.org/XML/1998/namespace}base") or "" file_paths = [base + fe.get("href") for fe in file_elements] dep_elements = resource_elem.findall("dependency", nsmap) dep_paths = [] @@ -873,10 +935,11 @@ def collect_resources(self, item, resources_dict): # Add all resource attrs to item dict for key, value in resource_elem.items(): key_stripped = re.sub("^{.*}", "", key) # Strip any namespace prefix - item[key_stripped] = value + # Don't overwrite existing keys + if key_stripped not in item: + item[key_stripped] = value if resource_elem.get("type") == "webcontent": - item["index_file"] = resource_elem.get("href") item["files"] = self.derive_content_files_dict( resource_elem, resources_dict ) @@ -889,10 +952,10 @@ def extract_metadata(self): nsmap = manifest.nsmap - metadata_elem = manifest.find("metadata", nsmap) - metadata = {} - if metadata_elem is not None: - metadata = self.collect_metadata(metadata_elem) + metadata = self.collect_metadata(manifest) + + if self.language is None and metadata.get("language"): + self.set_language(metadata.get("language")) resources_elem = manifest.find("resources", nsmap) resources_dict = dict((r.get("identifier"), r) for r in resources_elem) @@ -900,7 +963,7 @@ def extract_metadata(self): organizations = [] for org_elem in manifest.findall("organizations/organization", nsmap): item_tree = self.walk_items(org_elem) - self.collect_resources(license, item_tree, resources_dict) + self.collect_resources(item_tree, resources_dict) organizations.append(item_tree) return { @@ -912,7 +975,7 @@ def extract_metadata(self): class QTIZipFile(IMSCPZipFile): def get_preset(self): - return format_presets.QTI_ZIP + return self.preset or format_presets.QTI_ZIP class H5PFile(DownloadFile): diff --git a/tests/test_files.py b/tests/test_files.py index 9c23da2e..fadbd4f8 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -2,6 +2,8 @@ import hashlib import os.path import tempfile +import zipfile +from contextlib import contextmanager from shutil import copyfile import pytest @@ -10,6 +12,7 @@ from ricecooker import config from ricecooker.classes.files import _get_language_with_alpha2_fallback +from ricecooker.classes.files import IMSCPZipFile from ricecooker.classes.files import is_youtube_subtitle_file_supported_language from ricecooker.classes.files import SubtitleFile from ricecooker.classes.files import YouTubeSubtitleFile @@ -631,3 +634,206 @@ def test_convertible_substitles_weirdext_subtitlesformat(): assert ( "El total de los protones y neutrones de un átomo" in filecontents ), "missing check words in converted subs" + + +@contextmanager +def create_zip_with_manifest(manifest_filename, *additional_files): + # Create a temporary file for the zipfile + with tempfile.NamedTemporaryFile(suffix=".zip", delete=True) as temp_zip: + # Define the paths + base_path = os.path.dirname(os.path.abspath(__file__)) + source_folder = os.path.join(base_path, "testcontent", "samples", "ims_xml") + manifest_file = os.path.join(source_folder, manifest_filename) + + # Create the zipfile and add the manifest file + with zipfile.ZipFile(temp_zip.name, "w") as zf: + zf.write(manifest_file, "imsmanifest.xml") + for additional_file in additional_files: + zf.write(os.path.join(source_folder, additional_file), additional_file) + + yield temp_zip.name + + # Clean up the zipfile + try: + os.remove(temp_zip.name) + except FileNotFoundError: + pass + + +def test_extract_metadata_from_simple_manifest(): + with create_zip_with_manifest("simple_manifest.xml") as zip_path: + imscp_file = IMSCPZipFile(zip_path) + assert imscp_file.extract_metadata() == { + "identifier": None, + "metadata": { + "description": "Example of test file", + "language": "en", + "title": "Test File", + }, + "organizations": [ + { + "children": [ + { + "files": [], + "href": "file1.html", + "identifier": "file1Ref", + "identifierref": "file1Ref", + "title": "Test File1", + "type": "webcontent", + "metadata": {}, + }, + { + "files": [], + "href": "file2.html", + "identifier": "file2Ref", + "identifierref": "file2Ref", + "title": "Test File2", + "type": "webcontent", + "metadata": {}, + }, + { + "children": [ + { + "files": [], + "href": "file1.html", + "identifier": "file1Ref", + "identifierref": "file1Ref", + "metadata": {}, + "title": "Test File1 Nested", + "type": "webcontent", + }, + { + "files": [], + "href": "file2.html", + "identifier": "file2Ref", + "identifierref": "file2Ref", + "metadata": {}, + "title": "Test File2 Nested", + "type": "webcontent", + }, + ], + "identifier": "folder2", + "metadata": {}, + "title": "Folder 2", + }, + { + "children": [ + { + "files": [], + "href": "file3.html", + "identifier": "file3Ref", + "identifierref": "file3Ref", + "metadata": {}, + "title": "Test File3 Nested", + "type": "webcontent", + }, + { + "files": [], + "href": "file4.html", + "identifier": "file4Ref", + "identifierref": "file4Ref", + "metadata": {}, + "title": "Test File4 Nested", + "type": "webcontent", + }, + ], + "identifier": "folder3", + "metadata": {}, + "title": "Folder 3", + }, + ], + "title": "Folder 1", + "metadata": {}, + }, + { + "children": [ + { + "files": [], + "href": "file3.html", + "identifier": "file3Ref", + "identifierref": "file3Ref", + "title": "Test File3", + "type": "webcontent", + "metadata": {}, + }, + { + "files": [], + "href": "file4.html", + "identifier": "file4Ref", + "identifierref": "file4Ref", + "title": "Test File4", + "type": "webcontent", + "metadata": {}, + }, + ], + "title": "Folder 4", + "metadata": {}, + }, + ], + } + + +def test_extract_metadata_from_complex_manifest(): + # Additional metadata files are passed as arguments to the context manager + with create_zip_with_manifest( + "complete_manifest_with_external_metadata.xml", + "metadata_hummingbirds_course.xml", + "metadata_hummingbirds_organization.xml", + ) as zip_path: + imscp_file = IMSCPZipFile(zip_path) + assert imscp_file.extract_metadata() == { + "identifier": "com.example.hummingbirds.contentpackaging.metadata.2024", + "metadata": { + "description": "An engaging overview of hummingbirds, covering their biology, behavior, and habitats. This course is designed for enthusiasts and bird watchers of all levels.", # noqa E501 + "language": "en", + "title": "Discovering Hummingbirds", + "keyword": ["hummingbirds", "bird watching", "ornithology"], + "intendedEndUserRole": "learner", + "interactivityLevel": "medium", + "learningResourceType": ["documentary", "interactive lesson"], + }, + "organizations": [ + { + "title": "Discovering Hummingbirds", + "identifier": "hummingbirds_default_org", + "metadata": { + "description": "This metadata provides information about the structure and organization of the Discovering Hummingbirds course. The course is organized in a hierarchical manner, guiding learners from basic concepts to more detailed aspects of hummingbird biology and behavior.", # noqa E501 + }, + "children": [ + { + "title": "Introduction to Hummingbirds", + "metadata": { + "description": "Explore the fascinating world of hummingbirds, their unique characteristics and behaviors.", + }, + "identifier": "item1", + "identifierref": "resource1", + "href": "intro.html", + "type": "webcontent", + "files": ["intro.html", "shared/style.css"], + }, + { + "title": "Hummingbird Habitats", + "metadata": { + "description": "Learn about various habitats of hummingbirds, from tropical jungles to backyard gardens.", + }, + "identifier": "item2", + "identifierref": "resource2", + "href": "habitats.html", + "type": "webcontent", + "files": ["habitats.html", "shared/image1.jpg"], + }, + { + "title": "Hummingbird Species", + "metadata": { + "description": "Discover the diversity of hummingbird species and their unique adaptations.", + }, + "identifier": "item3", + "identifierref": "resource3", + "href": "species.html", + "type": "webcontent", + "files": ["species.html", "shared/image2.jpg"], + }, + ], + }, + ], + } diff --git a/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml b/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml new file mode 100644 index 00000000..98afd161 --- /dev/null +++ b/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml @@ -0,0 +1,102 @@ + + + + + + ADL SCORM + 2004 3rd Edition + + metadata_hummingbirds_course.xml + + + + + Discovering Hummingbirds + + + metadata_hummingbirds_organization.xml + + + + + Introduction to Hummingbirds + + + + + Explore the fascinating world of hummingbirds, their unique characteristics and behaviors. + + + + + + + + + Hummingbird Habitats + + + + + Learn about various habitats of hummingbirds, from tropical jungles to backyard gardens. + + + + + + + + + Hummingbird Species + + + + + Discover the diversity of hummingbird species and their unique adaptations. + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testcontent/samples/ims_xml/metadata_hummingbirds_course.xml b/tests/testcontent/samples/ims_xml/metadata_hummingbirds_course.xml new file mode 100644 index 00000000..cb3c7346 --- /dev/null +++ b/tests/testcontent/samples/ims_xml/metadata_hummingbirds_course.xml @@ -0,0 +1,232 @@ + + + + + URI + com.example.hummingbirds.contentpackaging.metadata.2024 + + + <string language="en-US">Discovering Hummingbirds</string> + <string language="es">Descubriendo Colibríes</string> + + en + + An engaging overview of hummingbirds, covering their biology, behavior, and habitats. This course is designed for enthusiasts and bird watchers of all levels. + + + hummingbirds + + + bird watching + + + ornithology + + + Global range, with a focus on the Americas. + + + LOMv1.0 + hierarchical + + + LOMv1.0 + 2 + + + + + + 1.0 + + + LOMv1.0 + final + + + + LOMv1.0 + publisher + + BEGIN:VCARD +VERSION:2.1 +FN:John Doe +ORG:Example Organization +TEL;WORK;VOICE:(555) 123-4567 +ADR;WORK:;;1234 Main St;Anytown;AN;12345;Country +EMAIL;PREF;INTERNET:john.doe@example.com +END:VCARD + + + 2024-01-01 + + Initial publication date of the course. + + + + + + + + URI + com.example.hummingbirds.metadata.2024 + + LOMv1.0 + SCORM_CAM_v1.3 + en-us + + + + text/html + image/jpeg + video/mp4 + 1048576 + http://www.example.com/hummingbirds + + + + LOMv1.0 + browser + + + LOMv1.0 + any + + + + + No specific installation requirements, accessible via standard web browsers. + + + Optimized for both desktop and mobile platforms. + + + P1H + + The average time to complete the course is approximately 1 hour. + + + + + + + LOMv1.0 + documentary + + + LOMv1.0 + interactive lesson + + + LOMv1.0 + medium + + + LOMv1.0 + medium + + + LOMv1.0 + learner + + + LOMv1.0 + training + + + Age 12 and up + + + LOMv1.0 + easy + + + P1H + + Approximately 1 hour is needed to complete the course. + + + + This course provides an introduction to hummingbirds, suitable for beginners in ornithology and bird watching. + + en-us + + + + + LOMv1.0 + no + + + LOMv1.0 + yes + + + Content is protected under Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. + + + + + + LOMv1.0 + ispartof + + + + URI + com.example.biologyseries + + + Part of a larger series on biology and wildlife. + + + + + + BEGIN:VCARD +VERSION:2.1 +FN:Jane Smith +ORG:Example Organization +TEL;WORK;VOICE:(555) 987-6543 +ADR;WORK:;;4321 Side St;Anytown;AN;54321;Country +EMAIL;PREF;INTERNET:jane.smith@example.com +END:VCARD + + + 2024-02-01 + + Annotation added to provide additional insights into course content. + + + + This course includes interactive elements to enhance learner engagement with the subject of hummingbirds. + + + + + + LOMv1.0 + discipline + + + + Example Organization's catalog of biology courses + + + ornithology_basics + + Basic Ornithology + + + + + This course serves as an introductory module in the Basic Ornithology series, focusing on hummingbirds. + + + biology + + + wildlife + + + diff --git a/tests/testcontent/samples/ims_xml/metadata_hummingbirds_organization.xml b/tests/testcontent/samples/ims_xml/metadata_hummingbirds_organization.xml new file mode 100644 index 00000000..87018dfa --- /dev/null +++ b/tests/testcontent/samples/ims_xml/metadata_hummingbirds_organization.xml @@ -0,0 +1,14 @@ + + + + + + This metadata provides information about the structure and organization of the Discovering Hummingbirds course. The course is organized in a hierarchical manner, guiding learners from basic concepts to more detailed aspects of hummingbird biology and behavior. + + + + LOMv1.0 + hierarchical + + + diff --git a/tests/testcontent/samples/ims_xml/simple_manifest.xml b/tests/testcontent/samples/ims_xml/simple_manifest.xml new file mode 100644 index 00000000..bcbb2f3a --- /dev/null +++ b/tests/testcontent/samples/ims_xml/simple_manifest.xml @@ -0,0 +1,63 @@ + + + + + + Test File + + en + + Example of test file + + + + + + + Folder 1 + + Test File1 + + + Test File2 + + + Folder 2 + + Test File1 Nested + + + Test File2 Nested + + + + Folder 3 + + Test File3 Nested + + + Test File4 Nested + + + + + Folder 4 + + Test File3 + + + Test File4 + + + + + + + + + + + + + + From e7d5aefe824a8bfcbc68b74b0363531fb3b66166 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Sun, 18 Feb 2024 20:40:32 -0800 Subject: [PATCH 4/7] Update metadata reading to assume a flat metadata dict. Add general purpose utility for setting metadata on a node. --- ricecooker/utils/SCORM_metadata.py | 45 ++++++++++++++++++++---------- tests/test_SCORM_metadata.py | 42 +++++++++++----------------- 2 files changed, 46 insertions(+), 41 deletions(-) diff --git a/ricecooker/utils/SCORM_metadata.py b/ricecooker/utils/SCORM_metadata.py index bd1db071..d1dfe21a 100644 --- a/ricecooker/utils/SCORM_metadata.py +++ b/ricecooker/utils/SCORM_metadata.py @@ -39,7 +39,7 @@ } -def map_scorm_to_le_utils_activities(scorm_dict): +def map_scorm_to_le_utils_activities(metadata_dict): le_utils_activities = [] # Adjustments based on interactivity @@ -51,8 +51,8 @@ def map_scorm_to_le_utils_activities(scorm_dict): } # Determine the interactivity level and type - interactivity_type = scorm_dict.get("educational", {}).get("interactivityType") - interactivity_level = scorm_dict.get("educational", {}).get("interactivityLevel") + interactivity_type = metadata_dict.get("interactivityType") + interactivity_level = metadata_dict.get("interactivityLevel") is_interactive = ( interactivity_type @@ -64,9 +64,7 @@ def map_scorm_to_le_utils_activities(scorm_dict): ) # Extract the learning resource types from the SCORM data - learning_resource_types = scorm_dict.get("educational", {}).get( - "learningResourceType", [] - ) + learning_resource_types = metadata_dict.get("learningResourceType", []) # Map each SCORM type to an LE Utils activity type for learning_resource_type in learning_resource_types: @@ -115,14 +113,12 @@ def map_scorm_to_le_utils_activities(scorm_dict): } -def map_scorm_to_educator_resource_types(scorm_dict): +def map_scorm_to_educator_resource_types(metadata_dict): educator_resource_types = [] # Extract the learning resource types and intended end user role from the SCORM data - learning_resource_types = scorm_dict.get("educational", {}).get( - "learningResourceType", [] - ) - intended_roles = scorm_dict.get("educational", {}).get("intendedEndUserRole", []) + learning_resource_types = metadata_dict.get("learningResourceType", []) + intended_roles = metadata_dict.get("intendedEndUserRole", []) # Map each SCORM type to an educator-focused resource type for learning_resource_type in learning_resource_types: @@ -144,15 +140,34 @@ def map_scorm_to_educator_resource_types(scorm_dict): return educator_resource_types -def infer_beginner_level_from_difficulty(scorm_dict): +def infer_beginner_level_from_difficulty(metadata_dict): # Beginner difficulty levels beginner_difficulties = {"very easy", "easy"} - educational_metadata = scorm_dict.get("educational", {}) - # Check if the difficulty level indicates beginner content - difficulty = educational_metadata.get("difficulty") + difficulty = metadata_dict.get("difficulty") if difficulty in beginner_difficulties: return [needs.FOR_BEGINNERS] return [] + + +def update_node_from_metadata(node, metadata_dict): + # Update the node with the general metadata + node.description = metadata_dict.get("description") or node.description + if metadata_dict.get("language"): + node.set_language(metadata_dict.get("language")) + node.tags = node.tags + metadata_dict.get("keyword", []) + + # Update the node with the educational metadata + node.learning_activities = ( + node.learning_activities + map_scorm_to_le_utils_activities(metadata_dict) + ) + node.resource_types = node.resource_types + map_scorm_to_educator_resource_types( + metadata_dict + ) + node.learner_needs = node.learner_needs + infer_beginner_level_from_difficulty( + metadata_dict + ) + + return node diff --git a/tests/test_SCORM_metadata.py b/tests/test_SCORM_metadata.py index 297e6416..3e0d2a31 100644 --- a/tests/test_SCORM_metadata.py +++ b/tests/test_SCORM_metadata.py @@ -13,35 +13,29 @@ [ ( { - "educational": { - "interactivityType": "active", - "interactivityLevel": "high", - "learningResourceType": ["exercise", "simulation"], - } + "interactivityType": "active", + "interactivityLevel": "high", + "learningResourceType": ["exercise", "simulation"], }, [learning_activities.PRACTICE, learning_activities.EXPLORE], ), ( - {"educational": {"learningResourceType": ["lecture", "self assessment"]}}, + {"learningResourceType": ["lecture", "self assessment"]}, [learning_activities.REFLECT, learning_activities.WATCH], ), ( { - "educational": { - "interactivityType": "mixed", - "interactivityLevel": "medium", - "learningResourceType": ["simulation", "graph"], - } + "interactivityType": "mixed", + "interactivityLevel": "medium", + "learningResourceType": ["simulation", "graph"], }, [learning_activities.EXPLORE], ), ( { - "educational": { - "interactivityType": "expositive", - "interactivityLevel": "low", - "learningResourceType": ["simulation", "graph"], - } + "interactivityType": "expositive", + "interactivityLevel": "low", + "learningResourceType": ["simulation", "graph"], }, [learning_activities.READ, learning_activities.WATCH], ), @@ -56,19 +50,15 @@ def test_map_scorm_to_le_utils_activities(scorm_dict, expected_result): [ ( { - "educational": { - "learningResourceType": ["exercise", "lecture"], - "intendedEndUserRole": ["teacher"], - } + "learningResourceType": ["exercise", "lecture"], + "intendedEndUserRole": ["teacher"], }, [resource_type.EXERCISE, resource_type.LESSON, resource_type.LESSON_PLAN], ), ( { - "educational": { - "learningResourceType": ["simulation", "figure"], - "intendedEndUserRole": ["author"], - } + "learningResourceType": ["simulation", "figure"], + "intendedEndUserRole": ["author"], }, [resource_type.ACTIVITY, resource_type.MEDIA, resource_type.GUIDE], ), @@ -79,10 +69,10 @@ def test_map_scorm_to_educator_resource_types(scorm_dict, expected_result): def test_infer_beginner_level_from_difficulty(): - scorm_dict_easy = {"educational": {"difficulty": "easy"}} + scorm_dict_easy = {"difficulty": "easy"} assert infer_beginner_level_from_difficulty(scorm_dict_easy) == [ needs.FOR_BEGINNERS ] - scorm_dict_hard = {"educational": {"difficulty": "difficult"}} + scorm_dict_hard = {"difficulty": "difficult"} assert infer_beginner_level_from_difficulty(scorm_dict_hard) == [] From 42c78615ec652dcf44208ab82f232e109cda17d3 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Sun, 18 Feb 2024 21:27:32 -0800 Subject: [PATCH 5/7] Do recursive tree generation for IMSCP nodes. Write basic tests to confirm behaviour. --- ricecooker/classes/nodes.py | 115 ++++++++++------ tests/test_files.py | 262 ++++++++++++++++++------------------ tests/test_tree.py | 43 ++++++ 3 files changed, 252 insertions(+), 168 deletions(-) diff --git a/ricecooker/classes/nodes.py b/ricecooker/classes/nodes.py index acf4f721..bacb93f1 100644 --- a/ricecooker/classes/nodes.py +++ b/ricecooker/classes/nodes.py @@ -21,6 +21,7 @@ from ..exceptions import InvalidNodeException from ..utils.utils import is_valid_uuid_string from .licenses import License +from ricecooker.utils.SCORM_metadata import update_node_from_metadata MASTERY_MODELS = [id for id, name in exercises.MASTERY_MODELS] ROLES = [id for id, name in roles.choices] @@ -31,6 +32,7 @@ class Node(object): license = None language = None + kind = None def __init__( self, @@ -40,7 +42,7 @@ def __init__( thumbnail=None, files=None, derive_thumbnail=False, - node_modifications={}, + node_modifications=None, extra_fields=None, ): self.files = [] @@ -60,7 +62,7 @@ def __init__( self.set_thumbnail(thumbnail) # save modifications passed in by csv - self.node_modifications = node_modifications + self.node_modifications = node_modifications or {} def set_language(self, language): """Set self.language to internal lang. repr. code from str or Language object.""" @@ -1649,54 +1651,87 @@ def to_dict(self): RemoteContentNode = StudioContentNode -def get_by_path(obj, *args): - for arg in args: - obj = obj.get(arg, {}) - if obj == {}: - return None - return obj - - -QTI_PLACEHOLDER_SOURCE_ID = "A SOURCE ID THAT WILL BE REMOVED BEFORE UPLOAD" - +class IMSCPNode(TreeNode): -class QTINode(ContentNode): - """ - Node representing QTI exercise - """ + kind = content_kinds.HTML5 - kind = content_kinds.EXERCISE + @classmethod + def _recurse_and_add_children(cls, parent, item_data, license): + source_id = item_data.get("identifier", item_data["title"]) + if item_data.get("children") is not None: + node = TopicNode( + source_id, + item_data["title"], + files=parent.files, + ) + for child in item_data["children"]: + cls._recurse_and_add_children(node, child, license) + else: + node = ContentNode( + source_id, + item_data["title"], + license, + files=parent.files, + extra_fields={ + "options": { + "entry": item_data["href"] + item_data.get("parameters", "") + } + }, + ) + node.kind = cls.kind + update_node_from_metadata(node, item_data["metadata"]) + parent.add_child(node) - def __init__( - self, source_id=QTI_PLACEHOLDER_SOURCE_ID, title=None, license=None, **kwargs - ): - super(QTINode, self).__init__(source_id, title, license, **kwargs) - from .files import QTIZipFile + def __new__(cls, *args, **kwargs): + from .files import IMSCPZipFile - qti_files = [f for f in self.files if isinstance(f, QTIZipFile)] - qti_file = qti_files[0] - qti_file.process_file() - metadata = qti_file.extract_metadata() - self.source_id = ( - metadata.get("identifer") - if self.source_id == QTI_PLACEHOLDER_SOURCE_ID - else self.source_id - ) - if self.source_id is None: + imscp_files = [f for f in kwargs["files"] if isinstance(f, IMSCPZipFile)] + if not imscp_files or len(imscp_files) > 1: + raise InvalidNodeException( + "IMSCPNode must be instantiated with exactly one IMSCPZipFile" + ) + imscp_file = imscp_files[0] + metadata = imscp_file.extract_metadata() + if "title" in metadata["metadata"] and kwargs.get("title") is None: + kwargs["title"] = metadata["metadata"]["title"] + if kwargs.get("title") is None: raise InvalidNodeException( - "No source_id was provided and the QTI file {} does not have an identifier".format( - qti_file.path + "No title was provided and the IMSCP file {} does not have a title".format( + imscp_file.path ) ) - self.title = self.title or get_by_path( - metadata, "metadata", "general", "title", "string" - ) - if self.title is None: + + if "identifier" in metadata and kwargs.get("source_id") is None: + kwargs["source_id"] = metadata["identifier"] + if kwargs.get("source_id") is None: raise InvalidNodeException( - "No title was provided and the QTI file {} does not have a title".format( - qti_file.path + "No source_id was provided and the IMSCP file {} does not have an identifier".format( + imscp_file.path ) ) + license = kwargs.pop("license") + if license is None: + raise InvalidNodeException( + "No license was provided and we cannot infer license from an IMSCP file" + ) + if metadata.get("organizations"): + node = TopicNode(*args, **kwargs) + + for child in metadata["organizations"]: + cls._recurse_and_add_children(node, child, license) + else: + node = ContentNode(*args, **kwargs) + node.kind = cls.kind + update_node_from_metadata(node, metadata["metadata"]) + return node + + +class QTINode(IMSCPNode): + """ + Node representing QTI exercise + """ + + kind = content_kinds.EXERCISE def validate(self): """validate: Makes sure QTI is valid diff --git a/tests/test_files.py b/tests/test_files.py index fadbd4f8..c8d7ede2 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -660,17 +660,34 @@ def create_zip_with_manifest(manifest_filename, *additional_files): pass -def test_extract_metadata_from_simple_manifest(): - with create_zip_with_manifest("simple_manifest.xml") as zip_path: - imscp_file = IMSCPZipFile(zip_path) - assert imscp_file.extract_metadata() == { - "identifier": None, - "metadata": { - "description": "Example of test file", - "language": "en", - "title": "Test File", - }, - "organizations": [ +expected_simple_metadata = { + "identifier": None, + "metadata": { + "description": "Example of test file", + "language": "en", + "title": "Test File", + }, + "organizations": [ + { + "children": [ + { + "files": [], + "href": "file1.html", + "identifier": "file1Ref", + "identifierref": "file1Ref", + "title": "Test File1", + "type": "webcontent", + "metadata": {}, + }, + { + "files": [], + "href": "file2.html", + "identifier": "file2Ref", + "identifierref": "file2Ref", + "title": "Test File2", + "type": "webcontent", + "metadata": {}, + }, { "children": [ { @@ -678,72 +695,23 @@ def test_extract_metadata_from_simple_manifest(): "href": "file1.html", "identifier": "file1Ref", "identifierref": "file1Ref", - "title": "Test File1", - "type": "webcontent", "metadata": {}, + "title": "Test File1 Nested", + "type": "webcontent", }, { "files": [], "href": "file2.html", "identifier": "file2Ref", "identifierref": "file2Ref", - "title": "Test File2", - "type": "webcontent", - "metadata": {}, - }, - { - "children": [ - { - "files": [], - "href": "file1.html", - "identifier": "file1Ref", - "identifierref": "file1Ref", - "metadata": {}, - "title": "Test File1 Nested", - "type": "webcontent", - }, - { - "files": [], - "href": "file2.html", - "identifier": "file2Ref", - "identifierref": "file2Ref", - "metadata": {}, - "title": "Test File2 Nested", - "type": "webcontent", - }, - ], - "identifier": "folder2", "metadata": {}, - "title": "Folder 2", - }, - { - "children": [ - { - "files": [], - "href": "file3.html", - "identifier": "file3Ref", - "identifierref": "file3Ref", - "metadata": {}, - "title": "Test File3 Nested", - "type": "webcontent", - }, - { - "files": [], - "href": "file4.html", - "identifier": "file4Ref", - "identifierref": "file4Ref", - "metadata": {}, - "title": "Test File4 Nested", - "type": "webcontent", - }, - ], - "identifier": "folder3", - "metadata": {}, - "title": "Folder 3", + "title": "Test File2 Nested", + "type": "webcontent", }, ], - "title": "Folder 1", + "identifier": "folder2", "metadata": {}, + "title": "Folder 2", }, { "children": [ @@ -752,88 +720,126 @@ def test_extract_metadata_from_simple_manifest(): "href": "file3.html", "identifier": "file3Ref", "identifierref": "file3Ref", - "title": "Test File3", - "type": "webcontent", "metadata": {}, + "title": "Test File3 Nested", + "type": "webcontent", }, { "files": [], "href": "file4.html", "identifier": "file4Ref", "identifierref": "file4Ref", - "title": "Test File4", - "type": "webcontent", "metadata": {}, + "title": "Test File4 Nested", + "type": "webcontent", }, ], - "title": "Folder 4", + "identifier": "folder3", "metadata": {}, + "title": "Folder 3", }, ], - } + "title": "Folder 1", + "metadata": {}, + }, + { + "children": [ + { + "files": [], + "href": "file3.html", + "identifier": "file3Ref", + "identifierref": "file3Ref", + "title": "Test File3", + "type": "webcontent", + "metadata": {}, + }, + { + "files": [], + "href": "file4.html", + "identifier": "file4Ref", + "identifierref": "file4Ref", + "title": "Test File4", + "type": "webcontent", + "metadata": {}, + }, + ], + "title": "Folder 4", + "metadata": {}, + }, + ], +} -def test_extract_metadata_from_complex_manifest(): - # Additional metadata files are passed as arguments to the context manager - with create_zip_with_manifest( - "complete_manifest_with_external_metadata.xml", - "metadata_hummingbirds_course.xml", - "metadata_hummingbirds_organization.xml", - ) as zip_path: +def test_extract_metadata_from_simple_manifest(): + with create_zip_with_manifest("simple_manifest.xml") as zip_path: imscp_file = IMSCPZipFile(zip_path) - assert imscp_file.extract_metadata() == { - "identifier": "com.example.hummingbirds.contentpackaging.metadata.2024", + assert imscp_file.extract_metadata() == expected_simple_metadata + + +expected_complex_metadata = { + "identifier": "com.example.hummingbirds.contentpackaging.metadata.2024", + "metadata": { + "description": "An engaging overview of hummingbirds, covering their biology, behavior, and habitats. This course is designed for enthusiasts and bird watchers of all levels.", # noqa E501 + "language": "en", + "title": "Discovering Hummingbirds", + "keyword": ["hummingbirds", "bird watching", "ornithology"], + "intendedEndUserRole": "learner", + "interactivityLevel": "medium", + "learningResourceType": ["documentary", "interactive lesson"], + }, + "organizations": [ + { + "title": "Discovering Hummingbirds", + "identifier": "hummingbirds_default_org", "metadata": { - "description": "An engaging overview of hummingbirds, covering their biology, behavior, and habitats. This course is designed for enthusiasts and bird watchers of all levels.", # noqa E501 - "language": "en", - "title": "Discovering Hummingbirds", - "keyword": ["hummingbirds", "bird watching", "ornithology"], - "intendedEndUserRole": "learner", - "interactivityLevel": "medium", - "learningResourceType": ["documentary", "interactive lesson"], + "description": "This metadata provides information about the structure and organization of the Discovering Hummingbirds course. The course is organized in a hierarchical manner, guiding learners from basic concepts to more detailed aspects of hummingbird biology and behavior.", # noqa E501 }, - "organizations": [ + "children": [ { - "title": "Discovering Hummingbirds", - "identifier": "hummingbirds_default_org", + "title": "Introduction to Hummingbirds", "metadata": { - "description": "This metadata provides information about the structure and organization of the Discovering Hummingbirds course. The course is organized in a hierarchical manner, guiding learners from basic concepts to more detailed aspects of hummingbird biology and behavior.", # noqa E501 + "description": "Explore the fascinating world of hummingbirds, their unique characteristics and behaviors.", }, - "children": [ - { - "title": "Introduction to Hummingbirds", - "metadata": { - "description": "Explore the fascinating world of hummingbirds, their unique characteristics and behaviors.", - }, - "identifier": "item1", - "identifierref": "resource1", - "href": "intro.html", - "type": "webcontent", - "files": ["intro.html", "shared/style.css"], - }, - { - "title": "Hummingbird Habitats", - "metadata": { - "description": "Learn about various habitats of hummingbirds, from tropical jungles to backyard gardens.", - }, - "identifier": "item2", - "identifierref": "resource2", - "href": "habitats.html", - "type": "webcontent", - "files": ["habitats.html", "shared/image1.jpg"], - }, - { - "title": "Hummingbird Species", - "metadata": { - "description": "Discover the diversity of hummingbird species and their unique adaptations.", - }, - "identifier": "item3", - "identifierref": "resource3", - "href": "species.html", - "type": "webcontent", - "files": ["species.html", "shared/image2.jpg"], - }, - ], + "identifier": "item1", + "identifierref": "resource1", + "href": "intro.html", + "type": "webcontent", + "files": ["intro.html", "shared/style.css"], + }, + { + "title": "Hummingbird Habitats", + "metadata": { + "description": "Learn about various habitats of hummingbirds, from tropical jungles to backyard gardens.", + }, + "identifier": "item2", + "identifierref": "resource2", + "href": "habitats.html", + "type": "webcontent", + "files": ["habitats.html", "shared/image1.jpg"], + }, + { + "title": "Hummingbird Species", + "metadata": { + "description": "Discover the diversity of hummingbird species and their unique adaptations.", + }, + "identifier": "item3", + "identifierref": "resource3", + "href": "species.html", + "type": "webcontent", + "files": ["species.html", "shared/image2.jpg"], }, ], - } + }, + ], +} + + +def test_extract_metadata_from_complex_manifest(): + # Additional metadata files are passed as arguments to the context manager + with create_zip_with_manifest( + "complete_manifest_with_external_metadata.xml", + "metadata_hummingbirds_course.xml", + "metadata_hummingbirds_organization.xml", + ) as zip_path: + imscp_file = IMSCPZipFile(zip_path) + assert imscp_file.extract_metadata() == expected_complex_metadata diff --git a/tests/test_tree.py b/tests/test_tree.py index d844d0f9..5ba9f7d9 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -11,16 +11,22 @@ from le_utils.constants import licenses from le_utils.constants.labels import levels from le_utils.constants.languages import getlang +from test_files import create_zip_with_manifest +from test_files import expected_complex_metadata +from test_files import expected_simple_metadata from ricecooker.classes.files import DocumentFile from ricecooker.classes.files import HTMLZipFile +from ricecooker.classes.files import IMSCPZipFile from ricecooker.classes.files import SlideImageFile from ricecooker.classes.files import ThumbnailFile from ricecooker.classes.licenses import get_license from ricecooker.classes.licenses import License +from ricecooker.classes.nodes import ContentNode from ricecooker.classes.nodes import CustomNavigationChannelNode from ricecooker.classes.nodes import CustomNavigationNode from ricecooker.classes.nodes import DocumentNode +from ricecooker.classes.nodes import IMSCPNode from ricecooker.classes.nodes import RemoteContentNode from ricecooker.classes.nodes import SlideshowNode from ricecooker.classes.nodes import TopicNode @@ -688,3 +694,40 @@ def test_remote_content_node_with_invalid_overridden_field(): author="Such disallowed. Computer says no.", ) node.validate_tree() + + +def assert_node_matches_metadata(node, metadata, files): + + extra_metadata = metadata["metadata"] + + assert node.title == extra_metadata["title"] + if metadata.get("identifier"): + assert node.source_id == metadata.get("identifier") + + # Check for ContentNode or TopicNode + if isinstance(node, ContentNode): + assert node.files == files + assert node.extra_fields.get("options", {}).get("entry") == metadata.get("href") + elif isinstance(node, TopicNode): + for child_node, child_metadata in zip( + node.children, metadata.get("children", []) + ): + assert_node_matches_metadata(child_node, child_metadata, files) + + +def test_imscp_node_simple_manifest(): + with create_zip_with_manifest("simple_manifest.xml") as zip_path: + imscp_file = IMSCPZipFile(zip_path) + node = IMSCPNode(source_id="test", license="CC BY", files=[imscp_file]) + assert_node_matches_metadata(node, expected_simple_metadata, [imscp_file]) + + +def test_imscp_node_complex_manifest(): + with create_zip_with_manifest( + "complete_manifest_with_external_metadata.xml", + "metadata_hummingbirds_course.xml", + "metadata_hummingbirds_organization.xml", + ) as zip_path: + imscp_file = IMSCPZipFile(zip_path) + node = IMSCPNode(license="CC BY", files=[imscp_file]) + assert_node_matches_metadata(node, expected_complex_metadata, [imscp_file]) From 3583a03eade2eb66cc3c655324adf2816ec9c590 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Tue, 20 Feb 2024 21:12:32 -0800 Subject: [PATCH 6/7] Tweaks to ensure proper parsing of language codes and keywords. --- ricecooker/utils/SCORM_metadata.py | 14 +++++++++++--- .../complete_manifest_with_external_metadata.xml | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/ricecooker/utils/SCORM_metadata.py b/ricecooker/utils/SCORM_metadata.py index d1dfe21a..12abb47e 100644 --- a/ricecooker/utils/SCORM_metadata.py +++ b/ricecooker/utils/SCORM_metadata.py @@ -4,6 +4,7 @@ from le_utils.constants.labels import learning_activities from le_utils.constants.labels import needs from le_utils.constants.labels import resource_type +from le_utils.constants.languages import getlang imscp_metadata_keys = { @@ -155,9 +156,16 @@ def infer_beginner_level_from_difficulty(metadata_dict): def update_node_from_metadata(node, metadata_dict): # Update the node with the general metadata node.description = metadata_dict.get("description") or node.description - if metadata_dict.get("language"): - node.set_language(metadata_dict.get("language")) - node.tags = node.tags + metadata_dict.get("keyword", []) + lang_code = metadata_dict.get("language", "") + lang_code = ( + lang_code.split("-")[0].lower() if getlang(lang_code) is None else lang_code + ) + if getlang(lang_code): + node.set_language(lang_code) + keyword = metadata_dict.get("keyword", []) + if keyword and isinstance(keyword, str): + keyword = [keyword] + node.tags = node.tags + keyword # Update the node with the educational metadata node.learning_activities = ( diff --git a/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml b/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml index 98afd161..804647ec 100644 --- a/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml +++ b/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml @@ -45,6 +45,7 @@ example of proper metadata element usage. Explore the fascinating world of hummingbirds, their unique characteristics and behaviors. + en-US From affca1637aa2abb1bcdde26246a40a1e1fdad0a8 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Sat, 9 Mar 2024 15:55:32 -0800 Subject: [PATCH 7/7] Update tests for windows. --- tests/test_files.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/test_files.py b/tests/test_files.py index c8d7ede2..3aab32c8 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -638,26 +638,29 @@ def test_convertible_substitles_weirdext_subtitlesformat(): @contextmanager def create_zip_with_manifest(manifest_filename, *additional_files): - # Create a temporary file for the zipfile - with tempfile.NamedTemporaryFile(suffix=".zip", delete=True) as temp_zip: + # Create a temporary file for the zipfile but close it immediately for Windows compatibility + temp_zip = tempfile.NamedTemporaryFile(suffix=".zip", delete=False) + temp_zip_path = temp_zip.name + temp_zip.close() # Close the file, so it's not locked + try: # Define the paths base_path = os.path.dirname(os.path.abspath(__file__)) source_folder = os.path.join(base_path, "testcontent", "samples", "ims_xml") manifest_file = os.path.join(source_folder, manifest_filename) # Create the zipfile and add the manifest file - with zipfile.ZipFile(temp_zip.name, "w") as zf: + with zipfile.ZipFile(temp_zip_path, "w") as zf: zf.write(manifest_file, "imsmanifest.xml") for additional_file in additional_files: zf.write(os.path.join(source_folder, additional_file), additional_file) yield temp_zip.name - - # Clean up the zipfile - try: - os.remove(temp_zip.name) - except FileNotFoundError: - pass + finally: + # Clean up the zipfile + try: + os.remove(temp_zip.name) + except (FileNotFoundError, OSError): + pass expected_simple_metadata = { @@ -799,6 +802,7 @@ def test_extract_metadata_from_simple_manifest(): "title": "Introduction to Hummingbirds", "metadata": { "description": "Explore the fascinating world of hummingbirds, their unique characteristics and behaviors.", + "language": "en-US", }, "identifier": "item1", "identifierref": "resource1",