diff --git a/ricecooker/classes/files.py b/ricecooker/classes/files.py index f6451533..2d9d1c33 100644 --- a/ricecooker/classes/files.py +++ b/ricecooker/classes/files.py @@ -2,8 +2,10 @@ from __future__ import unicode_literals import hashlib +import io import json import os +import re import shutil import tempfile import zipfile @@ -12,13 +14,16 @@ from urllib.parse import urlparse from xml.etree import ElementTree +import chardet import filetype +import xmltodict import yt_dlp from cachecontrol.caches.file_cache import FileCache from le_utils.constants import exercises from le_utils.constants import file_formats from le_utils.constants import format_presets from le_utils.constants import languages +from lxml import etree from PIL import Image from PIL import UnidentifiedImageError from requests.exceptions import ConnectionError @@ -37,6 +42,7 @@ from ricecooker.utils.images import create_image_from_zip from ricecooker.utils.images import create_tiled_image from ricecooker.utils.images import ThumbnailGenerationError +from ricecooker.utils.SCORM_metadata import imscp_metadata_keys from ricecooker.utils.subtitles import build_subtitle_converter_from_file from ricecooker.utils.subtitles import InvalidSubtitleFormatError from ricecooker.utils.subtitles import InvalidSubtitleLanguageError @@ -757,8 +763,8 @@ def process_file(self): self.filename = super(HTMLZipFile, self).process_file() if self.filename: try: - # make sure index.html exists unless this is a dependency (i.e. shared resources) zip - if not self.get_preset() == format_presets.HTML5_DEPENDENCY_ZIP: + # make sure index.html exists unless this is a dependency file + if self.get_preset() != format_presets.HTML5_DEPENDENCY_ZIP: with zipfile.ZipFile(config.get_storage_path(self.filename)) as zf: _ = zf.getinfo("index.html") except KeyError as err: @@ -768,6 +774,210 @@ def process_file(self): return self.filename +def denest_xml_value(value, preferred_language): + if isinstance(value, dict): + # Handle the 'string' -> '#text' nested structure + if "string" in value: + return denest_xml_value(value["string"], preferred_language) + elif "langstring" in value: + return denest_xml_value(value["langstring"], preferred_language) + # Handle other simple text and key-value pairs + elif "#text" in value: + return value["#text"] + elif "value" in value: + return value["value"] + elif isinstance(value, list): + try: + return next( + denest_xml_value(item, preferred_language) + for item in value + if item.get("@language", "").startswith(preferred_language) + ) + except StopIteration: + return [denest_xml_value(item, preferred_language) for item in value] + return value + + +class IMSCPZipFile(DownloadFile): + default_ext = file_formats.HTML5 + allowed_formats = [file_formats.HTML5] + is_primary = True + + def get_preset(self): + return self.preset or format_presets.IMSCP_ZIP + + def strip_ns_prefix(self, tree): + """Strip namespace prefixes from an LXML tree. + From https://stackoverflow.com/a/30233635 + """ + for element in tree.xpath("descendant-or-self::*[namespace-uri()!='']"): + element.tag = etree.QName(element).localname + + def _get_elem_for_tag(self, root, tag): + elem = root.find("lom/%s" % tag) + if elem is not None: + return elem + return root.find(tag) + + def collect_metadata(self, root): + metadata_dict = {} + + metadata_elem = root.find("metadata", root.nsmap) + + if metadata_elem is None: + return metadata_dict + + # Check for external metadata reference + external_metadata_ref = metadata_elem.find( + "adlcp:location", + namespaces={"adlcp": "http://www.adlnet.org/xsd/adlcp_v1p3"}, + ) + if external_metadata_ref is not None: + # External metadata file path + external_file_path = external_metadata_ref.text + with self.open_zip() as zip_file: + with zip_file.open(external_file_path) as external_file: + metadata_elem = etree.parse(external_file).getroot() + + self.strip_ns_prefix(metadata_elem) + preferred_language = self.language + + if preferred_language is None: + elem = self._get_elem_for_tag(metadata_elem, "general") + if elem is not None: + values = xmltodict.parse(etree.tostring(elem)) + if "language" in values["general"]: + preferred_language = denest_xml_value( + values["general"]["language"], None + ) + + for tag, fields in imscp_metadata_keys.items(): + elem = self._get_elem_for_tag(metadata_elem, tag) + if elem is not None: + values = xmltodict.parse(etree.tostring(elem)) + for field in fields: + if field in values[tag]: + metadata_dict[field] = denest_xml_value( + values[tag][field], preferred_language + ) + + return metadata_dict + + @contextmanager + def open_zip(self): + with zipfile.ZipFile(config.get_storage_path(self.get_filename())) as zf: + yield zf + + def get_manifest(self): + with self.open_zip() as zf: + try: + with zf.open("imsmanifest.xml") as manifest_file: + return etree.parse(manifest_file).getroot() + except etree.XMLSyntaxError: + # we've run across XML files that are marked as UTF-8 encoded but which have non-UTF-8 characters in them + # for this case, detect the 'real' encoding and decode it as unicode, then make it actual UTF-8 and parse. + f = zf.open("imsmanifest.xml", "r") + data = f.read() + f.close() + + info = chardet.detect(data) + data = data.decode(info["encoding"]) + return etree.parse(io.BytesIO(data.encode("utf-8"))).getroot() + + def walk_items(self, root): + root_dict = dict(root.items()) + + title_elem = root.find("title", root.nsmap) + if title_elem is not None: + # title_elem.text has issues when there are BR tags. Instead get ALL text, ignoring BR tags. + # As BR tags do not make sense in metadata, we can assume it's an editor glitch causing it. + text = "" + for child in title_elem.iter(): + if child.text: + text += child.text + if child.tail: + text += child.tail + assert text.strip(), "Title element has no title: {}".format( + etree.tostring(title_elem, pretty_print=True) + ) + root_dict["title"] = text.strip() + + root_dict["metadata"] = self.collect_metadata(root) + + children = [] + for item in root.findall("item", root.nsmap): + children.append(self.walk_items(item)) + + if children: + root_dict["children"] = children + + return root_dict + + def derive_content_files_dict(self, resource_elem, resources_dict): + nsmap = resource_elem.nsmap + file_elements = resource_elem.findall("file", nsmap) + base = resource_elem.get("{http://www.w3.org/XML/1998/namespace}base") or "" + file_paths = [base + fe.get("href") for fe in file_elements] + dep_elements = resource_elem.findall("dependency", nsmap) + dep_paths = [] + for de in dep_elements: + dre = resources_dict[de.get("identifierref")] + dep_paths.extend(self.derive_content_files_dict(dre, resources_dict)) + return file_paths + dep_paths + + def collect_resources(self, item, resources_dict): + if item.get("children"): + for child in item["children"]: + self.collect_resources(child, resources_dict) + elif item.get("identifierref"): + resource_elem = resources_dict[item["identifierref"]] + + # Add all resource attrs to item dict + for key, value in resource_elem.items(): + key_stripped = re.sub("^{.*}", "", key) # Strip any namespace prefix + # Don't overwrite existing keys + if key_stripped not in item: + item[key_stripped] = value + + if resource_elem.get("type") == "webcontent": + item["files"] = self.derive_content_files_dict( + resource_elem, resources_dict + ) + + def extract_metadata(self): + """Extract metadata and topic tree info from an IMSCP file. + Return a dict {'metadata': {...}, 'organizations': [list of topic dicts]} + """ + manifest = self.get_manifest() + + nsmap = manifest.nsmap + + metadata = self.collect_metadata(manifest) + + if self.language is None and metadata.get("language"): + self.set_language(metadata.get("language")) + + resources_elem = manifest.find("resources", nsmap) + resources_dict = dict((r.get("identifier"), r) for r in resources_elem) + + organizations = [] + for org_elem in manifest.findall("organizations/organization", nsmap): + item_tree = self.walk_items(org_elem) + self.collect_resources(item_tree, resources_dict) + organizations.append(item_tree) + + return { + "identifier": manifest.get("identifier"), + "metadata": metadata, + "organizations": organizations, + } + + +class QTIZipFile(IMSCPZipFile): + def get_preset(self): + return self.preset or format_presets.QTI_ZIP + + class H5PFile(DownloadFile): default_ext = file_formats.H5P allowed_formats = [file_formats.H5P] diff --git a/ricecooker/classes/nodes.py b/ricecooker/classes/nodes.py index efdd012c..bacb93f1 100644 --- a/ricecooker/classes/nodes.py +++ b/ricecooker/classes/nodes.py @@ -21,6 +21,7 @@ from ..exceptions import InvalidNodeException from ..utils.utils import is_valid_uuid_string from .licenses import License +from ricecooker.utils.SCORM_metadata import update_node_from_metadata MASTERY_MODELS = [id for id, name in exercises.MASTERY_MODELS] ROLES = [id for id, name in roles.choices] @@ -31,6 +32,7 @@ class Node(object): license = None language = None + kind = None def __init__( self, @@ -40,7 +42,7 @@ def __init__( thumbnail=None, files=None, derive_thumbnail=False, - node_modifications={}, + node_modifications=None, extra_fields=None, ): self.files = [] @@ -60,7 +62,7 @@ def __init__( self.set_thumbnail(thumbnail) # save modifications passed in by csv - self.node_modifications = node_modifications + self.node_modifications = node_modifications or {} def set_language(self, language): """Set self.language to internal lang. repr. code from str or Language object.""" @@ -1647,3 +1649,111 @@ def to_dict(self): # add alias for back-compatibility RemoteContentNode = StudioContentNode + + +class IMSCPNode(TreeNode): + + kind = content_kinds.HTML5 + + @classmethod + def _recurse_and_add_children(cls, parent, item_data, license): + source_id = item_data.get("identifier", item_data["title"]) + if item_data.get("children") is not None: + node = TopicNode( + source_id, + item_data["title"], + files=parent.files, + ) + for child in item_data["children"]: + cls._recurse_and_add_children(node, child, license) + else: + node = ContentNode( + source_id, + item_data["title"], + license, + files=parent.files, + extra_fields={ + "options": { + "entry": item_data["href"] + item_data.get("parameters", "") + } + }, + ) + node.kind = cls.kind + update_node_from_metadata(node, item_data["metadata"]) + parent.add_child(node) + + def __new__(cls, *args, **kwargs): + from .files import IMSCPZipFile + + imscp_files = [f for f in kwargs["files"] if isinstance(f, IMSCPZipFile)] + if not imscp_files or len(imscp_files) > 1: + raise InvalidNodeException( + "IMSCPNode must be instantiated with exactly one IMSCPZipFile" + ) + imscp_file = imscp_files[0] + metadata = imscp_file.extract_metadata() + if "title" in metadata["metadata"] and kwargs.get("title") is None: + kwargs["title"] = metadata["metadata"]["title"] + if kwargs.get("title") is None: + raise InvalidNodeException( + "No title was provided and the IMSCP file {} does not have a title".format( + imscp_file.path + ) + ) + + if "identifier" in metadata and kwargs.get("source_id") is None: + kwargs["source_id"] = metadata["identifier"] + if kwargs.get("source_id") is None: + raise InvalidNodeException( + "No source_id was provided and the IMSCP file {} does not have an identifier".format( + imscp_file.path + ) + ) + license = kwargs.pop("license") + if license is None: + raise InvalidNodeException( + "No license was provided and we cannot infer license from an IMSCP file" + ) + if metadata.get("organizations"): + node = TopicNode(*args, **kwargs) + + for child in metadata["organizations"]: + cls._recurse_and_add_children(node, child, license) + else: + node = ContentNode(*args, **kwargs) + node.kind = cls.kind + update_node_from_metadata(node, metadata["metadata"]) + return node + + +class QTINode(IMSCPNode): + """ + Node representing QTI exercise + """ + + kind = content_kinds.EXERCISE + + def validate(self): + """validate: Makes sure QTI is valid + Args: None + Returns: boolean indicating if QTI is valid + """ + from .files import QTIZipFile + + try: + assert ( + self.kind == content_kinds.EXERCISE + ), "Assumption Failed: Node should be an Exercise" + assert ( + self.questions == [] + ), "Assumption Failed: QTI should not have questions" + assert [ + f for f in self.files if isinstance(f, QTIZipFile) + ], "Assumption Failed: QTI should have at least one QTI file" + return super(QTINode, self).validate() + except AssertionError as ae: + raise InvalidNodeException( + "Invalid node ({}): {} - {}".format( + ae.args[0], self.title, self.__dict__ + ) + ) diff --git a/ricecooker/utils/SCORM_metadata.py b/ricecooker/utils/SCORM_metadata.py new file mode 100644 index 00000000..12abb47e --- /dev/null +++ b/ricecooker/utils/SCORM_metadata.py @@ -0,0 +1,181 @@ +""" +Utilities for mapping from SCORM metadata to LE Utils metadata. +""" +from le_utils.constants.labels import learning_activities +from le_utils.constants.labels import needs +from le_utils.constants.labels import resource_type +from le_utils.constants.languages import getlang + + +imscp_metadata_keys = { + "general": ["title", "description", "language", "keyword"], + "rights": [], + "educational": [ + "interactivityType", + "interactivityLevel", + "learningResourceType", + "intendedEndUserRole", + ], + "lifecycle": [], +} + + +# Define mappings from SCORM educational types to LE Utils activity types +SCORM_to_learning_activities_mappings = { + "exercise": learning_activities.PRACTICE, + "simulation": learning_activities.EXPLORE, + "questionnaire": learning_activities.PRACTICE, + "diagram": learning_activities.EXPLORE, + "figure": learning_activities.EXPLORE, + "graph": learning_activities.EXPLORE, + "index": learning_activities.READ, + "slide": learning_activities.READ, + "table": learning_activities.READ, + "narrative text": learning_activities.READ, + "exam": learning_activities.PRACTICE, + "experiment": learning_activities.EXPLORE, + "problem statement": learning_activities.REFLECT, + "self assessment": learning_activities.REFLECT, + "lecture": learning_activities.WATCH, +} + + +def map_scorm_to_le_utils_activities(metadata_dict): + le_utils_activities = [] + + # Adjustments based on interactivity + interactive_adjustments = { + learning_activities.EXPLORE: ( + learning_activities.READ, + learning_activities.WATCH, + ) + } + + # Determine the interactivity level and type + interactivity_type = metadata_dict.get("interactivityType") + interactivity_level = metadata_dict.get("interactivityLevel") + + is_interactive = ( + interactivity_type + in [ + "active", + "mixed", + ] + or interactivity_level in ["medium", "high"] + ) + + # Extract the learning resource types from the SCORM data + learning_resource_types = metadata_dict.get("learningResourceType", []) + + # Map each SCORM type to an LE Utils activity type + for learning_resource_type in learning_resource_types: + le_utils_type = SCORM_to_learning_activities_mappings.get( + learning_resource_type + ) + # Adjust based on interactivity + if not is_interactive and le_utils_type in interactive_adjustments: + le_utils_type = ( + interactive_adjustments[le_utils_type][0] + if learning_resource_type == "simulation" + else interactive_adjustments[le_utils_type][1] + ) + + if le_utils_type and le_utils_type not in le_utils_activities: + le_utils_activities.append(le_utils_type) + + return le_utils_activities + + +# Define mappings from SCORM educational types to educator-focused resource types +SCORM_to_resource_type_mappings = { + "exercise": resource_type.EXERCISE, + "simulation": resource_type.ACTIVITY, + "questionnaire": resource_type.ACTIVITY, + "diagram": resource_type.MEDIA, + "figure": resource_type.MEDIA, + "graph": resource_type.MEDIA, + "index": resource_type.GUIDE, + "slide": resource_type.LESSON, + "table": resource_type.TUTORIAL, + "narrative text": resource_type.TEXTBOOK, + "exam": resource_type.EXERCISE, + "experiment": resource_type.ACTIVITY, + "problem statement": resource_type.ACTIVITY, + "self assessment": resource_type.ACTIVITY, + "lecture": resource_type.LESSON, +} + + +# Mapping for intendedEndUserRole when the resource is for educators +SCORM_intended_role_to_resource_type_mapping = { + "teacher": resource_type.LESSON_PLAN, + "author": resource_type.GUIDE, + "manager": resource_type.GUIDE, +} + + +def map_scorm_to_educator_resource_types(metadata_dict): + educator_resource_types = [] + + # Extract the learning resource types and intended end user role from the SCORM data + learning_resource_types = metadata_dict.get("learningResourceType", []) + intended_roles = metadata_dict.get("intendedEndUserRole", []) + + # Map each SCORM type to an educator-focused resource type + for learning_resource_type in learning_resource_types: + mapped_type = SCORM_to_resource_type_mappings.get(learning_resource_type) + if mapped_type and mapped_type not in educator_resource_types: + educator_resource_types.append(mapped_type) + + # Check if the intended end user role matches any educator roles + for role in intended_roles: + if ( + role in SCORM_intended_role_to_resource_type_mapping + and SCORM_intended_role_to_resource_type_mapping[role] + not in educator_resource_types + ): + educator_resource_types.append( + SCORM_intended_role_to_resource_type_mapping[role] + ) + + return educator_resource_types + + +def infer_beginner_level_from_difficulty(metadata_dict): + # Beginner difficulty levels + beginner_difficulties = {"very easy", "easy"} + + # Check if the difficulty level indicates beginner content + difficulty = metadata_dict.get("difficulty") + if difficulty in beginner_difficulties: + return [needs.FOR_BEGINNERS] + + return [] + + +def update_node_from_metadata(node, metadata_dict): + # Update the node with the general metadata + node.description = metadata_dict.get("description") or node.description + lang_code = metadata_dict.get("language", "") + lang_code = ( + lang_code.split("-")[0].lower() if getlang(lang_code) is None else lang_code + ) + if getlang(lang_code): + node.set_language(lang_code) + keyword = metadata_dict.get("keyword", []) + if keyword and isinstance(keyword, str): + keyword = [keyword] + node.tags = node.tags + keyword + + # Update the node with the educational metadata + node.learning_activities = ( + node.learning_activities + map_scorm_to_le_utils_activities(metadata_dict) + ) + node.resource_types = node.resource_types + map_scorm_to_educator_resource_types( + metadata_dict + ) + node.learner_needs = node.learner_needs + infer_beginner_level_from_difficulty( + metadata_dict + ) + + return node diff --git a/setup.py b/setup.py index 1b84ac24..c6c05f5f 100644 --- a/setup.py +++ b/setup.py @@ -53,6 +53,7 @@ "EbookLib>=0.17.1", "filetype>=1.1.0", "urllib3==2.2.1", + "xmltodict==0.13.0", ], python_requires=">=3.8, <3.11", license="MIT license", diff --git a/tests/test_SCORM_metadata.py b/tests/test_SCORM_metadata.py new file mode 100644 index 00000000..3e0d2a31 --- /dev/null +++ b/tests/test_SCORM_metadata.py @@ -0,0 +1,78 @@ +import pytest +from le_utils.constants.labels import learning_activities +from le_utils.constants.labels import needs +from le_utils.constants.labels import resource_type + +from ricecooker.utils.SCORM_metadata import infer_beginner_level_from_difficulty +from ricecooker.utils.SCORM_metadata import map_scorm_to_educator_resource_types +from ricecooker.utils.SCORM_metadata import map_scorm_to_le_utils_activities + + +@pytest.mark.parametrize( + "scorm_dict, expected_result", + [ + ( + { + "interactivityType": "active", + "interactivityLevel": "high", + "learningResourceType": ["exercise", "simulation"], + }, + [learning_activities.PRACTICE, learning_activities.EXPLORE], + ), + ( + {"learningResourceType": ["lecture", "self assessment"]}, + [learning_activities.REFLECT, learning_activities.WATCH], + ), + ( + { + "interactivityType": "mixed", + "interactivityLevel": "medium", + "learningResourceType": ["simulation", "graph"], + }, + [learning_activities.EXPLORE], + ), + ( + { + "interactivityType": "expositive", + "interactivityLevel": "low", + "learningResourceType": ["simulation", "graph"], + }, + [learning_activities.READ, learning_activities.WATCH], + ), + ], +) +def test_map_scorm_to_le_utils_activities(scorm_dict, expected_result): + assert set(map_scorm_to_le_utils_activities(scorm_dict)) == set(expected_result) + + +@pytest.mark.parametrize( + "scorm_dict, expected_result", + [ + ( + { + "learningResourceType": ["exercise", "lecture"], + "intendedEndUserRole": ["teacher"], + }, + [resource_type.EXERCISE, resource_type.LESSON, resource_type.LESSON_PLAN], + ), + ( + { + "learningResourceType": ["simulation", "figure"], + "intendedEndUserRole": ["author"], + }, + [resource_type.ACTIVITY, resource_type.MEDIA, resource_type.GUIDE], + ), + ], +) +def test_map_scorm_to_educator_resource_types(scorm_dict, expected_result): + assert set(map_scorm_to_educator_resource_types(scorm_dict)) == set(expected_result) + + +def test_infer_beginner_level_from_difficulty(): + scorm_dict_easy = {"difficulty": "easy"} + assert infer_beginner_level_from_difficulty(scorm_dict_easy) == [ + needs.FOR_BEGINNERS + ] + + scorm_dict_hard = {"difficulty": "difficult"} + assert infer_beginner_level_from_difficulty(scorm_dict_hard) == [] diff --git a/tests/test_files.py b/tests/test_files.py index 9c23da2e..3aab32c8 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -2,6 +2,8 @@ import hashlib import os.path import tempfile +import zipfile +from contextlib import contextmanager from shutil import copyfile import pytest @@ -10,6 +12,7 @@ from ricecooker import config from ricecooker.classes.files import _get_language_with_alpha2_fallback +from ricecooker.classes.files import IMSCPZipFile from ricecooker.classes.files import is_youtube_subtitle_file_supported_language from ricecooker.classes.files import SubtitleFile from ricecooker.classes.files import YouTubeSubtitleFile @@ -631,3 +634,216 @@ def test_convertible_substitles_weirdext_subtitlesformat(): assert ( "El total de los protones y neutrones de un átomo" in filecontents ), "missing check words in converted subs" + + +@contextmanager +def create_zip_with_manifest(manifest_filename, *additional_files): + # Create a temporary file for the zipfile but close it immediately for Windows compatibility + temp_zip = tempfile.NamedTemporaryFile(suffix=".zip", delete=False) + temp_zip_path = temp_zip.name + temp_zip.close() # Close the file, so it's not locked + try: + # Define the paths + base_path = os.path.dirname(os.path.abspath(__file__)) + source_folder = os.path.join(base_path, "testcontent", "samples", "ims_xml") + manifest_file = os.path.join(source_folder, manifest_filename) + + # Create the zipfile and add the manifest file + with zipfile.ZipFile(temp_zip_path, "w") as zf: + zf.write(manifest_file, "imsmanifest.xml") + for additional_file in additional_files: + zf.write(os.path.join(source_folder, additional_file), additional_file) + + yield temp_zip.name + finally: + # Clean up the zipfile + try: + os.remove(temp_zip.name) + except (FileNotFoundError, OSError): + pass + + +expected_simple_metadata = { + "identifier": None, + "metadata": { + "description": "Example of test file", + "language": "en", + "title": "Test File", + }, + "organizations": [ + { + "children": [ + { + "files": [], + "href": "file1.html", + "identifier": "file1Ref", + "identifierref": "file1Ref", + "title": "Test File1", + "type": "webcontent", + "metadata": {}, + }, + { + "files": [], + "href": "file2.html", + "identifier": "file2Ref", + "identifierref": "file2Ref", + "title": "Test File2", + "type": "webcontent", + "metadata": {}, + }, + { + "children": [ + { + "files": [], + "href": "file1.html", + "identifier": "file1Ref", + "identifierref": "file1Ref", + "metadata": {}, + "title": "Test File1 Nested", + "type": "webcontent", + }, + { + "files": [], + "href": "file2.html", + "identifier": "file2Ref", + "identifierref": "file2Ref", + "metadata": {}, + "title": "Test File2 Nested", + "type": "webcontent", + }, + ], + "identifier": "folder2", + "metadata": {}, + "title": "Folder 2", + }, + { + "children": [ + { + "files": [], + "href": "file3.html", + "identifier": "file3Ref", + "identifierref": "file3Ref", + "metadata": {}, + "title": "Test File3 Nested", + "type": "webcontent", + }, + { + "files": [], + "href": "file4.html", + "identifier": "file4Ref", + "identifierref": "file4Ref", + "metadata": {}, + "title": "Test File4 Nested", + "type": "webcontent", + }, + ], + "identifier": "folder3", + "metadata": {}, + "title": "Folder 3", + }, + ], + "title": "Folder 1", + "metadata": {}, + }, + { + "children": [ + { + "files": [], + "href": "file3.html", + "identifier": "file3Ref", + "identifierref": "file3Ref", + "title": "Test File3", + "type": "webcontent", + "metadata": {}, + }, + { + "files": [], + "href": "file4.html", + "identifier": "file4Ref", + "identifierref": "file4Ref", + "title": "Test File4", + "type": "webcontent", + "metadata": {}, + }, + ], + "title": "Folder 4", + "metadata": {}, + }, + ], +} + + +def test_extract_metadata_from_simple_manifest(): + with create_zip_with_manifest("simple_manifest.xml") as zip_path: + imscp_file = IMSCPZipFile(zip_path) + assert imscp_file.extract_metadata() == expected_simple_metadata + + +expected_complex_metadata = { + "identifier": "com.example.hummingbirds.contentpackaging.metadata.2024", + "metadata": { + "description": "An engaging overview of hummingbirds, covering their biology, behavior, and habitats. This course is designed for enthusiasts and bird watchers of all levels.", # noqa E501 + "language": "en", + "title": "Discovering Hummingbirds", + "keyword": ["hummingbirds", "bird watching", "ornithology"], + "intendedEndUserRole": "learner", + "interactivityLevel": "medium", + "learningResourceType": ["documentary", "interactive lesson"], + }, + "organizations": [ + { + "title": "Discovering Hummingbirds", + "identifier": "hummingbirds_default_org", + "metadata": { + "description": "This metadata provides information about the structure and organization of the Discovering Hummingbirds course. The course is organized in a hierarchical manner, guiding learners from basic concepts to more detailed aspects of hummingbird biology and behavior.", # noqa E501 + }, + "children": [ + { + "title": "Introduction to Hummingbirds", + "metadata": { + "description": "Explore the fascinating world of hummingbirds, their unique characteristics and behaviors.", + "language": "en-US", + }, + "identifier": "item1", + "identifierref": "resource1", + "href": "intro.html", + "type": "webcontent", + "files": ["intro.html", "shared/style.css"], + }, + { + "title": "Hummingbird Habitats", + "metadata": { + "description": "Learn about various habitats of hummingbirds, from tropical jungles to backyard gardens.", + }, + "identifier": "item2", + "identifierref": "resource2", + "href": "habitats.html", + "type": "webcontent", + "files": ["habitats.html", "shared/image1.jpg"], + }, + { + "title": "Hummingbird Species", + "metadata": { + "description": "Discover the diversity of hummingbird species and their unique adaptations.", + }, + "identifier": "item3", + "identifierref": "resource3", + "href": "species.html", + "type": "webcontent", + "files": ["species.html", "shared/image2.jpg"], + }, + ], + }, + ], +} + + +def test_extract_metadata_from_complex_manifest(): + # Additional metadata files are passed as arguments to the context manager + with create_zip_with_manifest( + "complete_manifest_with_external_metadata.xml", + "metadata_hummingbirds_course.xml", + "metadata_hummingbirds_organization.xml", + ) as zip_path: + imscp_file = IMSCPZipFile(zip_path) + assert imscp_file.extract_metadata() == expected_complex_metadata diff --git a/tests/test_tree.py b/tests/test_tree.py index d844d0f9..5ba9f7d9 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -11,16 +11,22 @@ from le_utils.constants import licenses from le_utils.constants.labels import levels from le_utils.constants.languages import getlang +from test_files import create_zip_with_manifest +from test_files import expected_complex_metadata +from test_files import expected_simple_metadata from ricecooker.classes.files import DocumentFile from ricecooker.classes.files import HTMLZipFile +from ricecooker.classes.files import IMSCPZipFile from ricecooker.classes.files import SlideImageFile from ricecooker.classes.files import ThumbnailFile from ricecooker.classes.licenses import get_license from ricecooker.classes.licenses import License +from ricecooker.classes.nodes import ContentNode from ricecooker.classes.nodes import CustomNavigationChannelNode from ricecooker.classes.nodes import CustomNavigationNode from ricecooker.classes.nodes import DocumentNode +from ricecooker.classes.nodes import IMSCPNode from ricecooker.classes.nodes import RemoteContentNode from ricecooker.classes.nodes import SlideshowNode from ricecooker.classes.nodes import TopicNode @@ -688,3 +694,40 @@ def test_remote_content_node_with_invalid_overridden_field(): author="Such disallowed. Computer says no.", ) node.validate_tree() + + +def assert_node_matches_metadata(node, metadata, files): + + extra_metadata = metadata["metadata"] + + assert node.title == extra_metadata["title"] + if metadata.get("identifier"): + assert node.source_id == metadata.get("identifier") + + # Check for ContentNode or TopicNode + if isinstance(node, ContentNode): + assert node.files == files + assert node.extra_fields.get("options", {}).get("entry") == metadata.get("href") + elif isinstance(node, TopicNode): + for child_node, child_metadata in zip( + node.children, metadata.get("children", []) + ): + assert_node_matches_metadata(child_node, child_metadata, files) + + +def test_imscp_node_simple_manifest(): + with create_zip_with_manifest("simple_manifest.xml") as zip_path: + imscp_file = IMSCPZipFile(zip_path) + node = IMSCPNode(source_id="test", license="CC BY", files=[imscp_file]) + assert_node_matches_metadata(node, expected_simple_metadata, [imscp_file]) + + +def test_imscp_node_complex_manifest(): + with create_zip_with_manifest( + "complete_manifest_with_external_metadata.xml", + "metadata_hummingbirds_course.xml", + "metadata_hummingbirds_organization.xml", + ) as zip_path: + imscp_file = IMSCPZipFile(zip_path) + node = IMSCPNode(license="CC BY", files=[imscp_file]) + assert_node_matches_metadata(node, expected_complex_metadata, [imscp_file]) diff --git a/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml b/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml new file mode 100644 index 00000000..804647ec --- /dev/null +++ b/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml @@ -0,0 +1,103 @@ + + + + + + ADL SCORM + 2004 3rd Edition + + metadata_hummingbirds_course.xml + + + + + Discovering Hummingbirds + + + metadata_hummingbirds_organization.xml + + + + + Introduction to Hummingbirds + + + + + Explore the fascinating world of hummingbirds, their unique characteristics and behaviors. + + en-US + + + + + + + + Hummingbird Habitats + + + + + Learn about various habitats of hummingbirds, from tropical jungles to backyard gardens. + + + + + + + + + Hummingbird Species + + + + + Discover the diversity of hummingbird species and their unique adaptations. + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testcontent/samples/ims_xml/metadata_hummingbirds_course.xml b/tests/testcontent/samples/ims_xml/metadata_hummingbirds_course.xml new file mode 100644 index 00000000..cb3c7346 --- /dev/null +++ b/tests/testcontent/samples/ims_xml/metadata_hummingbirds_course.xml @@ -0,0 +1,232 @@ + + + + + URI + com.example.hummingbirds.contentpackaging.metadata.2024 + + + <string language="en-US">Discovering Hummingbirds</string> + <string language="es">Descubriendo Colibríes</string> + + en + + An engaging overview of hummingbirds, covering their biology, behavior, and habitats. This course is designed for enthusiasts and bird watchers of all levels. + + + hummingbirds + + + bird watching + + + ornithology + + + Global range, with a focus on the Americas. + + + LOMv1.0 + hierarchical + + + LOMv1.0 + 2 + + + + + + 1.0 + + + LOMv1.0 + final + + + + LOMv1.0 + publisher + + BEGIN:VCARD +VERSION:2.1 +FN:John Doe +ORG:Example Organization +TEL;WORK;VOICE:(555) 123-4567 +ADR;WORK:;;1234 Main St;Anytown;AN;12345;Country +EMAIL;PREF;INTERNET:john.doe@example.com +END:VCARD + + + 2024-01-01 + + Initial publication date of the course. + + + + + + + + URI + com.example.hummingbirds.metadata.2024 + + LOMv1.0 + SCORM_CAM_v1.3 + en-us + + + + text/html + image/jpeg + video/mp4 + 1048576 + http://www.example.com/hummingbirds + + + + LOMv1.0 + browser + + + LOMv1.0 + any + + + + + No specific installation requirements, accessible via standard web browsers. + + + Optimized for both desktop and mobile platforms. + + + P1H + + The average time to complete the course is approximately 1 hour. + + + + + + + LOMv1.0 + documentary + + + LOMv1.0 + interactive lesson + + + LOMv1.0 + medium + + + LOMv1.0 + medium + + + LOMv1.0 + learner + + + LOMv1.0 + training + + + Age 12 and up + + + LOMv1.0 + easy + + + P1H + + Approximately 1 hour is needed to complete the course. + + + + This course provides an introduction to hummingbirds, suitable for beginners in ornithology and bird watching. + + en-us + + + + + LOMv1.0 + no + + + LOMv1.0 + yes + + + Content is protected under Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. + + + + + + LOMv1.0 + ispartof + + + + URI + com.example.biologyseries + + + Part of a larger series on biology and wildlife. + + + + + + BEGIN:VCARD +VERSION:2.1 +FN:Jane Smith +ORG:Example Organization +TEL;WORK;VOICE:(555) 987-6543 +ADR;WORK:;;4321 Side St;Anytown;AN;54321;Country +EMAIL;PREF;INTERNET:jane.smith@example.com +END:VCARD + + + 2024-02-01 + + Annotation added to provide additional insights into course content. + + + + This course includes interactive elements to enhance learner engagement with the subject of hummingbirds. + + + + + + LOMv1.0 + discipline + + + + Example Organization's catalog of biology courses + + + ornithology_basics + + Basic Ornithology + + + + + This course serves as an introductory module in the Basic Ornithology series, focusing on hummingbirds. + + + biology + + + wildlife + + + diff --git a/tests/testcontent/samples/ims_xml/metadata_hummingbirds_organization.xml b/tests/testcontent/samples/ims_xml/metadata_hummingbirds_organization.xml new file mode 100644 index 00000000..87018dfa --- /dev/null +++ b/tests/testcontent/samples/ims_xml/metadata_hummingbirds_organization.xml @@ -0,0 +1,14 @@ + + + + + + This metadata provides information about the structure and organization of the Discovering Hummingbirds course. The course is organized in a hierarchical manner, guiding learners from basic concepts to more detailed aspects of hummingbird biology and behavior. + + + + LOMv1.0 + hierarchical + + + diff --git a/tests/testcontent/samples/ims_xml/simple_manifest.xml b/tests/testcontent/samples/ims_xml/simple_manifest.xml new file mode 100644 index 00000000..bcbb2f3a --- /dev/null +++ b/tests/testcontent/samples/ims_xml/simple_manifest.xml @@ -0,0 +1,63 @@ + + + + + + Test File + + en + + Example of test file + + + + + + + Folder 1 + + Test File1 + + + Test File2 + + + Folder 2 + + Test File1 Nested + + + Test File2 Nested + + + + Folder 3 + + Test File3 Nested + + + Test File4 Nested + + + + + Folder 4 + + Test File3 + + + Test File4 + + + + + + + + + + + + + +