diff --git a/ricecooker/classes/files.py b/ricecooker/classes/files.py
index f6451533..2d9d1c33 100644
--- a/ricecooker/classes/files.py
+++ b/ricecooker/classes/files.py
@@ -2,8 +2,10 @@
from __future__ import unicode_literals
import hashlib
+import io
import json
import os
+import re
import shutil
import tempfile
import zipfile
@@ -12,13 +14,16 @@
from urllib.parse import urlparse
from xml.etree import ElementTree
+import chardet
import filetype
+import xmltodict
import yt_dlp
from cachecontrol.caches.file_cache import FileCache
from le_utils.constants import exercises
from le_utils.constants import file_formats
from le_utils.constants import format_presets
from le_utils.constants import languages
+from lxml import etree
from PIL import Image
from PIL import UnidentifiedImageError
from requests.exceptions import ConnectionError
@@ -37,6 +42,7 @@
from ricecooker.utils.images import create_image_from_zip
from ricecooker.utils.images import create_tiled_image
from ricecooker.utils.images import ThumbnailGenerationError
+from ricecooker.utils.SCORM_metadata import imscp_metadata_keys
from ricecooker.utils.subtitles import build_subtitle_converter_from_file
from ricecooker.utils.subtitles import InvalidSubtitleFormatError
from ricecooker.utils.subtitles import InvalidSubtitleLanguageError
@@ -757,8 +763,8 @@ def process_file(self):
self.filename = super(HTMLZipFile, self).process_file()
if self.filename:
try:
- # make sure index.html exists unless this is a dependency (i.e. shared resources) zip
- if not self.get_preset() == format_presets.HTML5_DEPENDENCY_ZIP:
+ # make sure index.html exists unless this is a dependency file
+ if self.get_preset() != format_presets.HTML5_DEPENDENCY_ZIP:
with zipfile.ZipFile(config.get_storage_path(self.filename)) as zf:
_ = zf.getinfo("index.html")
except KeyError as err:
@@ -768,6 +774,210 @@ def process_file(self):
return self.filename
+def denest_xml_value(value, preferred_language):
+ if isinstance(value, dict):
+ # Handle the 'string' -> '#text' nested structure
+ if "string" in value:
+ return denest_xml_value(value["string"], preferred_language)
+ elif "langstring" in value:
+ return denest_xml_value(value["langstring"], preferred_language)
+ # Handle other simple text and key-value pairs
+ elif "#text" in value:
+ return value["#text"]
+ elif "value" in value:
+ return value["value"]
+ elif isinstance(value, list):
+ try:
+ return next(
+ denest_xml_value(item, preferred_language)
+ for item in value
+ if item.get("@language", "").startswith(preferred_language)
+ )
+ except StopIteration:
+ return [denest_xml_value(item, preferred_language) for item in value]
+ return value
+
+
+class IMSCPZipFile(DownloadFile):
+ default_ext = file_formats.HTML5
+ allowed_formats = [file_formats.HTML5]
+ is_primary = True
+
+ def get_preset(self):
+ return self.preset or format_presets.IMSCP_ZIP
+
+ def strip_ns_prefix(self, tree):
+ """Strip namespace prefixes from an LXML tree.
+ From https://stackoverflow.com/a/30233635
+ """
+ for element in tree.xpath("descendant-or-self::*[namespace-uri()!='']"):
+ element.tag = etree.QName(element).localname
+
+ def _get_elem_for_tag(self, root, tag):
+ elem = root.find("lom/%s" % tag)
+ if elem is not None:
+ return elem
+ return root.find(tag)
+
+ def collect_metadata(self, root):
+ metadata_dict = {}
+
+ metadata_elem = root.find("metadata", root.nsmap)
+
+ if metadata_elem is None:
+ return metadata_dict
+
+ # Check for external metadata reference
+ external_metadata_ref = metadata_elem.find(
+ "adlcp:location",
+ namespaces={"adlcp": "http://www.adlnet.org/xsd/adlcp_v1p3"},
+ )
+ if external_metadata_ref is not None:
+ # External metadata file path
+ external_file_path = external_metadata_ref.text
+ with self.open_zip() as zip_file:
+ with zip_file.open(external_file_path) as external_file:
+ metadata_elem = etree.parse(external_file).getroot()
+
+ self.strip_ns_prefix(metadata_elem)
+ preferred_language = self.language
+
+ if preferred_language is None:
+ elem = self._get_elem_for_tag(metadata_elem, "general")
+ if elem is not None:
+ values = xmltodict.parse(etree.tostring(elem))
+ if "language" in values["general"]:
+ preferred_language = denest_xml_value(
+ values["general"]["language"], None
+ )
+
+ for tag, fields in imscp_metadata_keys.items():
+ elem = self._get_elem_for_tag(metadata_elem, tag)
+ if elem is not None:
+ values = xmltodict.parse(etree.tostring(elem))
+ for field in fields:
+ if field in values[tag]:
+ metadata_dict[field] = denest_xml_value(
+ values[tag][field], preferred_language
+ )
+
+ return metadata_dict
+
+ @contextmanager
+ def open_zip(self):
+ with zipfile.ZipFile(config.get_storage_path(self.get_filename())) as zf:
+ yield zf
+
+ def get_manifest(self):
+ with self.open_zip() as zf:
+ try:
+ with zf.open("imsmanifest.xml") as manifest_file:
+ return etree.parse(manifest_file).getroot()
+ except etree.XMLSyntaxError:
+ # we've run across XML files that are marked as UTF-8 encoded but which have non-UTF-8 characters in them
+ # for this case, detect the 'real' encoding and decode it as unicode, then make it actual UTF-8 and parse.
+ f = zf.open("imsmanifest.xml", "r")
+ data = f.read()
+ f.close()
+
+ info = chardet.detect(data)
+ data = data.decode(info["encoding"])
+ return etree.parse(io.BytesIO(data.encode("utf-8"))).getroot()
+
+ def walk_items(self, root):
+ root_dict = dict(root.items())
+
+ title_elem = root.find("title", root.nsmap)
+ if title_elem is not None:
+ # title_elem.text has issues when there are BR tags. Instead get ALL text, ignoring BR tags.
+ # As BR tags do not make sense in metadata, we can assume it's an editor glitch causing it.
+ text = ""
+ for child in title_elem.iter():
+ if child.text:
+ text += child.text
+ if child.tail:
+ text += child.tail
+ assert text.strip(), "Title element has no title: {}".format(
+ etree.tostring(title_elem, pretty_print=True)
+ )
+ root_dict["title"] = text.strip()
+
+ root_dict["metadata"] = self.collect_metadata(root)
+
+ children = []
+ for item in root.findall("item", root.nsmap):
+ children.append(self.walk_items(item))
+
+ if children:
+ root_dict["children"] = children
+
+ return root_dict
+
+ def derive_content_files_dict(self, resource_elem, resources_dict):
+ nsmap = resource_elem.nsmap
+ file_elements = resource_elem.findall("file", nsmap)
+ base = resource_elem.get("{http://www.w3.org/XML/1998/namespace}base") or ""
+ file_paths = [base + fe.get("href") for fe in file_elements]
+ dep_elements = resource_elem.findall("dependency", nsmap)
+ dep_paths = []
+ for de in dep_elements:
+ dre = resources_dict[de.get("identifierref")]
+ dep_paths.extend(self.derive_content_files_dict(dre, resources_dict))
+ return file_paths + dep_paths
+
+ def collect_resources(self, item, resources_dict):
+ if item.get("children"):
+ for child in item["children"]:
+ self.collect_resources(child, resources_dict)
+ elif item.get("identifierref"):
+ resource_elem = resources_dict[item["identifierref"]]
+
+ # Add all resource attrs to item dict
+ for key, value in resource_elem.items():
+ key_stripped = re.sub("^{.*}", "", key) # Strip any namespace prefix
+ # Don't overwrite existing keys
+ if key_stripped not in item:
+ item[key_stripped] = value
+
+ if resource_elem.get("type") == "webcontent":
+ item["files"] = self.derive_content_files_dict(
+ resource_elem, resources_dict
+ )
+
+ def extract_metadata(self):
+ """Extract metadata and topic tree info from an IMSCP file.
+ Return a dict {'metadata': {...}, 'organizations': [list of topic dicts]}
+ """
+ manifest = self.get_manifest()
+
+ nsmap = manifest.nsmap
+
+ metadata = self.collect_metadata(manifest)
+
+ if self.language is None and metadata.get("language"):
+ self.set_language(metadata.get("language"))
+
+ resources_elem = manifest.find("resources", nsmap)
+ resources_dict = dict((r.get("identifier"), r) for r in resources_elem)
+
+ organizations = []
+ for org_elem in manifest.findall("organizations/organization", nsmap):
+ item_tree = self.walk_items(org_elem)
+ self.collect_resources(item_tree, resources_dict)
+ organizations.append(item_tree)
+
+ return {
+ "identifier": manifest.get("identifier"),
+ "metadata": metadata,
+ "organizations": organizations,
+ }
+
+
+class QTIZipFile(IMSCPZipFile):
+ def get_preset(self):
+ return self.preset or format_presets.QTI_ZIP
+
+
class H5PFile(DownloadFile):
default_ext = file_formats.H5P
allowed_formats = [file_formats.H5P]
diff --git a/ricecooker/classes/nodes.py b/ricecooker/classes/nodes.py
index efdd012c..bacb93f1 100644
--- a/ricecooker/classes/nodes.py
+++ b/ricecooker/classes/nodes.py
@@ -21,6 +21,7 @@
from ..exceptions import InvalidNodeException
from ..utils.utils import is_valid_uuid_string
from .licenses import License
+from ricecooker.utils.SCORM_metadata import update_node_from_metadata
MASTERY_MODELS = [id for id, name in exercises.MASTERY_MODELS]
ROLES = [id for id, name in roles.choices]
@@ -31,6 +32,7 @@ class Node(object):
license = None
language = None
+ kind = None
def __init__(
self,
@@ -40,7 +42,7 @@ def __init__(
thumbnail=None,
files=None,
derive_thumbnail=False,
- node_modifications={},
+ node_modifications=None,
extra_fields=None,
):
self.files = []
@@ -60,7 +62,7 @@ def __init__(
self.set_thumbnail(thumbnail)
# save modifications passed in by csv
- self.node_modifications = node_modifications
+ self.node_modifications = node_modifications or {}
def set_language(self, language):
"""Set self.language to internal lang. repr. code from str or Language object."""
@@ -1647,3 +1649,111 @@ def to_dict(self):
# add alias for back-compatibility
RemoteContentNode = StudioContentNode
+
+
+class IMSCPNode(TreeNode):
+
+ kind = content_kinds.HTML5
+
+ @classmethod
+ def _recurse_and_add_children(cls, parent, item_data, license):
+ source_id = item_data.get("identifier", item_data["title"])
+ if item_data.get("children") is not None:
+ node = TopicNode(
+ source_id,
+ item_data["title"],
+ files=parent.files,
+ )
+ for child in item_data["children"]:
+ cls._recurse_and_add_children(node, child, license)
+ else:
+ node = ContentNode(
+ source_id,
+ item_data["title"],
+ license,
+ files=parent.files,
+ extra_fields={
+ "options": {
+ "entry": item_data["href"] + item_data.get("parameters", "")
+ }
+ },
+ )
+ node.kind = cls.kind
+ update_node_from_metadata(node, item_data["metadata"])
+ parent.add_child(node)
+
+ def __new__(cls, *args, **kwargs):
+ from .files import IMSCPZipFile
+
+ imscp_files = [f for f in kwargs["files"] if isinstance(f, IMSCPZipFile)]
+ if not imscp_files or len(imscp_files) > 1:
+ raise InvalidNodeException(
+ "IMSCPNode must be instantiated with exactly one IMSCPZipFile"
+ )
+ imscp_file = imscp_files[0]
+ metadata = imscp_file.extract_metadata()
+ if "title" in metadata["metadata"] and kwargs.get("title") is None:
+ kwargs["title"] = metadata["metadata"]["title"]
+ if kwargs.get("title") is None:
+ raise InvalidNodeException(
+ "No title was provided and the IMSCP file {} does not have a title".format(
+ imscp_file.path
+ )
+ )
+
+ if "identifier" in metadata and kwargs.get("source_id") is None:
+ kwargs["source_id"] = metadata["identifier"]
+ if kwargs.get("source_id") is None:
+ raise InvalidNodeException(
+ "No source_id was provided and the IMSCP file {} does not have an identifier".format(
+ imscp_file.path
+ )
+ )
+ license = kwargs.pop("license")
+ if license is None:
+ raise InvalidNodeException(
+ "No license was provided and we cannot infer license from an IMSCP file"
+ )
+ if metadata.get("organizations"):
+ node = TopicNode(*args, **kwargs)
+
+ for child in metadata["organizations"]:
+ cls._recurse_and_add_children(node, child, license)
+ else:
+ node = ContentNode(*args, **kwargs)
+ node.kind = cls.kind
+ update_node_from_metadata(node, metadata["metadata"])
+ return node
+
+
+class QTINode(IMSCPNode):
+ """
+ Node representing QTI exercise
+ """
+
+ kind = content_kinds.EXERCISE
+
+ def validate(self):
+ """validate: Makes sure QTI is valid
+ Args: None
+ Returns: boolean indicating if QTI is valid
+ """
+ from .files import QTIZipFile
+
+ try:
+ assert (
+ self.kind == content_kinds.EXERCISE
+ ), "Assumption Failed: Node should be an Exercise"
+ assert (
+ self.questions == []
+ ), "Assumption Failed: QTI should not have questions"
+ assert [
+ f for f in self.files if isinstance(f, QTIZipFile)
+ ], "Assumption Failed: QTI should have at least one QTI file"
+ return super(QTINode, self).validate()
+ except AssertionError as ae:
+ raise InvalidNodeException(
+ "Invalid node ({}): {} - {}".format(
+ ae.args[0], self.title, self.__dict__
+ )
+ )
diff --git a/ricecooker/utils/SCORM_metadata.py b/ricecooker/utils/SCORM_metadata.py
new file mode 100644
index 00000000..12abb47e
--- /dev/null
+++ b/ricecooker/utils/SCORM_metadata.py
@@ -0,0 +1,181 @@
+"""
+Utilities for mapping from SCORM metadata to LE Utils metadata.
+"""
+from le_utils.constants.labels import learning_activities
+from le_utils.constants.labels import needs
+from le_utils.constants.labels import resource_type
+from le_utils.constants.languages import getlang
+
+
+imscp_metadata_keys = {
+ "general": ["title", "description", "language", "keyword"],
+ "rights": [],
+ "educational": [
+ "interactivityType",
+ "interactivityLevel",
+ "learningResourceType",
+ "intendedEndUserRole",
+ ],
+ "lifecycle": [],
+}
+
+
+# Define mappings from SCORM educational types to LE Utils activity types
+SCORM_to_learning_activities_mappings = {
+ "exercise": learning_activities.PRACTICE,
+ "simulation": learning_activities.EXPLORE,
+ "questionnaire": learning_activities.PRACTICE,
+ "diagram": learning_activities.EXPLORE,
+ "figure": learning_activities.EXPLORE,
+ "graph": learning_activities.EXPLORE,
+ "index": learning_activities.READ,
+ "slide": learning_activities.READ,
+ "table": learning_activities.READ,
+ "narrative text": learning_activities.READ,
+ "exam": learning_activities.PRACTICE,
+ "experiment": learning_activities.EXPLORE,
+ "problem statement": learning_activities.REFLECT,
+ "self assessment": learning_activities.REFLECT,
+ "lecture": learning_activities.WATCH,
+}
+
+
+def map_scorm_to_le_utils_activities(metadata_dict):
+ le_utils_activities = []
+
+ # Adjustments based on interactivity
+ interactive_adjustments = {
+ learning_activities.EXPLORE: (
+ learning_activities.READ,
+ learning_activities.WATCH,
+ )
+ }
+
+ # Determine the interactivity level and type
+ interactivity_type = metadata_dict.get("interactivityType")
+ interactivity_level = metadata_dict.get("interactivityLevel")
+
+ is_interactive = (
+ interactivity_type
+ in [
+ "active",
+ "mixed",
+ ]
+ or interactivity_level in ["medium", "high"]
+ )
+
+ # Extract the learning resource types from the SCORM data
+ learning_resource_types = metadata_dict.get("learningResourceType", [])
+
+ # Map each SCORM type to an LE Utils activity type
+ for learning_resource_type in learning_resource_types:
+ le_utils_type = SCORM_to_learning_activities_mappings.get(
+ learning_resource_type
+ )
+ # Adjust based on interactivity
+ if not is_interactive and le_utils_type in interactive_adjustments:
+ le_utils_type = (
+ interactive_adjustments[le_utils_type][0]
+ if learning_resource_type == "simulation"
+ else interactive_adjustments[le_utils_type][1]
+ )
+
+ if le_utils_type and le_utils_type not in le_utils_activities:
+ le_utils_activities.append(le_utils_type)
+
+ return le_utils_activities
+
+
+# Define mappings from SCORM educational types to educator-focused resource types
+SCORM_to_resource_type_mappings = {
+ "exercise": resource_type.EXERCISE,
+ "simulation": resource_type.ACTIVITY,
+ "questionnaire": resource_type.ACTIVITY,
+ "diagram": resource_type.MEDIA,
+ "figure": resource_type.MEDIA,
+ "graph": resource_type.MEDIA,
+ "index": resource_type.GUIDE,
+ "slide": resource_type.LESSON,
+ "table": resource_type.TUTORIAL,
+ "narrative text": resource_type.TEXTBOOK,
+ "exam": resource_type.EXERCISE,
+ "experiment": resource_type.ACTIVITY,
+ "problem statement": resource_type.ACTIVITY,
+ "self assessment": resource_type.ACTIVITY,
+ "lecture": resource_type.LESSON,
+}
+
+
+# Mapping for intendedEndUserRole when the resource is for educators
+SCORM_intended_role_to_resource_type_mapping = {
+ "teacher": resource_type.LESSON_PLAN,
+ "author": resource_type.GUIDE,
+ "manager": resource_type.GUIDE,
+}
+
+
+def map_scorm_to_educator_resource_types(metadata_dict):
+ educator_resource_types = []
+
+ # Extract the learning resource types and intended end user role from the SCORM data
+ learning_resource_types = metadata_dict.get("learningResourceType", [])
+ intended_roles = metadata_dict.get("intendedEndUserRole", [])
+
+ # Map each SCORM type to an educator-focused resource type
+ for learning_resource_type in learning_resource_types:
+ mapped_type = SCORM_to_resource_type_mappings.get(learning_resource_type)
+ if mapped_type and mapped_type not in educator_resource_types:
+ educator_resource_types.append(mapped_type)
+
+ # Check if the intended end user role matches any educator roles
+ for role in intended_roles:
+ if (
+ role in SCORM_intended_role_to_resource_type_mapping
+ and SCORM_intended_role_to_resource_type_mapping[role]
+ not in educator_resource_types
+ ):
+ educator_resource_types.append(
+ SCORM_intended_role_to_resource_type_mapping[role]
+ )
+
+ return educator_resource_types
+
+
+def infer_beginner_level_from_difficulty(metadata_dict):
+ # Beginner difficulty levels
+ beginner_difficulties = {"very easy", "easy"}
+
+ # Check if the difficulty level indicates beginner content
+ difficulty = metadata_dict.get("difficulty")
+ if difficulty in beginner_difficulties:
+ return [needs.FOR_BEGINNERS]
+
+ return []
+
+
+def update_node_from_metadata(node, metadata_dict):
+ # Update the node with the general metadata
+ node.description = metadata_dict.get("description") or node.description
+ lang_code = metadata_dict.get("language", "")
+ lang_code = (
+ lang_code.split("-")[0].lower() if getlang(lang_code) is None else lang_code
+ )
+ if getlang(lang_code):
+ node.set_language(lang_code)
+ keyword = metadata_dict.get("keyword", [])
+ if keyword and isinstance(keyword, str):
+ keyword = [keyword]
+ node.tags = node.tags + keyword
+
+ # Update the node with the educational metadata
+ node.learning_activities = (
+ node.learning_activities + map_scorm_to_le_utils_activities(metadata_dict)
+ )
+ node.resource_types = node.resource_types + map_scorm_to_educator_resource_types(
+ metadata_dict
+ )
+ node.learner_needs = node.learner_needs + infer_beginner_level_from_difficulty(
+ metadata_dict
+ )
+
+ return node
diff --git a/setup.py b/setup.py
index 1b84ac24..c6c05f5f 100644
--- a/setup.py
+++ b/setup.py
@@ -53,6 +53,7 @@
"EbookLib>=0.17.1",
"filetype>=1.1.0",
"urllib3==2.2.1",
+ "xmltodict==0.13.0",
],
python_requires=">=3.8, <3.11",
license="MIT license",
diff --git a/tests/test_SCORM_metadata.py b/tests/test_SCORM_metadata.py
new file mode 100644
index 00000000..3e0d2a31
--- /dev/null
+++ b/tests/test_SCORM_metadata.py
@@ -0,0 +1,78 @@
+import pytest
+from le_utils.constants.labels import learning_activities
+from le_utils.constants.labels import needs
+from le_utils.constants.labels import resource_type
+
+from ricecooker.utils.SCORM_metadata import infer_beginner_level_from_difficulty
+from ricecooker.utils.SCORM_metadata import map_scorm_to_educator_resource_types
+from ricecooker.utils.SCORM_metadata import map_scorm_to_le_utils_activities
+
+
+@pytest.mark.parametrize(
+ "scorm_dict, expected_result",
+ [
+ (
+ {
+ "interactivityType": "active",
+ "interactivityLevel": "high",
+ "learningResourceType": ["exercise", "simulation"],
+ },
+ [learning_activities.PRACTICE, learning_activities.EXPLORE],
+ ),
+ (
+ {"learningResourceType": ["lecture", "self assessment"]},
+ [learning_activities.REFLECT, learning_activities.WATCH],
+ ),
+ (
+ {
+ "interactivityType": "mixed",
+ "interactivityLevel": "medium",
+ "learningResourceType": ["simulation", "graph"],
+ },
+ [learning_activities.EXPLORE],
+ ),
+ (
+ {
+ "interactivityType": "expositive",
+ "interactivityLevel": "low",
+ "learningResourceType": ["simulation", "graph"],
+ },
+ [learning_activities.READ, learning_activities.WATCH],
+ ),
+ ],
+)
+def test_map_scorm_to_le_utils_activities(scorm_dict, expected_result):
+ assert set(map_scorm_to_le_utils_activities(scorm_dict)) == set(expected_result)
+
+
+@pytest.mark.parametrize(
+ "scorm_dict, expected_result",
+ [
+ (
+ {
+ "learningResourceType": ["exercise", "lecture"],
+ "intendedEndUserRole": ["teacher"],
+ },
+ [resource_type.EXERCISE, resource_type.LESSON, resource_type.LESSON_PLAN],
+ ),
+ (
+ {
+ "learningResourceType": ["simulation", "figure"],
+ "intendedEndUserRole": ["author"],
+ },
+ [resource_type.ACTIVITY, resource_type.MEDIA, resource_type.GUIDE],
+ ),
+ ],
+)
+def test_map_scorm_to_educator_resource_types(scorm_dict, expected_result):
+ assert set(map_scorm_to_educator_resource_types(scorm_dict)) == set(expected_result)
+
+
+def test_infer_beginner_level_from_difficulty():
+ scorm_dict_easy = {"difficulty": "easy"}
+ assert infer_beginner_level_from_difficulty(scorm_dict_easy) == [
+ needs.FOR_BEGINNERS
+ ]
+
+ scorm_dict_hard = {"difficulty": "difficult"}
+ assert infer_beginner_level_from_difficulty(scorm_dict_hard) == []
diff --git a/tests/test_files.py b/tests/test_files.py
index 9c23da2e..3aab32c8 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -2,6 +2,8 @@
import hashlib
import os.path
import tempfile
+import zipfile
+from contextlib import contextmanager
from shutil import copyfile
import pytest
@@ -10,6 +12,7 @@
from ricecooker import config
from ricecooker.classes.files import _get_language_with_alpha2_fallback
+from ricecooker.classes.files import IMSCPZipFile
from ricecooker.classes.files import is_youtube_subtitle_file_supported_language
from ricecooker.classes.files import SubtitleFile
from ricecooker.classes.files import YouTubeSubtitleFile
@@ -631,3 +634,216 @@ def test_convertible_substitles_weirdext_subtitlesformat():
assert (
"El total de los protones y neutrones de un átomo" in filecontents
), "missing check words in converted subs"
+
+
+@contextmanager
+def create_zip_with_manifest(manifest_filename, *additional_files):
+ # Create a temporary file for the zipfile but close it immediately for Windows compatibility
+ temp_zip = tempfile.NamedTemporaryFile(suffix=".zip", delete=False)
+ temp_zip_path = temp_zip.name
+ temp_zip.close() # Close the file, so it's not locked
+ try:
+ # Define the paths
+ base_path = os.path.dirname(os.path.abspath(__file__))
+ source_folder = os.path.join(base_path, "testcontent", "samples", "ims_xml")
+ manifest_file = os.path.join(source_folder, manifest_filename)
+
+ # Create the zipfile and add the manifest file
+ with zipfile.ZipFile(temp_zip_path, "w") as zf:
+ zf.write(manifest_file, "imsmanifest.xml")
+ for additional_file in additional_files:
+ zf.write(os.path.join(source_folder, additional_file), additional_file)
+
+ yield temp_zip.name
+ finally:
+ # Clean up the zipfile
+ try:
+ os.remove(temp_zip.name)
+ except (FileNotFoundError, OSError):
+ pass
+
+
+expected_simple_metadata = {
+ "identifier": None,
+ "metadata": {
+ "description": "Example of test file",
+ "language": "en",
+ "title": "Test File",
+ },
+ "organizations": [
+ {
+ "children": [
+ {
+ "files": [],
+ "href": "file1.html",
+ "identifier": "file1Ref",
+ "identifierref": "file1Ref",
+ "title": "Test File1",
+ "type": "webcontent",
+ "metadata": {},
+ },
+ {
+ "files": [],
+ "href": "file2.html",
+ "identifier": "file2Ref",
+ "identifierref": "file2Ref",
+ "title": "Test File2",
+ "type": "webcontent",
+ "metadata": {},
+ },
+ {
+ "children": [
+ {
+ "files": [],
+ "href": "file1.html",
+ "identifier": "file1Ref",
+ "identifierref": "file1Ref",
+ "metadata": {},
+ "title": "Test File1 Nested",
+ "type": "webcontent",
+ },
+ {
+ "files": [],
+ "href": "file2.html",
+ "identifier": "file2Ref",
+ "identifierref": "file2Ref",
+ "metadata": {},
+ "title": "Test File2 Nested",
+ "type": "webcontent",
+ },
+ ],
+ "identifier": "folder2",
+ "metadata": {},
+ "title": "Folder 2",
+ },
+ {
+ "children": [
+ {
+ "files": [],
+ "href": "file3.html",
+ "identifier": "file3Ref",
+ "identifierref": "file3Ref",
+ "metadata": {},
+ "title": "Test File3 Nested",
+ "type": "webcontent",
+ },
+ {
+ "files": [],
+ "href": "file4.html",
+ "identifier": "file4Ref",
+ "identifierref": "file4Ref",
+ "metadata": {},
+ "title": "Test File4 Nested",
+ "type": "webcontent",
+ },
+ ],
+ "identifier": "folder3",
+ "metadata": {},
+ "title": "Folder 3",
+ },
+ ],
+ "title": "Folder 1",
+ "metadata": {},
+ },
+ {
+ "children": [
+ {
+ "files": [],
+ "href": "file3.html",
+ "identifier": "file3Ref",
+ "identifierref": "file3Ref",
+ "title": "Test File3",
+ "type": "webcontent",
+ "metadata": {},
+ },
+ {
+ "files": [],
+ "href": "file4.html",
+ "identifier": "file4Ref",
+ "identifierref": "file4Ref",
+ "title": "Test File4",
+ "type": "webcontent",
+ "metadata": {},
+ },
+ ],
+ "title": "Folder 4",
+ "metadata": {},
+ },
+ ],
+}
+
+
+def test_extract_metadata_from_simple_manifest():
+ with create_zip_with_manifest("simple_manifest.xml") as zip_path:
+ imscp_file = IMSCPZipFile(zip_path)
+ assert imscp_file.extract_metadata() == expected_simple_metadata
+
+
+expected_complex_metadata = {
+ "identifier": "com.example.hummingbirds.contentpackaging.metadata.2024",
+ "metadata": {
+ "description": "An engaging overview of hummingbirds, covering their biology, behavior, and habitats. This course is designed for enthusiasts and bird watchers of all levels.", # noqa E501
+ "language": "en",
+ "title": "Discovering Hummingbirds",
+ "keyword": ["hummingbirds", "bird watching", "ornithology"],
+ "intendedEndUserRole": "learner",
+ "interactivityLevel": "medium",
+ "learningResourceType": ["documentary", "interactive lesson"],
+ },
+ "organizations": [
+ {
+ "title": "Discovering Hummingbirds",
+ "identifier": "hummingbirds_default_org",
+ "metadata": {
+ "description": "This metadata provides information about the structure and organization of the Discovering Hummingbirds course. The course is organized in a hierarchical manner, guiding learners from basic concepts to more detailed aspects of hummingbird biology and behavior.", # noqa E501
+ },
+ "children": [
+ {
+ "title": "Introduction to Hummingbirds",
+ "metadata": {
+ "description": "Explore the fascinating world of hummingbirds, their unique characteristics and behaviors.",
+ "language": "en-US",
+ },
+ "identifier": "item1",
+ "identifierref": "resource1",
+ "href": "intro.html",
+ "type": "webcontent",
+ "files": ["intro.html", "shared/style.css"],
+ },
+ {
+ "title": "Hummingbird Habitats",
+ "metadata": {
+ "description": "Learn about various habitats of hummingbirds, from tropical jungles to backyard gardens.",
+ },
+ "identifier": "item2",
+ "identifierref": "resource2",
+ "href": "habitats.html",
+ "type": "webcontent",
+ "files": ["habitats.html", "shared/image1.jpg"],
+ },
+ {
+ "title": "Hummingbird Species",
+ "metadata": {
+ "description": "Discover the diversity of hummingbird species and their unique adaptations.",
+ },
+ "identifier": "item3",
+ "identifierref": "resource3",
+ "href": "species.html",
+ "type": "webcontent",
+ "files": ["species.html", "shared/image2.jpg"],
+ },
+ ],
+ },
+ ],
+}
+
+
+def test_extract_metadata_from_complex_manifest():
+ # Additional metadata files are passed as arguments to the context manager
+ with create_zip_with_manifest(
+ "complete_manifest_with_external_metadata.xml",
+ "metadata_hummingbirds_course.xml",
+ "metadata_hummingbirds_organization.xml",
+ ) as zip_path:
+ imscp_file = IMSCPZipFile(zip_path)
+ assert imscp_file.extract_metadata() == expected_complex_metadata
diff --git a/tests/test_tree.py b/tests/test_tree.py
index d844d0f9..5ba9f7d9 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -11,16 +11,22 @@
from le_utils.constants import licenses
from le_utils.constants.labels import levels
from le_utils.constants.languages import getlang
+from test_files import create_zip_with_manifest
+from test_files import expected_complex_metadata
+from test_files import expected_simple_metadata
from ricecooker.classes.files import DocumentFile
from ricecooker.classes.files import HTMLZipFile
+from ricecooker.classes.files import IMSCPZipFile
from ricecooker.classes.files import SlideImageFile
from ricecooker.classes.files import ThumbnailFile
from ricecooker.classes.licenses import get_license
from ricecooker.classes.licenses import License
+from ricecooker.classes.nodes import ContentNode
from ricecooker.classes.nodes import CustomNavigationChannelNode
from ricecooker.classes.nodes import CustomNavigationNode
from ricecooker.classes.nodes import DocumentNode
+from ricecooker.classes.nodes import IMSCPNode
from ricecooker.classes.nodes import RemoteContentNode
from ricecooker.classes.nodes import SlideshowNode
from ricecooker.classes.nodes import TopicNode
@@ -688,3 +694,40 @@ def test_remote_content_node_with_invalid_overridden_field():
author="Such disallowed. Computer says no.",
)
node.validate_tree()
+
+
+def assert_node_matches_metadata(node, metadata, files):
+
+ extra_metadata = metadata["metadata"]
+
+ assert node.title == extra_metadata["title"]
+ if metadata.get("identifier"):
+ assert node.source_id == metadata.get("identifier")
+
+ # Check for ContentNode or TopicNode
+ if isinstance(node, ContentNode):
+ assert node.files == files
+ assert node.extra_fields.get("options", {}).get("entry") == metadata.get("href")
+ elif isinstance(node, TopicNode):
+ for child_node, child_metadata in zip(
+ node.children, metadata.get("children", [])
+ ):
+ assert_node_matches_metadata(child_node, child_metadata, files)
+
+
+def test_imscp_node_simple_manifest():
+ with create_zip_with_manifest("simple_manifest.xml") as zip_path:
+ imscp_file = IMSCPZipFile(zip_path)
+ node = IMSCPNode(source_id="test", license="CC BY", files=[imscp_file])
+ assert_node_matches_metadata(node, expected_simple_metadata, [imscp_file])
+
+
+def test_imscp_node_complex_manifest():
+ with create_zip_with_manifest(
+ "complete_manifest_with_external_metadata.xml",
+ "metadata_hummingbirds_course.xml",
+ "metadata_hummingbirds_organization.xml",
+ ) as zip_path:
+ imscp_file = IMSCPZipFile(zip_path)
+ node = IMSCPNode(license="CC BY", files=[imscp_file])
+ assert_node_matches_metadata(node, expected_complex_metadata, [imscp_file])
diff --git a/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml b/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml
new file mode 100644
index 00000000..804647ec
--- /dev/null
+++ b/tests/testcontent/samples/ims_xml/complete_manifest_with_external_metadata.xml
@@ -0,0 +1,103 @@
+
+
+
+
+
+ ADL SCORM
+ 2004 3rd Edition
+
+ metadata_hummingbirds_course.xml
+
+
+
+
+ Discovering Hummingbirds
+
+
+ metadata_hummingbirds_organization.xml
+
+
+
+ -
+ Introduction to Hummingbirds
+
+
+
+
+ Explore the fascinating world of hummingbirds, their unique characteristics and behaviors.
+
+ en-US
+
+
+
+
+
+
+ -
+ Hummingbird Habitats
+
+
+
+
+ Learn about various habitats of hummingbirds, from tropical jungles to backyard gardens.
+
+
+
+
+
+
+
+ -
+ Hummingbird Species
+
+
+
+
+ Discover the diversity of hummingbird species and their unique adaptations.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/testcontent/samples/ims_xml/metadata_hummingbirds_course.xml b/tests/testcontent/samples/ims_xml/metadata_hummingbirds_course.xml
new file mode 100644
index 00000000..cb3c7346
--- /dev/null
+++ b/tests/testcontent/samples/ims_xml/metadata_hummingbirds_course.xml
@@ -0,0 +1,232 @@
+
+
+
+
+ URI
+ com.example.hummingbirds.contentpackaging.metadata.2024
+
+
+ Discovering Hummingbirds
+ Descubriendo Colibríes
+
+ en
+
+ An engaging overview of hummingbirds, covering their biology, behavior, and habitats. This course is designed for enthusiasts and bird watchers of all levels.
+
+
+ hummingbirds
+
+
+ bird watching
+
+
+ ornithology
+
+
+ Global range, with a focus on the Americas.
+
+
+ LOMv1.0
+ hierarchical
+
+
+ LOMv1.0
+ 2
+
+
+
+
+
+ 1.0
+
+
+ LOMv1.0
+ final
+
+
+
+ LOMv1.0
+ publisher
+
+ BEGIN:VCARD
+VERSION:2.1
+FN:John Doe
+ORG:Example Organization
+TEL;WORK;VOICE:(555) 123-4567
+ADR;WORK:;;1234 Main St;Anytown;AN;12345;Country
+EMAIL;PREF;INTERNET:john.doe@example.com
+END:VCARD
+
+
+ 2024-01-01
+
+ Initial publication date of the course.
+
+
+
+
+
+
+
+ URI
+ com.example.hummingbirds.metadata.2024
+
+ LOMv1.0
+ SCORM_CAM_v1.3
+ en-us
+
+
+
+ text/html
+ image/jpeg
+ video/mp4
+ 1048576
+ http://www.example.com/hummingbirds
+
+
+
+ LOMv1.0
+ browser
+
+
+ LOMv1.0
+ any
+
+
+
+
+ No specific installation requirements, accessible via standard web browsers.
+
+
+ Optimized for both desktop and mobile platforms.
+
+
+ P1H
+
+ The average time to complete the course is approximately 1 hour.
+
+
+
+
+
+
+ LOMv1.0
+ documentary
+
+
+ LOMv1.0
+ interactive lesson
+
+
+ LOMv1.0
+ medium
+
+
+ LOMv1.0
+ medium
+
+
+ LOMv1.0
+ learner
+
+
+ LOMv1.0
+ training
+
+
+ Age 12 and up
+
+
+ LOMv1.0
+ easy
+
+
+ P1H
+
+ Approximately 1 hour is needed to complete the course.
+
+
+
+ This course provides an introduction to hummingbirds, suitable for beginners in ornithology and bird watching.
+
+ en-us
+
+
+
+
+ LOMv1.0
+ no
+
+
+ LOMv1.0
+ yes
+
+
+ Content is protected under Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
+
+
+
+
+
+ LOMv1.0
+ ispartof
+
+
+
+ URI
+ com.example.biologyseries
+
+
+ Part of a larger series on biology and wildlife.
+
+
+
+
+
+ BEGIN:VCARD
+VERSION:2.1
+FN:Jane Smith
+ORG:Example Organization
+TEL;WORK;VOICE:(555) 987-6543
+ADR;WORK:;;4321 Side St;Anytown;AN;54321;Country
+EMAIL;PREF;INTERNET:jane.smith@example.com
+END:VCARD
+
+
+ 2024-02-01
+
+ Annotation added to provide additional insights into course content.
+
+
+
+ This course includes interactive elements to enhance learner engagement with the subject of hummingbirds.
+
+
+
+
+
+ LOMv1.0
+ discipline
+
+
+
+ Example Organization's catalog of biology courses
+
+
+ ornithology_basics
+
+ Basic Ornithology
+
+
+
+
+ This course serves as an introductory module in the Basic Ornithology series, focusing on hummingbirds.
+
+
+ biology
+
+
+ wildlife
+
+
+
diff --git a/tests/testcontent/samples/ims_xml/metadata_hummingbirds_organization.xml b/tests/testcontent/samples/ims_xml/metadata_hummingbirds_organization.xml
new file mode 100644
index 00000000..87018dfa
--- /dev/null
+++ b/tests/testcontent/samples/ims_xml/metadata_hummingbirds_organization.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+ This metadata provides information about the structure and organization of the Discovering Hummingbirds course. The course is organized in a hierarchical manner, guiding learners from basic concepts to more detailed aspects of hummingbird biology and behavior.
+
+
+
+ LOMv1.0
+ hierarchical
+
+
+
diff --git a/tests/testcontent/samples/ims_xml/simple_manifest.xml b/tests/testcontent/samples/ims_xml/simple_manifest.xml
new file mode 100644
index 00000000..bcbb2f3a
--- /dev/null
+++ b/tests/testcontent/samples/ims_xml/simple_manifest.xml
@@ -0,0 +1,63 @@
+
+
+
+
+
+ Test File
+
+ en
+
+ Example of test file
+
+
+
+
+
+
+ Folder 1
+ -
+ Test File1
+
+ -
+ Test File2
+
+ -
+ Folder 2
+
-
+ Test File1 Nested
+
+ -
+ Test File2 Nested
+
+
+ -
+ Folder 3
+
-
+ Test File3 Nested
+
+ -
+ Test File4 Nested
+
+
+
+
+ Folder 4
+ -
+ Test File3
+
+ -
+ Test File4
+
+
+
+
+
+
+
+
+
+
+
+
+
+