From 4d508f94aef6976162993b7d92dc2347120ee306 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 14 Oct 2025 11:19:52 +0200 Subject: [PATCH 01/15] Moved mediafile.py into mediafile/__init__.py to allow easier refactoring. --- mediafile.py => mediafile/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename mediafile.py => mediafile/__init__.py (100%) diff --git a/mediafile.py b/mediafile/__init__.py similarity index 100% rename from mediafile.py rename to mediafile/__init__.py From 075749781172208a6ce6bfda0e31e9ab6e42f340 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 14 Oct 2025 11:20:59 +0200 Subject: [PATCH 02/15] Moved exceptions into own file. --- mediafile/__init__.py | 35 ++--------------------------------- mediafile/exceptions.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 33 deletions(-) create mode 100644 mediafile/exceptions.py diff --git a/mediafile/__init__.py b/mediafile/__init__.py index 03df854..572e555 100644 --- a/mediafile/__init__.py +++ b/mediafile/__init__.py @@ -56,6 +56,8 @@ import mutagen.mp3 import mutagen.mp4 +from .exceptions import FileTypeError, MutagenError, UnreadableFileError + __version__ = "0.13.0" __all__ = ["UnreadableFileError", "FileTypeError", "MediaFile"] @@ -79,39 +81,6 @@ } -# Exceptions. - - -class UnreadableFileError(Exception): - """Mutagen is not able to extract information from the file.""" - - def __init__(self, filename, msg): - Exception.__init__(self, msg if msg else repr(filename)) - - -class FileTypeError(UnreadableFileError): - """Reading this type of file is not supported. - - If passed the `mutagen_type` argument this indicates that the - mutagen type is not supported by `Mediafile`. - """ - - def __init__(self, filename, mutagen_type=None): - if mutagen_type is None: - msg = "{0!r}: not in a recognized format".format(filename) - else: - msg = "{0}: of mutagen type {1}".format(repr(filename), mutagen_type) - Exception.__init__(self, msg) - - -class MutagenError(UnreadableFileError): - """Raised when Mutagen fails unexpectedly---probably due to a bug.""" - - def __init__(self, filename, mutagen_exc): - msg = "{0}: {1}".format(repr(filename), mutagen_exc) - Exception.__init__(self, msg) - - # Interacting with Mutagen. diff --git a/mediafile/exceptions.py b/mediafile/exceptions.py new file mode 100644 index 0000000..5b33fce --- /dev/null +++ b/mediafile/exceptions.py @@ -0,0 +1,34 @@ +"""Custom exceptions for MediaFile metadata handling.""" + + +class UnreadableFileError(Exception): + """Mutagen is not able to extract information from the file.""" + + def __init__(self, filename, msg): + Exception.__init__(self, msg if msg else repr(filename)) + + +class FileTypeError(UnreadableFileError): + """Reading this type of file is not supported. + + If passed the `mutagen_type` argument this indicates that the + mutagen type is not supported by `Mediafile`. + """ + + def __init__(self, filename, mutagen_type=None): + if mutagen_type is None: + msg = "{0!r}: not in a recognized format".format(filename) + else: + msg = "{0}: of mutagen type {1}".format(repr(filename), mutagen_type) + Exception.__init__(self, msg) + + +class MutagenError(UnreadableFileError): + """Raised when Mutagen fails unexpectedly---probably due to a bug.""" + + def __init__(self, filename, mutagen_exc): + msg = "{0}: {1}".format(repr(filename), mutagen_exc) + Exception.__init__(self, msg) + + +__all__ = ["UnreadableFileError", "FileTypeError", "MutagenError"] From 891eef5b2d117b07dd74b17fb84e509db9c24f4c Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 14 Oct 2025 11:23:22 +0200 Subject: [PATCH 03/15] Modernized exceptions and added common type. --- mediafile/exceptions.py | 62 +++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/mediafile/exceptions.py b/mediafile/exceptions.py index 5b33fce..ec26c43 100644 --- a/mediafile/exceptions.py +++ b/mediafile/exceptions.py @@ -1,11 +1,41 @@ """Custom exceptions for MediaFile metadata handling.""" +from __future__ import annotations -class UnreadableFileError(Exception): - """Mutagen is not able to extract information from the file.""" - def __init__(self, filename, msg): - Exception.__init__(self, msg if msg else repr(filename)) +class MediaFileError(Exception): + """Base exception for all MediaFile-related errors.""" + + def __init__( + self, + message: str, + filename: str | None = None, + ): + self.filename = filename + self.message = message + super().__init__(self._format_message()) + + def _format_message(self) -> str: + if self.filename: + return f"{self.filename}: {self.message}" + return self.message + + def __str__(self) -> str: + return self._format_message() + + +class UnreadableFileError(MediaFileError): + """Raised when Mutagen cannot extract information from the file.""" + + def __init__( + self, + filename: str, + message: str, + ): + super().__init__( + message, + filename, + ) class FileTypeError(UnreadableFileError): @@ -15,20 +45,28 @@ class FileTypeError(UnreadableFileError): mutagen type is not supported by `Mediafile`. """ - def __init__(self, filename, mutagen_type=None): + def __init__( + self, + filename: str, + mutagen_type: str | None = None, + ): if mutagen_type is None: - msg = "{0!r}: not in a recognized format".format(filename) + msg = "File is not in a recognized format" else: - msg = "{0}: of mutagen type {1}".format(repr(filename), mutagen_type) - Exception.__init__(self, msg) + msg = f"File type '{mutagen_type}' is not supported" + + super().__init__(filename, msg) class MutagenError(UnreadableFileError): - """Raised when Mutagen fails unexpectedly---probably due to a bug.""" + """Raised when Mutagen fails unexpectedly, likely due to a bug.""" + + mutagen_exception: Exception - def __init__(self, filename, mutagen_exc): - msg = "{0}: {1}".format(repr(filename), mutagen_exc) - Exception.__init__(self, msg) + def __init__(self, filename: str, mutagen_exception: Exception): + self.mutagen_exception = mutagen_exception + message = f"Mutagen internal error: {mutagen_exception}" + super().__init__(filename, message) __all__ = ["UnreadableFileError", "FileTypeError", "MutagenError"] From 9470f8e4d7b3bd17372d6c82aa19834f2148a795 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 14 Oct 2025 11:50:42 +0200 Subject: [PATCH 04/15] Moved constants into own file. --- mediafile/__init__.py | 43 ----------------------------------------- mediafile/constants.py | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 43 deletions(-) create mode 100644 mediafile/constants.py diff --git a/mediafile/__init__.py b/mediafile/__init__.py index 572e555..8ca3207 100644 --- a/mediafile/__init__.py +++ b/mediafile/__init__.py @@ -63,23 +63,6 @@ log = logging.getLogger(__name__) -# Human-readable type names. -TYPES = { - "mp3": "MP3", - "aac": "AAC", - "alac": "ALAC", - "ogg": "OGG", - "opus": "Opus", - "flac": "FLAC", - "ape": "APE", - "wv": "WavPack", - "mpc": "Musepack", - "asf": "Windows Media", - "aiff": "AIFF", - "dsf": "DSD Stream File", - "wav": "WAVE", -} - # Interacting with Mutagen. @@ -326,32 +309,6 @@ def image_extension(data): return ext if ext != "tif" else "tiff" -class ImageType(enum.Enum): - """Indicates the kind of an `Image` stored in a file's tag.""" - - other = 0 - icon = 1 - other_icon = 2 - front = 3 - back = 4 - leaflet = 5 - media = 6 - lead_artist = 7 - artist = 8 - conductor = 9 - group = 10 - composer = 11 - lyricist = 12 - recording_location = 13 - recording_session = 14 - performance = 15 - screen_capture = 16 - fish = 17 - illustration = 18 - artist_logo = 19 - publisher_logo = 20 - - class Image(object): """Structure representing image data and metadata that can be stored and retrieved from tags. diff --git a/mediafile/constants.py b/mediafile/constants.py new file mode 100644 index 0000000..17a6342 --- /dev/null +++ b/mediafile/constants.py @@ -0,0 +1,44 @@ +import enum + +# Human-readable type names. +TYPES = { + "mp3": "MP3", + "aac": "AAC", + "alac": "ALAC", + "ogg": "OGG", + "opus": "Opus", + "flac": "FLAC", + "ape": "APE", + "wv": "WavPack", + "mpc": "Musepack", + "asf": "Windows Media", + "aiff": "AIFF", + "dsf": "DSD Stream File", + "wav": "WAVE", +} + + +class ImageType(enum.Enum): + """Indicates the kind of an `Image` stored in a file's tag.""" + + other = 0 + icon = 1 + other_icon = 2 + front = 3 + back = 4 + leaflet = 5 + media = 6 + lead_artist = 7 + artist = 8 + conductor = 9 + group = 10 + composer = 11 + lyricist = 12 + recording_location = 13 + recording_session = 14 + performance = 15 + screen_capture = 16 + fish = 17 + illustration = 18 + artist_logo = 19 + publisher_logo = 20 From 146a462e44e0bd4efa4f67a06378686adc8d38b4 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 14 Oct 2025 11:57:44 +0200 Subject: [PATCH 05/15] Moved storage classes into own files and folder. --- mediafile/__init__.py | 792 ---------------------------------- mediafile/storage/__init__.py | 20 + mediafile/storage/afs.py | 40 ++ mediafile/storage/ape.py | 70 +++ mediafile/storage/base.py | 220 ++++++++++ mediafile/storage/flac.py | 34 ++ mediafile/storage/mp3.py | 287 ++++++++++++ mediafile/storage/mp4.py | 100 +++++ mediafile/storage/vorbis.py | 56 +++ 9 files changed, 827 insertions(+), 792 deletions(-) create mode 100644 mediafile/storage/__init__.py create mode 100644 mediafile/storage/afs.py create mode 100644 mediafile/storage/ape.py create mode 100644 mediafile/storage/base.py create mode 100644 mediafile/storage/flac.py create mode 100644 mediafile/storage/mp3.py create mode 100644 mediafile/storage/mp4.py create mode 100644 mediafile/storage/vorbis.py diff --git a/mediafile/__init__.py b/mediafile/__init__.py index 8ca3207..36be8d8 100644 --- a/mediafile/__init__.py +++ b/mediafile/__init__.py @@ -349,798 +349,6 @@ def type_index(self): return self.type.value -# StorageStyle classes describe strategies for accessing values in -# Mutagen file objects. - - -class StorageStyle(object): - """A strategy for storing a value for a certain tag format (or set - of tag formats). This basic StorageStyle describes simple 1:1 - mapping from raw values to keys in a Mutagen file object; subclasses - describe more sophisticated translations or format-specific access - strategies. - - MediaFile uses a StorageStyle via three methods: ``get()``, - ``set()``, and ``delete()``. It passes a Mutagen file object to - each. - - Internally, the StorageStyle implements ``get()`` and ``set()`` - using two steps that may be overridden by subtypes. To get a value, - the StorageStyle first calls ``fetch()`` to retrieve the value - corresponding to a key and then ``deserialize()`` to convert the raw - Mutagen value to a consumable Python value. Similarly, to set a - field, we call ``serialize()`` to encode the value and then - ``store()`` to assign the result into the Mutagen object. - - Each StorageStyle type has a class-level `formats` attribute that is - a list of strings indicating the formats that the style applies to. - MediaFile only uses StorageStyles that apply to the correct type for - a given audio file. - """ - - formats = [ - "FLAC", - "OggOpus", - "OggTheora", - "OggSpeex", - "OggVorbis", - "OggFlac", - "APEv2File", - "WavPack", - "Musepack", - "MonkeysAudio", - ] - """List of mutagen classes the StorageStyle can handle. - """ - - def __init__(self, key, as_type=str, suffix=None, float_places=2, read_only=False): - """Create a basic storage strategy. Parameters: - - - `key`: The key on the Mutagen file object used to access the - field's data. - - `as_type`: The Python type that the value is stored as - internally (`unicode`, `int`, `bool`, or `bytes`). - - `suffix`: When `as_type` is a string type, append this before - storing the value. - - `float_places`: When the value is a floating-point number and - encoded as a string, the number of digits to store after the - decimal point. - - `read_only`: When true, writing to this field is disabled. - Primary use case is so wrongly named fields can be addressed - in a graceful manner. This does not block the delete method. - - """ - self.key = key - self.as_type = as_type - self.suffix = suffix - self.float_places = float_places - self.read_only = read_only - - # Convert suffix to correct string type. - if self.suffix and self.as_type is str and not isinstance(self.suffix, str): - self.suffix = self.suffix.decode("utf-8") - - # Getter. - - def get(self, mutagen_file): - """Get the value for the field using this style.""" - return self.deserialize(self.fetch(mutagen_file)) - - def fetch(self, mutagen_file): - """Retrieve the raw value of for this tag from the Mutagen file - object. - """ - try: - return mutagen_file[self.key][0] - except (KeyError, IndexError): - return None - - def deserialize(self, mutagen_value): - """Given a raw value stored on a Mutagen object, decode and - return the represented value. - """ - if ( - self.suffix - and isinstance(mutagen_value, str) - and mutagen_value.endswith(self.suffix) - ): - return mutagen_value[: -len(self.suffix)] - else: - return mutagen_value - - # Setter. - - def set(self, mutagen_file, value): - """Assign the value for the field using this style.""" - self.store(mutagen_file, self.serialize(value)) - - def store(self, mutagen_file, value): - """Store a serialized value in the Mutagen file object.""" - mutagen_file[self.key] = [value] - - def serialize(self, value): - """Convert the external Python value to a type that is suitable for - storing in a Mutagen file object. - """ - if isinstance(value, float) and self.as_type is str: - value = "{0:.{1}f}".format(value, self.float_places) - value = self.as_type(value) - elif self.as_type is str: - if isinstance(value, bool): - # Store bools as 1/0 instead of True/False. - value = str(int(bool(value))) - elif isinstance(value, bytes): - value = value.decode("utf-8", "ignore") - else: - value = str(value) - else: - value = self.as_type(value) - - if self.suffix: - value += self.suffix - - return value - - def delete(self, mutagen_file): - """Remove the tag from the file.""" - if self.key in mutagen_file: - del mutagen_file[self.key] - - -class ListStorageStyle(StorageStyle): - """Abstract storage style that provides access to lists. - - The ListMediaField descriptor uses a ListStorageStyle via two - methods: ``get_list()`` and ``set_list()``. It passes a Mutagen file - object to each. - - Subclasses may overwrite ``fetch`` and ``store``. ``fetch`` must - return a (possibly empty) list or `None` if the tag does not exist. - ``store`` receives a serialized list of values as the second argument. - - The `serialize` and `deserialize` methods (from the base - `StorageStyle`) are still called with individual values. This class - handles packing and unpacking the values into lists. - """ - - def get(self, mutagen_file): - """Get the first value in the field's value list.""" - values = self.get_list(mutagen_file) - if values is None: - return None - - try: - return values[0] - except IndexError: - return None - - def get_list(self, mutagen_file): - """Get a list of all values for the field using this style.""" - raw_values = self.fetch(mutagen_file) - if raw_values is None: - return None - - return [self.deserialize(item) for item in raw_values] - - def fetch(self, mutagen_file): - """Get the list of raw (serialized) values.""" - try: - return mutagen_file[self.key] - except KeyError: - return None - - def set(self, mutagen_file, value): - """Set an individual value as the only value for the field using - this style. - """ - if value is None: - self.store(mutagen_file, None) - else: - self.set_list(mutagen_file, [value]) - - def set_list(self, mutagen_file, values): - """Set all values for the field using this style. `values` - should be an iterable. - """ - if values is None: - self.delete(mutagen_file) - else: - self.store(mutagen_file, [self.serialize(value) for value in values]) - - def store(self, mutagen_file, values): - """Set the list of all raw (serialized) values for this field.""" - mutagen_file[self.key] = values - - -class SoundCheckStorageStyleMixin(object): - """A mixin for storage styles that read and write iTunes SoundCheck - analysis values. The object must have an `index` field that - indicates which half of the gain/peak pair---0 or 1---the field - represents. - """ - - def get(self, mutagen_file): - data = self.fetch(mutagen_file) - if data is not None: - return _sc_decode(data)[self.index] - - def set(self, mutagen_file, value): - data = self.fetch(mutagen_file) - if data is None: - gain_peak = [0, 0] - else: - gain_peak = list(_sc_decode(data)) - gain_peak[self.index] = value or 0 - data = self.serialize(_sc_encode(*gain_peak)) - self.store(mutagen_file, data) - - -class ASFStorageStyle(ListStorageStyle): - """A general storage style for Windows Media/ASF files.""" - - formats = ["ASF"] - - def deserialize(self, data): - if isinstance(data, mutagen.asf.ASFBaseAttribute): - data = data.value - return data - - -class MP4StorageStyle(StorageStyle): - """A general storage style for MPEG-4 tags.""" - - formats = ["MP4"] - - def serialize(self, value): - value = super(MP4StorageStyle, self).serialize(value) - if self.key.startswith("----:") and isinstance(value, str): - value = value.encode("utf-8") - return value - - -class MP4TupleStorageStyle(MP4StorageStyle): - """A style for storing values as part of a pair of numbers in an - MPEG-4 file. - """ - - def __init__(self, key, index=0, **kwargs): - super(MP4TupleStorageStyle, self).__init__(key, **kwargs) - self.index = index - - def deserialize(self, mutagen_value): - items = mutagen_value or [] - packing_length = 2 - return list(items) + [0] * (packing_length - len(items)) - - def get(self, mutagen_file): - value = super(MP4TupleStorageStyle, self).get(mutagen_file)[self.index] - if value == 0: - # The values are always present and saved as integers. So we - # assume that "0" indicates it is not set. - return None - else: - return value - - def set(self, mutagen_file, value): - if value is None: - value = 0 - items = self.deserialize(self.fetch(mutagen_file)) - items[self.index] = int(value) - self.store(mutagen_file, items) - - def delete(self, mutagen_file): - if self.index == 0: - super(MP4TupleStorageStyle, self).delete(mutagen_file) - else: - self.set(mutagen_file, None) - - -class MP4ListStorageStyle(ListStorageStyle, MP4StorageStyle): - pass - - -class MP4SoundCheckStorageStyle(SoundCheckStorageStyleMixin, MP4StorageStyle): - def __init__(self, key, index=0, **kwargs): - super(MP4SoundCheckStorageStyle, self).__init__(key, **kwargs) - self.index = index - - -class MP4BoolStorageStyle(MP4StorageStyle): - """A style for booleans in MPEG-4 files. (MPEG-4 has an atom type - specifically for representing booleans.) - """ - - def get(self, mutagen_file): - try: - return mutagen_file[self.key] - except KeyError: - return None - - def get_list(self, mutagen_file): - raise NotImplementedError("MP4 bool storage does not support lists") - - def set(self, mutagen_file, value): - mutagen_file[self.key] = value - - def set_list(self, mutagen_file, values): - raise NotImplementedError("MP4 bool storage does not support lists") - - -class MP4ImageStorageStyle(MP4ListStorageStyle): - """Store images as MPEG-4 image atoms. Values are `Image` objects.""" - - def __init__(self, **kwargs): - super(MP4ImageStorageStyle, self).__init__(key="covr", **kwargs) - - def deserialize(self, data): - return Image(data) - - def serialize(self, image): - if image.mime_type == "image/png": - kind = mutagen.mp4.MP4Cover.FORMAT_PNG - elif image.mime_type == "image/jpeg": - kind = mutagen.mp4.MP4Cover.FORMAT_JPEG - else: - raise ValueError("MP4 files only supports PNG and JPEG images") - return mutagen.mp4.MP4Cover(image.data, kind) - - -class MP3StorageStyle(StorageStyle): - """Store data in ID3 frames.""" - - formats = ["MP3", "AIFF", "DSF", "WAVE"] - - def __init__(self, key, id3_lang=None, **kwargs): - """Create a new ID3 storage style. `id3_lang` is the value for - the language field of newly created frames. - """ - self.id3_lang = id3_lang - super(MP3StorageStyle, self).__init__(key, **kwargs) - - def fetch(self, mutagen_file): - try: - return mutagen_file[self.key].text[0] - except (KeyError, IndexError): - return None - - def store(self, mutagen_file, value): - frame = mutagen.id3.Frames[self.key](encoding=3, text=[value]) - mutagen_file.tags.setall(self.key, [frame]) - - -class MP3PeopleStorageStyle(MP3StorageStyle): - """Store list of people in ID3 frames.""" - - def __init__(self, key, involvement="", **kwargs): - self.involvement = involvement - super(MP3PeopleStorageStyle, self).__init__(key, **kwargs) - - def store(self, mutagen_file, value): - frames = mutagen_file.tags.getall(self.key) - - # Try modifying in place. - found = False - for frame in frames: - if frame.encoding == mutagen.id3.Encoding.UTF8: - for pair in frame.people: - if pair[0].lower() == self.involvement.lower(): - pair[1] = value - found = True - - # Try creating a new frame. - if not found: - frame = mutagen.id3.Frames[self.key]( - encoding=mutagen.id3.Encoding.UTF8, people=[[self.involvement, value]] - ) - mutagen_file.tags.add(frame) - - def fetch(self, mutagen_file): - for frame in mutagen_file.tags.getall(self.key): - for pair in frame.people: - if pair[0].lower() == self.involvement.lower(): - try: - return pair[1] - except IndexError: - return None - - -class MP3ListStorageStyle(ListStorageStyle, MP3StorageStyle): - """Store lists of data in multiple ID3 frames.""" - - def fetch(self, mutagen_file): - try: - return mutagen_file[self.key].text - except KeyError: - return [] - - def store(self, mutagen_file, values): - frame = mutagen.id3.Frames[self.key](encoding=3, text=values) - mutagen_file.tags.setall(self.key, [frame]) - - -class MP3UFIDStorageStyle(MP3StorageStyle): - """Store string data in a UFID ID3 frame with a particular owner.""" - - def __init__(self, owner, **kwargs): - self.owner = owner - super(MP3UFIDStorageStyle, self).__init__("UFID:" + owner, **kwargs) - - def fetch(self, mutagen_file): - try: - return mutagen_file[self.key].data - except KeyError: - return None - - def store(self, mutagen_file, value): - # This field type stores text data as encoded data. - assert isinstance(value, str) - value = value.encode("utf-8") - - frames = mutagen_file.tags.getall(self.key) - for frame in frames: - # Replace existing frame data. - if frame.owner == self.owner: - frame.data = value - else: - # New frame. - frame = mutagen.id3.UFID(owner=self.owner, data=value) - mutagen_file.tags.setall(self.key, [frame]) - - -class MP3DescStorageStyle(MP3StorageStyle): - """Store data in a TXXX (or similar) ID3 frame. The frame is - selected based its ``desc`` field. - ``attr`` allows to specify name of data accessor property in the frame. - Most of frames use `text`. - ``multispec`` specifies if frame data is ``mutagen.id3.MultiSpec`` - which means that the data is being packed in the list. - """ - - def __init__(self, desc="", key="TXXX", attr="text", multispec=True, **kwargs): - assert isinstance(desc, str) - self.description = desc - self.attr = attr - self.multispec = multispec - super(MP3DescStorageStyle, self).__init__(key=key, **kwargs) - - def store(self, mutagen_file, value): - frames = mutagen_file.tags.getall(self.key) - if self.multispec: - value = [value] - - # Try modifying in place. - found = False - for frame in frames: - if frame.desc.lower() == self.description.lower(): - setattr(frame, self.attr, value) - frame.encoding = mutagen.id3.Encoding.UTF8 - found = True - - # Try creating a new frame. - if not found: - frame = mutagen.id3.Frames[self.key]( - desc=self.description, - encoding=mutagen.id3.Encoding.UTF8, - **{self.attr: value}, - ) - if self.id3_lang: - frame.lang = self.id3_lang - mutagen_file.tags.add(frame) - - def fetch(self, mutagen_file): - for frame in mutagen_file.tags.getall(self.key): - if frame.desc.lower() == self.description.lower(): - if not self.multispec: - return getattr(frame, self.attr) - try: - return getattr(frame, self.attr)[0] - except IndexError: - return None - - def delete(self, mutagen_file): - found_frame = None - for frame in mutagen_file.tags.getall(self.key): - if frame.desc.lower() == self.description.lower(): - found_frame = frame - break - if found_frame is not None: - del mutagen_file[frame.HashKey] - - -class MP3ListDescStorageStyle(MP3DescStorageStyle, ListStorageStyle): - def __init__(self, desc="", key="TXXX", split_v23=False, **kwargs): - self.split_v23 = split_v23 - super(MP3ListDescStorageStyle, self).__init__(desc=desc, key=key, **kwargs) - - def fetch(self, mutagen_file): - for frame in mutagen_file.tags.getall(self.key): - if frame.desc.lower() == self.description.lower(): - if mutagen_file.tags.version == (2, 3, 0) and self.split_v23: - return sum((el.split("/") for el in frame.text), []) - else: - return frame.text - return [] - - def store(self, mutagen_file, values): - self.delete(mutagen_file) - frame = mutagen.id3.Frames[self.key]( - desc=self.description, - text=values, - encoding=mutagen.id3.Encoding.UTF8, - ) - if self.id3_lang: - frame.lang = self.id3_lang - mutagen_file.tags.add(frame) - - -class MP3SlashPackStorageStyle(MP3StorageStyle): - """Store value as part of pair that is serialized as a slash- - separated string. - """ - - def __init__(self, key, pack_pos=0, **kwargs): - super(MP3SlashPackStorageStyle, self).__init__(key, **kwargs) - self.pack_pos = pack_pos - - def _fetch_unpacked(self, mutagen_file): - data = self.fetch(mutagen_file) - if data: - items = str(data).split("/") - else: - items = [] - packing_length = 2 - return list(items) + [None] * (packing_length - len(items)) - - def get(self, mutagen_file): - return self._fetch_unpacked(mutagen_file)[self.pack_pos] - - def set(self, mutagen_file, value): - items = self._fetch_unpacked(mutagen_file) - items[self.pack_pos] = value - if items[0] is None: - items[0] = "" - if items[1] is None: - items.pop() # Do not store last value - self.store(mutagen_file, "/".join(map(str, items))) - - def delete(self, mutagen_file): - if self.pack_pos == 0: - super(MP3SlashPackStorageStyle, self).delete(mutagen_file) - else: - self.set(mutagen_file, None) - - -class MP3ImageStorageStyle(ListStorageStyle, MP3StorageStyle): - """Converts between APIC frames and ``Image`` instances. - - The `get_list` method inherited from ``ListStorageStyle`` returns a - list of ``Image``s. Similarly, the `set_list` method accepts a - list of ``Image``s as its ``values`` argument. - """ - - def __init__(self): - super(MP3ImageStorageStyle, self).__init__(key="APIC") - self.as_type = bytes - - def deserialize(self, apic_frame): - """Convert APIC frame into Image.""" - return Image(data=apic_frame.data, desc=apic_frame.desc, type=apic_frame.type) - - def fetch(self, mutagen_file): - return mutagen_file.tags.getall(self.key) - - def store(self, mutagen_file, frames): - mutagen_file.tags.setall(self.key, frames) - - def delete(self, mutagen_file): - mutagen_file.tags.delall(self.key) - - def serialize(self, image): - """Return an APIC frame populated with data from ``image``.""" - assert isinstance(image, Image) - frame = mutagen.id3.Frames[self.key]() - frame.data = image.data - frame.mime = image.mime_type - frame.desc = image.desc or "" - - # For compatibility with OS X/iTunes prefer latin-1 if possible. - # See issue #899 - try: - frame.desc.encode("latin-1") - except UnicodeEncodeError: - frame.encoding = mutagen.id3.Encoding.UTF16 - else: - frame.encoding = mutagen.id3.Encoding.LATIN1 - - frame.type = image.type_index - return frame - - -class MP3SoundCheckStorageStyle(SoundCheckStorageStyleMixin, MP3DescStorageStyle): - def __init__(self, index=0, **kwargs): - super(MP3SoundCheckStorageStyle, self).__init__(**kwargs) - self.index = index - - -class ASFImageStorageStyle(ListStorageStyle): - """Store images packed into Windows Media/ASF byte array attributes. - Values are `Image` objects. - """ - - formats = ["ASF"] - - def __init__(self): - super(ASFImageStorageStyle, self).__init__(key="WM/Picture") - - def deserialize(self, asf_picture): - mime, data, type, desc = _unpack_asf_image(asf_picture.value) - return Image(data, desc=desc, type=type) - - def serialize(self, image): - pic = mutagen.asf.ASFByteArrayAttribute() - pic.value = _pack_asf_image( - image.mime_type, - image.data, - type=image.type_index, - description=image.desc or "", - ) - return pic - - -class VorbisImageStorageStyle(ListStorageStyle): - """Store images in Vorbis comments. Both legacy COVERART fields and - modern METADATA_BLOCK_PICTURE tags are supported. Data is - base64-encoded. Values are `Image` objects. - """ - - formats = ["OggOpus", "OggTheora", "OggSpeex", "OggVorbis", "OggFlac"] - - def __init__(self): - super(VorbisImageStorageStyle, self).__init__(key="metadata_block_picture") - self.as_type = bytes - - def fetch(self, mutagen_file): - images = [] - if "metadata_block_picture" not in mutagen_file: - # Try legacy COVERART tags. - if "coverart" in mutagen_file: - for data in mutagen_file["coverart"]: - images.append(Image(base64.b64decode(data))) - return images - for data in mutagen_file["metadata_block_picture"]: - try: - pic = mutagen.flac.Picture(base64.b64decode(data)) - except (TypeError, AttributeError): - continue - images.append(Image(data=pic.data, desc=pic.desc, type=pic.type)) - return images - - def store(self, mutagen_file, image_data): - # Strip all art, including legacy COVERART. - if "coverart" in mutagen_file: - del mutagen_file["coverart"] - if "coverartmime" in mutagen_file: - del mutagen_file["coverartmime"] - super(VorbisImageStorageStyle, self).store(mutagen_file, image_data) - - def serialize(self, image): - """Turn a Image into a base64 encoded FLAC picture block.""" - pic = mutagen.flac.Picture() - pic.data = image.data - pic.type = image.type_index - pic.mime = image.mime_type - pic.desc = image.desc or "" - - # Encoding with base64 returns bytes on both Python 2 and 3. - # Mutagen requires the data to be a Unicode string, so we decode - # it before passing it along. - return base64.b64encode(pic.write()).decode("ascii") - - -class FlacImageStorageStyle(ListStorageStyle): - """Converts between ``mutagen.flac.Picture`` and ``Image`` instances.""" - - formats = ["FLAC"] - - def __init__(self): - super(FlacImageStorageStyle, self).__init__(key="") - - def fetch(self, mutagen_file): - return mutagen_file.pictures - - def deserialize(self, flac_picture): - return Image( - data=flac_picture.data, desc=flac_picture.desc, type=flac_picture.type - ) - - def store(self, mutagen_file, pictures): - """``pictures`` is a list of mutagen.flac.Picture instances.""" - mutagen_file.clear_pictures() - for pic in pictures: - mutagen_file.add_picture(pic) - - def serialize(self, image): - """Turn a Image into a mutagen.flac.Picture.""" - pic = mutagen.flac.Picture() - pic.data = image.data - pic.type = image.type_index - pic.mime = image.mime_type - pic.desc = image.desc or "" - return pic - - def delete(self, mutagen_file): - """Remove all images from the file.""" - mutagen_file.clear_pictures() - - -class APEv2ImageStorageStyle(ListStorageStyle): - """Store images in APEv2 tags. Values are `Image` objects.""" - - formats = ["APEv2File", "WavPack", "Musepack", "MonkeysAudio", "OptimFROG"] - - TAG_NAMES = { - ImageType.other: "Cover Art (other)", - ImageType.icon: "Cover Art (icon)", - ImageType.other_icon: "Cover Art (other icon)", - ImageType.front: "Cover Art (front)", - ImageType.back: "Cover Art (back)", - ImageType.leaflet: "Cover Art (leaflet)", - ImageType.media: "Cover Art (media)", - ImageType.lead_artist: "Cover Art (lead)", - ImageType.artist: "Cover Art (artist)", - ImageType.conductor: "Cover Art (conductor)", - ImageType.group: "Cover Art (band)", - ImageType.composer: "Cover Art (composer)", - ImageType.lyricist: "Cover Art (lyricist)", - ImageType.recording_location: "Cover Art (studio)", - ImageType.recording_session: "Cover Art (recording)", - ImageType.performance: "Cover Art (performance)", - ImageType.screen_capture: "Cover Art (movie scene)", - ImageType.fish: "Cover Art (colored fish)", - ImageType.illustration: "Cover Art (illustration)", - ImageType.artist_logo: "Cover Art (band logo)", - ImageType.publisher_logo: "Cover Art (publisher logo)", - } - - def __init__(self): - super(APEv2ImageStorageStyle, self).__init__(key="") - - def fetch(self, mutagen_file): - images = [] - for cover_type, cover_tag in self.TAG_NAMES.items(): - try: - frame = mutagen_file[cover_tag] - text_delimiter_index = frame.value.find(b"\x00") - if text_delimiter_index > 0: - comment = frame.value[0:text_delimiter_index] - comment = comment.decode("utf-8", "replace") - else: - comment = None - image_data = frame.value[text_delimiter_index + 1 :] - images.append(Image(data=image_data, type=cover_type, desc=comment)) - except KeyError: - pass - - return images - - def set_list(self, mutagen_file, values): - self.delete(mutagen_file) - - for image in values: - image_type = image.type or ImageType.other - comment = image.desc or "" - image_data = comment.encode("utf-8") + b"\x00" + image.data - cover_tag = self.TAG_NAMES[image_type] - mutagen_file[cover_tag] = image_data - - def delete(self, mutagen_file): - """Remove all images from the file.""" - for cover_tag in self.TAG_NAMES.values(): - try: - del mutagen_file[cover_tag] - except KeyError: - pass - # MediaField is a descriptor that represents a single logical field. It # aggregates several StorageStyles describing how to access the data for diff --git a/mediafile/storage/__init__.py b/mediafile/storage/__init__.py new file mode 100644 index 0000000..0340ee6 --- /dev/null +++ b/mediafile/storage/__init__.py @@ -0,0 +1,20 @@ +from .afs import ASFImageStorageStyle, ASFStorageStyle +from .ape import APEv2ImageStorageStyle +from .flac import FlacImageStorageStyle +from .mp3 import MP3ImageStorageStyle, MP3PeopleStorageStyle, MP3StorageStyle +from .mp4 import MP4ImageStorageStyle, MP4SoundCheckStorageStyle, MP4StorageStyle +from .vorbis import VorbisImageStorageStyle + +__all__ = [ + "ASFStorageStyle", + "ASFImageStorageStyle", + "APEv2ImageStorageStyle", + "FlacImageStorageStyle", + "MP3StorageStyle", + "MP3PeopleStorageStyle", + "MP3ImageStorageStyle", + "MP4StorageStyle", + "MP4SoundCheckStorageStyle", + "MP4ImageStorageStyle", + "VorbisImageStorageStyle", +] diff --git a/mediafile/storage/afs.py b/mediafile/storage/afs.py new file mode 100644 index 0000000..4d47cf2 --- /dev/null +++ b/mediafile/storage/afs.py @@ -0,0 +1,40 @@ +import mutagen +import mutagen.asf + +from .base import ListStorageStyle + + +class ASFStorageStyle(ListStorageStyle): + """A general storage style for Windows Media/ASF files.""" + + formats = ["ASF"] + + def deserialize(self, data): + if isinstance(data, mutagen.asf.ASFBaseAttribute): + data = data.value + return data + + +class ASFImageStorageStyle(ListStorageStyle): + """Store images packed into Windows Media/ASF byte array attributes. + Values are `Image` objects. + """ + + formats = ["ASF"] + + def __init__(self): + super(ASFImageStorageStyle, self).__init__(key="WM/Picture") + + def deserialize(self, asf_picture): + mime, data, type, desc = _unpack_asf_image(asf_picture.value) + return Image(data, desc=desc, type=type) + + def serialize(self, image): + pic = mutagen.asf.ASFByteArrayAttribute() + pic.value = _pack_asf_image( + image.mime_type, + image.data, + type=image.type_index, + description=image.desc or "", + ) + return pic diff --git a/mediafile/storage/ape.py b/mediafile/storage/ape.py new file mode 100644 index 0000000..6d5e648 --- /dev/null +++ b/mediafile/storage/ape.py @@ -0,0 +1,70 @@ +from .base import ListStorageStyle + + +class APEv2ImageStorageStyle(ListStorageStyle): + """Store images in APEv2 tags. Values are `Image` objects.""" + + formats = ["APEv2File", "WavPack", "Musepack", "MonkeysAudio", "OptimFROG"] + + TAG_NAMES = { + ImageType.other: "Cover Art (other)", + ImageType.icon: "Cover Art (icon)", + ImageType.other_icon: "Cover Art (other icon)", + ImageType.front: "Cover Art (front)", + ImageType.back: "Cover Art (back)", + ImageType.leaflet: "Cover Art (leaflet)", + ImageType.media: "Cover Art (media)", + ImageType.lead_artist: "Cover Art (lead)", + ImageType.artist: "Cover Art (artist)", + ImageType.conductor: "Cover Art (conductor)", + ImageType.group: "Cover Art (band)", + ImageType.composer: "Cover Art (composer)", + ImageType.lyricist: "Cover Art (lyricist)", + ImageType.recording_location: "Cover Art (studio)", + ImageType.recording_session: "Cover Art (recording)", + ImageType.performance: "Cover Art (performance)", + ImageType.screen_capture: "Cover Art (movie scene)", + ImageType.fish: "Cover Art (colored fish)", + ImageType.illustration: "Cover Art (illustration)", + ImageType.artist_logo: "Cover Art (band logo)", + ImageType.publisher_logo: "Cover Art (publisher logo)", + } + + def __init__(self): + super(APEv2ImageStorageStyle, self).__init__(key="") + + def fetch(self, mutagen_file): + images = [] + for cover_type, cover_tag in self.TAG_NAMES.items(): + try: + frame = mutagen_file[cover_tag] + text_delimiter_index = frame.value.find(b"\x00") + if text_delimiter_index > 0: + comment = frame.value[0:text_delimiter_index] + comment = comment.decode("utf-8", "replace") + else: + comment = None + image_data = frame.value[text_delimiter_index + 1 :] + images.append(Image(data=image_data, type=cover_type, desc=comment)) + except KeyError: + pass + + return images + + def set_list(self, mutagen_file, values): + self.delete(mutagen_file) + + for image in values: + image_type = image.type or ImageType.other + comment = image.desc or "" + image_data = comment.encode("utf-8") + b"\x00" + image.data + cover_tag = self.TAG_NAMES[image_type] + mutagen_file[cover_tag] = image_data + + def delete(self, mutagen_file): + """Remove all images from the file.""" + for cover_tag in self.TAG_NAMES.values(): + try: + del mutagen_file[cover_tag] + except KeyError: + pass diff --git a/mediafile/storage/base.py b/mediafile/storage/base.py new file mode 100644 index 0000000..604a9a0 --- /dev/null +++ b/mediafile/storage/base.py @@ -0,0 +1,220 @@ +class StorageStyle(object): + """A strategy for storing a value for a certain tag format (or set + of tag formats). This basic StorageStyle describes simple 1:1 + mapping from raw values to keys in a Mutagen file object; subclasses + describe more sophisticated translations or format-specific access + strategies. + + MediaFile uses a StorageStyle via three methods: ``get()``, + ``set()``, and ``delete()``. It passes a Mutagen file object to + each. + + Internally, the StorageStyle implements ``get()`` and ``set()`` + using two steps that may be overridden by subtypes. To get a value, + the StorageStyle first calls ``fetch()`` to retrieve the value + corresponding to a key and then ``deserialize()`` to convert the raw + Mutagen value to a consumable Python value. Similarly, to set a + field, we call ``serialize()`` to encode the value and then + ``store()`` to assign the result into the Mutagen object. + + Each StorageStyle type has a class-level `formats` attribute that is + a list of strings indicating the formats that the style applies to. + MediaFile only uses StorageStyles that apply to the correct type for + a given audio file. + """ + + formats = [ + "FLAC", + "OggOpus", + "OggTheora", + "OggSpeex", + "OggVorbis", + "OggFlac", + "APEv2File", + "WavPack", + "Musepack", + "MonkeysAudio", + ] + """List of mutagen classes the StorageStyle can handle. + """ + + def __init__(self, key, as_type=str, suffix=None, float_places=2, read_only=False): + """Create a basic storage strategy. Parameters: + + - `key`: The key on the Mutagen file object used to access the + field's data. + - `as_type`: The Python type that the value is stored as + internally (`unicode`, `int`, `bool`, or `bytes`). + - `suffix`: When `as_type` is a string type, append this before + storing the value. + - `float_places`: When the value is a floating-point number and + encoded as a string, the number of digits to store after the + decimal point. + - `read_only`: When true, writing to this field is disabled. + Primary use case is so wrongly named fields can be addressed + in a graceful manner. This does not block the delete method. + + """ + self.key = key + self.as_type = as_type + self.suffix = suffix + self.float_places = float_places + self.read_only = read_only + + # Convert suffix to correct string type. + if self.suffix and self.as_type is str and not isinstance(self.suffix, str): + self.suffix = self.suffix.decode("utf-8") + + # Getter. + + def get(self, mutagen_file): + """Get the value for the field using this style.""" + return self.deserialize(self.fetch(mutagen_file)) + + def fetch(self, mutagen_file): + """Retrieve the raw value of for this tag from the Mutagen file + object. + """ + try: + return mutagen_file[self.key][0] + except (KeyError, IndexError): + return None + + def deserialize(self, mutagen_value): + """Given a raw value stored on a Mutagen object, decode and + return the represented value. + """ + if ( + self.suffix + and isinstance(mutagen_value, str) + and mutagen_value.endswith(self.suffix) + ): + return mutagen_value[: -len(self.suffix)] + else: + return mutagen_value + + # Setter. + + def set(self, mutagen_file, value): + """Assign the value for the field using this style.""" + self.store(mutagen_file, self.serialize(value)) + + def store(self, mutagen_file, value): + """Store a serialized value in the Mutagen file object.""" + mutagen_file[self.key] = [value] + + def serialize(self, value): + """Convert the external Python value to a type that is suitable for + storing in a Mutagen file object. + """ + if isinstance(value, float) and self.as_type is str: + value = "{0:.{1}f}".format(value, self.float_places) + value = self.as_type(value) + elif self.as_type is str: + if isinstance(value, bool): + # Store bools as 1/0 instead of True/False. + value = str(int(bool(value))) + elif isinstance(value, bytes): + value = value.decode("utf-8", "ignore") + else: + value = str(value) + else: + value = self.as_type(value) + + if self.suffix: + value += self.suffix + + return value + + def delete(self, mutagen_file): + """Remove the tag from the file.""" + if self.key in mutagen_file: + del mutagen_file[self.key] + + +class ListStorageStyle(StorageStyle): + """Abstract storage style that provides access to lists. + + The ListMediaField descriptor uses a ListStorageStyle via two + methods: ``get_list()`` and ``set_list()``. It passes a Mutagen file + object to each. + + Subclasses may overwrite ``fetch`` and ``store``. ``fetch`` must + return a (possibly empty) list or `None` if the tag does not exist. + ``store`` receives a serialized list of values as the second argument. + + The `serialize` and `deserialize` methods (from the base + `StorageStyle`) are still called with individual values. This class + handles packing and unpacking the values into lists. + """ + + def get(self, mutagen_file): + """Get the first value in the field's value list.""" + values = self.get_list(mutagen_file) + if values is None: + return None + + try: + return values[0] + except IndexError: + return None + + def get_list(self, mutagen_file): + """Get a list of all values for the field using this style.""" + raw_values = self.fetch(mutagen_file) + if raw_values is None: + return None + + return [self.deserialize(item) for item in raw_values] + + def fetch(self, mutagen_file): + """Get the list of raw (serialized) values.""" + try: + return mutagen_file[self.key] + except KeyError: + return None + + def set(self, mutagen_file, value): + """Set an individual value as the only value for the field using + this style. + """ + if value is None: + self.store(mutagen_file, None) + else: + self.set_list(mutagen_file, [value]) + + def set_list(self, mutagen_file, values): + """Set all values for the field using this style. `values` + should be an iterable. + """ + if values is None: + self.delete(mutagen_file) + else: + self.store(mutagen_file, [self.serialize(value) for value in values]) + + def store(self, mutagen_file, values): + """Set the list of all raw (serialized) values for this field.""" + mutagen_file[self.key] = values + + +class SoundCheckStorageStyleMixin(object): + """A mixin for storage styles that read and write iTunes SoundCheck + analysis values. The object must have an `index` field that + indicates which half of the gain/peak pair---0 or 1---the field + represents. + """ + + def get(self, mutagen_file): + data = self.fetch(mutagen_file) + if data is not None: + return _sc_decode(data)[self.index] + + def set(self, mutagen_file, value): + data = self.fetch(mutagen_file) + if data is None: + gain_peak = [0, 0] + else: + gain_peak = list(_sc_decode(data)) + gain_peak[self.index] = value or 0 + data = self.serialize(_sc_encode(*gain_peak)) + self.store(mutagen_file, data) diff --git a/mediafile/storage/flac.py b/mediafile/storage/flac.py new file mode 100644 index 0000000..c4a7555 --- /dev/null +++ b/mediafile/storage/flac.py @@ -0,0 +1,34 @@ +class FlacImageStorageStyle(ListStorageStyle): + """Converts between ``mutagen.flac.Picture`` and ``Image`` instances.""" + + formats = ["FLAC"] + + def __init__(self): + super(FlacImageStorageStyle, self).__init__(key="") + + def fetch(self, mutagen_file): + return mutagen_file.pictures + + def deserialize(self, flac_picture): + return Image( + data=flac_picture.data, desc=flac_picture.desc, type=flac_picture.type + ) + + def store(self, mutagen_file, pictures): + """``pictures`` is a list of mutagen.flac.Picture instances.""" + mutagen_file.clear_pictures() + for pic in pictures: + mutagen_file.add_picture(pic) + + def serialize(self, image): + """Turn a Image into a mutagen.flac.Picture.""" + pic = mutagen.flac.Picture() + pic.data = image.data + pic.type = image.type_index + pic.mime = image.mime_type + pic.desc = image.desc or "" + return pic + + def delete(self, mutagen_file): + """Remove all images from the file.""" + mutagen_file.clear_pictures() diff --git a/mediafile/storage/mp3.py b/mediafile/storage/mp3.py new file mode 100644 index 0000000..384c73f --- /dev/null +++ b/mediafile/storage/mp3.py @@ -0,0 +1,287 @@ +import mutagen +import mutagen._util +import mutagen.asf +import mutagen.flac +import mutagen.id3 +import mutagen.mp3 +import mutagen.mp4 + +from .base import ListStorageStyle, StorageStyle + + +class MP3StorageStyle(StorageStyle): + """Store data in ID3 frames.""" + + formats = ["MP3", "AIFF", "DSF", "WAVE"] + + def __init__(self, key, id3_lang=None, **kwargs): + """Create a new ID3 storage style. `id3_lang` is the value for + the language field of newly created frames. + """ + self.id3_lang = id3_lang + super(MP3StorageStyle, self).__init__(key, **kwargs) + + def fetch(self, mutagen_file): + try: + return mutagen_file[self.key].text[0] + except (KeyError, IndexError): + return None + + def store(self, mutagen_file, value): + frame = mutagen.id3.Frames[self.key](encoding=3, text=[value]) + mutagen_file.tags.setall(self.key, [frame]) + + +class MP3PeopleStorageStyle(MP3StorageStyle): + """Store list of people in ID3 frames.""" + + def __init__(self, key, involvement="", **kwargs): + self.involvement = involvement + super(MP3PeopleStorageStyle, self).__init__(key, **kwargs) + + def store(self, mutagen_file, value): + frames = mutagen_file.tags.getall(self.key) + + # Try modifying in place. + found = False + for frame in frames: + if frame.encoding == mutagen.id3._specs.Encoding.UTF8: + for pair in frame.people: + if pair[0].lower() == self.involvement.lower(): + pair[1] = value + found = True + + # Try creating a new frame. + if not found: + frame = mutagen.id3.Frames[self.key]( + encoding=mutagen.id3._specs.Encoding.UTF8, + people=[[self.involvement, value]], + ) + mutagen_file.tags.add(frame) + + def fetch(self, mutagen_file): + for frame in mutagen_file.tags.getall(self.key): + for pair in frame.people: + if pair[0].lower() == self.involvement.lower(): + try: + return pair[1] + except IndexError: + return None + + +class MP3ListStorageStyle(ListStorageStyle, MP3StorageStyle): + """Store lists of data in multiple ID3 frames.""" + + def fetch(self, mutagen_file): + try: + return mutagen_file[self.key].text + except KeyError: + return [] + + def store(self, mutagen_file, values): + frame = mutagen.id3.Frames[self.key](encoding=3, text=values) + mutagen_file.tags.setall(self.key, [frame]) + + +class MP3UFIDStorageStyle(MP3StorageStyle): + """Store string data in a UFID ID3 frame with a particular owner.""" + + def __init__(self, owner, **kwargs): + self.owner = owner + super(MP3UFIDStorageStyle, self).__init__("UFID:" + owner, **kwargs) + + def fetch(self, mutagen_file): + try: + return mutagen_file[self.key].data + except KeyError: + return None + + def store(self, mutagen_file, value): + # This field type stores text data as encoded data. + assert isinstance(value, str) + value = value.encode("utf-8") + + frames = mutagen_file.tags.getall(self.key) + for frame in frames: + # Replace existing frame data. + if frame.owner == self.owner: + frame.data = value + else: + # New frame. + frame = mutagen.id3._frames.UFID(owner=self.owner, data=value) + mutagen_file.tags.setall(self.key, [frame]) + + +class MP3DescStorageStyle(MP3StorageStyle): + """Store data in a TXXX (or similar) ID3 frame. The frame is + selected based its ``desc`` field. + ``attr`` allows to specify name of data accessor property in the frame. + Most of frames use `text`. + ``multispec`` specifies if frame data is ``mutagen.id3.MultiSpec`` + which means that the data is being packed in the list. + """ + + def __init__(self, desc="", key="TXXX", attr="text", multispec=True, **kwargs): + assert isinstance(desc, str) + self.description = desc + self.attr = attr + self.multispec = multispec + super(MP3DescStorageStyle, self).__init__(key=key, **kwargs) + + def store(self, mutagen_file, value): + frames = mutagen_file.tags.getall(self.key) + if self.multispec: + value = [value] + + # Try modifying in place. + found = False + for frame in frames: + if frame.desc.lower() == self.description.lower(): + setattr(frame, self.attr, value) + frame.encoding = mutagen.id3._specs.Encoding.UTF8 + found = True + + # Try creating a new frame. + if not found: + frame = mutagen.id3.Frames[self.key]( + desc=self.description, + encoding=mutagen.id3._specs.Encoding.UTF8, + **{self.attr: value}, + ) + if self.id3_lang: + frame.lang = self.id3_lang + mutagen_file.tags.add(frame) + + def fetch(self, mutagen_file): + for frame in mutagen_file.tags.getall(self.key): + if frame.desc.lower() == self.description.lower(): + if not self.multispec: + return getattr(frame, self.attr) + try: + return getattr(frame, self.attr)[0] + except IndexError: + return None + + def delete(self, mutagen_file): + found_frame = None + for frame in mutagen_file.tags.getall(self.key): + if frame.desc.lower() == self.description.lower(): + found_frame = frame + break + if found_frame is not None: + del mutagen_file[frame.HashKey] + + +class MP3ListDescStorageStyle(MP3DescStorageStyle, ListStorageStyle): + def __init__(self, desc="", key="TXXX", split_v23=False, **kwargs): + self.split_v23 = split_v23 + super(MP3ListDescStorageStyle, self).__init__(desc=desc, key=key, **kwargs) + + def fetch(self, mutagen_file): + for frame in mutagen_file.tags.getall(self.key): + if frame.desc.lower() == self.description.lower(): + if mutagen_file.tags.version == (2, 3, 0) and self.split_v23: + return sum((el.split("/") for el in frame.text), []) + else: + return frame.text + return [] + + def store(self, mutagen_file, values): + self.delete(mutagen_file) + frame = mutagen.id3.Frames[self.key]( + desc=self.description, + text=values, + encoding=mutagen.id3._specs.Encoding.UTF8, + ) + if self.id3_lang: + frame.lang = self.id3_lang + mutagen_file.tags.add(frame) + + +class MP3SlashPackStorageStyle(MP3StorageStyle): + """Store value as part of pair that is serialized as a slash- + separated string. + """ + + def __init__(self, key, pack_pos=0, **kwargs): + super(MP3SlashPackStorageStyle, self).__init__(key, **kwargs) + self.pack_pos = pack_pos + + def _fetch_unpacked(self, mutagen_file): + data = self.fetch(mutagen_file) + if data: + items = str(data).split("/") + else: + items = [] + packing_length = 2 + return list(items) + [None] * (packing_length - len(items)) + + def get(self, mutagen_file): + return self._fetch_unpacked(mutagen_file)[self.pack_pos] + + def set(self, mutagen_file, value): + items = self._fetch_unpacked(mutagen_file) + items[self.pack_pos] = value + if items[0] is None: + items[0] = "" + if items[1] is None: + items.pop() # Do not store last value + self.store(mutagen_file, "/".join(map(str, items))) + + def delete(self, mutagen_file): + if self.pack_pos == 0: + super(MP3SlashPackStorageStyle, self).delete(mutagen_file) + else: + self.set(mutagen_file, None) + + +class MP3ImageStorageStyle(ListStorageStyle, MP3StorageStyle): + """Converts between APIC frames and ``Image`` instances. + + The `get_list` method inherited from ``ListStorageStyle`` returns a + list of ``Image``s. Similarly, the `set_list` method accepts a + list of ``Image``s as its ``values`` argument. + """ + + def __init__(self): + super(MP3ImageStorageStyle, self).__init__(key="APIC") + self.as_type = bytes + + def deserialize(self, apic_frame): + """Convert APIC frame into Image.""" + return Image(data=apic_frame.data, desc=apic_frame.desc, type=apic_frame.type) + + def fetch(self, mutagen_file): + return mutagen_file.tags.getall(self.key) + + def store(self, mutagen_file, frames): + mutagen_file.tags.setall(self.key, frames) + + def delete(self, mutagen_file): + mutagen_file.tags.delall(self.key) + + def serialize(self, image): + """Return an APIC frame populated with data from ``image``.""" + assert isinstance(image, Image) + frame = mutagen.id3.Frames[self.key]() + frame.data = image.data + frame.mime = image.mime_type + frame.desc = image.desc or "" + + # For compatibility with OS X/iTunes prefer latin-1 if possible. + # See issue #899 + try: + frame.desc.encode("latin-1") + except UnicodeEncodeError: + frame.encoding = mutagen.id3._specs.Encoding.UTF16 + else: + frame.encoding = mutagen.id3._specs.Encoding.LATIN1 + + frame.type = image.type_index + return frame + + +class MP3SoundCheckStorageStyle(SoundCheckStorageStyleMixin, MP3DescStorageStyle): + def __init__(self, index=0, **kwargs): + super(MP3SoundCheckStorageStyle, self).__init__(**kwargs) + self.index = index diff --git a/mediafile/storage/mp4.py b/mediafile/storage/mp4.py new file mode 100644 index 0000000..51f14e4 --- /dev/null +++ b/mediafile/storage/mp4.py @@ -0,0 +1,100 @@ +from .base import ListStorageStyle, SoundCheckStorageStyleMixin, StorageStyle + + +class MP4StorageStyle(StorageStyle): + """A general storage style for MPEG-4 tags.""" + + formats = ["MP4"] + + def serialize(self, value): + value = super(MP4StorageStyle, self).serialize(value) + if self.key.startswith("----:") and isinstance(value, str): + value = value.encode("utf-8") + return value + + +class MP4TupleStorageStyle(MP4StorageStyle): + """A style for storing values as part of a pair of numbers in an + MPEG-4 file. + """ + + def __init__(self, key, index=0, **kwargs): + super(MP4TupleStorageStyle, self).__init__(key, **kwargs) + self.index = index + + def deserialize(self, mutagen_value): + items = mutagen_value or [] + packing_length = 2 + return list(items) + [0] * (packing_length - len(items)) + + def get(self, mutagen_file): + value = super(MP4TupleStorageStyle, self).get(mutagen_file)[self.index] + if value == 0: + # The values are always present and saved as integers. So we + # assume that "0" indicates it is not set. + return None + else: + return value + + def set(self, mutagen_file, value): + if value is None: + value = 0 + items = self.deserialize(self.fetch(mutagen_file)) + items[self.index] = int(value) + self.store(mutagen_file, items) + + def delete(self, mutagen_file): + if self.index == 0: + super(MP4TupleStorageStyle, self).delete(mutagen_file) + else: + self.set(mutagen_file, None) + + +class MP4ListStorageStyle(ListStorageStyle, MP4StorageStyle): + pass + + +class MP4SoundCheckStorageStyle(SoundCheckStorageStyleMixin, MP4StorageStyle): + def __init__(self, key, index=0, **kwargs): + super(MP4SoundCheckStorageStyle, self).__init__(key, **kwargs) + self.index = index + + +class MP4BoolStorageStyle(MP4StorageStyle): + """A style for booleans in MPEG-4 files. (MPEG-4 has an atom type + specifically for representing booleans.) + """ + + def get(self, mutagen_file): + try: + return mutagen_file[self.key] + except KeyError: + return None + + def get_list(self, mutagen_file): + raise NotImplementedError("MP4 bool storage does not support lists") + + def set(self, mutagen_file, value): + mutagen_file[self.key] = value + + def set_list(self, mutagen_file, values): + raise NotImplementedError("MP4 bool storage does not support lists") + + +class MP4ImageStorageStyle(MP4ListStorageStyle): + """Store images as MPEG-4 image atoms. Values are `Image` objects.""" + + def __init__(self, **kwargs): + super(MP4ImageStorageStyle, self).__init__(key="covr", **kwargs) + + def deserialize(self, data): + return Image(data) + + def serialize(self, image): + if image.mime_type == "image/png": + kind = mutagen.mp4.MP4Cover.FORMAT_PNG + elif image.mime_type == "image/jpeg": + kind = mutagen.mp4.MP4Cover.FORMAT_JPEG + else: + raise ValueError("MP4 files only supports PNG and JPEG images") + return mutagen.mp4.MP4Cover(image.data, kind) diff --git a/mediafile/storage/vorbis.py b/mediafile/storage/vorbis.py new file mode 100644 index 0000000..1ca9654 --- /dev/null +++ b/mediafile/storage/vorbis.py @@ -0,0 +1,56 @@ +import base64 + +import mutagen +import mutagen.flac + +from .base import ListStorageStyle + + +class VorbisImageStorageStyle(ListStorageStyle): + """Store images in Vorbis comments. Both legacy COVERART fields and + modern METADATA_BLOCK_PICTURE tags are supported. Data is + base64-encoded. Values are `Image` objects. + """ + + formats = ["OggOpus", "OggTheora", "OggSpeex", "OggVorbis", "OggFlac"] + + def __init__(self): + super(VorbisImageStorageStyle, self).__init__(key="metadata_block_picture") + self.as_type = bytes + + def fetch(self, mutagen_file): + images = [] + if "metadata_block_picture" not in mutagen_file: + # Try legacy COVERART tags. + if "coverart" in mutagen_file: + for data in mutagen_file["coverart"]: + images.append(Image(base64.b64decode(data))) + return images + for data in mutagen_file["metadata_block_picture"]: + try: + pic = mutagen.flac.Picture(base64.b64decode(data)) + except (TypeError, AttributeError): + continue + images.append(Image(data=pic.data, desc=pic.desc, type=pic.type)) + return images + + def store(self, mutagen_file, image_data): + # Strip all art, including legacy COVERART. + if "coverart" in mutagen_file: + del mutagen_file["coverart"] + if "coverartmime" in mutagen_file: + del mutagen_file["coverartmime"] + super(VorbisImageStorageStyle, self).store(mutagen_file, image_data) + + def serialize(self, image): + """Turn a Image into a base64 encoded FLAC picture block.""" + pic = mutagen.flac.Picture() + pic.data = image.data + pic.type = image.type_index + pic.mime = image.mime_type + pic.desc = image.desc or "" + + # Encoding with base64 returns bytes on both Python 2 and 3. + # Mutagen requires the data to be a Unicode string, so we decode + # it before passing it along. + return base64.b64encode(pic.write()).decode("ascii") From 80145c8c2e0a031c1d4da7a872ca200cc0e2d3af Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 14 Oct 2025 11:58:51 +0200 Subject: [PATCH 06/15] Moved field classes to own file. --- mediafile/__init__.py | 304 ---------------------------------------- mediafile/fields.py | 315 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 315 insertions(+), 304 deletions(-) create mode 100644 mediafile/fields.py diff --git a/mediafile/__init__.py b/mediafile/__init__.py index 36be8d8..5ab5b4a 100644 --- a/mediafile/__init__.py +++ b/mediafile/__init__.py @@ -350,310 +350,6 @@ def type_index(self): -# MediaField is a descriptor that represents a single logical field. It -# aggregates several StorageStyles describing how to access the data for -# each file type. - - -class MediaField(object): - """A descriptor providing access to a particular (abstract) metadata - field. - """ - - def __init__(self, *styles, **kwargs): - """Creates a new MediaField. - - :param styles: `StorageStyle` instances that describe the strategy - for reading and writing the field in particular - formats. There must be at least one style for - each possible file format. - - :param out_type: the type of the value that should be returned when - getting this property. - - """ - self.out_type = kwargs.get("out_type", str) - self._styles = styles - - def styles(self, mutagen_file): - """Yields the list of storage styles of this field that can - handle the MediaFile's format. - """ - for style in self._styles: - if mutagen_file.__class__.__name__ in style.formats: - yield style - - def __get__(self, mediafile, owner=None): - out = None - for style in self.styles(mediafile.mgfile): - out = style.get(mediafile.mgfile) - if out: - break - return _safe_cast(self.out_type, out) - - def __set__(self, mediafile, value): - if value is None: - value = self._none_value() - for style in self.styles(mediafile.mgfile): - if not style.read_only: - style.set(mediafile.mgfile, value) - - def __delete__(self, mediafile): - for style in self.styles(mediafile.mgfile): - style.delete(mediafile.mgfile) - - def _none_value(self): - """Get an appropriate "null" value for this field's type. This - is used internally when setting the field to None. - """ - if self.out_type is int: - return 0 - elif self.out_type is float: - return 0.0 - elif self.out_type is bool: - return False - elif self.out_type is str: - return "" - - -class ListMediaField(MediaField): - """Property descriptor that retrieves a list of multiple values from - a tag. - - Uses ``get_list`` and set_list`` methods of its ``StorageStyle`` - strategies to do the actual work. - """ - - def __get__(self, mediafile, _=None): - for style in self.styles(mediafile.mgfile): - values = style.get_list(mediafile.mgfile) - if values: - return [_safe_cast(self.out_type, value) for value in values] - return None - - def __set__(self, mediafile, values): - for style in self.styles(mediafile.mgfile): - if not style.read_only: - style.set_list(mediafile.mgfile, values) - - def single_field(self): - """Returns a ``MediaField`` descriptor that gets and sets the - first item. - """ - options = {"out_type": self.out_type} - return MediaField(*self._styles, **options) - - -class DateField(MediaField): - """Descriptor that handles serializing and deserializing dates - - The getter parses value from tags into a ``datetime.date`` instance - and setter serializes such an instance into a string. - - For granular access to year, month, and day, use the ``*_field`` - methods to create corresponding `DateItemField`s. - """ - - def __init__(self, *date_styles, **kwargs): - """``date_styles`` is a list of ``StorageStyle``s to store and - retrieve the whole date from. The ``year`` option is an - additional list of fallback styles for the year. The year is - always set on this style, but is only retrieved if the main - storage styles do not return a value. - """ - super(DateField, self).__init__(*date_styles) - year_style = kwargs.get("year", None) - if year_style: - self._year_field = MediaField(*year_style) - - def __get__(self, mediafile, owner=None): - year, month, day = self._get_date_tuple(mediafile) - if not year: - return None - try: - return datetime.date(year, month or 1, day or 1) - except ValueError: # Out of range values. - return None - - def __set__(self, mediafile, date): - if date is None: - self._set_date_tuple(mediafile, None, None, None) - else: - self._set_date_tuple(mediafile, date.year, date.month, date.day) - - def __delete__(self, mediafile): - super(DateField, self).__delete__(mediafile) - if hasattr(self, "_year_field"): - self._year_field.__delete__(mediafile) - - def _get_date_tuple(self, mediafile): - """Get a 3-item sequence representing the date consisting of a - year, month, and day number. Each number is either an integer or - None. - """ - # Get the underlying data and split on hyphens and slashes. - datestring = super(DateField, self).__get__(mediafile, None) - if isinstance(datestring, str): - datestring = re.sub(r"[Tt ].*$", "", str(datestring)) - items = re.split("[-/]", str(datestring)) - else: - items = [] - - # Ensure that we have exactly 3 components, possibly by - # truncating or padding. - items = items[:3] - if len(items) < 3: - items += [None] * (3 - len(items)) - - # Use year field if year is missing. - if not items[0] and hasattr(self, "_year_field"): - items[0] = self._year_field.__get__(mediafile) - - # Convert each component to an integer if possible. - items_ = [] - for item in items: - try: - items_.append(int(item)) - except (TypeError, ValueError): - items_.append(None) - return items_ - - def _set_date_tuple(self, mediafile, year, month=None, day=None): - """Set the value of the field given a year, month, and day - number. Each number can be an integer or None to indicate an - unset component. - """ - if year is None: - self.__delete__(mediafile) - return - - date = ["{0:04d}".format(int(year))] - if month: - date.append("{0:02d}".format(int(month))) - if month and day: - date.append("{0:02d}".format(int(day))) - date = map(str, date) - super(DateField, self).__set__(mediafile, "-".join(date)) - - if hasattr(self, "_year_field"): - self._year_field.__set__(mediafile, year) - - def year_field(self): - return DateItemField(self, 0) - - def month_field(self): - return DateItemField(self, 1) - - def day_field(self): - return DateItemField(self, 2) - - -class DateItemField(MediaField): - """Descriptor that gets and sets constituent parts of a `DateField`: - the month, day, or year. - """ - - def __init__(self, date_field, item_pos): - self.date_field = date_field - self.item_pos = item_pos - - def __get__(self, mediafile, _): - return self.date_field._get_date_tuple(mediafile)[self.item_pos] - - def __set__(self, mediafile, value): - items = self.date_field._get_date_tuple(mediafile) - items[self.item_pos] = value - self.date_field._set_date_tuple(mediafile, *items) - - def __delete__(self, mediafile): - self.__set__(mediafile, None) - - -class CoverArtField(MediaField): - """A descriptor that provides access to the *raw image data* for the - cover image on a file. This is used for backwards compatibility: the - full `ImageListField` provides richer `Image` objects. - - When there are multiple images we try to pick the most likely to be a front - cover. - """ - - def __init__(self): - pass - - def __get__(self, mediafile, _): - candidates = mediafile.images - if candidates: - return self.guess_cover_image(candidates).data - else: - return None - - @staticmethod - def guess_cover_image(candidates): - if len(candidates) == 1: - return candidates[0] - try: - return next(c for c in candidates if c.type == ImageType.front) - except StopIteration: - return candidates[0] - - def __set__(self, mediafile, data): - if data: - mediafile.images = [Image(data=data)] - else: - mediafile.images = [] - - def __delete__(self, mediafile): - delattr(mediafile, "images") - - -class QNumberField(MediaField): - """Access integer-represented Q number fields. - - Access a fixed-point fraction as a float. The stored value is shifted by - `fraction_bits` binary digits to the left and then rounded, yielding a - simple integer. - """ - - def __init__(self, fraction_bits, *args, **kwargs): - super(QNumberField, self).__init__(out_type=int, *args, **kwargs) - self.__fraction_bits = fraction_bits - - def __get__(self, mediafile, owner=None): - q_num = super(QNumberField, self).__get__(mediafile, owner) - if q_num is None: - return None - return q_num / pow(2, self.__fraction_bits) - - def __set__(self, mediafile, value): - q_num = round(value * pow(2, self.__fraction_bits)) - q_num = int(q_num) # needed for py2.7 - super(QNumberField, self).__set__(mediafile, q_num) - - -class ImageListField(ListMediaField): - """Descriptor to access the list of images embedded in tags. - - The getter returns a list of `Image` instances obtained from - the tags. The setter accepts a list of `Image` instances to be - written to the tags. - """ - - def __init__(self): - # The storage styles used here must implement the - # `ListStorageStyle` interface and get and set lists of - # `Image`s. - super(ImageListField, self).__init__( - MP3ImageStorageStyle(), - MP4ImageStorageStyle(), - ASFImageStorageStyle(), - VorbisImageStorageStyle(), - FlacImageStorageStyle(), - APEv2ImageStorageStyle(), - out_type=Image, - ) - - # MediaFile is a collection of fields. diff --git a/mediafile/fields.py b/mediafile/fields.py new file mode 100644 index 0000000..e7b665b --- /dev/null +++ b/mediafile/fields.py @@ -0,0 +1,315 @@ +import datetime +import re + +from .constants import ImageType +from .storage import ( + APEv2ImageStorageStyle, + ASFImageStorageStyle, + FlacImageStorageStyle, + MP3ImageStorageStyle, + MP4ImageStorageStyle, + VorbisImageStorageStyle, +) + +# MediaField is a descriptor that represents a single logical field. It +# aggregates several StorageStyles describing how to access the data for +# each file type. + + +class MediaField(object): + """A descriptor providing access to a particular (abstract) metadata + field. + """ + + def __init__(self, *styles, **kwargs): + """Creates a new MediaField. + + :param styles: `StorageStyle` instances that describe the strategy + for reading and writing the field in particular + formats. There must be at least one style for + each possible file format. + + :param out_type: the type of the value that should be returned when + getting this property. + + """ + self.out_type = kwargs.get("out_type", str) + self._styles = styles + + def styles(self, mutagen_file): + """Yields the list of storage styles of this field that can + handle the MediaFile's format. + """ + for style in self._styles: + if mutagen_file.__class__.__name__ in style.formats: + yield style + + def __get__(self, mediafile, owner=None): + out = None + for style in self.styles(mediafile.mgfile): + out = style.get(mediafile.mgfile) + if out: + break + return _safe_cast(self.out_type, out) + + def __set__(self, mediafile, value): + if value is None: + value = self._none_value() + for style in self.styles(mediafile.mgfile): + if not style.read_only: + style.set(mediafile.mgfile, value) + + def __delete__(self, mediafile): + for style in self.styles(mediafile.mgfile): + style.delete(mediafile.mgfile) + + def _none_value(self): + """Get an appropriate "null" value for this field's type. This + is used internally when setting the field to None. + """ + if self.out_type is int: + return 0 + elif self.out_type is float: + return 0.0 + elif self.out_type is bool: + return False + elif self.out_type is str: + return "" + + +class ListMediaField(MediaField): + """Property descriptor that retrieves a list of multiple values from + a tag. + + Uses ``get_list`` and set_list`` methods of its ``StorageStyle`` + strategies to do the actual work. + """ + + def __get__(self, mediafile, _=None): + for style in self.styles(mediafile.mgfile): + values = style.get_list(mediafile.mgfile) + if values: + return [_safe_cast(self.out_type, value) for value in values] + return None + + def __set__(self, mediafile, values): + for style in self.styles(mediafile.mgfile): + if not style.read_only: + style.set_list(mediafile.mgfile, values) + + def single_field(self): + """Returns a ``MediaField`` descriptor that gets and sets the + first item. + """ + options = {"out_type": self.out_type} + return MediaField(*self._styles, **options) + + +class DateField(MediaField): + """Descriptor that handles serializing and deserializing dates + + The getter parses value from tags into a ``datetime.date`` instance + and setter serializes such an instance into a string. + + For granular access to year, month, and day, use the ``*_field`` + methods to create corresponding `DateItemField`s. + """ + + def __init__(self, *date_styles, **kwargs): + """``date_styles`` is a list of ``StorageStyle``s to store and + retrieve the whole date from. The ``year`` option is an + additional list of fallback styles for the year. The year is + always set on this style, but is only retrieved if the main + storage styles do not return a value. + """ + super(DateField, self).__init__(*date_styles) + year_style = kwargs.get("year", None) + if year_style: + self._year_field = MediaField(*year_style) + + def __get__(self, mediafile, owner=None): + year, month, day = self._get_date_tuple(mediafile) + if not year: + return None + try: + return datetime.date(year, month or 1, day or 1) + except ValueError: # Out of range values. + return None + + def __set__(self, mediafile, date): + if date is None: + self._set_date_tuple(mediafile, None, None, None) + else: + self._set_date_tuple(mediafile, date.year, date.month, date.day) + + def __delete__(self, mediafile): + super(DateField, self).__delete__(mediafile) + if hasattr(self, "_year_field"): + self._year_field.__delete__(mediafile) + + def _get_date_tuple(self, mediafile): + """Get a 3-item sequence representing the date consisting of a + year, month, and day number. Each number is either an integer or + None. + """ + # Get the underlying data and split on hyphens and slashes. + datestring = super(DateField, self).__get__(mediafile, None) + if isinstance(datestring, str): + datestring = re.sub(r"[Tt ].*$", "", str(datestring)) + items = re.split("[-/]", str(datestring)) + else: + items = [] + + # Ensure that we have exactly 3 components, possibly by + # truncating or padding. + items = items[:3] + if len(items) < 3: + items += [None] * (3 - len(items)) + + # Use year field if year is missing. + if not items[0] and hasattr(self, "_year_field"): + items[0] = self._year_field.__get__(mediafile) + + # Convert each component to an integer if possible. + items_ = [] + for item in items: + try: + items_.append(int(item)) + except (TypeError, ValueError): + items_.append(None) + return items_ + + def _set_date_tuple(self, mediafile, year, month=None, day=None): + """Set the value of the field given a year, month, and day + number. Each number can be an integer or None to indicate an + unset component. + """ + if year is None: + self.__delete__(mediafile) + return + + date = ["{0:04d}".format(int(year))] + if month: + date.append("{0:02d}".format(int(month))) + if month and day: + date.append("{0:02d}".format(int(day))) + date = map(str, date) + super(DateField, self).__set__(mediafile, "-".join(date)) + + if hasattr(self, "_year_field"): + self._year_field.__set__(mediafile, year) + + def year_field(self): + return DateItemField(self, 0) + + def month_field(self): + return DateItemField(self, 1) + + def day_field(self): + return DateItemField(self, 2) + + +class DateItemField(MediaField): + """Descriptor that gets and sets constituent parts of a `DateField`: + the month, day, or year. + """ + + def __init__(self, date_field, item_pos): + self.date_field = date_field + self.item_pos = item_pos + + def __get__(self, mediafile, _): + return self.date_field._get_date_tuple(mediafile)[self.item_pos] + + def __set__(self, mediafile, value): + items = self.date_field._get_date_tuple(mediafile) + items[self.item_pos] = value + self.date_field._set_date_tuple(mediafile, *items) + + def __delete__(self, mediafile): + self.__set__(mediafile, None) + + +class CoverArtField(MediaField): + """A descriptor that provides access to the *raw image data* for the + cover image on a file. This is used for backwards compatibility: the + full `ImageListField` provides richer `Image` objects. + + When there are multiple images we try to pick the most likely to be a front + cover. + """ + + def __init__(self): + pass + + def __get__(self, mediafile, _): + candidates = mediafile.images + if candidates: + return self.guess_cover_image(candidates).data + else: + return None + + @staticmethod + def guess_cover_image(candidates): + if len(candidates) == 1: + return candidates[0] + try: + return next(c for c in candidates if c.type == ImageType.front) + except StopIteration: + return candidates[0] + + def __set__(self, mediafile, data): + if data: + mediafile.images = [Image(data=data)] + else: + mediafile.images = [] + + def __delete__(self, mediafile): + delattr(mediafile, "images") + + +class QNumberField(MediaField): + """Access integer-represented Q number fields. + + Access a fixed-point fraction as a float. The stored value is shifted by + `fraction_bits` binary digits to the left and then rounded, yielding a + simple integer. + """ + + def __init__(self, fraction_bits, *args, **kwargs): + super(QNumberField, self).__init__(out_type=int, *args, **kwargs) + self.__fraction_bits = fraction_bits + + def __get__(self, mediafile, owner=None): + q_num = super(QNumberField, self).__get__(mediafile, owner) + if q_num is None: + return None + return q_num / pow(2, self.__fraction_bits) + + def __set__(self, mediafile, value): + q_num = round(value * pow(2, self.__fraction_bits)) + q_num = int(q_num) # needed for py2.7 + super(QNumberField, self).__set__(mediafile, q_num) + + +class ImageListField(ListMediaField): + """Descriptor to access the list of images embedded in tags. + + The getter returns a list of `Image` instances obtained from + the tags. The setter accepts a list of `Image` instances to be + written to the tags. + """ + + def __init__(self): + # The storage styles used here must implement the + # `ListStorageStyle` interface and get and set lists of + # `Image`s. + super(ImageListField, self).__init__( + MP3ImageStorageStyle(), + MP4ImageStorageStyle(), + ASFImageStorageStyle(), + VorbisImageStorageStyle(), + FlacImageStorageStyle(), + APEv2ImageStorageStyle(), + out_type=Image, + ) From 5bfd2cb144ce618c7a8fff7a713addc661bd7465 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 14 Oct 2025 12:01:10 +0200 Subject: [PATCH 07/15] Moved afs unpack util functions to asf module. --- mediafile/__init__.py | 37 ------------------------------------- mediafile/storage/afs.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/mediafile/__init__.py b/mediafile/__init__.py index 5ab5b4a..ca33924 100644 --- a/mediafile/__init__.py +++ b/mediafile/__init__.py @@ -191,43 +191,6 @@ def _safe_cast(out_type, val): return val -# Image coding for ASF/WMA. - - -def _unpack_asf_image(data): - """Unpack image data from a WM/Picture tag. Return a tuple - containing the MIME type, the raw image data, a type indicator, and - the image's description. - - This function is treated as "untrusted" and could throw all manner - of exceptions (out-of-bounds, etc.). We should clean this up - sometime so that the failure modes are well-defined. - """ - type, size = struct.unpack_from(" Date: Tue, 14 Oct 2025 12:06:10 +0200 Subject: [PATCH 08/15] Moved mutagen util functions --- mediafile/__init__.py | 67 --------------------------- mediafile/utils/mutagen_wrapper.py | 72 ++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 67 deletions(-) create mode 100644 mediafile/utils/mutagen_wrapper.py diff --git a/mediafile/__init__.py b/mediafile/__init__.py index ca33924..ae53c38 100644 --- a/mediafile/__init__.py +++ b/mediafile/__init__.py @@ -64,73 +64,6 @@ log = logging.getLogger(__name__) -# Interacting with Mutagen. - - -def mutagen_call(action, filename, func, *args, **kwargs): - """Call a Mutagen function with appropriate error handling. - - `action` is a string describing what the function is trying to do, - and `filename` is the relevant filename. The rest of the arguments - describe the callable to invoke. - - We require at least Mutagen 1.33, where `IOError` is *never* used, - neither for internal parsing errors *nor* for ordinary IO error - conditions such as a bad filename. Mutagen-specific parsing errors and IO - errors are reraised as `UnreadableFileError`. Other exceptions - raised inside Mutagen---i.e., bugs---are reraised as `MutagenError`. - """ - try: - return func(*args, **kwargs) - except mutagen.MutagenError as exc: - log.debug("%s failed: %s", action, str(exc)) - raise UnreadableFileError(filename, str(exc)) - except UnreadableFileError: - # Reraise our errors without changes. - # Used in case of decorating functions (e.g. by `loadfile`). - raise - except Exception as exc: - # Isolate bugs in Mutagen. - log.debug("%s", traceback.format_exc()) - log.error("uncaught Mutagen exception in %s: %s", action, exc) - raise MutagenError(filename, exc) - - -def loadfile(method=True, writable=False, create=False): - """A decorator that works like `mutagen._util.loadfile` but with - additional error handling. - - Opens a file and passes a `mutagen._utils.FileThing` to the - decorated function. Should be used as a decorator for functions - using a `filething` parameter. - """ - - def decorator(func): - f = mutagen._util.loadfile(method, writable, create)(func) - - @functools.wraps(func) - def wrapper(*args, **kwargs): - return mutagen_call("loadfile", "", f, *args, **kwargs) - - return wrapper - - return decorator - - -# Utility. - - -def _update_filething(filething): - """Reopen a `filething` if it's a local file. - - A filething that is *not* an actual file is left unchanged; a - filething with a filename is reopened and a new object is returned. - """ - if filething.filename: - return mutagen._util.FileThing(None, filething.filename, filething.name) - else: - return filething - def _safe_cast(out_type, val): """Try to covert val to out_type but never raise an exception. diff --git a/mediafile/utils/mutagen_wrapper.py b/mediafile/utils/mutagen_wrapper.py new file mode 100644 index 0000000..f3fbf8a --- /dev/null +++ b/mediafile/utils/mutagen_wrapper.py @@ -0,0 +1,72 @@ +import functools +import logging +import traceback + +import mutagen +import mutagen._util + +from mediafile.exceptions import MutagenError, UnreadableFileError + +log = logging.getLogger(__name__) + + +def mutagen_call(action, filename, func, *args, **kwargs): + """Call a Mutagen function with appropriate error handling. + + `action` is a string describing what the function is trying to do, + and `filename` is the relevant filename. The rest of the arguments + describe the callable to invoke. + + We require at least Mutagen 1.33, where `IOError` is *never* used, + neither for internal parsing errors *nor* for ordinary IO error + conditions such as a bad filename. Mutagen-specific parsing errors and IO + errors are reraised as `UnreadableFileError`. Other exceptions + raised inside Mutagen---i.e., bugs---are reraised as `MutagenError`. + """ + try: + return func(*args, **kwargs) + except mutagen._util.MutagenError as exc: + log.debug("%s failed: %s", action, str(exc)) + raise UnreadableFileError(filename, str(exc)) + except UnreadableFileError: + # Reraise our errors without changes. + # Used in case of decorating functions (e.g. by `loadfile`). + raise + except Exception as exc: + # Isolate bugs in Mutagen. + log.debug("%s", traceback.format_exc()) + log.error("uncaught Mutagen exception in %s: %s", action, exc) + raise MutagenError(filename, exc) + + +def loadfile(method=True, writable=False, create=False): + """A decorator that works like `mutagen._util.loadfile` but with + additional error handling. + + Opens a file and passes a `mutagen._utils.FileThing` to the + decorated function. Should be used as a decorator for functions + using a `filething` parameter. + """ + + def decorator(func): + f = mutagen._util.loadfile(method, writable, create)(func) + + @functools.wraps(func) + def wrapper(*args, **kwargs): + return mutagen_call("loadfile", "", f, *args, **kwargs) + + return wrapper + + return decorator + + +def update_filething(filething): + """Reopen a `filething` if it's a local file. + + A filething that is *not* an actual file is left unchanged; a + filething with a filename is reopened and a new object is returned. + """ + if filething.filename: + return mutagen._util.FileThing(None, filething.filename, filething.name) + else: + return filething From 8a1769f9731cc9aec44c707b07941ce3824e6e0b Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 14 Oct 2025 12:07:36 +0200 Subject: [PATCH 09/15] Moved soundcheck util functions into own file. --- mediafile/__init__.py | 67 --------------------------------- mediafile/utils/soundcheck.py | 71 +++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 67 deletions(-) create mode 100644 mediafile/utils/soundcheck.py diff --git a/mediafile/__init__.py b/mediafile/__init__.py index ae53c38..6a9c400 100644 --- a/mediafile/__init__.py +++ b/mediafile/__init__.py @@ -124,73 +124,6 @@ def _safe_cast(out_type, val): return val -# iTunes Sound Check encoding. - - -def _sc_decode(soundcheck): - """Convert a Sound Check bytestring value to a (gain, peak) tuple as - used by ReplayGain. - """ - # We decode binary data. If one of the formats gives us a text - # string, interpret it as UTF-8. - if isinstance(soundcheck, str): - soundcheck = soundcheck.encode("utf-8") - - # SoundCheck tags consist of 10 numbers, each represented by 8 - # characters of ASCII hex preceded by a space. - try: - soundcheck = codecs.decode(soundcheck.replace(b" ", b""), "hex") - soundcheck = struct.unpack("!iiiiiiiiii", soundcheck) - except (struct.error, TypeError, binascii.Error): - # SoundCheck isn't in the format we expect, so return default - # values. - return 0.0, 0.0 - - # SoundCheck stores absolute calculated/measured RMS value in an - # unknown unit. We need to find the ratio of this measurement - # compared to a reference value of 1000 to get our gain in dB. We - # play it safe by using the larger of the two values (i.e., the most - # attenuation). - maxgain = max(soundcheck[:2]) - if maxgain > 0: - gain = math.log10(maxgain / 1000.0) * -10 - else: - # Invalid gain value found. - gain = 0.0 - - # SoundCheck stores peak values as the actual value of the sample, - # and again separately for the left and right channels. We need to - # convert this to a percentage of full scale, which is 32768 for a - # 16 bit sample. Once again, we play it safe by using the larger of - # the two values. - peak = max(soundcheck[6:8]) / 32768.0 - - return round(gain, 2), round(peak, 6) - - -def _sc_encode(gain, peak): - """Encode ReplayGain gain/peak values as a Sound Check string.""" - # SoundCheck stores the peak value as the actual value of the - # sample, rather than the percentage of full scale that RG uses, so - # we do a simple conversion assuming 16 bit samples. - peak *= 32768.0 - - # SoundCheck stores absolute RMS values in some unknown units rather - # than the dB values RG uses. We can calculate these absolute values - # from the gain ratio using a reference value of 1000 units. We also - # enforce the maximum and minimum value here, which is equivalent to - # about -18.2dB and 30.0dB. - g1 = int(min(round((10 ** (gain / -10)) * 1000), 65534)) or 1 - # Same as above, except our reference level is 2500 units. - g2 = int(min(round((10 ** (gain / -10)) * 2500), 65534)) or 1 - - # The purpose of these values are unknown, but they also seem to be - # unused so we just use zero. - uk = 0 - values = (g1, g1, g2, g2, uk, uk, int(peak), int(peak), uk, uk) - return (" %08X" * 10) % values - - # Cover art and other images. diff --git a/mediafile/utils/soundcheck.py b/mediafile/utils/soundcheck.py new file mode 100644 index 0000000..a06102d --- /dev/null +++ b/mediafile/utils/soundcheck.py @@ -0,0 +1,71 @@ +# iTunes Sound Check encoding. + + +import binascii +import codecs +import math +import struct + + +def sc_decode(soundcheck): + """Convert a Sound Check bytestring value to a (gain, peak) tuple as + used by ReplayGain. + """ + # We decode binary data. If one of the formats gives us a text + # string, interpret it as UTF-8. + if isinstance(soundcheck, str): + soundcheck = soundcheck.encode("utf-8") + + # SoundCheck tags consist of 10 numbers, each represented by 8 + # characters of ASCII hex preceded by a space. + try: + soundcheck = codecs.decode(soundcheck.replace(b" ", b""), "hex") + soundcheck = struct.unpack("!iiiiiiiiii", soundcheck) + except (struct.error, TypeError, binascii.Error): + # SoundCheck isn't in the format we expect, so return default + # values. + return 0.0, 0.0 + + # SoundCheck stores absolute calculated/measured RMS value in an + # unknown unit. We need to find the ratio of this measurement + # compared to a reference value of 1000 to get our gain in dB. We + # play it safe by using the larger of the two values (i.e., the most + # attenuation). + maxgain = max(soundcheck[:2]) + if maxgain > 0: + gain = math.log10(maxgain / 1000.0) * -10 + else: + # Invalid gain value found. + gain = 0.0 + + # SoundCheck stores peak values as the actual value of the sample, + # and again separately for the left and right channels. We need to + # convert this to a percentage of full scale, which is 32768 for a + # 16 bit sample. Once again, we play it safe by using the larger of + # the two values. + peak = max(soundcheck[6:8]) / 32768.0 + + return round(gain, 2), round(peak, 6) + + +def sc_encode(gain, peak): + """Encode ReplayGain gain/peak values as a Sound Check string.""" + # SoundCheck stores the peak value as the actual value of the + # sample, rather than the percentage of full scale that RG uses, so + # we do a simple conversion assuming 16 bit samples. + peak *= 32768.0 + + # SoundCheck stores absolute RMS values in some unknown units rather + # than the dB values RG uses. We can calculate these absolute values + # from the gain ratio using a reference value of 1000 units. We also + # enforce the maximum and minimum value here, which is equivalent to + # about -18.2dB and 30.0dB. + g1 = int(min(round((10 ** (gain / -10)) * 1000), 65534)) or 1 + # Same as above, except our reference level is 2500 units. + g2 = int(min(round((10 ** (gain / -10)) * 2500), 65534)) or 1 + + # The purpose of these values are unknown, but they also seem to be + # unused so we just use zero. + uk = 0 + values = (g1, g1, g2, g2, uk, uk, int(peak), int(peak), uk, uk) + return (" %08X" * 10) % values From 184f59c727612978e7c5f64ff24e07019094599e Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 14 Oct 2025 12:08:13 +0200 Subject: [PATCH 10/15] Moved image util functions into own file --- mediafile/__init__.py | 54 ------------------------------------ mediafile/utils/image.py | 60 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 54 deletions(-) create mode 100644 mediafile/utils/image.py diff --git a/mediafile/__init__.py b/mediafile/__init__.py index 6a9c400..73bb5ab 100644 --- a/mediafile/__init__.py +++ b/mediafile/__init__.py @@ -124,60 +124,6 @@ def _safe_cast(out_type, val): return val -# Cover art and other images. - - -def image_mime_type(data): - """Return the MIME type of the image data (a bytestring).""" - return filetype.guess_mime(data) - - -def image_extension(data): - ext = filetype.guess_extension(data) - # imghdr returned "tiff", so we should keep returning it with filetype. - return ext if ext != "tif" else "tiff" - - -class Image(object): - """Structure representing image data and metadata that can be - stored and retrieved from tags. - - The structure has four properties. - * ``data`` The binary data of the image - * ``desc`` An optional description of the image - * ``type`` An instance of `ImageType` indicating the kind of image - * ``mime_type`` Read-only property that contains the mime type of - the binary data - """ - - def __init__(self, data, desc=None, type=None): - assert isinstance(data, bytes) - if desc is not None: - assert isinstance(desc, str) - self.data = data - self.desc = desc - if isinstance(type, int): - try: - type = list(ImageType)[type] - except IndexError: - log.debug("ignoring unknown image type index %s", type) - type = ImageType.other - self.type = type - - @property - def mime_type(self): - if self.data: - return image_mime_type(self.data) - - @property - def type_index(self): - if self.type is None: - # This method is used when a tag format requires the type - # index to be set, so we return "other" as the default value. - return 0 - return self.type.value - - # MediaFile is a collection of fields. diff --git a/mediafile/utils/image.py b/mediafile/utils/image.py new file mode 100644 index 0000000..9dd8276 --- /dev/null +++ b/mediafile/utils/image.py @@ -0,0 +1,60 @@ +# Cover art and other images. + +import logging + +import filetype + +from mediafile.constants import ImageType + +log = logging.getLogger(__name__) + + +def image_mime_type(data): + """Return the MIME type of the image data (a bytestring).""" + return filetype.guess_mime(data) + + +def image_extension(data): + ext = filetype.guess_extension(data) + # imghdr returned "tiff", so we should keep returning it with filetype. + return ext if ext != "tif" else "tiff" + + +class Image(object): + """Structure representing image data and metadata that can be + stored and retrieved from tags. + + The structure has four properties. + * ``data`` The binary data of the image + * ``desc`` An optional description of the image + * ``type`` An instance of `ImageType` indicating the kind of image + * ``mime_type`` Read-only property that contains the mime type of + the binary data + """ + + def __init__(self, data, desc=None, type=None): + assert isinstance(data, bytes) + if desc is not None: + assert isinstance(desc, str) + self.data = data + self.desc = desc + if isinstance(type, int): + try: + type = list(ImageType)[type] + except IndexError: + log.debug("ignoring unknown image type index %s", type) + type = ImageType.other + self.type = type + + @property + def mime_type(self): + if self.data: + return image_mime_type(self.data) + + @property + def type_index(self): + if self.type is None: + # This method is used when a tag format requires the type + # index to be set, so we return "other" as the default value. + return 0 + return self.type.value From a5b5c6c8ff61b0271df36e17633c34503f1de189 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 14 Oct 2025 12:09:24 +0200 Subject: [PATCH 11/15] Moved safecast util functions. --- mediafile/__init__.py | 57 ---------------------------- mediafile/utils/type_conversion.py | 60 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 57 deletions(-) create mode 100644 mediafile/utils/type_conversion.py diff --git a/mediafile/__init__.py b/mediafile/__init__.py index 73bb5ab..51bac83 100644 --- a/mediafile/__init__.py +++ b/mediafile/__init__.py @@ -65,63 +65,6 @@ -def _safe_cast(out_type, val): - """Try to covert val to out_type but never raise an exception. - - If the value does not exist, return None. Or, if the value - can't be converted, then a sensible default value is returned. - out_type should be bool, int, or unicode; otherwise, the value - is just passed through. - """ - if val is None: - return None - - if out_type is int: - if isinstance(val, int) or isinstance(val, float): - # Just a number. - return int(val) - else: - # Process any other type as a string. - if isinstance(val, bytes): - val = val.decode("utf-8", "ignore") - elif not isinstance(val, str): - val = str(val) - # Get a number from the front of the string. - match = re.match(r"[\+-]?[0-9]+", val.strip()) - return int(match.group(0)) if match else 0 - - elif out_type is bool: - try: - # Should work for strings, bools, ints: - return bool(int(val)) - except ValueError: - return False - - elif out_type is str: - if isinstance(val, bytes): - return val.decode("utf-8", "ignore") - elif isinstance(val, str): - return val - else: - return str(val) - - elif out_type is float: - if isinstance(val, int) or isinstance(val, float): - return float(val) - else: - if isinstance(val, bytes): - val = val.decode("utf-8", "ignore") - else: - val = str(val) - match = re.match(r"[\+-]?([0-9]+\.?[0-9]*|[0-9]*\.[0-9]+)", val.strip()) - if match: - val = match.group(0) - if val: - return float(val) - return 0.0 - - else: - return val diff --git a/mediafile/utils/type_conversion.py b/mediafile/utils/type_conversion.py new file mode 100644 index 0000000..95398ea --- /dev/null +++ b/mediafile/utils/type_conversion.py @@ -0,0 +1,60 @@ +import re + + +def safe_cast(out_type, val): + """Try to covert val to out_type but never raise an exception. + + If the value does not exist, return None. Or, if the value + can't be converted, then a sensible default value is returned. + out_type should be bool, int, or unicode; otherwise, the value + is just passed through. + """ + if val is None: + return None + + if out_type is int: + if isinstance(val, int) or isinstance(val, float): + # Just a number. + return int(val) + else: + # Process any other type as a string. + if isinstance(val, bytes): + val = val.decode("utf-8", "ignore") + elif not isinstance(val, str): + val = str(val) + # Get a number from the front of the string. + match = re.match(r"[\+-]?[0-9]+", val.strip()) + return int(match.group(0)) if match else 0 + + elif out_type is bool: + try: + # Should work for strings, bools, ints: + return bool(int(val)) + except ValueError: + return False + + elif out_type is str: + if isinstance(val, bytes): + return val.decode("utf-8", "ignore") + elif isinstance(val, str): + return val + else: + return str(val) + + elif out_type is float: + if isinstance(val, int) or isinstance(val, float): + return float(val) + else: + if isinstance(val, bytes): + val = val.decode("utf-8", "ignore") + else: + val = str(val) + match = re.match(r"[\+-]?([0-9]+\.?[0-9]*|[0-9]*\.[0-9]+)", val.strip()) + if match: + val = match.group(0) + if val: + return float(val) + return 0.0 + + else: + return val From bb3857d297596b589302c9d7b4bd166e5184a59b Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 14 Oct 2025 12:19:45 +0200 Subject: [PATCH 12/15] Fixed imports --- mediafile/__init__.py | 67 ++++++++++++++++++++++------------- mediafile/fields.py | 13 +++---- mediafile/storage/__init__.py | 34 ++++++++++++++++-- mediafile/storage/afs.py | 2 ++ mediafile/storage/ape.py | 3 ++ mediafile/storage/base.py | 9 +++-- mediafile/storage/flac.py | 7 ++++ mediafile/storage/mp3.py | 9 ++--- mediafile/storage/mp4.py | 4 +++ mediafile/storage/vorbis.py | 2 ++ mediafile/utils/__init__.py | 16 +++++++++ test/test_mediafile_edge.py | 18 +++++----- 12 files changed, 133 insertions(+), 51 deletions(-) create mode 100644 mediafile/utils/__init__.py diff --git a/mediafile/__init__.py b/mediafile/__init__.py index 51bac83..0587ec5 100644 --- a/mediafile/__init__.py +++ b/mediafile/__init__.py @@ -34,40 +34,57 @@ ``StorageStyle`` strategies to handle format specific logic. """ -import base64 -import binascii -import codecs -import datetime -import enum -import functools import logging -import math import os import re -import struct -import traceback -import filetype import mutagen -import mutagen._util -import mutagen.asf -import mutagen.flac -import mutagen.id3 import mutagen.mp3 -import mutagen.mp4 -from .exceptions import FileTypeError, MutagenError, UnreadableFileError - -__version__ = "0.13.0" -__all__ = ["UnreadableFileError", "FileTypeError", "MediaFile"] +from .constants import TYPES, ImageType +from .exceptions import FileTypeError, UnreadableFileError +from .fields import ( + CoverArtField, + DateField, + DateItemField, + ImageListField, + ListMediaField, + MediaField, + QNumberField, +) +from .storage import ( + ASFStorageStyle, + ListStorageStyle, + MP3DescStorageStyle, + MP3ListDescStorageStyle, + MP3ListStorageStyle, + MP3PeopleStorageStyle, + MP3SlashPackStorageStyle, + MP3SoundCheckStorageStyle, + MP3StorageStyle, + MP3UFIDStorageStyle, + MP4BoolStorageStyle, + MP4ListStorageStyle, + MP4SoundCheckStorageStyle, + MP4StorageStyle, + MP4TupleStorageStyle, + StorageStyle, +) +from .utils import Image, loadfile, mutagen_call, update_filething + +__version__ = "1.0.0-rc1" +__all__ = [ + "UnreadableFileError", + "FileTypeError", + "MediaFile", + "Image", + "TYPES", + "ImageType", +] log = logging.getLogger(__name__) - - - - # MediaFile is a collection of fields. @@ -190,7 +207,7 @@ def save(self, **kwargs): "save", self.filename, self.mgfile.save, - _update_filething(self.filething), + update_filething(self.filething), **kwargs, ) @@ -202,7 +219,7 @@ def delete(self): "delete", self.filename, self.mgfile.delete, - _update_filething(self.filething), + update_filething(self.filething), ) # Convenient access to the set of available fields. diff --git a/mediafile/fields.py b/mediafile/fields.py index e7b665b..cda788e 100644 --- a/mediafile/fields.py +++ b/mediafile/fields.py @@ -1,6 +1,11 @@ +# MediaField is a descriptor that represents a single logical field. It +# aggregates several StorageStyles describing how to access the data for +# each file type. import datetime import re +from mediafile.utils import Image, safe_cast + from .constants import ImageType from .storage import ( APEv2ImageStorageStyle, @@ -11,10 +16,6 @@ VorbisImageStorageStyle, ) -# MediaField is a descriptor that represents a single logical field. It -# aggregates several StorageStyles describing how to access the data for -# each file type. - class MediaField(object): """A descriptor providing access to a particular (abstract) metadata @@ -50,7 +51,7 @@ def __get__(self, mediafile, owner=None): out = style.get(mediafile.mgfile) if out: break - return _safe_cast(self.out_type, out) + return safe_cast(self.out_type, out) def __set__(self, mediafile, value): if value is None: @@ -89,7 +90,7 @@ def __get__(self, mediafile, _=None): for style in self.styles(mediafile.mgfile): values = style.get_list(mediafile.mgfile) if values: - return [_safe_cast(self.out_type, value) for value in values] + return [safe_cast(self.out_type, value) for value in values] return None def __set__(self, mediafile, values): diff --git a/mediafile/storage/__init__.py b/mediafile/storage/__init__.py index 0340ee6..0bcb98b 100644 --- a/mediafile/storage/__init__.py +++ b/mediafile/storage/__init__.py @@ -1,20 +1,50 @@ from .afs import ASFImageStorageStyle, ASFStorageStyle from .ape import APEv2ImageStorageStyle +from .base import ListStorageStyle, SoundCheckStorageStyleMixin, StorageStyle from .flac import FlacImageStorageStyle -from .mp3 import MP3ImageStorageStyle, MP3PeopleStorageStyle, MP3StorageStyle -from .mp4 import MP4ImageStorageStyle, MP4SoundCheckStorageStyle, MP4StorageStyle +from .mp3 import ( + MP3DescStorageStyle, + MP3ImageStorageStyle, + MP3ListDescStorageStyle, + MP3ListStorageStyle, + MP3PeopleStorageStyle, + MP3SlashPackStorageStyle, + MP3SoundCheckStorageStyle, + MP3StorageStyle, + MP3UFIDStorageStyle, +) +from .mp4 import ( + MP4BoolStorageStyle, + MP4ImageStorageStyle, + MP4ListStorageStyle, + MP4SoundCheckStorageStyle, + MP4StorageStyle, + MP4TupleStorageStyle, +) from .vorbis import VorbisImageStorageStyle __all__ = [ + "StorageStyle", + "ListStorageStyle", + "SoundCheckStorageStyleMixin", "ASFStorageStyle", "ASFImageStorageStyle", "APEv2ImageStorageStyle", "FlacImageStorageStyle", "MP3StorageStyle", + "MP3SoundCheckStorageStyle", + "MP3DescStorageStyle", "MP3PeopleStorageStyle", + "MP3SlashPackStorageStyle", "MP3ImageStorageStyle", + "MP4TupleStorageStyle", + "MP3ListStorageStyle", + "MP3UFIDStorageStyle", + "MP3ListDescStorageStyle", "MP4StorageStyle", + "MP4BoolStorageStyle", "MP4SoundCheckStorageStyle", "MP4ImageStorageStyle", + "MP4ListStorageStyle", "VorbisImageStorageStyle", ] diff --git a/mediafile/storage/afs.py b/mediafile/storage/afs.py index 738f2f1..c9a766d 100644 --- a/mediafile/storage/afs.py +++ b/mediafile/storage/afs.py @@ -3,6 +3,8 @@ import mutagen import mutagen.asf +from mediafile.utils import Image + from .base import ListStorageStyle diff --git a/mediafile/storage/ape.py b/mediafile/storage/ape.py index 6d5e648..c272bdd 100644 --- a/mediafile/storage/ape.py +++ b/mediafile/storage/ape.py @@ -1,3 +1,6 @@ +from mediafile.constants import ImageType +from mediafile.utils import Image + from .base import ListStorageStyle diff --git a/mediafile/storage/base.py b/mediafile/storage/base.py index 604a9a0..d566f1b 100644 --- a/mediafile/storage/base.py +++ b/mediafile/storage/base.py @@ -1,3 +1,6 @@ +from mediafile.utils import sc_decode, sc_encode + + class StorageStyle(object): """A strategy for storing a value for a certain tag format (or set of tag formats). This basic StorageStyle describes simple 1:1 @@ -207,14 +210,14 @@ class SoundCheckStorageStyleMixin(object): def get(self, mutagen_file): data = self.fetch(mutagen_file) if data is not None: - return _sc_decode(data)[self.index] + return sc_decode(data)[self.index] def set(self, mutagen_file, value): data = self.fetch(mutagen_file) if data is None: gain_peak = [0, 0] else: - gain_peak = list(_sc_decode(data)) + gain_peak = list(sc_decode(data)) gain_peak[self.index] = value or 0 - data = self.serialize(_sc_encode(*gain_peak)) + data = self.serialize(sc_encode(*gain_peak)) self.store(mutagen_file, data) diff --git a/mediafile/storage/flac.py b/mediafile/storage/flac.py index c4a7555..403e6df 100644 --- a/mediafile/storage/flac.py +++ b/mediafile/storage/flac.py @@ -1,3 +1,10 @@ +import mutagen.flac + +from mediafile.utils import Image + +from .base import ListStorageStyle + + class FlacImageStorageStyle(ListStorageStyle): """Converts between ``mutagen.flac.Picture`` and ``Image`` instances.""" diff --git a/mediafile/storage/mp3.py b/mediafile/storage/mp3.py index 384c73f..de34b87 100644 --- a/mediafile/storage/mp3.py +++ b/mediafile/storage/mp3.py @@ -1,12 +1,9 @@ import mutagen -import mutagen._util -import mutagen.asf -import mutagen.flac import mutagen.id3 -import mutagen.mp3 -import mutagen.mp4 -from .base import ListStorageStyle, StorageStyle +from mediafile.utils import Image + +from .base import ListStorageStyle, SoundCheckStorageStyleMixin, StorageStyle class MP3StorageStyle(StorageStyle): diff --git a/mediafile/storage/mp4.py b/mediafile/storage/mp4.py index 51f14e4..3d2abbe 100644 --- a/mediafile/storage/mp4.py +++ b/mediafile/storage/mp4.py @@ -1,3 +1,7 @@ +import mutagen.mp4 + +from mediafile.utils import Image + from .base import ListStorageStyle, SoundCheckStorageStyleMixin, StorageStyle diff --git a/mediafile/storage/vorbis.py b/mediafile/storage/vorbis.py index 1ca9654..9b75ea9 100644 --- a/mediafile/storage/vorbis.py +++ b/mediafile/storage/vorbis.py @@ -3,6 +3,8 @@ import mutagen import mutagen.flac +from mediafile.utils import Image + from .base import ListStorageStyle diff --git a/mediafile/utils/__init__.py b/mediafile/utils/__init__.py new file mode 100644 index 0000000..eccb6b2 --- /dev/null +++ b/mediafile/utils/__init__.py @@ -0,0 +1,16 @@ +from .image import Image, image_extension, image_mime_type +from .mutagen_wrapper import loadfile, mutagen_call, update_filething +from .soundcheck import sc_decode, sc_encode +from .type_conversion import safe_cast + +__all__ = [ + "Image", + "image_mime_type", + "image_extension", + "loadfile", + "mutagen_call", + "update_filething", + "sc_encode", + "sc_decode", + "safe_cast", +] diff --git a/test/test_mediafile_edge.py b/test/test_mediafile_edge.py index 7dd40a1..1f84977 100644 --- a/test/test_mediafile_edge.py +++ b/test/test_mediafile_edge.py @@ -24,7 +24,7 @@ import mediafile from test import _common -_sc = mediafile._safe_cast +_sc = mediafile.utils.safe_cast class EdgeTest(unittest.TestCase): @@ -278,8 +278,8 @@ def test_set_date_to_none(self): class SoundCheckTest(unittest.TestCase): def test_round_trip(self): - data = mediafile._sc_encode(1.0, 1.0) - gain, peak = mediafile._sc_decode(data) + data = mediafile.utils.sc_encode(1.0, 1.0) + gain, peak = mediafile.utils.sc_decode(data) self.assertEqual(gain, 1.0) self.assertEqual(peak, 1.0) @@ -288,31 +288,31 @@ def test_decode_zero(self): b" 80000000 80000000 00000000 00000000 00000000 00000000 " b"00000000 00000000 00000000 00000000" ) - gain, peak = mediafile._sc_decode(data) + gain, peak = mediafile.utils.sc_decode(data) self.assertEqual(gain, 0.0) self.assertEqual(peak, 0.0) def test_malformatted(self): - gain, peak = mediafile._sc_decode(b"foo") + gain, peak = mediafile.utils.sc_decode(b"foo") self.assertEqual(gain, 0.0) self.assertEqual(peak, 0.0) def test_special_characters(self): - gain, peak = mediafile._sc_decode("caf\xe9".encode("utf-8")) + gain, peak = mediafile.utils.sc_decode("caf\xe9".encode("utf-8")) self.assertEqual(gain, 0.0) self.assertEqual(peak, 0.0) def test_decode_handles_unicode(self): # Most of the time, we expect to decode the raw bytes. But some formats # might give us text strings, which we need to handle. - gain, peak = mediafile._sc_decode("caf\xe9") + gain, peak = mediafile.utils.sc_decode("caf\xe9") self.assertEqual(gain, 0.0) self.assertEqual(peak, 0.0) def test_encode_excessive_gain(self): # The mimimum value of SoundCheck gain is 30.0dB. - data = mediafile._sc_encode(60.0, 1.0) - gain, _ = mediafile._sc_decode(data) + data = mediafile.utils.sc_encode(60.0, 1.0) + gain, _ = mediafile.utils.sc_decode(data) self.assertEqual(gain, 30.0) From c05f65cb02c4fa3aa464538f54f51d0327e6a2ff Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 21 Oct 2025 15:59:22 +0200 Subject: [PATCH 13/15] Added commits to git blame. --- .git-blame-ignore-revs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 43d31b8..5523df0 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,17 @@ # Migrated to ruff -7ff9d471b23c45b6d957e7507035af39885546ab \ No newline at end of file +7ff9d471b23c45b6d957e7507035af39885546ab + +# Major refactor 2025 +710b62ceb63b4fd7c2f03acef3ccbf9e1b0ce1ac +50599d09236e408e6f0d473e2daf536876eaa762 +921f0882bf21d0f43b68a6d356dde9a8f901a0aa +ad4d35c81b4739da9e61375607d77fe2a9a5a84e +3579aaa4aa1992f13bf7716605936a63ce0e4f9d +ddc2f69bdb7e4acca5d85c869c9c4b888a51a3ec +3fa7f2d0c6e4facb9fa6559c43cebfc21495a07f +5eb62e2c099e38d3beefda7c7266247b4286aa80 +97bb4b43f82eb2cf6067ebdb9d626a4471559e55 +dbd0ca86085792a2c358fcd2778a8de0cd403a58 +00942a696db8912cbe6b71cd4daf960d91faf979 +58da1e7cfdd3d17f20d7de175a81931d840e145c +cbc64e3e0903585e1865f7e80f92a2b066776ac7 From c089f066fccccba773a89d5bd4cfbeb4833a9b7d Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sun, 26 Oct 2025 09:27:37 +0100 Subject: [PATCH 14/15] Added missing commit for ruff migration to git blame ignore --- .git-blame-ignore-revs | 1 + 1 file changed, 1 insertion(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 5523df0..a89e309 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,5 +1,6 @@ # Migrated to ruff 7ff9d471b23c45b6d957e7507035af39885546ab +226e455a5605cf70a4387ba6a17da85d41c6ce87 # Major refactor 2025 710b62ceb63b4fd7c2f03acef3ccbf9e1b0ce1ac From 3bb85f5a261f7bfea048e10aea04b2aed3fecec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 17 Nov 2025 07:21:20 +0000 Subject: [PATCH 15/15] Update commit hashes in blame ignore revs --- .git-blame-ignore-revs | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index a89e309..7525b4f 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,18 +1,29 @@ -# Migrated to ruff +# Moved changelog into its own file and added changelog reminder. 7ff9d471b23c45b6d957e7507035af39885546ab +# Removed flask8 in favor of ruff. 226e455a5605cf70a4387ba6a17da85d41c6ce87 -# Major refactor 2025 -710b62ceb63b4fd7c2f03acef3ccbf9e1b0ce1ac -50599d09236e408e6f0d473e2daf536876eaa762 -921f0882bf21d0f43b68a6d356dde9a8f901a0aa -ad4d35c81b4739da9e61375607d77fe2a9a5a84e -3579aaa4aa1992f13bf7716605936a63ce0e4f9d -ddc2f69bdb7e4acca5d85c869c9c4b888a51a3ec -3fa7f2d0c6e4facb9fa6559c43cebfc21495a07f -5eb62e2c099e38d3beefda7c7266247b4286aa80 -97bb4b43f82eb2cf6067ebdb9d626a4471559e55 -dbd0ca86085792a2c358fcd2778a8de0cd403a58 -00942a696db8912cbe6b71cd4daf960d91faf979 -58da1e7cfdd3d17f20d7de175a81931d840e145c -cbc64e3e0903585e1865f7e80f92a2b066776ac7 +# Moved mediafile.py into mediafile/__init__.py to allow easier refactoring. +4d508f94aef6976162993b7d92dc2347120ee306 +# Moved exceptions into own file. +075749781172208a6ce6bfda0e31e9ab6e42f340 +# Modernized exceptions and added common type. +891eef5b2d117b07dd74b17fb84e509db9c24f4c +# Moved constants into own file. +9470f8e4d7b3bd17372d6c82aa19834f2148a795 +# Moved storage classes into own files and folder. +146a462e44e0bd4efa4f67a06378686adc8d38b4 +# Moved field classes to own file. +80145c8c2e0a031c1d4da7a872ca200cc0e2d3af +# Moved afs unpack util functions to asf module. +5bfd2cb144ce618c7a8fff7a713addc661bd7465 +# Moved mutagen util functions +28f336523d26f8ba1d3996c9b90ae5b8c8411b5d +# Moved soundcheck util functions into own file. +8a1769f9731cc9aec44c707b07941ce3824e6e0b +# Moved image util functions into own file +184f59c727612978e7c5f64ff24e07019094599e +# Moved safecast util functions. +a5b5c6c8ff61b0271df36e17633c34503f1de189 +# Fixed imports +bb3857d297596b589302c9d7b4bd166e5184a59b