UMID Implementation

2026-07-02 04:10:53 +00:00 · 2020-01-06 08:27:34 -08:00
parent 1b9547e8c2
commit 5a30ce3afc
4 changed files with 185 additions and 203 deletions
@@ -1,5 +1,10 @@
-import struct
 from typing import Union
+import binascii
+
+
+def binary_to_string(binary_value):
+    return str(binascii.hexlify(binary_value), encoding='ascii')
+

 class UMIDParser:
    """
@@ -9,121 +14,111 @@ class UMIDParser:
    """
    def __init__(self, raw_umid: bytearray):
        self.raw_umid = raw_umid
-
-    @classmethod
-    def binary_to_string(cls, binary_value):
-        result_str = ''
-        for n in range(len(binary_value)):
-            result_str = '{:x}'.format(binary_value[n]) + result_str
-
-        return result_str
-
-    @property
-    def universal_label(self) -> bytearray:
-        return self.raw_umid[0:12]
-
-    @property
-    def basic_umid(self):
-        return self.raw_umid[0:32]
+    #
+    # @property
+    # def universal_label(self) -> bytearray:
+    #     return self.raw_umid[0:12]
+    #
+    # @property
+    # def basic_umid(self):
+    #     return self.raw_umid[0:32]

    def basic_umid_to_str(self):
-        return "%024x-%06x-%032x" % (self.binary_to_string(self.universal_label),
-                                     self.binary_to_string(self.instance_number),
-                                     self.binary_to_string(self.material_number))
-
-    @property
-    def universal_label_is_valid(self) -> bool:
-        valid_preamble = b'\x06\x0a\x2b\x34\x01\x01\x01\x05\x01\x01'
-        return self.universal_label[0:len(valid_preamble)] == valid_preamble
-
-    @property
-    def material_type(self) -> str:
-        material_byte = self.raw_umid[10]
-        if material_byte == 0x1:
-            return 'picture'
-        elif material_byte == 0x2:
-            return 'audio'
-        elif material_byte == 0x3:
-            return 'data'
-        elif material_byte == 0x4:
-            return 'other'
-        elif material_byte == 0x5:
-            return 'picture_single_component'
-        elif material_byte == 0x6:
-            return 'picture_multiple_component'
-        elif material_byte == 0x7:
-            return 'audio_single_component'
-        elif material_byte == 0x9:
-            return 'audio_multiple_component'
-        elif material_byte == 0xb:
-            return 'auxiliary_single_component'
-        elif material_byte == 0xc:
-            return 'auxiliary_multiple_component'
-        elif material_byte == 0xd:
-            return 'mixed_components'
-        elif material_byte == 0xf:
-            return 'not_identified'
-        else:
-            return 'not_recognized'
-
-    @property
-    def material_number_creation_method(self) -> str:
-        method_byte = self.raw_umid[11]
-        method_byte = (method_byte << 4) & 0xf
-        if method_byte == 0x0:
-            return 'undefined'
-        elif method_byte == 0x1:
-            return 'smpte'
-        elif method_byte == 0x2:
-            return 'uuid'
-        elif method_byte == 0x3:
-            return 'masked'
-        elif method_byte == 0x4:
-            return 'ieee1394'
-        elif 0x5 <= method_byte <= 0x7:
-            return 'reserved_undefined'
-        else:
-            return 'unrecognized'
-
-    @property
-    def instance_number_creation_method(self) -> str:
-        method_byte = self.raw_umid[11]
-        method_byte = method_byte & 0xf
-        if method_byte == 0x0:
-            return 'undefined'
-        elif method_byte == 0x01:
-            return 'local_registration'
-        elif method_byte == 0x02:
-            return '24_bit_prs'
-        elif method_byte == 0x03:
-            return 'copy_number_and_16_bit_prs'
-        elif 0x04 <= method_byte <= 0x0e:
-            return 'reserved_undefined'
-        elif method_byte == 0x0f:
-            return 'live_stream'
-        else:
-            return 'unrecognized'
-
-    @property
-    def indicated_length(self) -> str:
-        if self.raw_umid[12] == 0x13:
-            return 'basic'
-        elif self.raw_umid[12] == 0x33:
-            return 'extended'
-
-    @property
-    def instance_number(self) -> bytearray:
-        return self.raw_umid[13:3]
-
-    @property
-    def material_number(self) -> bytearray:
-        return self.raw_umid[16:16]
-
-    @property
-    def source_pack(self) -> Union[bytearray, None]:
-        if self.indicated_length == 'extended':
-            return self.raw_umid[32:32]
-        else:
-            return None
+        return binary_to_string(self.raw_umid[0:13]) + '-' + binary_to_string(self.raw_umid[13:3])
+    #
+    # @property
+    # def universal_label_is_valid(self) -> bool:
+    #     valid_preamble = b'\x06\x0a\x2b\x34\x01\x01\x01\x05\x01\x01'
+    #     return self.universal_label[0:len(valid_preamble)] == valid_preamble
+    #
+    # @property
+    # def material_type(self) -> str:
+    #     material_byte = self.raw_umid[10]
+    #     if material_byte == 0x1:
+    #         return 'picture'
+    #     elif material_byte == 0x2:
+    #         return 'audio'
+    #     elif material_byte == 0x3:
+    #         return 'data'
+    #     elif material_byte == 0x4:
+    #         return 'other'
+    #     elif material_byte == 0x5:
+    #         return 'picture_single_component'
+    #     elif material_byte == 0x6:
+    #         return 'picture_multiple_component'
+    #     elif material_byte == 0x7:
+    #         return 'audio_single_component'
+    #     elif material_byte == 0x9:
+    #         return 'audio_multiple_component'
+    #     elif material_byte == 0xb:
+    #         return 'auxiliary_single_component'
+    #     elif material_byte == 0xc:
+    #         return 'auxiliary_multiple_component'
+    #     elif material_byte == 0xd:
+    #         return 'mixed_components'
+    #     elif material_byte == 0xf:
+    #         return 'not_identified'
+    #     else:
+    #         return 'not_recognized'
+    #
+    # @property
+    # def material_number_creation_method(self) -> str:
+    #     method_byte = self.raw_umid[11]
+    #     method_byte = (method_byte << 4) & 0xf
+    #     if method_byte == 0x0:
+    #         return 'undefined'
+    #     elif method_byte == 0x1:
+    #         return 'smpte'
+    #     elif method_byte == 0x2:
+    #         return 'uuid'
+    #     elif method_byte == 0x3:
+    #         return 'masked'
+    #     elif method_byte == 0x4:
+    #         return 'ieee1394'
+    #     elif 0x5 <= method_byte <= 0x7:
+    #         return 'reserved_undefined'
+    #     else:
+    #         return 'unrecognized'
+    #
+    # @property
+    # def instance_number_creation_method(self) -> str:
+    #     method_byte = self.raw_umid[11]
+    #     method_byte = method_byte & 0xf
+    #     if method_byte == 0x0:
+    #         return 'undefined'
+    #     elif method_byte == 0x01:
+    #         return 'local_registration'
+    #     elif method_byte == 0x02:
+    #         return '24_bit_prs'
+    #     elif method_byte == 0x03:
+    #         return 'copy_number_and_16_bit_prs'
+    #     elif 0x04 <= method_byte <= 0x0e:
+    #         return 'reserved_undefined'
+    #     elif method_byte == 0x0f:
+    #         return 'live_stream'
+    #     else:
+    #         return 'unrecognized'
+    #
+    # @property
+    # def indicated_length(self) -> str:
+    #     if self.raw_umid[12] == 0x13:
+    #         return 'basic'
+    #     elif self.raw_umid[12] == 0x33:
+    #         return 'extended'
+    #
+    # @property
+    # def instance_number(self) -> bytearray:
+    #     return self.raw_umid[13:3]
+    #
+    # @property
+    # def material_number(self) -> bytearray:
+    #     return self.raw_umid[16:16]
+    #
+    # @property
+    # def source_pack(self) -> Union[bytearray, None]:
+    #     if self.indicated_length == 'extended':
+    #         return self.raw_umid[32:32]
+    #     else:
+    #         return None


@@ -1,21 +1,22 @@
 import struct
 import binascii
+from .umid_parser import UMIDParser

 class WavBextReader:
-    def __init__(self,bext_data,encoding):
+    def __init__(self, bext_data, encoding):
        """
        Read Broadcast-WAV extended metadata.
        :param best_data: The bytes-like data.
        "param encoding: The encoding to use when decoding the text fields of the
                 BEXT metadata scope. According to EBU Rec 3285 this shall be ASCII.
        """
-        packstring = "<256s"+ "32s" + "32s" + "10s" + "8s" + "QH" + "64s" + "hhhhh" + "180s"
+        packstring = "<256s" + "32s" + "32s" + "10s" + "8s" + "QH" + "64s" + "hhhhh" + "180s"

        rest_starts = struct.calcsize(packstring)
        unpacked = struct.unpack(packstring, bext_data[:rest_starts])

        def sanatize_bytes(bytes):
-            first_null = next( (index for index, byte in enumerate(bytes) if byte == 0 ), None )
+            first_null = next((index for index, byte in enumerate(bytes) if byte == 0), None)
            if first_null is not None:
                trimmed = bytes[:first_null]
            else:
@@ -25,68 +26,67 @@ class WavBextReader:
            return decoded

        #: Description. A free-text field up to 256 characters long.
-        self.description     = sanatize_bytes(unpacked[0])
+        self.description = sanatize_bytes(unpacked[0])
        #: Originator. Usually the name of the encoding application, sometimes
        #: a artist name.
-        self.originator      = sanatize_bytes(unpacked[1])
+        self.originator = sanatize_bytes(unpacked[1])
        #: A unique identifer for the file, a serial number.
-        self.originator_ref  = sanatize_bytes(unpacked[2])
+        self.originator_ref = sanatize_bytes(unpacked[2])
        #: Date of the recording, in the format YYY-MM-DD
        self.originator_date = sanatize_bytes(unpacked[3])
        #: Time of the recording, in the format HH:MM:SS.
        self.originator_time = sanatize_bytes(unpacked[4])
        #: The sample offset of the start of the file relative to an
        #: epoch, usually midnight the day of the recording. 
-        self.time_reference  = unpacked[5]
+        self.time_reference = unpacked[5]
        #: A variable-length text field containing a list of processes and
        #: and conversions performed on the file.
-        self.coding_history  = sanatize_bytes(bext_data[rest_starts:])
+        self.coding_history = sanatize_bytes(bext_data[rest_starts:])
        #: BEXT version. 
-        self.version         = unpacked[6]
+        self.version = unpacked[6]
        #: SMPTE 330M UMID of this audio file, 64 bytes are allocated though the UMID
        #: may only be 32 bytes long.
-        self.umid            = None
+        self.umid = None
        #: EBU R128 Integrated loudness, in LUFS.
-        self.loudness_value          = None
+        self.loudness_value = None
        #: EBU R128 Loudness rante, in LUFS.
-        self.loudness_range          = None
+        self.loudness_range = None
        #: True peak level, in dBFS TP
-        self.max_true_peak           = None
+        self.max_true_peak = None
        #: EBU R128 Maximum momentary loudness, in LUFS
-        self.max_momentary_loudness  = None
+        self.max_momentary_loudness = None
        #: EBU R128 Maximum short-term loudness, in LUFS.
-        self.max_shortterm_loudness  = None
+        self.max_shortterm_loudness = None

        if self.version > 0:
            self.umid = unpacked[7]

        if self.version > 1:
-            self.loudness_value          = unpacked[8] / 100.0
-            self.loudness_range          = unpacked[9] / 100.0
-            self.max_true_peak           = unpacked[10] / 100.0
-            self.max_momentary_loudness  = unpacked[11] / 100.0
-            self.max_shortterm_loudness  = unpacked[12] / 100.0
-
-    def umid_to_str(self):
-        if self.umid:
-            return str(binascii.hexlify(self.umid), encoding='ascii')
-        else:
-            return None
+            self.loudness_value = unpacked[8] / 100.0
+            self.loudness_range = unpacked[9] / 100.0
+            self.max_true_peak = unpacked[10] / 100.0
+            self.max_momentary_loudness = unpacked[11] / 100.0
+            self.max_shortterm_loudness = unpacked[12] / 100.0

    def to_dict(self):
-        return {'description':      self.description,
-                'originator':       self.originator,
-                'originator_ref':   self.originator_ref,
-                'originator_date':  self.originator_date,
-                'originator_time':  self.originator_time,
-                'time_reference':   self.time_reference,
-                'version':          self.version,
-                'umid':             self.umid_to_str(),
-                'coding_history':   self.coding_history,
-                'loudness_value':   self.loudness_value,
-                'loudness_range':   self.loudness_range,
-                'max_true_peak':    self.max_true_peak,
-                'max_momentary_loudness':   self.max_momentary_loudness,
-                'max_shortterm_loudness':   self.max_shortterm_loudness
-                }
+        if self.umid is not None:
+            umid_parsed = UMIDParser(self.umid)
+            umid_str = umid_parsed.basic_umid_to_str()
+        else:
+            umid_str = None

+        return {'description': self.description,
+                'originator': self.originator,
+                'originator_ref': self.originator_ref,
+                'originator_date': self.originator_date,
+                'originator_time': self.originator_time,
+                'time_reference': self.time_reference,
+                'version': self.version,
+                'umid': umid_str,
+                'coding_history': self.coding_history,
+                'loudness_value': self.loudness_value,
+                'loudness_range': self.loudness_range,
+                'max_true_peak': self.max_true_peak,
+                'max_momentary_loudness': self.max_momentary_loudness,
+                'max_shortterm_loudness': self.max_shortterm_loudness
+                }
@@ -1,6 +1,6 @@
-
 from .riff_parser import parse_chunk, ListChunkDescriptor

+
 class WavInfoChunkReader:

    def __init__(self, f, encoding):
@@ -9,53 +9,48 @@ class WavInfoChunkReader:
        f.seek(0)
        parsed_chunks = parse_chunk(f)

-        list_chunks = [chunk for chunk in parsed_chunks.children \
-                if type(chunk) is ListChunkDescriptor]
+        list_chunks = [chunk for chunk in parsed_chunks.children if type(chunk) is ListChunkDescriptor]
+
+        self.info_chunk = next((chunk for chunk in list_chunks if chunk.signature == b'INFO'), None)

-        self.info_chunk  = next((chunk for chunk in list_chunks \
-                if chunk.signature == b'INFO'), None)
-        
        #: 'ICOP' Copyright
-        self.copyright      = self._get_field(f,b'ICOP')
+        self.copyright = self._get_field(f, b'ICOP')
        #: 'IPRD' Product
-        self.product        = self._get_field(f,b'IPRD')
+        self.product = self._get_field(f, b'IPRD')
        #: 'IGNR' Genre
-        self.genre          = self._get_field(f,b'IGNR')
+        self.genre = self._get_field(f, b'IGNR')
        #: 'ISBJ' Supject
-        self.subject        = self._get_field(f,b'ISBJ')
+        self.subject = self._get_field(f, b'ISBJ')
        #: 'IART' Artist, composer, author
-        self.artist         = self._get_field(f,b'IART')
+        self.artist = self._get_field(f, b'IART')
        #: 'ICMT' Comment
-        self.comment        = self._get_field(f,b'ICMT')
+        self.comment = self._get_field(f, b'ICMT')
        #: 'ISFT' Software, encoding application
-        self.software       = self._get_field(f,b'ISFT')
+        self.software = self._get_field(f, b'ISFT')
        #: 'ICRD' Created date
-        self.created_date   = self._get_field(f,b'ICRD')
+        self.created_date = self._get_field(f, b'ICRD')
        #: 'IENG' Engineer
-        self.engineer       = self._get_field(f,b'IENG')
+        self.engineer = self._get_field(f, b'IENG')
        #: 'ITCH' Technician
-        self.technician     = self._get_field(f,b'ITCH')
+        self.technician = self._get_field(f, b'ITCH')
        #: 'IKEY' Keywords, keyword list
-        self.keywords       = self._get_field(f,b'IKEY')
+        self.keywords = self._get_field(f, b'IKEY')
        #: 'INAM' Name, title
-        self.title          = self._get_field(f,b'INAM')
+        self.title = self._get_field(f, b'INAM')
        #: 'ISRC' Source
-        self.source         = self._get_field(f,b'ISRC')
+        self.source = self._get_field(f, b'ISRC')
        #: 'TAPE' Tape
-        self.tape           = self._get_field(f,b'TAPE')
+        self.tape = self._get_field(f, b'TAPE')
        #: 'IARL' Archival Location
-        self.archival_location = self._get_field(f,b'IARL')
+        self.archival_location = self._get_field(f, b'IARL')
        #: 'ISFT' Software
-        self.software       = self._get_field(f,b'ISFT')
+        self.software = self._get_field(f, b'ISFT')
        #: 'ICSM' Commissioned
-        self.commissioned   = self._get_field(f,b'ICMS')
-        
-
+        self.commissioned = self._get_field(f, b'ICMS')

    def _get_field(self, f, field_ident):
-
-        search = next( ( (chunk.start, chunk.length) for chunk in self.info_chunk.children \
-                if chunk.ident == field_ident ), None)
+        search = next(((chunk.start, chunk.length) for chunk in self.info_chunk.children if chunk.ident == field_ident),
+                      None)

        if search is not None:
            f.seek(search[0])
@@ -64,32 +59,24 @@ class WavInfoChunkReader:
        else:
            return None

-
    def to_dict(self):
        """
        A dictionary with all of the key/values read from the INFO scope.
        """
-        return {'copyright':    self.copyright,
-                'product':  self.product,
-                'genre':    self.genre,
-                'artist':   self.artist,
-                'comment':  self.comment,
+        return {'copyright': self.copyright,
+                'product': self.product,
+                'genre': self.genre,
+                'artist': self.artist,
+                'comment': self.comment,
                'software': self.software,
                'created_date': self.created_date,
                'engineer': self.engineer,
                'keywords': self.keywords,
-                'title':    self.title,
-                'source':   self.source,
-                'tape':     self.tape,
+                'title': self.title,
+                'source': self.source,
+                'tape': self.tape,
                'commissioned': self.commissioned,
-                'software': self.software,
-                'archival_location':self.archival_location,
-                'subject':  self.subject,
-                'technician':self.technician
+                'archival_location': self.archival_location,
+                'subject': self.subject,
+                'technician': self.technician
                }
-
-
-
-
-
-