From 5a30ce3afc588561ddfb2f38bea843d021459ee4 Mon Sep 17 00:00:00 2001
From: Jamie Hardt <jamiehardt@me.com>
Date: Mon, 6 Jan 2020 08:27:34 -0800
Subject: [PATCH] UMID Implementation

---
 tests/test_files/protools/umid.wav | Bin 0 -> 181504 bytes
 wavinfo/umid_parser.py             | 225 ++++++++++++++---------------
 wavinfo/wave_bext_reader.py        |  82 +++++------
 wavinfo/wave_info_reader.py        |  81 +++++------
 4 files changed, 185 insertions(+), 203 deletions(-)
 create mode 100644 tests/test_files/protools/umid.wav

diff --git a/tests/test_files/protools/umid.wav b/tests/test_files/protools/umid.wav
new file mode 100644
index 0000000000000000000000000000000000000000..db58c85b8f7edf68593fdceb2211325452105f16
GIT binary patch
literal 181504
zcmeI(Jx^0n7y#gN3u@9BBvB$Gqftp(1;1j5O%O$ih+ibcM6b38krsuLKwS6>{s$Kk
zql2>!IJ)BCqyvMKiP6MO@2!dz#7T^j=d``|<UQ|s&pA)q@t*7L>G||L69x;Hj-R@C
z=Je&TzU^9Rx;m6u-!=Y+>Srg$+b)cck4>(z**Ko*?3+G3c)fq%=C4xr<@V*;bNTjM
zXYSyU&I3pCheignwZtfF&hF`mqG(eTZEN11#O5*!i%UzhVgFSB?PqVgepPM*#fHn(
zwq412qU6;*AHs{qOlSzLYo4$?S{ZImMjyZ3^5FHtVibMLERB`(FORM{%>V%c1PBly
zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF
z5FkK+009C7{*XX1uEyz;(Ha2)1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ
zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U
zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7
z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N
z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+
z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly
zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF
z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk
z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs
z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ
zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U
zAV7cs0RjXF5FkK+009C72oNAZU?U0i^!1(FduJn^$XN&wAV7cs0RjXF5FkK+009C7
z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N
z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+
z009C72oNAZfB*pk1pc<bMCnH5a<XXjW8vL%nT5rrrP<|3_xy6V))^Vd{&de4ri!EE
zZTTy?d<a)V$lZU}5^ja?bm7F#wC+@SwAfmkQM;_Jf1<lkZ44oKC;KQ=;<1^@(aH0r
z>g37dL}O@4;%S+>flT<&kOt4j*Kd}p=c^N?xSZrC=}~x98wnwkOsOC5trVwg^*YjJ
zb~Ft&eVp&<?R?bn{@(7pNhn-PK2OqFb?sqY|F^}uRutQu>|$%(<g}ZFmfOl=4WT(1
uO9kq`ir-`%OH;qL`mFA_8b_;+@jcU>z`rc8`j@=&Ka}qN^SAaVPVxn|mzgjC

literal 0
HcmV?d00001

diff --git a/wavinfo/umid_parser.py b/wavinfo/umid_parser.py
index 5eac0e4..d0c6a9c 100644
--- a/wavinfo/umid_parser.py
+++ b/wavinfo/umid_parser.py
@@ -1,5 +1,10 @@
-import struct
 from typing import Union
+import binascii
+
+
+def binary_to_string(binary_value):
+    return str(binascii.hexlify(binary_value), encoding='ascii')
+
 
 class UMIDParser:
     """
@@ -9,121 +14,111 @@ class UMIDParser:
     """
     def __init__(self, raw_umid: bytearray):
         self.raw_umid = raw_umid
-
-    @classmethod
-    def binary_to_string(cls, binary_value):
-        result_str = ''
-        for n in range(len(binary_value)):
-            result_str = '{:x}'.format(binary_value[n]) + result_str
-
-        return result_str
-
-    @property
-    def universal_label(self) -> bytearray:
-        return self.raw_umid[0:12]
-
-    @property
-    def basic_umid(self):
-        return self.raw_umid[0:32]
+    #
+    # @property
+    # def universal_label(self) -> bytearray:
+    #     return self.raw_umid[0:12]
+    #
+    # @property
+    # def basic_umid(self):
+    #     return self.raw_umid[0:32]
 
     def basic_umid_to_str(self):
-        return "%024x-%06x-%032x" % (self.binary_to_string(self.universal_label),
-                                     self.binary_to_string(self.instance_number),
-                                     self.binary_to_string(self.material_number))
-
-    @property
-    def universal_label_is_valid(self) -> bool:
-        valid_preamble = b'\x06\x0a\x2b\x34\x01\x01\x01\x05\x01\x01'
-        return self.universal_label[0:len(valid_preamble)] == valid_preamble
-
-    @property
-    def material_type(self) -> str:
-        material_byte = self.raw_umid[10]
-        if material_byte == 0x1:
-            return 'picture'
-        elif material_byte == 0x2:
-            return 'audio'
-        elif material_byte == 0x3:
-            return 'data'
-        elif material_byte == 0x4:
-            return 'other'
-        elif material_byte == 0x5:
-            return 'picture_single_component'
-        elif material_byte == 0x6:
-            return 'picture_multiple_component'
-        elif material_byte == 0x7:
-            return 'audio_single_component'
-        elif material_byte == 0x9:
-            return 'audio_multiple_component'
-        elif material_byte == 0xb:
-            return 'auxiliary_single_component'
-        elif material_byte == 0xc:
-            return 'auxiliary_multiple_component'
-        elif material_byte == 0xd:
-            return 'mixed_components'
-        elif material_byte == 0xf:
-            return 'not_identified'
-        else:
-            return 'not_recognized'
-
-    @property
-    def material_number_creation_method(self) -> str:
-        method_byte = self.raw_umid[11]
-        method_byte = (method_byte << 4) & 0xf
-        if method_byte == 0x0:
-            return 'undefined'
-        elif method_byte == 0x1:
-            return 'smpte'
-        elif method_byte == 0x2:
-            return 'uuid'
-        elif method_byte == 0x3:
-            return 'masked'
-        elif method_byte == 0x4:
-            return 'ieee1394'
-        elif 0x5 <= method_byte <= 0x7:
-            return 'reserved_undefined'
-        else:
-            return 'unrecognized'
-
-    @property
-    def instance_number_creation_method(self) -> str:
-        method_byte = self.raw_umid[11]
-        method_byte = method_byte & 0xf
-        if method_byte == 0x0:
-            return 'undefined'
-        elif method_byte == 0x01:
-            return 'local_registration'
-        elif method_byte == 0x02:
-            return '24_bit_prs'
-        elif method_byte == 0x03:
-            return 'copy_number_and_16_bit_prs'
-        elif 0x04 <= method_byte <= 0x0e:
-            return 'reserved_undefined'
-        elif method_byte == 0x0f:
-            return 'live_stream'
-        else:
-            return 'unrecognized'
-
-    @property
-    def indicated_length(self) -> str:
-        if self.raw_umid[12] == 0x13:
-            return 'basic'
-        elif self.raw_umid[12] == 0x33:
-            return 'extended'
-
-    @property
-    def instance_number(self) -> bytearray:
-        return self.raw_umid[13:3]
-
-    @property
-    def material_number(self) -> bytearray:
-        return self.raw_umid[16:16]
-
-    @property
-    def source_pack(self) -> Union[bytearray, None]:
-        if self.indicated_length == 'extended':
-            return self.raw_umid[32:32]
-        else:
-            return None
+        return binary_to_string(self.raw_umid[0:13]) + '-' + binary_to_string(self.raw_umid[13:3])
+    #
+    # @property
+    # def universal_label_is_valid(self) -> bool:
+    #     valid_preamble = b'\x06\x0a\x2b\x34\x01\x01\x01\x05\x01\x01'
+    #     return self.universal_label[0:len(valid_preamble)] == valid_preamble
+    #
+    # @property
+    # def material_type(self) -> str:
+    #     material_byte = self.raw_umid[10]
+    #     if material_byte == 0x1:
+    #         return 'picture'
+    #     elif material_byte == 0x2:
+    #         return 'audio'
+    #     elif material_byte == 0x3:
+    #         return 'data'
+    #     elif material_byte == 0x4:
+    #         return 'other'
+    #     elif material_byte == 0x5:
+    #         return 'picture_single_component'
+    #     elif material_byte == 0x6:
+    #         return 'picture_multiple_component'
+    #     elif material_byte == 0x7:
+    #         return 'audio_single_component'
+    #     elif material_byte == 0x9:
+    #         return 'audio_multiple_component'
+    #     elif material_byte == 0xb:
+    #         return 'auxiliary_single_component'
+    #     elif material_byte == 0xc:
+    #         return 'auxiliary_multiple_component'
+    #     elif material_byte == 0xd:
+    #         return 'mixed_components'
+    #     elif material_byte == 0xf:
+    #         return 'not_identified'
+    #     else:
+    #         return 'not_recognized'
+    #
+    # @property
+    # def material_number_creation_method(self) -> str:
+    #     method_byte = self.raw_umid[11]
+    #     method_byte = (method_byte << 4) & 0xf
+    #     if method_byte == 0x0:
+    #         return 'undefined'
+    #     elif method_byte == 0x1:
+    #         return 'smpte'
+    #     elif method_byte == 0x2:
+    #         return 'uuid'
+    #     elif method_byte == 0x3:
+    #         return 'masked'
+    #     elif method_byte == 0x4:
+    #         return 'ieee1394'
+    #     elif 0x5 <= method_byte <= 0x7:
+    #         return 'reserved_undefined'
+    #     else:
+    #         return 'unrecognized'
+    #
+    # @property
+    # def instance_number_creation_method(self) -> str:
+    #     method_byte = self.raw_umid[11]
+    #     method_byte = method_byte & 0xf
+    #     if method_byte == 0x0:
+    #         return 'undefined'
+    #     elif method_byte == 0x01:
+    #         return 'local_registration'
+    #     elif method_byte == 0x02:
+    #         return '24_bit_prs'
+    #     elif method_byte == 0x03:
+    #         return 'copy_number_and_16_bit_prs'
+    #     elif 0x04 <= method_byte <= 0x0e:
+    #         return 'reserved_undefined'
+    #     elif method_byte == 0x0f:
+    #         return 'live_stream'
+    #     else:
+    #         return 'unrecognized'
+    #
+    # @property
+    # def indicated_length(self) -> str:
+    #     if self.raw_umid[12] == 0x13:
+    #         return 'basic'
+    #     elif self.raw_umid[12] == 0x33:
+    #         return 'extended'
+    #
+    # @property
+    # def instance_number(self) -> bytearray:
+    #     return self.raw_umid[13:3]
+    #
+    # @property
+    # def material_number(self) -> bytearray:
+    #     return self.raw_umid[16:16]
+    #
+    # @property
+    # def source_pack(self) -> Union[bytearray, None]:
+    #     if self.indicated_length == 'extended':
+    #         return self.raw_umid[32:32]
+    #     else:
+    #         return None
 
 
diff --git a/wavinfo/wave_bext_reader.py b/wavinfo/wave_bext_reader.py
index 7496664..cf0f211 100644
--- a/wavinfo/wave_bext_reader.py
+++ b/wavinfo/wave_bext_reader.py
@@ -1,21 +1,22 @@
 import struct
 import binascii
+from .umid_parser import UMIDParser
 
 class WavBextReader:
-    def __init__(self,bext_data,encoding):
+    def __init__(self, bext_data, encoding):
         """
         Read Broadcast-WAV extended metadata.
         :param best_data: The bytes-like data.
         "param encoding: The encoding to use when decoding the text fields of the
                  BEXT metadata scope. According to EBU Rec 3285 this shall be ASCII.
         """
-        packstring = "<256s"+ "32s" + "32s" + "10s" + "8s" + "QH" + "64s" + "hhhhh" + "180s"
+        packstring = "<256s" + "32s" + "32s" + "10s" + "8s" + "QH" + "64s" + "hhhhh" + "180s"
 
         rest_starts = struct.calcsize(packstring)
         unpacked = struct.unpack(packstring, bext_data[:rest_starts])
 
         def sanatize_bytes(bytes):
-            first_null = next( (index for index, byte in enumerate(bytes) if byte == 0 ), None )
+            first_null = next((index for index, byte in enumerate(bytes) if byte == 0), None)
             if first_null is not None:
                 trimmed = bytes[:first_null]
             else:
@@ -25,68 +26,67 @@ class WavBextReader:
             return decoded
 
         #: Description. A free-text field up to 256 characters long.
-        self.description     = sanatize_bytes(unpacked[0])
+        self.description = sanatize_bytes(unpacked[0])
         #: Originator. Usually the name of the encoding application, sometimes
         #: a artist name.
-        self.originator      = sanatize_bytes(unpacked[1])
+        self.originator = sanatize_bytes(unpacked[1])
         #: A unique identifer for the file, a serial number.
-        self.originator_ref  = sanatize_bytes(unpacked[2])
+        self.originator_ref = sanatize_bytes(unpacked[2])
         #: Date of the recording, in the format YYY-MM-DD
         self.originator_date = sanatize_bytes(unpacked[3])
         #: Time of the recording, in the format HH:MM:SS.
         self.originator_time = sanatize_bytes(unpacked[4])
         #: The sample offset of the start of the file relative to an
         #: epoch, usually midnight the day of the recording. 
-        self.time_reference  = unpacked[5]
+        self.time_reference = unpacked[5]
         #: A variable-length text field containing a list of processes and
         #: and conversions performed on the file.
-        self.coding_history  = sanatize_bytes(bext_data[rest_starts:])
+        self.coding_history = sanatize_bytes(bext_data[rest_starts:])
         #: BEXT version. 
-        self.version         = unpacked[6]
+        self.version = unpacked[6]
         #: SMPTE 330M UMID of this audio file, 64 bytes are allocated though the UMID
         #: may only be 32 bytes long.
-        self.umid            = None
+        self.umid = None
         #: EBU R128 Integrated loudness, in LUFS.
-        self.loudness_value          = None
+        self.loudness_value = None
         #: EBU R128 Loudness rante, in LUFS.
-        self.loudness_range          = None
+        self.loudness_range = None
         #: True peak level, in dBFS TP
-        self.max_true_peak           = None
+        self.max_true_peak = None
         #: EBU R128 Maximum momentary loudness, in LUFS
-        self.max_momentary_loudness  = None
+        self.max_momentary_loudness = None
         #: EBU R128 Maximum short-term loudness, in LUFS.
-        self.max_shortterm_loudness  = None
+        self.max_shortterm_loudness = None
 
         if self.version > 0:
             self.umid = unpacked[7]
 
         if self.version > 1:
-            self.loudness_value          = unpacked[8] / 100.0
-            self.loudness_range          = unpacked[9] / 100.0
-            self.max_true_peak           = unpacked[10] / 100.0
-            self.max_momentary_loudness  = unpacked[11] / 100.0
-            self.max_shortterm_loudness  = unpacked[12] / 100.0
-
-    def umid_to_str(self):
-        if self.umid:
-            return str(binascii.hexlify(self.umid), encoding='ascii')
-        else:
-            return None
+            self.loudness_value = unpacked[8] / 100.0
+            self.loudness_range = unpacked[9] / 100.0
+            self.max_true_peak = unpacked[10] / 100.0
+            self.max_momentary_loudness = unpacked[11] / 100.0
+            self.max_shortterm_loudness = unpacked[12] / 100.0
 
     def to_dict(self):
-        return {'description':      self.description,
-                'originator':       self.originator,
-                'originator_ref':   self.originator_ref,
-                'originator_date':  self.originator_date,
-                'originator_time':  self.originator_time,
-                'time_reference':   self.time_reference,
-                'version':          self.version,
-                'umid':             self.umid_to_str(),
-                'coding_history':   self.coding_history,
-                'loudness_value':   self.loudness_value,
-                'loudness_range':   self.loudness_range,
-                'max_true_peak':    self.max_true_peak,
-                'max_momentary_loudness':   self.max_momentary_loudness,
-                'max_shortterm_loudness':   self.max_shortterm_loudness
-                }
+        if self.umid is not None:
+            umid_parsed = UMIDParser(self.umid)
+            umid_str = umid_parsed.basic_umid_to_str()
+        else:
+            umid_str = None
 
+        return {'description': self.description,
+                'originator': self.originator,
+                'originator_ref': self.originator_ref,
+                'originator_date': self.originator_date,
+                'originator_time': self.originator_time,
+                'time_reference': self.time_reference,
+                'version': self.version,
+                'umid': umid_str,
+                'coding_history': self.coding_history,
+                'loudness_value': self.loudness_value,
+                'loudness_range': self.loudness_range,
+                'max_true_peak': self.max_true_peak,
+                'max_momentary_loudness': self.max_momentary_loudness,
+                'max_shortterm_loudness': self.max_shortterm_loudness
+                }
diff --git a/wavinfo/wave_info_reader.py b/wavinfo/wave_info_reader.py
index 5fa78f8..0fc3efc 100644
--- a/wavinfo/wave_info_reader.py
+++ b/wavinfo/wave_info_reader.py
@@ -1,6 +1,6 @@
-
 from .riff_parser import parse_chunk, ListChunkDescriptor
 
+
 class WavInfoChunkReader:
 
     def __init__(self, f, encoding):
@@ -9,53 +9,48 @@ class WavInfoChunkReader:
         f.seek(0)
         parsed_chunks = parse_chunk(f)
 
-        list_chunks = [chunk for chunk in parsed_chunks.children \
-                if type(chunk) is ListChunkDescriptor]
+        list_chunks = [chunk for chunk in parsed_chunks.children if type(chunk) is ListChunkDescriptor]
+
+        self.info_chunk = next((chunk for chunk in list_chunks if chunk.signature == b'INFO'), None)
 
-        self.info_chunk  = next((chunk for chunk in list_chunks \
-                if chunk.signature == b'INFO'), None)
-        
         #: 'ICOP' Copyright
-        self.copyright      = self._get_field(f,b'ICOP')
+        self.copyright = self._get_field(f, b'ICOP')
         #: 'IPRD' Product
-        self.product        = self._get_field(f,b'IPRD')
+        self.product = self._get_field(f, b'IPRD')
         #: 'IGNR' Genre
-        self.genre          = self._get_field(f,b'IGNR')
+        self.genre = self._get_field(f, b'IGNR')
         #: 'ISBJ' Supject
-        self.subject        = self._get_field(f,b'ISBJ')
+        self.subject = self._get_field(f, b'ISBJ')
         #: 'IART' Artist, composer, author
-        self.artist         = self._get_field(f,b'IART')
+        self.artist = self._get_field(f, b'IART')
         #: 'ICMT' Comment
-        self.comment        = self._get_field(f,b'ICMT')
+        self.comment = self._get_field(f, b'ICMT')
         #: 'ISFT' Software, encoding application
-        self.software       = self._get_field(f,b'ISFT')
+        self.software = self._get_field(f, b'ISFT')
         #: 'ICRD' Created date
-        self.created_date   = self._get_field(f,b'ICRD')
+        self.created_date = self._get_field(f, b'ICRD')
         #: 'IENG' Engineer
-        self.engineer       = self._get_field(f,b'IENG')
+        self.engineer = self._get_field(f, b'IENG')
         #: 'ITCH' Technician
-        self.technician     = self._get_field(f,b'ITCH')
+        self.technician = self._get_field(f, b'ITCH')
         #: 'IKEY' Keywords, keyword list
-        self.keywords       = self._get_field(f,b'IKEY')
+        self.keywords = self._get_field(f, b'IKEY')
         #: 'INAM' Name, title
-        self.title          = self._get_field(f,b'INAM')
+        self.title = self._get_field(f, b'INAM')
         #: 'ISRC' Source
-        self.source         = self._get_field(f,b'ISRC')
+        self.source = self._get_field(f, b'ISRC')
         #: 'TAPE' Tape
-        self.tape           = self._get_field(f,b'TAPE')
+        self.tape = self._get_field(f, b'TAPE')
         #: 'IARL' Archival Location
-        self.archival_location = self._get_field(f,b'IARL')
+        self.archival_location = self._get_field(f, b'IARL')
         #: 'ISFT' Software
-        self.software       = self._get_field(f,b'ISFT')
+        self.software = self._get_field(f, b'ISFT')
         #: 'ICSM' Commissioned
-        self.commissioned   = self._get_field(f,b'ICMS')
-        
-
+        self.commissioned = self._get_field(f, b'ICMS')
 
     def _get_field(self, f, field_ident):
-
-        search = next( ( (chunk.start, chunk.length) for chunk in self.info_chunk.children \
-                if chunk.ident == field_ident ), None)
+        search = next(((chunk.start, chunk.length) for chunk in self.info_chunk.children if chunk.ident == field_ident),
+                      None)
 
         if search is not None:
             f.seek(search[0])
@@ -64,32 +59,24 @@ class WavInfoChunkReader:
         else:
             return None
 
-
     def to_dict(self):
         """
         A dictionary with all of the key/values read from the INFO scope.
         """
-        return {'copyright':    self.copyright,
-                'product':  self.product,
-                'genre':    self.genre,
-                'artist':   self.artist,
-                'comment':  self.comment,
+        return {'copyright': self.copyright,
+                'product': self.product,
+                'genre': self.genre,
+                'artist': self.artist,
+                'comment': self.comment,
                 'software': self.software,
                 'created_date': self.created_date,
                 'engineer': self.engineer,
                 'keywords': self.keywords,
-                'title':    self.title,
-                'source':   self.source,
-                'tape':     self.tape,
+                'title': self.title,
+                'source': self.source,
+                'tape': self.tape,
                 'commissioned': self.commissioned,
-                'software': self.software,
-                'archival_location':self.archival_location,
-                'subject':  self.subject,
-                'technician':self.technician
+                'archival_location': self.archival_location,
+                'subject': self.subject,
+                'technician': self.technician
                 }
-
-
-
-
-
-