Fixed bext parsing for metacorder

These have really screwed-up bext chunks that aren't zero-filled.
2026-02-15 06:25:38 +00:00 · 2019-01-01 19:30:43 -08:00
parent d37726f090
commit ae09897abf
3 changed files with 81 additions and 133 deletions
--- a/examples/wavinfo.ipynb
+++ b/examples/wavinfo.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -12,118 +12,47 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "pp = pprint.PrettyPrinter(indent=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "testfile_path = \"../tests/test_files/\"\n",
    "sound_devices_file = testfile_path + \"A101_1.WAV\"\n",
    "\n",
    "info = wavinfo.WavInfoReader(sound_devices_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WavInfoFormat(audio_format=1, channel_count=2, sample_rate=48000, byte_rate=288000, block_align=6, bits_per_sample=24)\n"
     ]
    }
   ],
   "source": [
    "pp.pprint(info.fmt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WavBextFormat(description='sSPEED=023.976-ND\\r\\nsTAKE=1\\r\\nsUBITS=$12311801\\r\\nsSWVER=2.67\\r\\nsPROJECT=BMH\\r\\nsSCENE=A101\\r\\nsFILENAME=A101_1.WAV\\r\\nsTAPE=18Y12M31\\r\\nsTRK1=MKH516 A\\r\\nsTRK2=Boom\\r\\nsNOTE=\\r\\n', originator='Sound Dev: 702T S#GR1112089007', originator_ref='USSDVGR1112089007124001008206301', originator_date='2018-12-31', originator_time='12:40:00', time_reference=2190940753, version=1, umid=b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00', loudness_value=0.0, loudness_range=0.0, max_true_peak=0.0, max_momentary_loudness=0.0, max_shortterm_loudness=0.0, coding_history='A=PCM,F=48000,W=24,M=stereo,R=48000,T=2 Ch\\r\\n')\n"
     ]
    }
   ],
   "source": [
    "pp.pprint(info.bext)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('BMH', 'A101', '1', 240239)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "info.ixml.project, info.ixml.scene, info.ixml.take, info.data.frame_count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
-   "source": []
+   "source": [
    "metacorder_path = '../tests/test_files/metacorder/Sr001-001-06_01.WAV'\n",
    "\n",
    "info = wavinfo.WavInfoReader(metacorder_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
-   "metadata": {},
+   "metadata": {
-   "outputs": [],
+    "scrolled": true
-   "source": [
+   },
    "pro_tools_file = testfile_path + \"PT A101_4.A1.wav\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[   ChunkDescriptor(ident=b'bext', start=20, length=858),\n",
-      "    ChunkDescriptor(ident=b'iXML', start=886, length=5226),\n",
+      "    ChunkDescriptor(ident=b'fmt ', start=886, length=16),\n",
-      "    ChunkDescriptor(ident=b'fmt ', start=6120, length=16),\n",
+      "    ChunkDescriptor(ident=b'data', start=910, length=1725696),\n",
-      "    ChunkDescriptor(ident=b'data', start=6144, length=864840),\n",
+      "    ChunkDescriptor(ident=b'iXML', start=1726614, length=2852),\n",
-      "    ChunkDescriptor(ident=b'umid', start=870992, length=24),\n",
+      "    ChunkDescriptor(ident=b'iXTC', start=1729474, length=20)]\n"
      "    ChunkDescriptor(ident=b'minf', start=871024, length=16),\n",
      "    ChunkDescriptor(ident=b'regn', start=871048, length=92)]\n"
     ]
    }
   ],
   "source": [
    "import wavinfo.wave_parser\n",
    "\n",
-    "with open(pro_tools_file,'rb') as f:\n",
+    "with open(metacorder_path,'rb') as f:\n",
    "    chunk_tree = wavinfo.wave_parser.parse_chunk(f)\n",
    "\n",
    "pp.pprint(chunk_tree.children)"
@@ -131,39 +60,49 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# with open(metacorder_path,'rb') as f:\n",
    "#     f.seek(chunk_tree.children[4].start)\n",
    "#     iXTC_data = f.read(chunk_tree.children[4].length)\n",
    "    \n",
    "# print(iXTC_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "WavBextFormat(description='dUBITS=12311804\\r\\ndSCENE=A101\\r\\ndTAKE=4\\r\\ndTAPE=18Y12M31\\r\\ndFRAMERATE=23.976ND\\r\\ndSPEED=023.976-NDF\\r\\ndTRK1=MKH516 A\\r\\ndTRK2=Boom\\r\\n', originator='Sound Dev: 702T S#GR1112089007', originator_ref='aa4CKtcd13Vk', originator_date='2018-12-31', originator_time='12:40:07', time_reference=2191709524, version=0, umid=b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00', loudness_value=0.0, loudness_range=0.0, max_true_peak=0.0, max_momentary_loudness=0.0, max_shortterm_loudness=0.0, coding_history='A=PCM,F=48000,W=24,M=stereo,R=48000,T=2 Ch\\r\\n')\n"
+      "WavBextFormat(description='gSCENE=001\\r\\ngTAKE=06\\r\\ngTAPE=Sr001\\r\\ngUBITS=00000000\\r\\n', originator='Metacorder Demo', originator_ref='', originator_date='2019:01:01', originator_time='13:36:16', time_reference=2350375830, version=0, umid=b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00', loudness_value=0.0, loudness_range=0.0, max_true_peak=0.0, max_momentary_loudness=0.0, max_shortterm_loudness=0.0, coding_history='')\n"
     ]
    }
   ],
   "source": [
-    "ptinfo = wavinfo.WavInfoReader(pro_tools_file)\n",
+    "with open(metacorder_path,'rb') as f:\n",
    "    f.seek(chunk_tree.children[0].start)\n",
    "    bext_raw = f.read(chunk_tree.children[0].length)\n",
    "\n",
-    "print(ptinfo.bext)"
+    "# ptinfo = wavinfo.WavInfoReader(metacorder_path)\n",
    "\n",
    "print(wavinfo.WavInfoReader(metacorder_path).bext)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
-   "metadata": {},
+   "metadata": {
-   "outputs": [
+    "scrolled": false
-    {
+   },
-     "data": {
+   "outputs": [],
      "text/plain": [
       "'<BWFXML><IXML_VERSION>1.61</IXML_VERSION><STEINBERG><ATTR_LIST><ATTR><TYPE>string</TYPE><NAME>MediaLibrary</NAME><VALUE>The Recordist Christmas 2018</VALUE></ATTR><ATTR><TYPE>string</TYPE><NAME>MediaCategoryPost</NAME><VALUE>Bullets</VALUE></ATTR><ATTR><TYPE>string</TYPE><NAME>MediaLibraryManufacturerName</NAME><VALUE>Creative Sound Design, LLC</VALUE></ATTR><ATTR><TYPE>string</TYPE><NAME>AudioSoundEditor</NAME><VALUE>Frank Bry</VALUE></ATTR><ATTR><TYPE>string</TYPE><NAME>MediaComment</NAME><VALUE>BULLET Impact Plastic LCD TV Screen Shatter Debris 2x</VALUE></ATTR><ATTR><TYPE>string</TYPE><NAME>MusicalCategory</NAME><VALUE>Bullets</VALUE></ATTR></ATTR_LIST></STEINBERG></BWFXML>'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "library_sound = testfile_path + 'BULLET Impact Plastic LCD TV Screen Shatter Debris 2x.wav'\n",
    "\n",
@@ -174,25 +113,11 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
-   "metadata": {},
+   "metadata": {
-   "outputs": [
+    "scrolled": true
-    {
+   },
-     "name": "stdout",
+   "outputs": [],
     "output_type": "stream",
     "text": [
      "[   ChunkDescriptor(ident=b'fmt ', start=20, length=40),\n",
      "    ChunkDescriptor(ident=b'bext', start=68, length=604),\n",
      "    ChunkDescriptor(ident=b'data', start=680, length=2833404),\n",
      "    ChunkDescriptor(ident=b'ID3 ', start=2834092, length=2048),\n",
      "    ChunkDescriptor(ident=b'SMED', start=2836148, length=5468),\n",
      "    ListChunkDescriptor(signature=b'INFO', children=[ChunkDescriptor(ident=b'IPRD', start=2841636, length=30), ChunkDescriptor(ident=b'IGNR', start=2841674, length=8), ChunkDescriptor(ident=b'IART', start=2841690, length=10), ChunkDescriptor(ident=b'ICMT', start=2841708, length=54), ChunkDescriptor(ident=b'ICOP', start=2841770, length=84), ChunkDescriptor(ident=b'ISFT', start=2841862, length=12), ChunkDescriptor(ident=b'ICRD', start=2841882, length=12)]),\n",
      "    ChunkDescriptor(ident=b'iXML', start=2841902, length=686),\n",
      "    ChunkDescriptor(ident=b'umid', start=2842596, length=24),\n",
      "    ChunkDescriptor(ident=b'_PMX', start=2842628, length=3560)]\n"
     ]
    }
   ],
   "source": [
    "with open(library_sound,'rb') as f:\n",
    "    chunk_tree = wavinfo.wave_parser.parse_chunk(f)\n",
--- a/tests/test_wave_parsing.py
+++ b/tests/test_wave_parsing.py
@@ -29,7 +29,7 @@ class TestWaveInfo(TestCase):
        for dirpath, dirnames, filenames in os.walk('tests/test_files'):
            for filename in filenames:
                name, ext = os.path.splitext(filename)
-                if ext == '.wav':
+                if ext in ['.wav','.WAV']:
                    yield os.path.join(dirpath, filename)
@@ -68,13 +68,23 @@ class TestWaveInfo(TestCase):
            self.assertEqual( info.bext.description, ffprobe_info['format']['tags']['comment']  )
            self.assertEqual( info.bext.originator, ffprobe_info['format']['tags']['encoded_by']  )
-            self.assertEqual( info.bext.originator_ref, ffprobe_info['format']['tags']['originator_reference']  )
+            if 'originator_reference' in ffprobe_info['format']['tags']:
                self.assertEqual( info.bext.originator_ref, ffprobe_info['format']['tags']['originator_reference']  )
            else:
                self.assertEqual( info.bext.originator_ref, None)
            # these don't always reflect the bext info
            #self.assertEqual( info.bext.originator_date, ffprobe_info['format']['tags']['date']  )
            #self.assertEqual( info.bext.originator_time, ffprobe_info['format']['tags']['creation_time']  )
            self.assertEqual( info.bext.time_reference, int(ffprobe_info['format']['tags']['time_reference'])  )
-            self.assertEqual( info.bext.coding_history, ffprobe_info['format']['tags']['coding_history']  )
+
            if 'coding_history' in ffprobe_info['format']['tags']:
                if len(ffprobe_info['format']['tags']['coding_history']) > 0:
                    self.assertEqual( info.bext.coding_history, ffprobe_info['format']['tags']['coding_history']  )
                else:
                    self.assertEqual( info.bext.coding_history, None )
            else:
                self.assertEqual( info.bext.coding_history, None )
    def test_ixml(self):
        expected = {'A101_4.WAV': {'project' : 'BMH', 'scene': 'A101', 'take': '4',
--- a/wavinfo/wave_reader.py
+++ b/wavinfo/wave_reader.py
@@ -87,7 +87,7 @@ class WavInfoReader():
                    bits_per_sample = unpacked[5]
                    )
-    def _get_bext(self,f):
+    def _get_bext(self,f,encoding='ascii'):
        bext_data = self._find_chunk_data(b'bext',f,default_none=True)
@@ -117,11 +117,24 @@ class WavInfoReader():
        rest_starts = struct.calcsize(packstring)
        unpacked = struct.unpack(packstring, bext_data[:rest_starts])
-        return WavBextFormat(description=unpacked[0].decode('ascii').rstrip('\0'),
+        def sanatize_bytes(bytes):
-                originator      = unpacked[1].decode('ascii').rstrip('\0'),
+            first_null = next( (index for index, byte in enumerate(bytes) if byte == 0 ), None )
-                originator_ref  = unpacked[2].decode('ascii').rstrip('\0'),
+            if first_null is not None:
-                originator_date = unpacked[3].decode('ascii'),
+                trimmed = bytes[:first_null]
-                originator_time = unpacked[4].decode('ascii'),
+            else:
                trimmed = bytes
            decoded = trimmed.decode(encoding)
            if len(decoded) > 0:
                return decoded
            else:
                return None
        return WavBextFormat(description=sanatize_bytes(unpacked[0]),
                originator      = sanatize_bytes(unpacked[1]),
                originator_ref  = sanatize_bytes(unpacked[2]),
                originator_date = sanatize_bytes(unpacked[3]),
                originator_time = sanatize_bytes(unpacked[4]),
                time_reference  = unpacked[5],
                version         = unpacked[6],
                umid            = unpacked[7],
@@ -130,7 +143,7 @@ class WavInfoReader():
                max_true_peak   = unpacked[10] / 100.0,
                max_momentary_loudness = unpacked[11] / 100.0,
                max_shortterm_loudness = unpacked[12] / 100.0,
-                coding_history = bext_data[rest_starts:].decode('ascii').rstrip('\0')
+                coding_history = sanatize_bytes(bext_data[rest_starts:])
                )
    def _get_ixml(self,f):