From add390c0a0e09a5376ff649efbfcfcd4b377a1dc Mon Sep 17 00:00:00 2001 From: Elijah Lopez Date: Fri, 14 Aug 2020 09:07:56 -0400 Subject: [PATCH 1/7] Formatting, refactoring, __repr__ --- wavinfo/__init__.py | 4 +- wavinfo/__main__.py | 2 + wavinfo/rf64_parser.py | 8 +-- wavinfo/riff_parser.py | 10 ++-- wavinfo/umid_parser.py | 10 +--- wavinfo/wave_bext_reader.py | 29 +++++----- wavinfo/wave_info_reader.py | 5 ++ wavinfo/wave_ixml_reader.py | 9 ++-- wavinfo/wave_reader.py | 105 +++++++++++++++++------------------- 9 files changed, 85 insertions(+), 97 deletions(-) diff --git a/wavinfo/__init__.py b/wavinfo/__init__.py index d7d33f0..571a434 100644 --- a/wavinfo/__init__.py +++ b/wavinfo/__init__.py @@ -7,6 +7,6 @@ Go to the documentation for wavinfo.WavInfoReader for more information. from .wave_reader import WavInfoReader from .riff_parser import WavInfoEOFError -__version__ = '1.5' +__version__ = '1.6' __author__ = 'Jamie Hardt ' -__license__ = "MIT" \ No newline at end of file +__license__ = "MIT" diff --git a/wavinfo/__main__.py b/wavinfo/__main__.py index f676acd..73cb991 100644 --- a/wavinfo/__main__.py +++ b/wavinfo/__main__.py @@ -4,6 +4,7 @@ from . import WavInfoReader import sys import json + def main(): parser = OptionParser() @@ -28,5 +29,6 @@ def main(): except Exception as e: print(e) + if __name__ == "__main__": main() diff --git a/wavinfo/rf64_parser.py b/wavinfo/rf64_parser.py index 852c0d6..8f27a4a 100644 --- a/wavinfo/rf64_parser.py +++ b/wavinfo/rf64_parser.py @@ -6,7 +6,7 @@ RF64Context = namedtuple('RF64Context','sample_count bigchunk_table') def parse_rf64(stream, signature = b'RF64'): - #print("starting parse_rf64") + # print("starting parse_rf64") start = stream.tell() assert( stream.read(4) == b'WAVE' ) @@ -19,13 +19,13 @@ def parse_rf64(stream, signature = b'RF64'): ds64_data = ds64_chunk.read_data(stream) assert(len(ds64_data) >= ds64_fields_size ) - #print("Read ds64 chunk: len()",len(ds64_data)) + # print("Read ds64 chunk: len()",len(ds64_data)) riff_size, data_size, sample_count, length_lookup_table = struct.unpack( ds64_field_spec , ds64_data[0:ds64_fields_size] ) bigchunk_table = {} chunksize64format = "<4sL" chunksize64size = struct.calcsize(chunksize64format) - #print("Found chunks64s:", length_lookup_table) + # print("Found chunks64s:", length_lookup_table) for n in range(length_lookup_table): bigname, bigsize = struct.unpack_from( chunksize64format , ds64_data, offset= ds64_fields_size ) @@ -35,6 +35,6 @@ def parse_rf64(stream, signature = b'RF64'): bigchunk_table[signature] = riff_size stream.seek(start, 0) - #print("returning from parse_rf64, context: ",RF64Context( sample_count=sample_count, bigchunk_table=bigchunk_table ) ) + # print("returning from parse_rf64, context: ", RF64Context(sample_count=sample_count, bigchunk_table=bigchunk_table)) return RF64Context( sample_count=sample_count, bigchunk_table=bigchunk_table ) diff --git a/wavinfo/riff_parser.py b/wavinfo/riff_parser.py index 5a941dd..47ead92 100644 --- a/wavinfo/riff_parser.py +++ b/wavinfo/riff_parser.py @@ -36,7 +36,7 @@ def parse_list_chunk(stream, length, rf64_context=None): signature = stream.read(4) children = [] - while (stream.tell() - start + 8) < length: + while stream.tell() - start + 8 < length: child_chunk = parse_chunk(stream, rf64_context=rf64_context) children.append(child_chunk) @@ -56,16 +56,16 @@ def parse_chunk(stream, rf64_context=None): data_size = struct.unpack(' Date: Fri, 14 Aug 2020 14:34:17 -0400 Subject: [PATCH 2/7] Update wave_reader.py --- wavinfo/wave_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wavinfo/wave_reader.py b/wavinfo/wave_reader.py index dd38187..16cc407 100644 --- a/wavinfo/wave_reader.py +++ b/wavinfo/wave_reader.py @@ -119,7 +119,7 @@ class WavInfoReader: def _get_ixml(self, f): ixml_data = self._find_chunk_data(b'iXML', f, default_none=True) - return None if ixml_data else WavIXMLFormat(ixml_data.rstrip(b'\0')) + return None if ixml_data is None else WavIXMLFormat(ixml_data.rstrip(b'\0')) def walk(self): """ From ba232605db7b765731102c529dce43b17b4b3dca Mon Sep 17 00:00:00 2001 From: Elijah Lopez Date: Fri, 14 Aug 2020 14:48:49 -0400 Subject: [PATCH 3/7] fix bugs --- .travis.yml | 2 +- README.md | 16 ++++------------ wavinfo/wave_reader.py | 2 +- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/.travis.yml b/.travis.yml index 33f77d2..fd3334a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,8 +2,8 @@ dist: xenial language: python python: # - "2.7" - - "3.6" - "3.5" + - "3.6" - "3.7" - "3.8" script: diff --git a/README.md b/README.md index 8a47c69..03e88a3 100644 --- a/README.md +++ b/README.md @@ -6,17 +6,16 @@ # wavinfo - -The `wavinfo` package allows you to probe WAVE and [RF64/WAVE files][eburf64] and extract extended metadata, with an emphasis on film, video and professional music production metadata. +The `wavinfo` package allows you to probe WAVE and [RF64/WAVE files][eburf64] and extract extended metadata, with an emphasis on film, video and professional music production metadata. `wavinfo` reads: -* __Broadcast-WAVE__ metadata[1][ebu], including embedded program +* __Broadcast-WAVE__ metadata[1][ebu], including embedded program loudness and coding history, if extant. This also includes the SMPTE UMID[2][smpte_330m2011]. -* __iXML__ production recorder metadata[3][ixml], including project, scene, and take tags, recorder notes +* __iXML__ production recorder metadata[3][ixml], including project, scene, and take tags, recorder notes and file family information. * Most of the common __RIFF INFO__[4][info-tags] metadata fields. -* The __wav format__ is also parsed, so you can access the basic sample rate and channel count +* The __wav format__ is also parsed, so you can access the basic sample rate and channel count information. In progress: @@ -32,8 +31,6 @@ In progress: [eburf64]:https://tech.ebu.ch/docs/tech/tech3306v1_1.pdf [info-tags]:https://exiftool.org/TagNames/RIFF.html#Info - - ## Demonstration The entry point for wavinfo is the WavInfoReader class. @@ -67,11 +64,6 @@ The length of the file in frames (interleaved samples) and bytes is available, a Python 3.5 support is deprecated. - ## Other Resources * For other file formats and ID3 decoding, look at [audio-metadata](https://github.com/thebigmunch/audio-metadata). - - - - diff --git a/wavinfo/wave_reader.py b/wavinfo/wave_reader.py index 16cc407..d2b12a7 100644 --- a/wavinfo/wave_reader.py +++ b/wavinfo/wave_reader.py @@ -132,7 +132,7 @@ class WavInfoReader: scopes = ('fmt', 'data') # 'bext', 'ixml', 'info') for scope in scopes: - attr: WavAudioFormat = self.__getattribute__(scope) + attr = self.__getattribute__(scope) for field in attr._fields: yield scope, field, attr.__getattribute__(field) From 6d8e717f42d50858f703aed048321ddcde9221fd Mon Sep 17 00:00:00 2001 From: Elijah Lopez Date: Fri, 14 Aug 2020 14:51:31 -0400 Subject: [PATCH 4/7] Update wave_info_reader.py --- wavinfo/wave_info_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wavinfo/wave_info_reader.py b/wavinfo/wave_info_reader.py index 8e187d5..e686e25 100644 --- a/wavinfo/wave_info_reader.py +++ b/wavinfo/wave_info_reader.py @@ -81,5 +81,5 @@ class WavInfoChunkReader: def __repr__(self): return_val = self.to_dict() - return_val.update({'encoding', self.encoding}) + return_val.update({'encoding': self.encoding}) return str(return_val) From f8bf6cb4a0c4e64c12b22e7230c293f553033e0a Mon Sep 17 00:00:00 2001 From: Elijah Lopez Date: Fri, 14 Aug 2020 15:03:06 -0400 Subject: [PATCH 5/7] Add tests --- tests/test_walk.py | 1 + tests/test_wave_parsing.py | 83 +++++++++++++++++++------------------- tests/test_zoom_f8.py | 1 + tests/utils.py | 9 ++--- 4 files changed, 46 insertions(+), 48 deletions(-) diff --git a/tests/test_walk.py b/tests/test_walk.py index 8250f7d..63df1ab 100644 --- a/tests/test_walk.py +++ b/tests/test_walk.py @@ -1,6 +1,7 @@ import unittest import wavinfo + class TestWalk(unittest.TestCase): def test_walk_metadata(self): test_file = 'tests/test_files/protools/PT A101_4.A1.wav' diff --git a/tests/test_wave_parsing.py b/tests/test_wave_parsing.py index d821887..85897e1 100644 --- a/tests/test_wave_parsing.py +++ b/tests/test_wave_parsing.py @@ -6,34 +6,35 @@ from unittest import TestCase from .utils import all_files, ffprobe import wavinfo - + + class TestWaveInfo(TestCase): def test_sanity(self): for wav_file in all_files(): info = wavinfo.WavInfoReader(wav_file) - self.assertTrue(info is not None) + self.assertEqual(info.__repr__(), 'WavInfoReader(%s, %s, %s)'.format(wav_file, 'latin_1', 'ascii')) + self.assertIsNotNone(info) def test_fmt_against_ffprobe(self): for wav_file in all_files(): info = wavinfo.WavInfoReader(wav_file) ffprobe_info = ffprobe(wav_file) - self.assertEqual( info.fmt.channel_count , ffprobe_info['streams'][0]['channels'] ) - self.assertEqual( info.fmt.sample_rate , int(ffprobe_info['streams'][0]['sample_rate']) ) - self.assertEqual( info.fmt.bits_per_sample, int(ffprobe_info['streams'][0]['bits_per_raw_sample']) ) + self.assertEqual(info.fmt.channel_count, ffprobe_info['streams'][0]['channels']) + self.assertEqual(info.fmt.sample_rate, int(ffprobe_info['streams'][0]['sample_rate'])) + self.assertEqual(info.fmt.bits_per_sample, int(ffprobe_info['streams'][0]['bits_per_raw_sample'])) if info.fmt.audio_format == 1: - self.assertTrue(ffprobe_info['streams'][0]['codec_name'].startswith('pcm') ) - byte_rate = int(ffprobe_info['streams'][0]['sample_rate']) \ - * ffprobe_info['streams'][0]['channels'] \ - * int(ffprobe_info['streams'][0]['bits_per_raw_sample']) / 8 - self.assertEqual( info.fmt.byte_rate , byte_rate ) + self.assertTrue(ffprobe_info['streams'][0]['codec_name'].startswith('pcm')) + streams = ffprobe_info['streams'][0] + byte_rate = int(streams['sample_rate']) * streams['channels'] * int(streams['bits_per_raw_sample']) / 8 + self.assertEqual(info.fmt.byte_rate, byte_rate) def test_data_against_ffprobe(self): for wav_file in all_files(): info = wavinfo.WavInfoReader(wav_file) ffprobe_info = ffprobe(wav_file) - self.assertEqual( info.data.frame_count, int(ffprobe_info['streams'][0]['duration_ts'] )) + self.assertEqual(info.data.frame_count, int(ffprobe_info['streams'][0]['duration_ts'])) def test_bext_against_ffprobe(self): for wav_file in all_files(): @@ -41,40 +42,40 @@ class TestWaveInfo(TestCase): ffprobe_info = ffprobe(wav_file) if info.bext: if 'comment' in ffprobe_info['format']['tags']: - self.assertEqual( info.bext.description, ffprobe_info['format']['tags']['comment'] ) - else: - self.assertEqual( info.bext.description , '') - + self.assertEqual(info.bext.description, ffprobe_info['format']['tags']['comment']) + else: + self.assertEqual(info.bext.description, '') + if 'encoded_by' in ffprobe_info['format']['tags']: - self.assertEqual( info.bext.originator, ffprobe_info['format']['tags']['encoded_by'] ) + self.assertEqual(info.bext.originator, ffprobe_info['format']['tags']['encoded_by']) else: - self.assertEqual( info.bext.originator, '') - + self.assertEqual(info.bext.originator, '') + if 'originator_reference' in ffprobe_info['format']['tags']: - self.assertEqual( info.bext.originator_ref, ffprobe_info['format']['tags']['originator_reference'] ) + self.assertEqual(info.bext.originator_ref, ffprobe_info['format']['tags']['originator_reference']) else: - self.assertEqual( info.bext.originator_ref, '') + self.assertEqual(info.bext.originator_ref, '') # these don't always reflect the bext info - # self.assertEqual( info.bext.originator_date, ffprobe_info['format']['tags']['date'] ) - # self.assertEqual( info.bext.originator_time, ffprobe_info['format']['tags']['creation_time'] ) - self.assertEqual( info.bext.time_reference, int(ffprobe_info['format']['tags']['time_reference']) ) + # self.assertEqual(info.bext.originator_date, ffprobe_info['format']['tags']['date']) + # self.assertEqual(info.bext.originator_time, ffprobe_info['format']['tags']['creation_time']) + self.assertEqual(info.bext.time_reference, int(ffprobe_info['format']['tags']['time_reference'])) if 'coding_history' in ffprobe_info['format']['tags']: - self.assertEqual( info.bext.coding_history, ffprobe_info['format']['tags']['coding_history'] ) + self.assertEqual(info.bext.coding_history, ffprobe_info['format']['tags']['coding_history']) else: - self.assertEqual( info.bext.coding_history, '' ) + self.assertEqual(info.bext.coding_history, '') def test_ixml(self): - expected = {'A101_4.WAV': {'project' : 'BMH', 'scene': 'A101', 'take': '4', - 'tape': '18Y12M31', 'family_uid': 'USSDVGR1112089007124015008231000'}, - 'A101_3.WAV': {'project' : 'BMH', 'scene': 'A101', 'take': '3', - 'tape': '18Y12M31', 'family_uid': 'USSDVGR1112089007124014008228300'}, - 'A101_2.WAV': {'project' : 'BMH', 'scene': 'A101', 'take': '2', - 'tape': '18Y12M31', 'family_uid': 'USSDVGR1112089007124004008218600'}, - 'A101_1.WAV': {'project' : 'BMH', 'scene': 'A101', 'take': '1', - 'tape': '18Y12M31', 'family_uid': 'USSDVGR1112089007124001008206300'}, - } + expected = {'A101_4.WAV': {'project': 'BMH', 'scene': 'A101', 'take': '4', + 'tape': '18Y12M31', 'family_uid': 'USSDVGR1112089007124015008231000'}, + 'A101_3.WAV': {'project': 'BMH', 'scene': 'A101', 'take': '3', + 'tape': '18Y12M31', 'family_uid': 'USSDVGR1112089007124014008228300'}, + 'A101_2.WAV': {'project': 'BMH', 'scene': 'A101', 'take': '2', + 'tape': '18Y12M31', 'family_uid': 'USSDVGR1112089007124004008218600'}, + 'A101_1.WAV': {'project': 'BMH', 'scene': 'A101', 'take': '1', + 'tape': '18Y12M31', 'family_uid': 'USSDVGR1112089007124001008206300'}, + } for wav_file in all_files(): basename = os.path.basename(wav_file) @@ -82,15 +83,13 @@ class TestWaveInfo(TestCase): info = wavinfo.WavInfoReader(wav_file) e = expected[basename] - self.assertEqual( e['project'], info.ixml.project ) - self.assertEqual( e['scene'], info.ixml.scene ) - self.assertEqual( e['take'], info.ixml.take ) - self.assertEqual( e['tape'], info.ixml.tape ) - self.assertEqual( e['family_uid'], info.ixml.family_uid ) + self.assertEqual(e['project'], info.ixml.project) + self.assertEqual(e['scene'], info.ixml.scene) + self.assertEqual(e['take'], info.ixml.take) + self.assertEqual(e['tape'], info.ixml.tape) + self.assertEqual(e['family_uid'], info.ixml.family_uid) for track in info.ixml.track_list: self.assertIsNotNone(track.channel_index) if basename == 'A101_4.WAV' and track.channel_index == '1': - self.assertTrue(track.name == 'MKH516 A') - - + self.assertEqual(track.name, 'MKH516 A') diff --git a/tests/test_zoom_f8.py b/tests/test_zoom_f8.py index c6ad721..5b09dc0 100644 --- a/tests/test_zoom_f8.py +++ b/tests/test_zoom_f8.py @@ -8,5 +8,6 @@ from unittest import TestCase import wavinfo + class TestZoomF8(TestCase): pass diff --git a/tests/utils.py b/tests/utils.py index bc330f2..4ad7114 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -4,10 +4,11 @@ import subprocess from subprocess import PIPE import json -FFPROBE='ffprobe' +FFPROBE = 'ffprobe' + def ffprobe(path): - arguments = [ FFPROBE , "-of", "json" , "-show_format", "-show_streams", path ] + arguments = [FFPROBE, "-of", "json", "-show_format", "-show_streams", path] if int(sys.version[0]) < 3: process = subprocess.Popen(arguments, stdout=PIPE) process.wait() @@ -27,13 +28,9 @@ def ffprobe(path): return None - def all_files(): for dirpath, _, filenames in os.walk('tests/test_files'): for filename in filenames: _, ext = os.path.splitext(filename) if ext in ['.wav','.WAV']: yield os.path.join(dirpath, filename) - - - From 6014d1d48bf2c7ec570fac95a7429ba1714f3ddb Mon Sep 17 00:00:00 2001 From: Elijah Lopez Date: Fri, 14 Aug 2020 15:05:32 -0400 Subject: [PATCH 6/7] Update utils.py --- tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils.py b/tests/utils.py index 4ad7114..b802b8e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -32,5 +32,5 @@ def all_files(): for dirpath, _, filenames in os.walk('tests/test_files'): for filename in filenames: _, ext = os.path.splitext(filename) - if ext in ['.wav','.WAV']: + if ext in ['.wav', '.WAV']: yield os.path.join(dirpath, filename) From 7589d5fb82a9fdb70e9b39450dc57c7bfd03805f Mon Sep 17 00:00:00 2001 From: Elijah Lopez Date: Fri, 14 Aug 2020 15:19:28 -0400 Subject: [PATCH 7/7] Add metadata tests --- tests/test_wave_parsing.py | 19 +++++++++++++++++++ wavinfo/wave_info_reader.py | 2 ++ 2 files changed, 21 insertions(+) diff --git a/tests/test_wave_parsing.py b/tests/test_wave_parsing.py index 85897e1..db4b8e2 100644 --- a/tests/test_wave_parsing.py +++ b/tests/test_wave_parsing.py @@ -93,3 +93,22 @@ class TestWaveInfo(TestCase): self.assertIsNotNone(track.channel_index) if basename == 'A101_4.WAV' and track.channel_index == '1': self.assertEqual(track.name, 'MKH516 A') + + def test_metadata(self): + file_with_metadata = 'tests/test_files/sound_grinder_pro/new_camera bumb 1.wav' + self.assertTrue(os.path.exists(file_with_metadata)) + info = wavinfo.WavInfoReader(file_with_metadata).info + self.assertEqual(info.title, 'camera bumb 1') + self.assertEqual(info.artist, 'Jamie Hardt') + self.assertEqual(info.copyright, '© 2010 Jamie Hardt') + self.assertEqual(info.product, 'Test Sounds') # album + self.assertEqual(info.album, info.product) + self.assertEqual(info.comment, 'Comments') + self.assertEqual(info.software, 'Sound Grinder Pro') + self.assertEqual(info.created_date, '2010-12-28') + self.assertEqual(info.engineer, 'JPH') + self.assertEqual(info.keywords, 'Sound Effect, movement, microphone, bump') + self.assertEqual(info.title, 'camera bumb 1') + self.assertEqual(type(info.to_dict()), dict) + self.assertEqual(type(info.__repr__()), str) + diff --git a/wavinfo/wave_info_reader.py b/wavinfo/wave_info_reader.py index e686e25..d15cf23 100644 --- a/wavinfo/wave_info_reader.py +++ b/wavinfo/wave_info_reader.py @@ -17,6 +17,7 @@ class WavInfoChunkReader: self.copyright = self._get_field(f, b'ICOP') #: 'IPRD' Product self.product = self._get_field(f, b'IPRD') + self.album = self.product #: 'IGNR' Genre self.genre = self._get_field(f, b'IGNR') #: 'ISBJ' Supject @@ -63,6 +64,7 @@ class WavInfoChunkReader: """ return {'copyright': self.copyright, 'product': self.product, + 'album': self.album, 'genre': self.genre, 'artist': self.artist, 'comment': self.comment,