Merge pull request #8 from elibroftw/master

Updated Code
This commit is contained in:
Jamie Hardt
2020-08-17 11:15:50 -07:00
committed by GitHub
15 changed files with 155 additions and 156 deletions

View File

@@ -2,8 +2,8 @@ dist: xenial
language: python language: python
python: python:
# - "2.7" # - "2.7"
- "3.6"
- "3.5" - "3.5"
- "3.6"
- "3.7" - "3.7"
- "3.8" - "3.8"
script: script:

View File

@@ -6,7 +6,6 @@
# wavinfo # wavinfo
The `wavinfo` package allows you to probe WAVE and [RF64/WAVE files][eburf64] and extract extended metadata, with an emphasis on film, video and professional music production metadata. The `wavinfo` package allows you to probe WAVE and [RF64/WAVE files][eburf64] and extract extended metadata, with an emphasis on film, video and professional music production metadata.
`wavinfo` reads: `wavinfo` reads:
@@ -32,8 +31,6 @@ In progress:
[eburf64]:https://tech.ebu.ch/docs/tech/tech3306v1_1.pdf [eburf64]:https://tech.ebu.ch/docs/tech/tech3306v1_1.pdf
[info-tags]:https://exiftool.org/TagNames/RIFF.html#Info [info-tags]:https://exiftool.org/TagNames/RIFF.html#Info
## Demonstration ## Demonstration
The entry point for wavinfo is the WavInfoReader class. The entry point for wavinfo is the WavInfoReader class.
@@ -67,11 +64,6 @@ The length of the file in frames (interleaved samples) and bytes is available, a
Python 3.5 support is deprecated. Python 3.5 support is deprecated.
## Other Resources ## Other Resources
* For other file formats and ID3 decoding, look at [audio-metadata](https://github.com/thebigmunch/audio-metadata). * For other file formats and ID3 decoding, look at [audio-metadata](https://github.com/thebigmunch/audio-metadata).

View File

@@ -1,6 +1,7 @@
import unittest import unittest
import wavinfo import wavinfo
class TestWalk(unittest.TestCase): class TestWalk(unittest.TestCase):
def test_walk_metadata(self): def test_walk_metadata(self):
test_file = 'tests/test_files/protools/PT A101_4.A1.wav' test_file = 'tests/test_files/protools/PT A101_4.A1.wav'

View File

@@ -7,11 +7,13 @@ from .utils import all_files, ffprobe
import wavinfo import wavinfo
class TestWaveInfo(TestCase): class TestWaveInfo(TestCase):
def test_sanity(self): def test_sanity(self):
for wav_file in all_files(): for wav_file in all_files():
info = wavinfo.WavInfoReader(wav_file) info = wavinfo.WavInfoReader(wav_file)
self.assertTrue(info is not None) self.assertEqual(info.__repr__(), 'WavInfoReader(%s, %s, %s)'.format(wav_file, 'latin_1', 'ascii'))
self.assertIsNotNone(info)
def test_fmt_against_ffprobe(self): def test_fmt_against_ffprobe(self):
for wav_file in all_files(): for wav_file in all_files():
@@ -24,9 +26,8 @@ class TestWaveInfo(TestCase):
if info.fmt.audio_format == 1: if info.fmt.audio_format == 1:
self.assertTrue(ffprobe_info['streams'][0]['codec_name'].startswith('pcm')) self.assertTrue(ffprobe_info['streams'][0]['codec_name'].startswith('pcm'))
byte_rate = int(ffprobe_info['streams'][0]['sample_rate']) \ streams = ffprobe_info['streams'][0]
* ffprobe_info['streams'][0]['channels'] \ byte_rate = int(streams['sample_rate']) * streams['channels'] * int(streams['bits_per_raw_sample']) / 8
* int(ffprobe_info['streams'][0]['bits_per_raw_sample']) / 8
self.assertEqual(info.fmt.byte_rate, byte_rate) self.assertEqual(info.fmt.byte_rate, byte_rate)
def test_data_against_ffprobe(self): def test_data_against_ffprobe(self):
@@ -91,6 +92,23 @@ class TestWaveInfo(TestCase):
for track in info.ixml.track_list: for track in info.ixml.track_list:
self.assertIsNotNone(track.channel_index) self.assertIsNotNone(track.channel_index)
if basename == 'A101_4.WAV' and track.channel_index == '1': if basename == 'A101_4.WAV' and track.channel_index == '1':
self.assertTrue(track.name == 'MKH516 A') self.assertEqual(track.name, 'MKH516 A')
def test_metadata(self):
file_with_metadata = 'tests/test_files/sound_grinder_pro/new_camera bumb 1.wav'
self.assertTrue(os.path.exists(file_with_metadata))
info = wavinfo.WavInfoReader(file_with_metadata).info
self.assertEqual(info.title, 'camera bumb 1')
self.assertEqual(info.artist, 'Jamie Hardt')
self.assertEqual(info.copyright, '© 2010 Jamie Hardt')
self.assertEqual(info.product, 'Test Sounds') # album
self.assertEqual(info.album, info.product)
self.assertEqual(info.comment, 'Comments')
self.assertEqual(info.software, 'Sound Grinder Pro')
self.assertEqual(info.created_date, '2010-12-28')
self.assertEqual(info.engineer, 'JPH')
self.assertEqual(info.keywords, 'Sound Effect, movement, microphone, bump')
self.assertEqual(info.title, 'camera bumb 1')
self.assertEqual(type(info.to_dict()), dict)
self.assertEqual(type(info.__repr__()), str)

View File

@@ -8,5 +8,6 @@ from unittest import TestCase
import wavinfo import wavinfo
class TestZoomF8(TestCase): class TestZoomF8(TestCase):
pass pass

View File

@@ -6,6 +6,7 @@ import json
FFPROBE = 'ffprobe' FFPROBE = 'ffprobe'
def ffprobe(path): def ffprobe(path):
arguments = [FFPROBE, "-of", "json", "-show_format", "-show_streams", path] arguments = [FFPROBE, "-of", "json", "-show_format", "-show_streams", path]
if int(sys.version[0]) < 3: if int(sys.version[0]) < 3:
@@ -27,13 +28,9 @@ def ffprobe(path):
return None return None
def all_files(): def all_files():
for dirpath, _, filenames in os.walk('tests/test_files'): for dirpath, _, filenames in os.walk('tests/test_files'):
for filename in filenames: for filename in filenames:
_, ext = os.path.splitext(filename) _, ext = os.path.splitext(filename)
if ext in ['.wav', '.WAV']: if ext in ['.wav', '.WAV']:
yield os.path.join(dirpath, filename) yield os.path.join(dirpath, filename)

View File

@@ -7,6 +7,6 @@ Go to the documentation for wavinfo.WavInfoReader for more information.
from .wave_reader import WavInfoReader from .wave_reader import WavInfoReader
from .riff_parser import WavInfoEOFError from .riff_parser import WavInfoEOFError
__version__ = '1.5' __version__ = '1.6'
__author__ = 'Jamie Hardt <jamiehardt@gmail.com>' __author__ = 'Jamie Hardt <jamiehardt@gmail.com>'
__license__ = "MIT" __license__ = "MIT"

View File

@@ -4,6 +4,7 @@ from . import WavInfoReader
import sys import sys
import json import json
def main(): def main():
parser = OptionParser() parser = OptionParser()
@@ -28,5 +29,6 @@ def main():
except Exception as e: except Exception as e:
print(e) print(e)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -36,7 +36,7 @@ def parse_list_chunk(stream, length, rf64_context=None):
signature = stream.read(4) signature = stream.read(4)
children = [] children = []
while (stream.tell() - start + 8) < length: while stream.tell() - start + 8 < length:
child_chunk = parse_chunk(stream, rf64_context=rf64_context) child_chunk = parse_chunk(stream, rf64_context=rf64_context)
children.append(child_chunk) children.append(child_chunk)
@@ -56,16 +56,16 @@ def parse_chunk(stream, rf64_context=None):
data_size = struct.unpack('<I', size_bytes)[0] data_size = struct.unpack('<I', size_bytes)[0]
if data_size == 0xFFFFFFFF: if data_size == 0xFFFFFFFF:
if rf64_context is None and ident in [b'RF64', b'BW64']: if rf64_context is None and ident in {b'RF64', b'BW64'}:
rf64_context = parse_rf64(stream=stream, signature=ident) rf64_context = parse_rf64(stream=stream, signature=ident)
data_size = rf64_context.bigchunk_table[ident] data_size = rf64_context.bigchunk_table[ident]
displacement = data_size displacement = data_size
if (displacement % 2) != 0: if displacement % 2:
displacement = displacement + 1 displacement += 1
if ident in [b'RIFF', b'LIST', b'RF64', b'BW64']: if ident in {b'RIFF', b'LIST', b'RF64', b'BW64'}:
return parse_list_chunk(stream=stream, length=data_size, rf64_context=rf64_context) return parse_list_chunk(stream=stream, length=data_size, rf64_context=rf64_context)
else: else:
data_start = stream.tell() data_start = stream.tell()

View File

@@ -1,14 +1,10 @@
from typing import Union from typing import Union
import binascii import binascii
from functools import reduce
def binary_to_string(binary_value): def binary_to_string(binary_value):
retval = '' return reduce(lambda val, el: val + "{:02x}".format(el), binary_value, '')
for n in range(0, len(binary_value)):
sr = "{:02x}".format(binary_value[n])
retval += sr
return retval
class UMIDParser: class UMIDParser:
@@ -125,5 +121,3 @@ class UMIDParser:
# return self.raw_umid[32:32] # return self.raw_umid[32:32]
# else: # else:
# return None # return None

View File

@@ -2,12 +2,13 @@ import struct
import binascii import binascii
from .umid_parser import UMIDParser from .umid_parser import UMIDParser
class WavBextReader: class WavBextReader:
def __init__(self, bext_data, encoding): def __init__(self, bext_data, encoding):
""" """
Read Broadcast-WAV extended metadata. Read Broadcast-WAV extended metadata.
:param best_data: The bytes-like data. :param bext_data: The bytes-like data.
"param encoding: The encoding to use when decoding the text fields of the :param encoding: The encoding to use when decoding the text fields of the
BEXT metadata scope. According to EBU Rec 3285 this shall be ASCII. BEXT metadata scope. According to EBU Rec 3285 this shall be ASCII.
""" """
packstring = "<256s" + "32s" + "32s" + "10s" + "8s" + "QH" + "64s" + "hhhhh" + "180s" packstring = "<256s" + "32s" + "32s" + "10s" + "8s" + "QH" + "64s" + "hhhhh" + "180s"
@@ -15,33 +16,29 @@ class WavBextReader:
rest_starts = struct.calcsize(packstring) rest_starts = struct.calcsize(packstring)
unpacked = struct.unpack(packstring, bext_data[:rest_starts]) unpacked = struct.unpack(packstring, bext_data[:rest_starts])
def sanatize_bytes(bytes): def sanitize_bytes(b):
first_null = next((index for index, byte in enumerate(bytes) if byte == 0), None) first_null = next((index for index, byte in enumerate(b) if byte == 0), None)
if first_null is not None: trimmed = b if first_null is None else b[:first_null]
trimmed = bytes[:first_null]
else:
trimmed = bytes
decoded = trimmed.decode(encoding) decoded = trimmed.decode(encoding)
return decoded return decoded
#: Description. A free-text field up to 256 characters long. #: Description. A free-text field up to 256 characters long.
self.description = sanatize_bytes(unpacked[0]) self.description = sanitize_bytes(unpacked[0])
#: Originator. Usually the name of the encoding application, sometimes #: Originator. Usually the name of the encoding application, sometimes
#: a artist name. #: a artist name.
self.originator = sanatize_bytes(unpacked[1]) self.originator = sanitize_bytes(unpacked[1])
#: A unique identifer for the file, a serial number. #: A unique identifier for the file, a serial number.
self.originator_ref = sanatize_bytes(unpacked[2]) self.originator_ref = sanitize_bytes(unpacked[2])
#: Date of the recording, in the format YYY-MM-DD #: Date of the recording, in the format YYY-MM-DD
self.originator_date = sanatize_bytes(unpacked[3]) self.originator_date = sanitize_bytes(unpacked[3])
#: Time of the recording, in the format HH:MM:SS. #: Time of the recording, in the format HH:MM:SS.
self.originator_time = sanatize_bytes(unpacked[4]) self.originator_time = sanitize_bytes(unpacked[4])
#: The sample offset of the start of the file relative to an #: The sample offset of the start of the file relative to an
#: epoch, usually midnight the day of the recording. #: epoch, usually midnight the day of the recording.
self.time_reference = unpacked[5] self.time_reference = unpacked[5]
#: A variable-length text field containing a list of processes and #: A variable-length text field containing a list of processes and
#: and conversions performed on the file. #: and conversions performed on the file.
self.coding_history = sanatize_bytes(bext_data[rest_starts:]) self.coding_history = sanitize_bytes(bext_data[rest_starts:])
#: BEXT version. #: BEXT version.
self.version = unpacked[6] self.version = unpacked[6]
#: SMPTE 330M UMID of this audio file, 64 bytes are allocated though the UMID #: SMPTE 330M UMID of this audio file, 64 bytes are allocated though the UMID

View File

@@ -17,6 +17,7 @@ class WavInfoChunkReader:
self.copyright = self._get_field(f, b'ICOP') self.copyright = self._get_field(f, b'ICOP')
#: 'IPRD' Product #: 'IPRD' Product
self.product = self._get_field(f, b'IPRD') self.product = self._get_field(f, b'IPRD')
self.album = self.product
#: 'IGNR' Genre #: 'IGNR' Genre
self.genre = self._get_field(f, b'IGNR') self.genre = self._get_field(f, b'IGNR')
#: 'ISBJ' Supject #: 'ISBJ' Supject
@@ -63,6 +64,7 @@ class WavInfoChunkReader:
""" """
return {'copyright': self.copyright, return {'copyright': self.copyright,
'product': self.product, 'product': self.product,
'album': self.album,
'genre': self.genre, 'genre': self.genre,
'artist': self.artist, 'artist': self.artist,
'comment': self.comment, 'comment': self.comment,
@@ -78,3 +80,8 @@ class WavInfoChunkReader:
'subject': self.subject, 'subject': self.subject,
'technician': self.technician 'technician': self.technician
} }
def __repr__(self):
return_val = self.to_dict()
return_val.update({'encoding': self.encoding})
return str(return_val)

View File

@@ -6,6 +6,7 @@ from collections import namedtuple
IXMLTrack = namedtuple('IXMLTrack', ['channel_index', 'interleave_index', 'name', 'function']) IXMLTrack = namedtuple('IXMLTrack', ['channel_index', 'interleave_index', 'name', 'function'])
class WavIXMLFormat: class WavIXMLFormat:
""" """
iXML recorder metadata. iXML recorder metadata.
@@ -16,9 +17,9 @@ class WavIXMLFormat:
:param xml: A bytes-like object containing the iXML payload. :param xml: A bytes-like object containing the iXML payload.
""" """
self.source = xml self.source = xml
xmlBytes = io.BytesIO(xml) xml_bytes = io.BytesIO(xml)
parser = ET.XMLParser(recover=True) parser = ET.XMLParser(recover=True)
self.parsed = ET.parse(xmlBytes, parser=parser) self.parsed = ET.parse(xml_bytes, parser=parser)
def _get_text_value(self, xpath): def _get_text_value(self, xpath):
e = self.parsed.find("./" + xpath) e = self.parsed.find("./" + xpath)
@@ -87,5 +88,3 @@ class WavIXMLFormat:
The name of this file's file family. The name of this file's file family.
""" """
return self._get_text_value("FILE_SET/FAMILY_NAME") return self._get_text_value("FILE_SET/FAMILY_NAME")

View File

@@ -15,9 +15,11 @@ from .wave_info_reader import WavInfoChunkReader
WavDataDescriptor = namedtuple('WavDataDescriptor', 'byte_count frame_count') WavDataDescriptor = namedtuple('WavDataDescriptor', 'byte_count frame_count')
#: The format of the audio samples. #: The format of the audio samples.
WavAudioFormat = namedtuple('WavAudioFormat','audio_format channel_count sample_rate byte_rate block_align bits_per_sample') WavAudioFormat = namedtuple('WavAudioFormat',
'audio_format channel_count sample_rate byte_rate block_align bits_per_sample')
class WavInfoReader():
class WavInfoReader:
""" """
Parse a WAV audio file for metadata. Parse a WAV audio file for metadata.
""" """
@@ -33,13 +35,18 @@ class WavInfoReader():
:param bext_encoding: The text encoding to use when decoding the string :param bext_encoding: The text encoding to use when decoding the string
fields of the Broadcast-WAV extension. Per EBU 3285 this is ASCII fields of the Broadcast-WAV extension. Per EBU 3285 this is ASCII
but this parameter is available to you if you encounter a werido. but this parameter is available to you if you encounter a weirdo.
""" """
absolute_path = os.path.abspath(path) absolute_path = os.path.abspath(path)
#: `file://` url for the file. #: `file://` url for the file.
self.url = pathlib.Path(absolute_path).as_uri() self.url = pathlib.Path(absolute_path).as_uri()
# for __repr__()
self.path = absolute_path
self.info_encoding = info_encoding
self.bext_encoding = bext_encoding
with open(path, 'rb') as f: with open(path, 'rb') as f:
chunks = parse_chunk(f) chunks = parse_chunk(f)
@@ -57,30 +64,19 @@ class WavInfoReader():
#: :class:`wavinfo.wave_info_reader.WavInfoChunkReader` with RIFF INFO metadata #: :class:`wavinfo.wave_info_reader.WavInfoChunkReader` with RIFF INFO metadata
self.info = self._get_info(f, encoding=info_encoding) self.info = self._get_info(f, encoding=info_encoding)
self.data = self._describe_data(f) self.data = self._describe_data()
def _find_chunk_data(self, ident, from_stream, default_none=False): def _find_chunk_data(self, ident, from_stream, default_none=False):
chunk_descriptor = None top_chunks = (chunk for chunk in self.main_list if type(chunk) is ChunkDescriptor and chunk.ident == ident)
top_chunks = (chunk for chunk in self.main_list if type(chunk) is ChunkDescriptor) chunk_descriptor = next(top_chunks, None) if default_none else next(top_chunks)
return chunk_descriptor.read_data(from_stream) if chunk_descriptor else None
if default_none: def _describe_data(self):
chunk_descriptor = next((chunk for chunk in top_chunks if chunk.ident == ident),None)
else:
chunk_descriptor = next((chunk for chunk in top_chunks if chunk.ident == ident))
if chunk_descriptor:
return chunk_descriptor.read_data(from_stream)
else:
return None
def _describe_data(self,f):
data_chunk = next(c for c in self.main_list if c.ident == b'data') data_chunk = next(c for c in self.main_list if c.ident == b'data')
return WavDataDescriptor(byte_count=data_chunk.length, return WavDataDescriptor(byte_count=data_chunk.length,
frame_count=int(data_chunk.length / self.fmt.block_align)) frame_count=int(data_chunk.length / self.fmt.block_align))
def _get_format(self, f): def _get_format(self, f):
fmt_data = self._find_chunk_data(b'fmt ', f) fmt_data = self._find_chunk_data(b'fmt ', f)
@@ -112,26 +108,18 @@ class WavInfoReader():
) )
def _get_info(self, f, encoding): def _get_info(self, f, encoding):
finder = (chunk.signature for chunk in self.main_list \ finder = (chunk.signature for chunk in self.main_list if type(chunk) is ListChunkDescriptor)
if type(chunk) is ListChunkDescriptor)
if b'INFO' in finder: if b'INFO' in finder:
return WavInfoChunkReader(f, encoding) return WavInfoChunkReader(f, encoding)
def _get_bext(self, f, encoding): def _get_bext(self, f, encoding):
bext_data = self._find_chunk_data(b'bext', f, default_none=True) bext_data = self._find_chunk_data(b'bext', f, default_none=True)
if bext_data: return WavBextReader(bext_data, encoding) if bext_data else None
return WavBextReader(bext_data, encoding)
else:
return None
def _get_ixml(self, f): def _get_ixml(self, f):
ixml_data = self._find_chunk_data(b'iXML', f, default_none=True) ixml_data = self._find_chunk_data(b'iXML', f, default_none=True)
if ixml_data is None: return None if ixml_data is None else WavIXMLFormat(ixml_data.rstrip(b'\0'))
return None
ixml_string = ixml_data.rstrip(b'\0')
return WavIXMLFormat(ixml_string)
def walk(self): def walk(self):
""" """
@@ -157,3 +145,6 @@ class WavInfoReader():
info_dict = self.info.to_dict() info_dict = self.info.to_dict()
for key in info_dict.keys(): for key in info_dict.keys():
yield 'info', key, info_dict[key] yield 'info', key, info_dict[key]
def __repr__(self):
return 'WavInfoReader(%s, %s, %s)'.format(self.path, self.info_encoding, self.bext_encoding)