18 Commits
v0.1 ... v0.3

Author SHA1 Message Date
Jamie Hardt
b150cd6d8e Nudged version, added author 2019-01-01 23:29:20 -08:00
Jamie Hardt
849ade92a4 Merge branch 'master' of https://github.com/iluvcapra/wavinfo 2019-01-01 23:24:36 -08:00
Jamie Hardt
482a3f86d1 Update wavinfo.ipynb 2019-01-01 23:24:23 -08:00
Jamie Hardt
991a12cbb5 Update README.md 2019-01-01 23:22:45 -08:00
Jamie Hardt
37b816045d Update README.md 2019-01-01 23:21:45 -08:00
Jamie Hardt
99aa29c5f3 More work 2019-01-01 23:19:22 -08:00
Jamie Hardt
41b599923a bext Version handling 2019-01-01 20:51:54 -08:00
Jamie Hardt
ae09897abf Fixed bext parsing for metacorder
These have really screwed-up bext chunks that aren't zero-filled.
2019-01-01 19:30:43 -08:00
Jamie Hardt
d37726f090 Reorganized test WAVs into folders 2019-01-01 19:30:06 -08:00
Jamie Hardt
ae52152111 More test WAV files
From Gallery Metacorder
2019-01-01 13:44:56 -08:00
Jamie Hardt
004249773a Update README.md
Fixed ixml URL
2019-01-01 12:36:09 -08:00
Jamie Hardt
ebbdb99c46 Update README.md
Removed Travis badge
2019-01-01 12:25:05 -08:00
Jamie Hardt
32454039bf Trying to get travis build to work 2019-01-01 12:22:23 -08:00
Jamie Hardt
830c702376 Update .travis.yml
Removed other old versions
2019-01-01 12:21:05 -08:00
Jamie Hardt
0723f21e4f Update .travis.yml
Add ffprobe to apt-get (I hope)
2019-01-01 12:20:24 -08:00
Jamie Hardt
08743be3fa Update .travis.yml
Removed unavailable versions
2019-01-01 12:16:16 -08:00
Jamie Hardt
bdb39684c7 Update test_wave_parsing.py
Removed capture_output argument, it's too new
2019-01-01 12:15:17 -08:00
Jamie Hardt
c3c3c12d38 Nudge version 2019-01-01 12:09:03 -08:00
30 changed files with 336 additions and 310 deletions

View File

@@ -3,11 +3,9 @@ python:
- "3.6"
- "3.5"
- "3.4"
- "3.3"
- "3.2"
- "3.1"
- "3.0"
script:
- "python3 setup.py test"
before_install:
- sudo apt-get install -y ffmpeg
install:
- "pip3 install setuptools"

View File

@@ -1,4 +1,3 @@
[![Build Status](https://travis-ci.com/iluvcapra/wavinfo.svg?branch=master)](https://travis-ci.com/iluvcapra/wavinfo)
[![Documentation Status](https://readthedocs.org/projects/wavinfo/badge/?version=latest)](https://wavinfo.readthedocs.io/en/latest/?badge=latest) ![](https://img.shields.io/github/license/iluvcapra/wavinfo.svg) ![](https://img.shields.io/pypi/pyversions/wavinfo.svg) [![](https://img.shields.io/pypi/v/wavinfo.svg)](https://pypi.org/project/wavinfo/) ![](https://img.shields.io/pypi/wheel/wavinfo.svg)
@@ -10,21 +9,24 @@ production metadata.
`wavinfo` reads:
* __Broadcast-WAVE__ metadata, compliant with [EBU Tech 3285v2 (2011)][ebu], including embedded program loudness and coding history, if extant.
* [__iXML__ production recorder metadata][ixml], including project, scene, and take tags, recorder notes and file family information.
* The __wav format__ is also parsed, so you can access the basic sample rate and channel count information.
* __Broadcast-WAVE__ metadata, compliant with [EBU Tech 3285v2 (2011)][ebu], including embedded program
loudness and coding history, if extant. This also includes the [SMPTE 330M __UMID__][smpte_330m2011]
Unique Materials Identifier.
* [__iXML__ production recorder metadata][ixml], including project, scene, and take tags, recorder notes
and file family information.
* Most of the common __RIFF INFO__ metadata fields.
* The __wav format__ is also parsed, so you can access the basic sample rate and channel count
information.
In progress:
* [SMPTE 330M __UMID__][smpte_330m2011] Unique Materials Identifier.
* iXML `STEINBERG` sound library attributes.
* Most of the common __RIFF INFO__ metadata fields.
* Pro Tools __embedded regions__.
This module is presently under construction and not sutiable for production at this time.
[ebu]:https://tech.ebu.ch/docs/tech/tech3285.pdf
[smpte_330m2011]:http://standards.smpte.org/content/978-1-61482-678-1/st-330-2011/SEC1.abstract
[ixml]:http://www.ixml.infoi
[ixml]:http://www.ixml.info

104
demo.md
View File

@@ -1,104 +0,0 @@
# `wavinfo` Demonstration
The entry point for wavinfo is the WavInfoReader class.
```python
from wavinfo import WavInfoReader
path = '../tests/test_files/A101_1.WAV'
info = WavInfoReader(path)
```
## Basic WAV Data
The length of the file in frames (interleaved samples) and bytes is available, as is the contents of the format chunk.
```python
(info.data.frame_count, info.data.byte_count)
```
(240239, 1441434)
```python
(info.fmt.sample_rate, info.fmt.channel_count, info.fmt.block_align, info.fmt.bits_per_sample)
```
(48000, 2, 6, 24)
## Broadcast WAV Extension
```python
print(info.bext.description)
print("----------")
print("Originator:", info.bext.originator)
print("Originator Ref:", info.bext.originator_ref)
print("Originator Date:", info.bext.originator_date)
print("Originator Time:", info.bext.originator_time)
print("Time Reference:", info.bext.time_reference)
print(info.bext.coding_history)
```
sSPEED=023.976-ND
sTAKE=1
sUBITS=$12311801
sSWVER=2.67
sPROJECT=BMH
sSCENE=A101
sFILENAME=A101_1.WAV
sTAPE=18Y12M31
sTRK1=MKH516 A
sTRK2=Boom
sNOTE=
----------
Originator: Sound Dev: 702T S#GR1112089007
Originator Ref: USSDVGR1112089007124001008206301
Originator Date: 2018-12-31
Originator Time: 12:40:00
Time Reference: 2190940753
A=PCM,F=48000,W=24,M=stereo,R=48000,T=2 Ch
## iXML Production Recorder Metadata
```python
print("iXML Project:", info.ixml.project)
print("iXML Scene:", info.ixml.scene)
print("iXML Take:", info.ixml.take)
print("iXML Tape:", info.ixml.tape)
print("iXML File Family Name:", info.ixml.family_name)
print("iXML File Family UID:", info.ixml.family_uid)
```
iXML Project: BMH
iXML Scene: A101
iXML Take: 1
iXML Tape: 18Y12M31
iXML File Family Name: None
iXML File Family UID: USSDVGR1112089007124001008206300
A=PCM,F=48000,W=24,M=stereo,R=48000,T=2 Ch
```python
```

View File

@@ -25,86 +25,17 @@
"metadata": {},
"outputs": [],
"source": [
"testfile_path = \"../tests/test_files/\"\n",
"sound_devices_file = testfile_path + \"A101_1.WAV\"\n",
"path = '../tests/test_files/protools/PT A101_4.A1.wav'\n",
"\n",
"info = wavinfo.WavInfoReader(sound_devices_file)"
"info = wavinfo.WavInfoReader(path)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WavInfoFormat(audio_format=1, channel_count=2, sample_rate=48000, byte_rate=288000, block_align=6, bits_per_sample=24)\n"
]
}
],
"source": [
"pp.pprint(info.fmt)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WavBextFormat(description='sSPEED=023.976-ND\\r\\nsTAKE=1\\r\\nsUBITS=$12311801\\r\\nsSWVER=2.67\\r\\nsPROJECT=BMH\\r\\nsSCENE=A101\\r\\nsFILENAME=A101_1.WAV\\r\\nsTAPE=18Y12M31\\r\\nsTRK1=MKH516 A\\r\\nsTRK2=Boom\\r\\nsNOTE=\\r\\n', originator='Sound Dev: 702T S#GR1112089007', originator_ref='USSDVGR1112089007124001008206301', originator_date='2018-12-31', originator_time='12:40:00', time_reference=2190940753, version=1, umid=b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00', loudness_value=0.0, loudness_range=0.0, max_true_peak=0.0, max_momentary_loudness=0.0, max_shortterm_loudness=0.0, coding_history='A=PCM,F=48000,W=24,M=stereo,R=48000,T=2 Ch\\r\\n')\n"
]
}
],
"source": [
"pp.pprint(info.bext)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('BMH', 'A101', '1', 240239)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"info.ixml.project, info.ixml.scene, info.ixml.take, info.data.frame_count"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"pro_tools_file = testfile_path + \"PT A101_4.A1.wav\""
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
@@ -123,7 +54,7 @@
"source": [
"import wavinfo.wave_parser\n",
"\n",
"with open(pro_tools_file,'rb') as f:\n",
"with open(path,'rb') as f:\n",
" chunk_tree = wavinfo.wave_parser.parse_chunk(f)\n",
"\n",
"pp.pprint(chunk_tree.children)"
@@ -131,75 +62,127 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WavBextFormat(description='dUBITS=12311804\\r\\ndSCENE=A101\\r\\ndTAKE=4\\r\\ndTAPE=18Y12M31\\r\\ndFRAMERATE=23.976ND\\r\\ndSPEED=023.976-NDF\\r\\ndTRK1=MKH516 A\\r\\ndTRK2=Boom\\r\\n', originator='Sound Dev: 702T S#GR1112089007', originator_ref='aa4CKtcd13Vk', originator_date='2018-12-31', originator_time='12:40:07', time_reference=2191709524, version=0, umid=b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00', loudness_value=0.0, loudness_range=0.0, max_true_peak=0.0, max_momentary_loudness=0.0, max_shortterm_loudness=0.0, coding_history='A=PCM,F=48000,W=24,M=stereo,R=48000,T=2 Ch\\r\\n')\n"
"b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00*\\xfd\\xf5\\x0c$\\xe4s\\x80\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n",
"000000000000002afdf50c24e47380000000000000000000\n",
"24\n"
]
}
],
"source": [
"ptinfo = wavinfo.WavInfoReader(pro_tools_file)\n",
"\n",
"print(ptinfo.bext)"
"with open(path,'rb') as f:\n",
" f.seek( chunk_tree.children[4].start )\n",
" umid_bin = f.read(chunk_tree.children[4].length)\n",
" f.seek( chunk_tree.children[6].start )\n",
" regn_bin = f.read(chunk_tree.children[6].length)\n",
" \n",
"print(umid_bin)\n",
"print(umid_bin.hex())\n",
"print(len(umid_bin))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'<BWFXML><IXML_VERSION>1.61</IXML_VERSION><STEINBERG><ATTR_LIST><ATTR><TYPE>string</TYPE><NAME>MediaLibrary</NAME><VALUE>The Recordist Christmas 2018</VALUE></ATTR><ATTR><TYPE>string</TYPE><NAME>MediaCategoryPost</NAME><VALUE>Bullets</VALUE></ATTR><ATTR><TYPE>string</TYPE><NAME>MediaLibraryManufacturerName</NAME><VALUE>Creative Sound Design, LLC</VALUE></ATTR><ATTR><TYPE>string</TYPE><NAME>AudioSoundEditor</NAME><VALUE>Frank Bry</VALUE></ATTR><ATTR><TYPE>string</TYPE><NAME>MediaComment</NAME><VALUE>BULLET Impact Plastic LCD TV Screen Shatter Debris 2x</VALUE></ATTR><ATTR><TYPE>string</TYPE><NAME>MusicalCategory</NAME><VALUE>Bullets</VALUE></ATTR></ATTR_LIST></STEINBERG></BWFXML>'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"library_sound = testfile_path + 'BULLET Impact Plastic LCD TV Screen Shatter Debris 2x.wav'\n",
"\n",
"recinfo = wavinfo.WavInfoReader(library_sound)\n",
"\n",
"recinfo.ixml.source"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ ChunkDescriptor(ident=b'fmt ', start=20, length=40),\n",
" ChunkDescriptor(ident=b'bext', start=68, length=604),\n",
" ChunkDescriptor(ident=b'data', start=680, length=2833404),\n",
" ChunkDescriptor(ident=b'ID3 ', start=2834092, length=2048),\n",
" ChunkDescriptor(ident=b'SMED', start=2836148, length=5468),\n",
" ListChunkDescriptor(signature=b'INFO', children=[ChunkDescriptor(ident=b'IPRD', start=2841636, length=30), ChunkDescriptor(ident=b'IGNR', start=2841674, length=8), ChunkDescriptor(ident=b'IART', start=2841690, length=10), ChunkDescriptor(ident=b'ICMT', start=2841708, length=54), ChunkDescriptor(ident=b'ICOP', start=2841770, length=84), ChunkDescriptor(ident=b'ISFT', start=2841862, length=12), ChunkDescriptor(ident=b'ICRD', start=2841882, length=12)]),\n",
" ChunkDescriptor(ident=b'iXML', start=2841902, length=686),\n",
" ChunkDescriptor(ident=b'umid', start=2842596, length=24),\n",
" ChunkDescriptor(ident=b'_PMX', start=2842628, length=3560)]\n"
"<wavinfo.wave_bext_reader.WavBextReader object at 0x10d5f8ac8>\n"
]
}
],
"source": [
"with open(library_sound,'rb') as f:\n",
"print(info.bext)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"b'\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00*\\xfd\\xf5\\x0c$\\xe4s\\x80\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0c3\\x02\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00T\\xd5\\xa2\\x82\\x00\\x00\\x00\\x00\\x10PT A101_4.A1.wavGK\\xaa\\xaf\\x7f\\x00\\x00@ }\\x06\\x00`\\x00\\x00'\n",
"01000000000000000000002afdf50c24e473800000000000000000000c330200000000000000000000000000000000000000000054d5a2820000000010505420413130315f342e41312e776176474baaaf7f000040207d0600600000\n",
"92\n"
]
}
],
"source": [
"\n",
"print(regn_bin)\n",
"print(regn_bin.hex())\n",
"print(len(regn_bin))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{ 'artist': 'Frank Bry',\n",
" 'comment': 'BULLET Impact Plastic LCD TV Screen Shatter Debris 2x',\n",
" 'copyright': '2018 Creative Sound Design, LLC (The Recordist Christmas '\n",
" '2018) www.therecordist.com',\n",
" 'created_date': '2018-11-15',\n",
" 'engineer': None,\n",
" 'genre': 'Bullets',\n",
" 'keywords': None,\n",
" 'product': 'The Recordist Christmas 2018',\n",
" 'software': 'Soundminer',\n",
" 'source': None,\n",
" 'tape': None,\n",
" 'title': None}\n",
"{ 'coding_history': '',\n",
" 'description': 'BULLET Impact Plastic LCD TV Screen Shatter Debris 2x',\n",
" 'loudness_range': None,\n",
" 'loudness_value': None,\n",
" 'max_momentary_loudness': None,\n",
" 'max_shortterm_loudness': None,\n",
" 'max_true_peak': None,\n",
" 'originator': 'TheRecordist',\n",
" 'originator_date': '2018-12-20',\n",
" 'originator_ref': 'aaiAKt3fCGTk',\n",
" 'originator_time': '12:15:37',\n",
" 'time_reference': 57882,\n",
" 'version': 0}\n"
]
}
],
"source": [
"path = '../tests/test_files/BULLET Impact Plastic LCD TV Screen Shatter Debris 2x.wav'\n",
"\n",
"info = wavinfo.WavInfoReader(path)\n",
"\n",
"with open(path,'rb') as f:\n",
" chunk_tree = wavinfo.wave_parser.parse_chunk(f)\n",
"\n",
"pp.pprint(chunk_tree.children)"
" \n",
"pp.pprint(info.info.to_dict())\n",
"pp.pprint(info.bext.to_dict())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,

2
pypi_upload.sh Executable file
View File

@@ -0,0 +1,2 @@
#!/bin/bash
python3 -m twine upload --repository-url https://upload.pypi.org/legacy/ dist/*

View File

@@ -4,15 +4,18 @@ with open("README.md", "r") as fh:
long_description = fh.read()
setup(name='wavinfo',
version='0.1',
version='0.3',
author='Jamie Hardt',
author_email='jamiehardt@me.com',
description='WAVE sound file metadata parser.',
long_description_content_type="text/markdown",
long_description=long_description,
url='https://github.com/iluvcapra/wavinfo',
classifiers=['Development Status :: 2 - Pre-Alpha',
classifiers=['Development Status :: 4 - Beta',
'License :: OSI Approved :: MIT License',
'Topic :: Multimedia',
'Topic :: Multimedia :: Sound/Audio'],
'Topic :: Multimedia :: Sound/Audio',
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6"],
packages=['wavinfo'])

View File

@@ -1 +1 @@
SOUND REPORT
SOUND REPORT
Can't render this file because it contains an unexpected character in line 1 and column 53.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,6 +1,7 @@
import os.path
import json
import subprocess
from subprocess import PIPE
from unittest import TestCase
@@ -13,7 +14,7 @@ def ffprobe(path):
arguments = [ FFPROBE , "-of", "json" , "-show_format", "-show_streams", path ]
process = subprocess.run(arguments, stdin=None, capture_output=True)
process = subprocess.run(arguments, stdin=None, stdout=PIPE, stderr=PIPE)
if process.returncode == 0:
return json.loads(process.stdout)
@@ -28,7 +29,7 @@ class TestWaveInfo(TestCase):
for dirpath, dirnames, filenames in os.walk('tests/test_files'):
for filename in filenames:
name, ext = os.path.splitext(filename)
if ext == '.wav':
if ext in ['.wav','.WAV']:
yield os.path.join(dirpath, filename)
@@ -67,13 +68,20 @@ class TestWaveInfo(TestCase):
self.assertEqual( info.bext.description, ffprobe_info['format']['tags']['comment'] )
self.assertEqual( info.bext.originator, ffprobe_info['format']['tags']['encoded_by'] )
self.assertEqual( info.bext.originator_ref, ffprobe_info['format']['tags']['originator_reference'] )
if 'originator_reference' in ffprobe_info['format']['tags']:
self.assertEqual( info.bext.originator_ref, ffprobe_info['format']['tags']['originator_reference'] )
else:
self.assertEqual( info.bext.originator_ref, '')
# these don't always reflect the bext info
#self.assertEqual( info.bext.originator_date, ffprobe_info['format']['tags']['date'] )
#self.assertEqual( info.bext.originator_time, ffprobe_info['format']['tags']['creation_time'] )
# self.assertEqual( info.bext.originator_date, ffprobe_info['format']['tags']['date'] )
# self.assertEqual( info.bext.originator_time, ffprobe_info['format']['tags']['creation_time'] )
self.assertEqual( info.bext.time_reference, int(ffprobe_info['format']['tags']['time_reference']) )
self.assertEqual( info.bext.coding_history, ffprobe_info['format']['tags']['coding_history'] )
if 'coding_history' in ffprobe_info['format']['tags']:
self.assertEqual( info.bext.coding_history, ffprobe_info['format']['tags']['coding_history'] )
else:
self.assertEqual( info.bext.coding_history, '' )
def test_ixml(self):
expected = {'A101_4.WAV': {'project' : 'BMH', 'scene': 'A101', 'take': '4',

View File

@@ -1 +1,4 @@
from .wave_reader import WavInfoReader
__version__ = 0.3
__author__ = 'Jamie Hardt'

View File

@@ -0,0 +1,86 @@
import struct
class WavBextReader:
def __init__(self,bext_data,encoding):
# description[256]
# originator[32]
# originatorref[32]
# originatordate[10] "YYYY:MM:DD"
# originatortime[8] "HH:MM:SS"
# lowtimeref U32
# hightimeref U32
# version U16
# umid[64]
#
# EBU 3285 fields
# loudnessvalue S16 (in LUFS*100)
# loudnessrange S16 (in LUFS*100)
# maxtruepeak S16 (in dbTB*100)
# maxmomentaryloudness S16 (LUFS*100)
# maxshorttermloudness S16 (LUFS*100)
# reserved[180]
# codinghistory []
if bext_data is None:
return None
packstring = "<256s"+ "32s" + "32s" + "10s" + "8s" + "QH" + "64s" + "hhhhh" + "180s"
rest_starts = struct.calcsize(packstring)
unpacked = struct.unpack(packstring, bext_data[:rest_starts])
def sanatize_bytes(bytes):
first_null = next( (index for index, byte in enumerate(bytes) if byte == 0 ), None )
if first_null is not None:
trimmed = bytes[:first_null]
else:
trimmed = bytes
decoded = trimmed.decode(encoding)
return decoded
bext_version = unpacked[6]
if bext_version > 0:
self.umid = unpacked[6]
else:
self.umid = None
if bext_version > 1:
self.loudness_value = unpacked[8] / 100.0,
self.loudness_range = unpacked[9] / 100.0
self.max_true_peak = unpacked[10] / 100.0
self.max_momentary_loudness = unpacked[11] / 100.0
self.max_shortterm_loudness = unpacked[12] / 100.0
else:
self.loudness_value = None
self.loudness_range = None
self.max_true_peak = None
self.max_momentary_loudness = None
self.max_shortterm_loudness = None
self.description = sanatize_bytes(unpacked[0])
self.originator = sanatize_bytes(unpacked[1])
self.originator_ref = sanatize_bytes(unpacked[2])
self.originator_date = sanatize_bytes(unpacked[3])
self.originator_time = sanatize_bytes(unpacked[4])
self.time_reference = unpacked[5]
self.version = unpacked[6]
self.coding_history = sanatize_bytes(bext_data[rest_starts:])
def to_dict(self):
return {'description': self.description,
'originator': self.originator,
'originator_ref': self.originator_ref,
'originator_date': self.originator_date,
'originator_time': self.originator_time,
'time_reference': self.time_reference,
'version': self.version,
'coding_history': self.coding_history,
'loudness_value': self.loudness_value,
'loudness_range': self.loudness_range,
'max_true_peak': self.max_true_peak,
'max_momentary_loudness': self.max_momentary_loudness,
'max_shortterm_loudness': self.max_shortterm_loudness
}

View File

@@ -0,0 +1,64 @@
from .wave_parser import parse_chunk, ListChunkDescriptor
class WavInfoChunkReader:
def __init__(self, f, encoding):
self.encoding = encoding
f.seek(0)
parsed_chunks = parse_chunk(f)
list_chunks = [chunk for chunk in parsed_chunks.children \
if type(chunk) is ListChunkDescriptor]
self.info_chunk = next((chunk for chunk in list_chunks \
if chunk.signature == b'INFO'), None)
self.copyright = self._get_field(f,b'ICOP')
self.product = self._get_field(f,b'IPRD')
self.genre = self._get_field(f,b'IGNR')
self.artist = self._get_field(f,b'IART')
self.comment = self._get_field(f,b'ICMT')
self.software = self._get_field(f,b'ISFT')
self.created_date = self._get_field(f,b'ICRD')
self.engineer = self._get_field(f,b'IENG')
self.keywords = self._get_field(f,b'IKEY')
self.title = self._get_field(f,b'INAM')
self.source = self._get_field(f,b'ISRC')
self.tape = self._get_field(f,b'TAPE')
def _get_field(self, f, field_ident):
search = next( ( (chunk.start, chunk.length) for chunk in self.info_chunk.children \
if chunk.ident == field_ident ), None)
if search is not None:
f.seek(search[0])
data = f.read(search[1])
return data.decode(self.encoding).rstrip('\0')
else:
return None
def to_dict(self):
return {'copyright': self.copyright,
'product': self.product,
'genre': self.genre,
'artist': self.artist,
'comment': self.comment,
'software': self.software,
'created_date': self.created_date,
'engineer': self.engineer,
'keywords': self.keywords,
'title': self.title,
'source': self.source,
'tape': self.tape
}

View File

@@ -3,30 +3,40 @@ import struct
from collections import namedtuple
ListChunkDescriptor = namedtuple('ListChunkDescriptor' , 'signature children')
class ListChunkDescriptor(namedtuple('ListChunkDescriptor' , 'signature children')):
def find(chunk_path):
if len(chunk_path) > 1:
for chunk in self.children:
if type(chunk) is ListChunkDescriptor and \
chunk.signature is chunk_path[0]:
return chunk.find(chunk_path[1:])
else:
for chunk in self.children:
if type(chunk) is ChunkDescriptor and \
chunk.ident is chunk_path[0]:
return chunk
class ChunkDescriptor(namedtuple('ChunkDescriptor', 'ident start length') ):
def read_data(self, from_stream):
from_stream.seek(self.start)
return from_stream.read(self.length)
def parse_list_chunk(stream, length):
children = []
start = stream.tell()
signature = stream.read(4)
children = []
while (stream.tell() - start) < length:
children.append(parse_chunk(stream))
return ListChunkDescriptor(signature=signature, children=children)
def parse_chunk(stream):
ident = stream.read(4)
if len(ident) != 4:
if len(ident) != 4:
return
sizeb = stream.read(4)
@@ -47,11 +57,3 @@ def parse_chunk(stream):

View File

@@ -4,6 +4,8 @@ from collections import namedtuple
from .wave_parser import parse_chunk, ChunkDescriptor, ListChunkDescriptor
from .wave_ixml_reader import WavIXMLFormat
from .wave_bext_reader import WavBextReader
from .wave_info_reader import WavInfoChunkReader
WavDataDescriptor = namedtuple('WavDataDescriptor','byte_count frame_count')
@@ -22,7 +24,20 @@ class WavInfoReader():
"""
def __init__(self, path):
def __init__(self, path, info_encoding='latin_1', bext_encoding='ascii'):
"""
Parse a WAV audio file for metadata.
* `path`: A filesystem path to the wav file you wish to probe.
* `info_encoding`: The text encoding of the INFO metadata fields.
`latin_1`/Win CP1252 has always been a pretty good guess for this.
* `bext_encoding`: The text encoding to use when decoding the string
fields of the Broadcast-WAV extension. Per EBU 3285 this is ASCII
but this parameter is available to you if you encounter a werido.
"""
with open(path, 'rb') as f:
chunks = parse_chunk(f)
@@ -30,9 +45,9 @@ class WavInfoReader():
f.seek(0)
self.fmt = self._get_format(f)
self.bext = self._get_bext(f)
self.bext = self._get_bext(f, encoding=bext_encoding)
self.ixml = self._get_ixml(f)
self.info = self._get_info(f, encoding=info_encoding)
self.data = self._describe_data(f)
def _find_chunk_data(self, ident, from_stream, default_none=False):
@@ -57,7 +72,6 @@ class WavInfoReader():
frame_count= int(data_chunk.length / self.fmt.block_align))
def _get_format(self,f):
fmt_data = self._find_chunk_data(b'fmt ',f)
@@ -78,63 +92,28 @@ class WavInfoReader():
#0x0006 WAVE_FORMAT_ALAW 8-bit ITU-T G.711 A-law
#0x0007 WAVE_FORMAT_MULAW 8-bit ITU-T G.711 µ-law
#0xFFFE WAVE_FORMAT_EXTENSIBLE Determined by SubFormat
if unpacked[0] == 0x0001:
return WavInfoFormat(audio_format = unpacked[0],
channel_count = unpacked[1],
sample_rate = unpacked[2],
byte_rate = unpacked[3],
block_align = unpacked[4],
#https://sno.phy.queensu.ca/~phil/exiftool/TagNames/RIFF.html
return WavInfoFormat(audio_format = unpacked[0],
channel_count = unpacked[1],
sample_rate = unpacked[2],
byte_rate = unpacked[3],
block_align = unpacked[4],
bits_per_sample = unpacked[5]
)
def _get_bext(self,f):
def _get_info(self, f, encoding):
finder = (chunk.signature for chunk in self.main_list \
if type(chunk) is ListChunkDescriptor)
if b'INFO' in finder:
return WavInfoChunkReader(f, encoding)
def _get_bext(self, f, encoding):
bext_data = self._find_chunk_data(b'bext',f,default_none=True)
# description[256]
# originator[32]
# originatorref[32]
# originatordate[10] "YYYY:MM:DD"
# originatortime[8] "HH:MM:SS"
# lowtimeref U32
# hightimeref U32
# version U16
# umid[64]
#
# EBU 3285 fields
# loudnessvalue S16 (in LUFS*100)
# loudnessrange S16 (in LUFS*100)
# maxtruepeak S16 (in dbTB*100)
# maxmomentaryloudness S16 (LUFS*100)
# maxshorttermloudness S16 (LUFS*100)
# reserved[180]
# codinghistory []
if bext_data is None:
return None
packstring = "<256s"+ "32s" + "32s" + "10s" + "8s" + "QH" + "64s" + "hhhhh" + "180s"
rest_starts = struct.calcsize(packstring)
unpacked = struct.unpack(packstring, bext_data[:rest_starts])
return WavBextFormat(description=unpacked[0].decode('ascii').rstrip('\0'),
originator = unpacked[1].decode('ascii').rstrip('\0'),
originator_ref = unpacked[2].decode('ascii').rstrip('\0'),
originator_date = unpacked[3].decode('ascii'),
originator_time = unpacked[4].decode('ascii'),
time_reference = unpacked[5],
version = unpacked[6],
umid = unpacked[7],
loudness_value = unpacked[8] / 100.0,
loudness_range = unpacked[9] / 100.0,
max_true_peak = unpacked[10] / 100.0,
max_momentary_loudness = unpacked[11] / 100.0,
max_shortterm_loudness = unpacked[12] / 100.0,
coding_history = bext_data[rest_starts:].decode('ascii').rstrip('\0')
)
return WavBextReader(bext_data, encoding)
def _get_ixml(self,f):
ixml_data = self._find_chunk_data(b'iXML',f,default_none=True)
if ixml_data is None:
return None