Files
wavinfo/wavinfo/wave_cues_reader.py
2023-11-06 23:09:06 -08:00

242 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Cues metadata
For reference on implementation of cues and related metadata see:
August 1991, "Multimedia Programming Interface and Data Specifications 1.0",
IBM Corporation and Microsoft Corporation
https://www.aelius.com/njh/wavemetatools/doc/riffmci.pdf
"""
from dataclasses import dataclass
import encodings
from .riff_parser import ChunkDescriptor
from struct import unpack, calcsize
from typing import Optional, Tuple, NamedTuple, List, Dict, Any, Generator
#: Country Codes used in the RIFF standard to resolve locale. These codes
#: appear in CSET and LTXT metadata.
CountryCodes = """000 None Indicated
001,USA
002,Canada
003,Latin America
030,Greece
031,Netherlands
032,Belgium
033,France
034,Spain
039,Italy
041,Switzerland
043,Austria
044,United Kingdom
045,Denmark
046,Sweden
047,Norway
049,West Germany
052,Mexico
055,Brazil
061,Australia
064,New Zealand
081,Japan
082,Korea
086,Peoples Republic of China
088,Taiwan
090,Turkey
351,Portugal
352,Luxembourg
354,Iceland
358,Finland"""
#: Language and Dialect codes used in the RIFF standard to resolve native
#: language of text fields. These codes appear in CSET and LTXT metadata.
LanguageDialectCodes = """0 0 None Indicated
1,1,Arabic
2,1,Bulgarian
3,1,Catalan
4,1,Traditional Chinese
4,2,Simplified Chinese
5,1,Czech
6,1,Danish
7,1,German
7,2,Swiss German
8,1,Greek
9,1,US English
9,2,UK English
10,1,Spanish
10,2,Spanish Mexican
11,1,Finnish
12,1,French
12,2,Belgian French
12,3,Canadian French
12,4,Swiss French
13,1,Hebrew
14,1,Hungarian
15,1,Icelandic
16,1,Italian
16,2,Swiss Italian
17,1,Japanese
18,1,Korean
19,1,Dutch
19,2,Belgian Dutch
20,1,Norwegian - Bokmal
20,2,Norwegian - Nynorsk
21,1,Polish
22,1,Brazilian Portuguese
22,2,Portuguese
23,1,Rhaeto-Romanic
24,1,Romanian
25,1,Russian
26,1,Serbo-Croatian (Latin)
26,2,Serbo-Croatian (Cyrillic)
27,1,Slovak
28,1,Albanian
29,1,Swedish
30,1,Thai
31,1,Turkish
32,1,Urdu
33,1,Bahasa"""
class CueEntry(NamedTuple):
name: int
position: int
chunk_id: bytes
chunk_start: int
block_start: int
sample_offset: int
Format = "<II4sIII"
@classmethod
def format_size(cls) -> int:
return calcsize(cls.Format)
@classmethod
def read(cls, data: bytes) -> 'CueEntry':
assert len(data) == cls.format_size(), \
f"cue data size incorrect, expected {calcsize(cls.Format)} found {len(data)}"
parsed = unpack(cls.Format, data)
return cls(name=parsed[0], position=parsed[1], chunk_id=parsed[2],
chunk_start=parsed[3], block_start=parsed[4],
sample_offset=parsed[5])
class LabelEntry(NamedTuple):
name: int
text: str
@classmethod
def read(cls, data: bytes, encoding: str):
return cls(name=unpack("<I", data[0:4])[0],
text=data[4:].decode(encoding).rstrip("\0"))
NoteEntry = LabelEntry
class RangeLabel(NamedTuple):
name: int
length: int
purpose: str
country: int
language: int
dialect: int
codepage: int
text: str
@classmethod
def read(cls, data: bytes, fallback_encoding: str):
leader_struct_fmt = "<II4sHHHH"
parsed = unpack(leader_struct_fmt, data[0:calcsize(leader_struct_fmt)])
text_data = data[calcsize(leader_struct_fmt):]
if data[6] != 0:
fallback_encoding = f"cp{data[6]}"
return cls(name=parsed[0], length=parsed[1], purpose=parsed[2],
country=parsed[3], language=parsed[4],
dialect=parsed[5], codepage=parsed[6],
text=text_data.decode(fallback_encoding))
@dataclass
class WavCuesReader:
cues: List[CueEntry]
labels: List[LabelEntry]
ranges: List[RangeLabel]
notes: List[NoteEntry]
@classmethod
def read_all(cls, f,
cues: Optional[ChunkDescriptor],
labls: List[ChunkDescriptor],
ltxts: List[ChunkDescriptor],
notes: List[ChunkDescriptor],
fallback_encoding: str) -> 'WavCuesReader':
cue_list = []
if cues is not None:
cues_data = cues.read_data(f)
assert len(cues_data) >= 4, "cue metadata too short"
offset = calcsize("<I")
cues_count = unpack("<I", cues_data[0:offset])
for _ in range(cues_count[0]):
cue_bytes = cues_data[offset: offset + CueEntry.format_size()]
cue_list.append(CueEntry.read(cue_bytes))
offset += CueEntry.format_size()
label_list = []
for labl in labls:
label_list.append(
LabelEntry.read(labl.read_data(f),
encoding=fallback_encoding)
)
range_list = []
for r in ltxts:
range_list.append(
RangeLabel.read(r.read_data(f),
fallback_encoding=fallback_encoding)
)
note_list = []
for note in notes:
note_list.append(
NoteEntry.read(note.read_data(f),
encoding=fallback_encoding)
)
return WavCuesReader(cues=cue_list, labels=label_list,
ranges=range_list, notes=note_list)
def each_cue(self) -> Generator[Tuple[int, int], None, None]:
"""
Iterate through each cue.
:yields: the cue's ``name`` and ``sample_offset``
"""
for cue in self.cues:
yield (cue.name, cue.sample_offset)
def label_and_note(self, cue_ident: int) -> Tuple[Optional[str], Optional[str]]:
"""
Get the label and note (extended comment) for a cue.
:param cue_ident: the cue's name, it's unique identifying number
:returns: a tuple of the the cue's label (if present) and note (if
present)
"""
label = next((l.text for l in self.labels if l.name == cue_ident), None)
note = next((n.text for n in self.notes if n.name == cue_ident), None)
return (label, note)
def to_dict(self) -> Dict[str, Any]:
return dict(cues=[c.__dict__ for c in self.cues],
labels=[l.__dict__ for l in self.labels],
ranges=[r.__dict__ for r in self.ranges],
notes=[n.__dict__ for n in self.notes])