Much refactoring

This seems to work, needs more testing
This commit is contained in:
Jamie Hardt
2020-09-20 13:46:03 -07:00
parent 8e695140fb
commit b4b3f071ae
3 changed files with 330 additions and 280 deletions

View File

@@ -1,18 +1,7 @@
import sys
import bpy import bpy
import os import os
from ear.fileio.utils import openBw64 from contextlib import contextmanager
from ear.fileio.bw64.utils import interleave
from ear.fileio.bw64.chunks import (FormatInfoChunk, ChnaChunk)
from ear.fileio.adm import chna as adm_chna
from ear.fileio.adm.xml import adm_to_xml
from ear.fileio.adm.elements.block_formats import (AudioBlockFormatObjects, JumpPosition)
from ear.fileio.adm.elements.geom import ObjectCartesianPosition
from ear.fileio.adm.builder import (ADMBuilder, TypeDefinition)
from ear.fileio.adm.generate_ids import generate_ids
import lxml import lxml
import uuid import uuid
@@ -20,108 +9,166 @@ from fractions import Fraction
import struct import struct
import numpy import numpy
from numpy.linalg import norm
from mathutils import Quaternion, Vector
from time import strftime from time import strftime
from math import sqrt
from dataclasses import dataclass from typing import List
from typing import List, Tuple
from ear.fileio.utils import openBw64
from ear.fileio.bw64 import Bw64Reader
from ear.fileio.bw64.chunks import (FormatInfoChunk, ChnaChunk)
from ear.fileio.adm import chna as adm_chna
from ear.fileio.adm.xml import adm_to_xml
from ear.fileio.adm.elements.block_formats import (AudioBlockFormatObjects, JumpPosition)
from ear.fileio.adm.elements.geom import ObjectCartesianPosition
from ear.fileio.adm.builder import (ADMBuilder)
from ear.fileio.adm.generate_ids import generate_ids
from sound_objects.intern.geom_utils import (compute_relative_vector,
room_norm_vector,
speaker_active_time_range,
speakers_by_min_distance,
speakers_by_start_time)
from sound_objects.intern.speaker_utils import (all_speakers, solo_speakers, unmute_all_speakers)
class FrameInterval: @contextmanager
def __init__(self, start_frame, end_frame): def adm_object_rendering_context(scene: bpy.types.Scene):
self.start_frame = int(start_frame) old_ff = scene.render.image_settings.file_format
self.end_frame = int(end_frame) old_codec = scene.render.ffmpeg.audio_codec
old_chans = scene.render.ffmpeg.audio_channels
def overlaps(self, other : 'FrameInterval') -> bool: scene = bpy.context.scene
return self.start_frame <= other.start_frame <= self.end_frame or \
other.start_frame <= self.start_frame <= other.end_frame scene.render.image_settings.file_format = 'FFMPEG'
scene.render.ffmpeg.audio_codec = 'PCM'
scene.render.ffmpeg.audio_channels = 'MONO'
yield scene
scene.render.image_settings.file_format = old_ff
scene.render.ffmpeg.audio_codec = old_codec
scene.render.ffmpeg.audio_channels = old_chans
def compute_relative_vector(camera: bpy.types.Camera, target: bpy.types.Object): class ObjectMix:
""" def __init__(self, sources: List[bpy.types.Speaker],
Return a vector from `camera` to `target` in the camera's coordinate space. scene: bpy.types.Scene, base_dir: str):
self.sources = sources
self.intermediate_filename = None
self.base_dir = base_dir
self.scene = scene
self._mixdown_file_handle = None
self._mixdown_reader = None
The camera's lens is assumed to be norm to the ZX plane. @property
""" def frame_start(self):
cam_loc, cam_rot, _ = camera.matrix_world.decompose() return self.scene.frame_start
target_loc, _, _ = target.matrix_world.decompose()
relative_vector = target_loc - cam_loc
rotation = cam_rot.to_matrix().transposed() @property
relative_vector.rotate(rotation) def frame_end(self):
return self.scene.frame_end
# The camera's worldvector is norm to the horizon, we want a vector @property
# down the barrel. def mixdown_reader(self) -> Bw64Reader:
camera_correction = Quaternion( ( sqrt(2.) / 2. , sqrt(2.) / 2. , 0. , 0.) ) if self._mixdown_reader is None:
relative_vector.rotate(camera_correction) self._mixdown_reader = Bw64Reader(self.mixdown_file_handle)
return relative_vector return self._mixdown_reader
@property
def mixdown_file_handle(self):
if self._mixdown_file_handle is None:
self._mixdown_file_handle = open(self.mixdown_filename, 'rb')
def room_norm_vector(vec, room_size=1.) -> Vector: return self._mixdown_file_handle
"""
The Room is tearing me apart, Lisa.
The room is a cube with the camera at its center. We use a chebyshev normalization @property
to convert a vector in world or camera space into a vector the represents the projection def mixdown_filename(self):
of that vector onto the room's walls. if self.intermediate_filename is None:
self.mixdown()
The Pro Tools/Dolby Atmos workflow I am targeting uses "Room Centric" panner coordinates return self.intermediate_filename
("cartesian allocentric coordinates" in ADM speak) and this process seems to yield good
results. @property
""" def object_name(self):
chebyshev = norm(vec, ord=numpy.inf) return self.sources[0].name
if chebyshev < room_size:
return vec / room_size def mixdown(self):
with adm_object_rendering_context(self.scene) as scene:
solo_speakers(scene, self.sources)
scene_name = bpy.path.clean_name(scene.name)
speaker_name = bpy.path.clean_name(self.object_name)
self.intermediate_filename = os.path.join(self.base_dir, "%s_%s.wav" % (scene_name, speaker_name))
bpy.ops.sound.mixdown(filepath=self.intermediate_filename,
container='WAV', codec='PCM', format='S24')
print("Created mixdown named {}".format(self.intermediate_filename))
unmute_all_speakers(scene)
def adm_block_formats(self, room_size=1.):
fps = self.scene.render.fps
block_formats = []
for speaker_obj in self.sources:
speaker_interval = speaker_active_time_range(speaker_obj)
for frame in range(speaker_interval.start_frame, speaker_interval.end_frame + 1):
self.scene.frame_set(frame)
relative_vector = compute_relative_vector(camera=self.scene.camera, target=speaker_obj)
norm_vec = room_norm_vector(relative_vector, room_size=room_size)
pos = ObjectCartesianPosition(X=norm_vec.x, Y=norm_vec.y, Z=norm_vec.z)
if len(block_formats) == 0 or pos != block_formats[-1].position:
jp = JumpPosition(flag=True, interpolationLength=Fraction(1, fps * 2))
block = AudioBlockFormatObjects(position=pos,
rtime=Fraction(frame, fps),
duration=Fraction(1, fps),
cartesian=True,
jumpPosition=jp)
block_formats.append(block)
else: else:
return vec / chebyshev block_formats[-1].duration = block_formats[-1].duration + Fraction(1, fps)
return block_formats
def rm_mixdown(self):
if self._mixdown_reader is not None:
self._mixdown_reader = None
if self._mixdown_file_handle is not None:
self._mixdown_file_handle.close()
self._mixdown_file_handle = None
os.remove(self.intermediate_filename)
self.intermediate_filename = None
def closest_approach_to_camera(scene, speaker_object) -> (float, int): @contextmanager
max_dist = sys.float_info.max class ObjectMixPool:
at_time = scene.frame_start def __init__(self, object_mixes: List[ObjectMix]):
for frame in range(scene.frame_start, scene.frame_end + 1): self.object_mixes = object_mixes
scene.frame_set(frame)
rel = speaker_object.matrix_world.to_translation() - scene.camera.matrix_world.to_translation()
dist = norm(rel)
if dist < max_dist: def __enter__(self):
max_dist = dist return self
at_time = frame
return (max_dist, at_time) @property
def shortest_file_length(self):
lengths = map(lambda f: len(f.mixdown_reader))
return min(lengths)
def __exit__(self, exc_type, exc_val, exc_tb):
def speaker_active_time_range(speaker) -> FrameInterval: for mix in self.object_mixes:
""" mix.rm_mixdown()
The time range this speaker must control in order to sound right.
At this time this is assuming the str
"""
start, end = 0xffffffff, -0xffffffff
for track in speaker.animation_data.nla_tracks:
for strip in track.strips:
if strip.frame_start < start:
start = strip.frame_start
if strip.frame_end > end:
end = strip.frame_end
return FrameInterval(start_frame=start, end_frame=end)
def speakers_by_min_distance(scene, speakers):
def min_distance(speaker):
return closest_approach_to_camera(scene, speaker)[0]
return sorted(speakers, key=(lambda spk: min_distance(spk)))
def speakers_by_start_time(speaker_objs):
return sorted(speaker_objs, key=(lambda spk: speaker_active_time_range(spk).start_frame))
def group_speakers(speakers, scene) -> List[List[bpy.types.Object]]: def group_speakers(speakers, scene) -> List[List[bpy.types.Object]]:
@@ -157,42 +204,14 @@ def group_speakers(speakers, scene) -> List[List[bpy.types.Object]]:
return ret_val return ret_val
def adm_block_formats_for_speakers(scene, speaker_objs, room_size=1.): def adm_for_object(scene, sound_object: ObjectMix, room_size, adm_builder, object_index, wav_format):
fps = scene.render.fps fps = scene.render.fps
block_formats = [] frame_start = scene.frame_start
frame_end = scene.frame_end
for speaker_obj in speakers_by_start_time(speaker_objs): block_formats = sound_object.adm_block_formats(room_size=room_size)
speaker_interval = speaker_active_time_range(speaker_obj) created = adm_builder.create_item_objects(track_index=object_index,
for frame in range(speaker_interval.start_frame, speaker_interval.end_frame + 1): name=sound_object.object_name,
scene.frame_set(frame)
relative_vector = compute_relative_vector(camera=scene.camera, target=speaker_obj)
norm_vec = room_norm_vector(relative_vector, room_size=room_size)
pos = ObjectCartesianPosition(X=norm_vec.x , Y=norm_vec.y , Z=norm_vec.z)
if len(block_formats) == 0 or pos != block_formats[-1].position:
jp = JumpPosition(flag=True, interpolationLength=Fraction(1,fps * 2) )
block = AudioBlockFormatObjects(position= pos,
rtime=Fraction(frame,fps),
duration=Fraction(1,fps) ,
cartesian=True,
jumpPosition=jp)
block_formats.append(block)
else:
block_formats[-1].duration = block_formats[-1].duration + Fraction(1,fps)
return block_formats
def adm_for_object(scene, speakers_this_mixdown, room_size, b, i, frame_start, fps, frame_end, wav_format):
block_formats = adm_block_formats_for_speakers(scene=scene,
speaker_objs=speakers_this_mixdown,
room_size=room_size)
created = b.create_item_objects(track_index=i,
name=speakers_this_mixdown[0].name,
block_formats=block_formats) block_formats=block_formats)
created.audio_object.start = Fraction(frame_start, fps) created.audio_object.start = Fraction(frame_start, fps)
@@ -201,24 +220,23 @@ def adm_for_object(scene, speakers_this_mixdown, room_size, b, i, frame_start, f
created.track_uid.bitDepth = wav_format.bitsPerSample created.track_uid.bitDepth = wav_format.bitsPerSample
def adm_for_scene(scene, speaker_groups, wav_format, room_size): def adm_for_scene(scene, sound_objects: List['ObjectMix'], wav_format, room_size):
adm_builder = ADMBuilder()
b = ADMBuilder()
frame_start = scene.frame_start frame_start = scene.frame_start
frame_end = scene.frame_end frame_end = scene.frame_end
fps = scene.render.fps fps = scene.render.fps
b.create_programme(audioProgrammeName=scene.name, adm_builder.create_programme(audioProgrammeName=scene.name,
start=Fraction(frame_start ,fps), start=Fraction(frame_start, fps),
end=Fraction(frame_end, fps) ) end=Fraction(frame_end, fps))
b.create_content(audioContentName="Objects") adm_builder.create_content(audioContentName="Objects")
for i, speakers_this_mixdown in enumerate(speaker_groups): for object_index, sound_object in enumerate(sound_objects):
adm_for_object(scene, speakers_this_mixdown, room_size, b, i, frame_start, fps, frame_end, wav_format) adm_for_object(scene, sound_object, room_size, adm_builder, object_index, wav_format)
adm = b.adm adm = adm_builder.adm
generate_ids(adm) generate_ids(adm)
chna = ChnaChunk() chna = ChnaChunk()
@@ -231,7 +249,7 @@ def adm_for_scene(scene, speaker_groups, wav_format, room_size):
# File writing functions below # File writing functions below
def bext_data(scene, speaker_obj, sample_rate, room_size): def bext_data(scene, sample_rate, room_size):
description = "SCENE={};ROOM_SIZE={}\n".format(scene.name, room_size).encode("ascii") description = "SCENE={};ROOM_SIZE={}\n".format(scene.name, room_size).encode("ascii")
originator_name = "Blender {}".format(bpy.app.version_string).encode("ascii") originator_name = "Blender {}".format(bpy.app.version_string).encode("ascii")
originator_ref = uuid.uuid1().hex.encode("ascii") originator_ref = uuid.uuid1().hex.encode("ascii")
@@ -248,27 +266,15 @@ def bext_data(scene, speaker_obj, sample_rate, room_size):
return data return data
def load_infiles_for_muxing(mixdowns): def write_muxed_wav(mix_pool: ObjectMixPool, scene, out_format, room_size, outfile, shortest_file):
infiles = [] READ_BLOCK = 1024
shortest_file = 0xFFFFFFFFFFFF
for elem in mixdowns:
infile = openBw64(elem[0], 'r')
infiles.append(infile)
if len(infile) < shortest_file:
shortest_file = len(infile)
return infiles, shortest_file
sound_objects = mix_pool.object_mixes
def write_muxed_wav(mixdowns, scene, out_format, room_size, outfile, shortest_file, object_count, infiles): adm, chna = adm_for_scene(scene, sound_objects, out_format, room_size=room_size)
#print("write_muxed_wav entered")
READ_BLOCK=1024
speaker_groups = list(map(lambda x: x[1], mixdowns))
adm, chna = adm_for_scene(scene, speaker_groups, out_format, room_size=room_size)
outfile.axml = lxml.etree.tostring(adm, pretty_print=True) outfile.axml = lxml.etree.tostring(adm, pretty_print=True)
outfile.chna = chna outfile.chna = chna
outfile.bext = bext_data(scene, None, out_format.sampleRate, room_size=room_size) outfile.bext = bext_data(scene, out_format.sampleRate, room_size=room_size)
cursor = 0 cursor = 0
while True: while True:
@@ -277,106 +283,43 @@ def write_muxed_wav(mixdowns, scene, out_format, room_size, outfile, shortest_fi
if to_read == 0: if to_read == 0:
break break
buffer = numpy.zeros((to_read, object_count)) buffer = numpy.zeros((to_read, len(sound_objects)))
for i, infile in enumerate(infiles): for i, sound_object in enumerate(sound_objects):
buffer[: , i] = infile.read(to_read)[: , 0] buffer[:, i] = sound_object.mixdown_reader.read(to_read)[:, 0]
outfile.write(buffer) outfile.write(buffer)
cursor = cursor + to_read cursor = cursor + to_read
def mux_adm_from_object_mixdowns(scene, mixdowns_spk_list_tuple, output_filename=None, room_size=1.): def mux_adm_from_object_mixdowns(scene, sound_objects: List['ObjectMix'], output_filename, room_size=1.):
""" """
mixdowns are a tuple of wave filename, and corresponding speaker object mixdowns are a tuple of wave filename, and corresponding speaker object
""" """
#print("mux_adm_from_object_mixdowns entered")
object_count = len(mixdowns_spk_list_tuple) object_count = len(sound_objects)
assert object_count > 0 assert object_count > 0
infiles, shortest_file = load_infiles_for_muxing(mixdowns_spk_list_tuple)
out_file = output_filename or os.path.join(os.path.dirname(mixdowns_spk_list_tuple[0][0]),
bpy.path.clean_name(scene.name) + ".wav")
out_format = FormatInfoChunk(channelCount=object_count, out_format = FormatInfoChunk(channelCount=object_count,
sampleRate=infiles[0].sampleRate, sampleRate=scene.render.ffmpeg.audio_mixrate,
bitsPerSample=infiles[0].bitdepth) bitsPerSample=24)
with openBw64(out_file, 'w', formatInfo=out_format) as outfile: with ObjectMixPool(sound_objects) as mix_pool:
write_muxed_wav(mixdowns_spk_list_tuple, scene, out_format, room_size, outfile, shortest_file, object_count, infiles) with openBw64(output_filename, 'w', formatInfo=out_format) as outfile:
write_muxed_wav(mix_pool, scene, out_format, room_size,
for infile in infiles: outfile, mix_pool.shortest_file_length)
infile._buffer.close()
def rm_object_mixes(mixdowns):
#print("rm_object_mixes entered")
for elem in mixdowns:
os.remove(elem[0])
def partition_sounds_to_objects(scene, max_objects):
def all_speakers(scene): sound_sources = all_speakers(scene)
return [obj for obj in scene.objects if obj.type == 'SPEAKER']
if len(sound_sources) == 0:
return []
def solo_speakers(scene, solo_group):
for speaker in all_speakers(scene):
if speaker in solo_group:
speaker.data.muted = False
else:
speaker.data.muted = True
speaker.data.update_tag()
def unmute_all_speakers(scene):
for speaker in all_speakers(scene):
speaker.data.muted = False
speaker.data.update_tag()
def create_mixdown_for_object(scene, speaker_group, basedir):
solo_speakers(scene, speaker_group)
scene_name = bpy.path.clean_name(scene.name)
speaker_name = bpy.path.clean_name(speaker_group[0].name)
fn = os.path.join(basedir, "%s_%s.wav" % (scene_name, speaker_name) )
bpy.ops.sound.mixdown(filepath=fn, container='WAV', codec='PCM', format='S24')
print("Created mixdown named {}".format(fn))
unmute_all_speakers(scene)
return fn
def generate_speaker_mixdowns(scene, speaker_groups, filepath):
basedir = os.path.dirname(filepath)
for speaker_group in speaker_groups:
fn = create_mixdown_for_object(scene, speaker_group, basedir)
yield (fn, speaker_group)
def save_output_state(context):
"""
save render settings that we change to produce object WAV files
"""
ff = context.scene.render.image_settings.file_format
codec = context.scene.render.ffmpeg.audio_codec
chans = context.scene.render.ffmpeg.audio_channels
return (ff, codec, chans)
def restore_output_state(ctx, context):
context.scene.render.image_settings.file_format = ctx[0]
context.scene.render.ffmpeg.audio_codec = ctx[1]
context.scene.render.ffmpeg.audio_channels = ctx[2]
def group_sounds(sound_sources, scene, max_objects):
object_groups = group_speakers(sound_sources, scene) object_groups = group_speakers(sound_sources, scene)
too_far_speakers = [] too_far_speakers = []
if len(object_groups) > max_objects: if len(object_groups) > max_objects:
too_far_speakers = object_groups[max_objects:] too_far_speakers = object_groups[max_objects:]
object_groups = object_groups[0:max_objects] object_groups = object_groups[0:max_objects]
@@ -385,37 +328,27 @@ def group_sounds(sound_sources, scene, max_objects):
len(object_groups), len(sound_sources), len(too_far_speakers))) len(object_groups), len(sound_sources), len(too_far_speakers)))
for i, group in enumerate(object_groups): for i, group in enumerate(object_groups):
print("Object Group %i"%i) print("Object Group %i" % i)
for source in group: for source in group:
print(" - %s" % source.name) print(" - %s" % source.name)
return object_groups, too_far_speakers return object_groups, too_far_speakers
def generate_adm(context, filepath, room_size, max_objects): def generate_adm(context, filepath, room_size, max_objects):
ctx = save_output_state(context) scene = context.scene
scene = bpy.context.scene object_groups, _ = partition_sounds_to_objects(scene, max_objects)
scene.render.image_settings.file_format = 'FFMPEG' if len(object_groups) == 0:
scene.render.ffmpeg.audio_codec = 'PCM'
scene.render.ffmpeg.audio_channels = 'MONO'
sound_sources = all_speakers(scene)
object_groups, _ = group_sounds(sound_sources, scene, max_objects)
mixdowns_spk_list_tuple = list(generate_speaker_mixdowns(scene, object_groups, filepath))
mixdown_count = len(mixdowns_spk_list_tuple)
if mixdown_count == 0:
return {'FINISHED'} return {'FINISHED'}
else:
mux_adm_from_object_mixdowns(scene, mixdowns_spk_list_tuple, sound_objects = map(lambda objects: ObjectMix(sources=objects))
output_filename= filepath,
mux_adm_from_object_mixdowns(scene, list(sound_objects),
output_filename=filepath,
room_size=room_size) room_size=room_size)
#cleanup for o in sound_objects:
#print("Will delete {} input object files".format(len(mixdowns_spk_list_tuple))) o.rm_mixdown()
rm_object_mixes(mixdowns_spk_list_tuple)
restore_output_state(ctx, context)
return {'FINISHED'} return {'FINISHED'}

View File

@@ -0,0 +1,99 @@
import sys
from math import sqrt
import numpy
from numpy.linalg import norm
class FrameInterval:
def __init__(self, start_frame, end_frame):
self.start_frame = int(start_frame)
self.end_frame = int(end_frame)
def overlaps(self, other : 'FrameInterval') -> bool:
return self.start_frame <= other.start_frame <= self.end_frame or \
other.start_frame <= self.start_frame <= other.end_frame
def compute_relative_vector(camera: bpy.types.Camera, target: bpy.types.Object):
"""
Return a vector from `camera` to `target` in the camera's coordinate space.
The camera's lens is assumed to be norm to the ZX plane.
"""
cam_loc, cam_rot, _ = camera.matrix_world.decompose()
target_loc, _, _ = target.matrix_world.decompose()
relative_vector = target_loc - cam_loc
rotation = cam_rot.to_matrix().transposed()
relative_vector.rotate(rotation)
# The camera's worldvector is norm to the horizon, we want a vector
# down the barrel.
camera_correction = Quaternion( ( sqrt(2.) / 2. , sqrt(2.) / 2. , 0. , 0.) )
relative_vector.rotate(camera_correction)
return relative_vector
def room_norm_vector(vec, room_size=1.) -> Vector:
"""
The Room is tearing me apart, Lisa.
The room is a cube with the camera at its center. We use a chebyshev normalization
to convert a vector in world or camera space into a vector the represents the projection
of that vector onto the room's walls.
The Pro Tools/Dolby Atmos workflow I am targeting uses "Room Centric" panner coordinates
("cartesian allocentric coordinates" in ADM speak) and this process seems to yield good
results.
"""
chebyshev = norm(vec, ord=numpy.inf)
if chebyshev < room_size:
return vec / room_size
else:
return vec / chebyshev
def closest_approach_to_camera(scene, speaker_object) -> (float, int):
max_dist = sys.float_info.max
at_time = scene.frame_start
for frame in range(scene.frame_start, scene.frame_end + 1):
scene.frame_set(frame)
rel = speaker_object.matrix_world.to_translation() - scene.camera.matrix_world.to_translation()
dist = norm(rel)
if dist < max_dist:
max_dist = dist
at_time = frame
return (max_dist, at_time)
def speaker_active_time_range(speaker) -> FrameInterval:
"""
The time range this speaker must control in order to sound right.
At this time this is assuming the str
"""
start, end = 0xffffffff, -0xffffffff
for track in speaker.animation_data.nla_tracks:
for strip in track.strips:
if strip.frame_start < start:
start = strip.frame_start
if strip.frame_end > end:
end = strip.frame_end
return FrameInterval(start_frame=start, end_frame=end)
def speakers_by_min_distance(scene, speakers):
def min_distance(speaker):
return closest_approach_to_camera(scene, speaker)[0]
return sorted(speakers, key=(lambda spk: min_distance(spk)))
def speakers_by_start_time(speaker_objs):
return sorted(speaker_objs, key=(lambda spk: speaker_active_time_range(spk).start_frame))

View File

@@ -0,0 +1,18 @@
def all_speakers(scene):
return [obj for obj in scene.objects if obj.type == 'SPEAKER']
def solo_speakers(scene, solo_group):
for speaker in all_speakers(scene):
if speaker in solo_group:
speaker.data.muted = False
else:
speaker.data.muted = True
speaker.data.update_tag()
def unmute_all_speakers(scene):
for speaker in all_speakers(scene):
speaker.data.muted = False
speaker.data.update_tag()