Much refactoring

This seems to work, needs more testing
This commit is contained in:
Jamie Hardt
2020-09-20 13:46:03 -07:00
parent 8e695140fb
commit b4b3f071ae
3 changed files with 330 additions and 280 deletions

View File

@@ -1,18 +1,7 @@
import sys
import bpy
import os
from ear.fileio.utils import openBw64
from ear.fileio.bw64.utils import interleave
from ear.fileio.bw64.chunks import (FormatInfoChunk, ChnaChunk)
from ear.fileio.adm import chna as adm_chna
from ear.fileio.adm.xml import adm_to_xml
from ear.fileio.adm.elements.block_formats import (AudioBlockFormatObjects, JumpPosition)
from ear.fileio.adm.elements.geom import ObjectCartesianPosition
from ear.fileio.adm.builder import (ADMBuilder, TypeDefinition)
from ear.fileio.adm.generate_ids import generate_ids
from contextlib import contextmanager
import lxml
import uuid
@@ -20,108 +9,166 @@ from fractions import Fraction
import struct
import numpy
from numpy.linalg import norm
from mathutils import Quaternion, Vector
from time import strftime
from math import sqrt
from dataclasses import dataclass
from typing import List, Tuple
from typing import List
from ear.fileio.utils import openBw64
from ear.fileio.bw64 import Bw64Reader
from ear.fileio.bw64.chunks import (FormatInfoChunk, ChnaChunk)
from ear.fileio.adm import chna as adm_chna
from ear.fileio.adm.xml import adm_to_xml
from ear.fileio.adm.elements.block_formats import (AudioBlockFormatObjects, JumpPosition)
from ear.fileio.adm.elements.geom import ObjectCartesianPosition
from ear.fileio.adm.builder import (ADMBuilder)
from ear.fileio.adm.generate_ids import generate_ids
from sound_objects.intern.geom_utils import (compute_relative_vector,
room_norm_vector,
speaker_active_time_range,
speakers_by_min_distance,
speakers_by_start_time)
from sound_objects.intern.speaker_utils import (all_speakers, solo_speakers, unmute_all_speakers)
class FrameInterval:
def __init__(self, start_frame, end_frame):
self.start_frame = int(start_frame)
self.end_frame = int(end_frame)
@contextmanager
def adm_object_rendering_context(scene: bpy.types.Scene):
old_ff = scene.render.image_settings.file_format
old_codec = scene.render.ffmpeg.audio_codec
old_chans = scene.render.ffmpeg.audio_channels
def overlaps(self, other : 'FrameInterval') -> bool:
return self.start_frame <= other.start_frame <= self.end_frame or \
other.start_frame <= self.start_frame <= other.end_frame
scene = bpy.context.scene
scene.render.image_settings.file_format = 'FFMPEG'
scene.render.ffmpeg.audio_codec = 'PCM'
scene.render.ffmpeg.audio_channels = 'MONO'
yield scene
scene.render.image_settings.file_format = old_ff
scene.render.ffmpeg.audio_codec = old_codec
scene.render.ffmpeg.audio_channels = old_chans
def compute_relative_vector(camera: bpy.types.Camera, target: bpy.types.Object):
"""
Return a vector from `camera` to `target` in the camera's coordinate space.
class ObjectMix:
def __init__(self, sources: List[bpy.types.Speaker],
scene: bpy.types.Scene, base_dir: str):
self.sources = sources
self.intermediate_filename = None
self.base_dir = base_dir
self.scene = scene
self._mixdown_file_handle = None
self._mixdown_reader = None
The camera's lens is assumed to be norm to the ZX plane.
"""
cam_loc, cam_rot, _ = camera.matrix_world.decompose()
target_loc, _, _ = target.matrix_world.decompose()
relative_vector = target_loc - cam_loc
@property
def frame_start(self):
return self.scene.frame_start
rotation = cam_rot.to_matrix().transposed()
relative_vector.rotate(rotation)
@property
def frame_end(self):
return self.scene.frame_end
# The camera's worldvector is norm to the horizon, we want a vector
# down the barrel.
camera_correction = Quaternion( ( sqrt(2.) / 2. , sqrt(2.) / 2. , 0. , 0.) )
relative_vector.rotate(camera_correction)
@property
def mixdown_reader(self) -> Bw64Reader:
if self._mixdown_reader is None:
self._mixdown_reader = Bw64Reader(self.mixdown_file_handle)
return relative_vector
return self._mixdown_reader
@property
def mixdown_file_handle(self):
if self._mixdown_file_handle is None:
self._mixdown_file_handle = open(self.mixdown_filename, 'rb')
def room_norm_vector(vec, room_size=1.) -> Vector:
"""
The Room is tearing me apart, Lisa.
return self._mixdown_file_handle
The room is a cube with the camera at its center. We use a chebyshev normalization
to convert a vector in world or camera space into a vector the represents the projection
of that vector onto the room's walls.
@property
def mixdown_filename(self):
if self.intermediate_filename is None:
self.mixdown()
The Pro Tools/Dolby Atmos workflow I am targeting uses "Room Centric" panner coordinates
("cartesian allocentric coordinates" in ADM speak) and this process seems to yield good
results.
"""
chebyshev = norm(vec, ord=numpy.inf)
if chebyshev < room_size:
return vec / room_size
return self.intermediate_filename
@property
def object_name(self):
return self.sources[0].name
def mixdown(self):
with adm_object_rendering_context(self.scene) as scene:
solo_speakers(scene, self.sources)
scene_name = bpy.path.clean_name(scene.name)
speaker_name = bpy.path.clean_name(self.object_name)
self.intermediate_filename = os.path.join(self.base_dir, "%s_%s.wav" % (scene_name, speaker_name))
bpy.ops.sound.mixdown(filepath=self.intermediate_filename,
container='WAV', codec='PCM', format='S24')
print("Created mixdown named {}".format(self.intermediate_filename))
unmute_all_speakers(scene)
def adm_block_formats(self, room_size=1.):
fps = self.scene.render.fps
block_formats = []
for speaker_obj in self.sources:
speaker_interval = speaker_active_time_range(speaker_obj)
for frame in range(speaker_interval.start_frame, speaker_interval.end_frame + 1):
self.scene.frame_set(frame)
relative_vector = compute_relative_vector(camera=self.scene.camera, target=speaker_obj)
norm_vec = room_norm_vector(relative_vector, room_size=room_size)
pos = ObjectCartesianPosition(X=norm_vec.x, Y=norm_vec.y, Z=norm_vec.z)
if len(block_formats) == 0 or pos != block_formats[-1].position:
jp = JumpPosition(flag=True, interpolationLength=Fraction(1, fps * 2))
block = AudioBlockFormatObjects(position=pos,
rtime=Fraction(frame, fps),
duration=Fraction(1, fps),
cartesian=True,
jumpPosition=jp)
block_formats.append(block)
else:
return vec / chebyshev
block_formats[-1].duration = block_formats[-1].duration + Fraction(1, fps)
return block_formats
def rm_mixdown(self):
if self._mixdown_reader is not None:
self._mixdown_reader = None
if self._mixdown_file_handle is not None:
self._mixdown_file_handle.close()
self._mixdown_file_handle = None
os.remove(self.intermediate_filename)
self.intermediate_filename = None
def closest_approach_to_camera(scene, speaker_object) -> (float, int):
max_dist = sys.float_info.max
at_time = scene.frame_start
for frame in range(scene.frame_start, scene.frame_end + 1):
scene.frame_set(frame)
rel = speaker_object.matrix_world.to_translation() - scene.camera.matrix_world.to_translation()
dist = norm(rel)
@contextmanager
class ObjectMixPool:
def __init__(self, object_mixes: List[ObjectMix]):
self.object_mixes = object_mixes
if dist < max_dist:
max_dist = dist
at_time = frame
def __enter__(self):
return self
return (max_dist, at_time)
@property
def shortest_file_length(self):
lengths = map(lambda f: len(f.mixdown_reader))
return min(lengths)
def speaker_active_time_range(speaker) -> FrameInterval:
"""
The time range this speaker must control in order to sound right.
At this time this is assuming the str
"""
start, end = 0xffffffff, -0xffffffff
for track in speaker.animation_data.nla_tracks:
for strip in track.strips:
if strip.frame_start < start:
start = strip.frame_start
if strip.frame_end > end:
end = strip.frame_end
return FrameInterval(start_frame=start, end_frame=end)
def speakers_by_min_distance(scene, speakers):
def min_distance(speaker):
return closest_approach_to_camera(scene, speaker)[0]
return sorted(speakers, key=(lambda spk: min_distance(spk)))
def speakers_by_start_time(speaker_objs):
return sorted(speaker_objs, key=(lambda spk: speaker_active_time_range(spk).start_frame))
def __exit__(self, exc_type, exc_val, exc_tb):
for mix in self.object_mixes:
mix.rm_mixdown()
def group_speakers(speakers, scene) -> List[List[bpy.types.Object]]:
@@ -157,42 +204,14 @@ def group_speakers(speakers, scene) -> List[List[bpy.types.Object]]:
return ret_val
def adm_block_formats_for_speakers(scene, speaker_objs, room_size=1.):
def adm_for_object(scene, sound_object: ObjectMix, room_size, adm_builder, object_index, wav_format):
fps = scene.render.fps
block_formats = []
frame_start = scene.frame_start
frame_end = scene.frame_end
for speaker_obj in speakers_by_start_time(speaker_objs):
speaker_interval = speaker_active_time_range(speaker_obj)
for frame in range(speaker_interval.start_frame, speaker_interval.end_frame + 1):
scene.frame_set(frame)
relative_vector = compute_relative_vector(camera=scene.camera, target=speaker_obj)
norm_vec = room_norm_vector(relative_vector, room_size=room_size)
pos = ObjectCartesianPosition(X=norm_vec.x , Y=norm_vec.y , Z=norm_vec.z)
if len(block_formats) == 0 or pos != block_formats[-1].position:
jp = JumpPosition(flag=True, interpolationLength=Fraction(1,fps * 2) )
block = AudioBlockFormatObjects(position= pos,
rtime=Fraction(frame,fps),
duration=Fraction(1,fps) ,
cartesian=True,
jumpPosition=jp)
block_formats.append(block)
else:
block_formats[-1].duration = block_formats[-1].duration + Fraction(1,fps)
return block_formats
def adm_for_object(scene, speakers_this_mixdown, room_size, b, i, frame_start, fps, frame_end, wav_format):
block_formats = adm_block_formats_for_speakers(scene=scene,
speaker_objs=speakers_this_mixdown,
room_size=room_size)
created = b.create_item_objects(track_index=i,
name=speakers_this_mixdown[0].name,
block_formats = sound_object.adm_block_formats(room_size=room_size)
created = adm_builder.create_item_objects(track_index=object_index,
name=sound_object.object_name,
block_formats=block_formats)
created.audio_object.start = Fraction(frame_start, fps)
@@ -201,24 +220,23 @@ def adm_for_object(scene, speakers_this_mixdown, room_size, b, i, frame_start, f
created.track_uid.bitDepth = wav_format.bitsPerSample
def adm_for_scene(scene, speaker_groups, wav_format, room_size):
b = ADMBuilder()
def adm_for_scene(scene, sound_objects: List['ObjectMix'], wav_format, room_size):
adm_builder = ADMBuilder()
frame_start = scene.frame_start
frame_end = scene.frame_end
fps = scene.render.fps
b.create_programme(audioProgrammeName=scene.name,
start=Fraction(frame_start ,fps),
end=Fraction(frame_end, fps) )
adm_builder.create_programme(audioProgrammeName=scene.name,
start=Fraction(frame_start, fps),
end=Fraction(frame_end, fps))
b.create_content(audioContentName="Objects")
adm_builder.create_content(audioContentName="Objects")
for i, speakers_this_mixdown in enumerate(speaker_groups):
adm_for_object(scene, speakers_this_mixdown, room_size, b, i, frame_start, fps, frame_end, wav_format)
for object_index, sound_object in enumerate(sound_objects):
adm_for_object(scene, sound_object, room_size, adm_builder, object_index, wav_format)
adm = b.adm
adm = adm_builder.adm
generate_ids(adm)
chna = ChnaChunk()
@@ -231,7 +249,7 @@ def adm_for_scene(scene, speaker_groups, wav_format, room_size):
# File writing functions below
def bext_data(scene, speaker_obj, sample_rate, room_size):
def bext_data(scene, sample_rate, room_size):
description = "SCENE={};ROOM_SIZE={}\n".format(scene.name, room_size).encode("ascii")
originator_name = "Blender {}".format(bpy.app.version_string).encode("ascii")
originator_ref = uuid.uuid1().hex.encode("ascii")
@@ -248,27 +266,15 @@ def bext_data(scene, speaker_obj, sample_rate, room_size):
return data
def load_infiles_for_muxing(mixdowns):
infiles = []
shortest_file = 0xFFFFFFFFFFFF
for elem in mixdowns:
infile = openBw64(elem[0], 'r')
infiles.append(infile)
if len(infile) < shortest_file:
shortest_file = len(infile)
return infiles, shortest_file
def write_muxed_wav(mix_pool: ObjectMixPool, scene, out_format, room_size, outfile, shortest_file):
READ_BLOCK = 1024
def write_muxed_wav(mixdowns, scene, out_format, room_size, outfile, shortest_file, object_count, infiles):
#print("write_muxed_wav entered")
READ_BLOCK=1024
speaker_groups = list(map(lambda x: x[1], mixdowns))
adm, chna = adm_for_scene(scene, speaker_groups, out_format, room_size=room_size)
sound_objects = mix_pool.object_mixes
adm, chna = adm_for_scene(scene, sound_objects, out_format, room_size=room_size)
outfile.axml = lxml.etree.tostring(adm, pretty_print=True)
outfile.chna = chna
outfile.bext = bext_data(scene, None, out_format.sampleRate, room_size=room_size)
outfile.bext = bext_data(scene, out_format.sampleRate, room_size=room_size)
cursor = 0
while True:
@@ -277,106 +283,43 @@ def write_muxed_wav(mixdowns, scene, out_format, room_size, outfile, shortest_fi
if to_read == 0:
break
buffer = numpy.zeros((to_read, object_count))
for i, infile in enumerate(infiles):
buffer[: , i] = infile.read(to_read)[: , 0]
buffer = numpy.zeros((to_read, len(sound_objects)))
for i, sound_object in enumerate(sound_objects):
buffer[:, i] = sound_object.mixdown_reader.read(to_read)[:, 0]
outfile.write(buffer)
cursor = cursor + to_read
def mux_adm_from_object_mixdowns(scene, mixdowns_spk_list_tuple, output_filename=None, room_size=1.):
def mux_adm_from_object_mixdowns(scene, sound_objects: List['ObjectMix'], output_filename, room_size=1.):
"""
mixdowns are a tuple of wave filename, and corresponding speaker object
"""
#print("mux_adm_from_object_mixdowns entered")
object_count = len(mixdowns_spk_list_tuple)
object_count = len(sound_objects)
assert object_count > 0
infiles, shortest_file = load_infiles_for_muxing(mixdowns_spk_list_tuple)
out_file = output_filename or os.path.join(os.path.dirname(mixdowns_spk_list_tuple[0][0]),
bpy.path.clean_name(scene.name) + ".wav")
out_format = FormatInfoChunk(channelCount=object_count,
sampleRate=infiles[0].sampleRate,
bitsPerSample=infiles[0].bitdepth)
sampleRate=scene.render.ffmpeg.audio_mixrate,
bitsPerSample=24)
with openBw64(out_file, 'w', formatInfo=out_format) as outfile:
write_muxed_wav(mixdowns_spk_list_tuple, scene, out_format, room_size, outfile, shortest_file, object_count, infiles)
for infile in infiles:
infile._buffer.close()
with ObjectMixPool(sound_objects) as mix_pool:
with openBw64(output_filename, 'w', formatInfo=out_format) as outfile:
write_muxed_wav(mix_pool, scene, out_format, room_size,
outfile, mix_pool.shortest_file_length)
def rm_object_mixes(mixdowns):
#print("rm_object_mixes entered")
for elem in mixdowns:
os.remove(elem[0])
def partition_sounds_to_objects(scene, max_objects):
def all_speakers(scene):
return [obj for obj in scene.objects if obj.type == 'SPEAKER']
sound_sources = all_speakers(scene)
if len(sound_sources) == 0:
return []
def solo_speakers(scene, solo_group):
for speaker in all_speakers(scene):
if speaker in solo_group:
speaker.data.muted = False
else:
speaker.data.muted = True
speaker.data.update_tag()
def unmute_all_speakers(scene):
for speaker in all_speakers(scene):
speaker.data.muted = False
speaker.data.update_tag()
def create_mixdown_for_object(scene, speaker_group, basedir):
solo_speakers(scene, speaker_group)
scene_name = bpy.path.clean_name(scene.name)
speaker_name = bpy.path.clean_name(speaker_group[0].name)
fn = os.path.join(basedir, "%s_%s.wav" % (scene_name, speaker_name) )
bpy.ops.sound.mixdown(filepath=fn, container='WAV', codec='PCM', format='S24')
print("Created mixdown named {}".format(fn))
unmute_all_speakers(scene)
return fn
def generate_speaker_mixdowns(scene, speaker_groups, filepath):
basedir = os.path.dirname(filepath)
for speaker_group in speaker_groups:
fn = create_mixdown_for_object(scene, speaker_group, basedir)
yield (fn, speaker_group)
def save_output_state(context):
"""
save render settings that we change to produce object WAV files
"""
ff = context.scene.render.image_settings.file_format
codec = context.scene.render.ffmpeg.audio_codec
chans = context.scene.render.ffmpeg.audio_channels
return (ff, codec, chans)
def restore_output_state(ctx, context):
context.scene.render.image_settings.file_format = ctx[0]
context.scene.render.ffmpeg.audio_codec = ctx[1]
context.scene.render.ffmpeg.audio_channels = ctx[2]
def group_sounds(sound_sources, scene, max_objects):
object_groups = group_speakers(sound_sources, scene)
too_far_speakers = []
if len(object_groups) > max_objects:
too_far_speakers = object_groups[max_objects:]
object_groups = object_groups[0:max_objects]
@@ -385,37 +328,27 @@ def group_sounds(sound_sources, scene, max_objects):
len(object_groups), len(sound_sources), len(too_far_speakers)))
for i, group in enumerate(object_groups):
print("Object Group %i"%i)
print("Object Group %i" % i)
for source in group:
print(" - %s" % source.name)
return object_groups, too_far_speakers
def generate_adm(context, filepath, room_size, max_objects):
ctx = save_output_state(context)
scene = context.scene
scene = bpy.context.scene
object_groups, _ = partition_sounds_to_objects(scene, max_objects)
scene.render.image_settings.file_format = 'FFMPEG'
scene.render.ffmpeg.audio_codec = 'PCM'
scene.render.ffmpeg.audio_channels = 'MONO'
sound_sources = all_speakers(scene)
object_groups, _ = group_sounds(sound_sources, scene, max_objects)
mixdowns_spk_list_tuple = list(generate_speaker_mixdowns(scene, object_groups, filepath))
mixdown_count = len(mixdowns_spk_list_tuple)
if mixdown_count == 0:
if len(object_groups) == 0:
return {'FINISHED'}
else:
mux_adm_from_object_mixdowns(scene, mixdowns_spk_list_tuple,
output_filename= filepath,
sound_objects = map(lambda objects: ObjectMix(sources=objects))
mux_adm_from_object_mixdowns(scene, list(sound_objects),
output_filename=filepath,
room_size=room_size)
#cleanup
#print("Will delete {} input object files".format(len(mixdowns_spk_list_tuple)))
rm_object_mixes(mixdowns_spk_list_tuple)
restore_output_state(ctx, context)
for o in sound_objects:
o.rm_mixdown()
return {'FINISHED'}

View File

@@ -0,0 +1,99 @@
import sys
from math import sqrt
import numpy
from numpy.linalg import norm
class FrameInterval:
def __init__(self, start_frame, end_frame):
self.start_frame = int(start_frame)
self.end_frame = int(end_frame)
def overlaps(self, other : 'FrameInterval') -> bool:
return self.start_frame <= other.start_frame <= self.end_frame or \
other.start_frame <= self.start_frame <= other.end_frame
def compute_relative_vector(camera: bpy.types.Camera, target: bpy.types.Object):
"""
Return a vector from `camera` to `target` in the camera's coordinate space.
The camera's lens is assumed to be norm to the ZX plane.
"""
cam_loc, cam_rot, _ = camera.matrix_world.decompose()
target_loc, _, _ = target.matrix_world.decompose()
relative_vector = target_loc - cam_loc
rotation = cam_rot.to_matrix().transposed()
relative_vector.rotate(rotation)
# The camera's worldvector is norm to the horizon, we want a vector
# down the barrel.
camera_correction = Quaternion( ( sqrt(2.) / 2. , sqrt(2.) / 2. , 0. , 0.) )
relative_vector.rotate(camera_correction)
return relative_vector
def room_norm_vector(vec, room_size=1.) -> Vector:
"""
The Room is tearing me apart, Lisa.
The room is a cube with the camera at its center. We use a chebyshev normalization
to convert a vector in world or camera space into a vector the represents the projection
of that vector onto the room's walls.
The Pro Tools/Dolby Atmos workflow I am targeting uses "Room Centric" panner coordinates
("cartesian allocentric coordinates" in ADM speak) and this process seems to yield good
results.
"""
chebyshev = norm(vec, ord=numpy.inf)
if chebyshev < room_size:
return vec / room_size
else:
return vec / chebyshev
def closest_approach_to_camera(scene, speaker_object) -> (float, int):
max_dist = sys.float_info.max
at_time = scene.frame_start
for frame in range(scene.frame_start, scene.frame_end + 1):
scene.frame_set(frame)
rel = speaker_object.matrix_world.to_translation() - scene.camera.matrix_world.to_translation()
dist = norm(rel)
if dist < max_dist:
max_dist = dist
at_time = frame
return (max_dist, at_time)
def speaker_active_time_range(speaker) -> FrameInterval:
"""
The time range this speaker must control in order to sound right.
At this time this is assuming the str
"""
start, end = 0xffffffff, -0xffffffff
for track in speaker.animation_data.nla_tracks:
for strip in track.strips:
if strip.frame_start < start:
start = strip.frame_start
if strip.frame_end > end:
end = strip.frame_end
return FrameInterval(start_frame=start, end_frame=end)
def speakers_by_min_distance(scene, speakers):
def min_distance(speaker):
return closest_approach_to_camera(scene, speaker)[0]
return sorted(speakers, key=(lambda spk: min_distance(spk)))
def speakers_by_start_time(speaker_objs):
return sorted(speaker_objs, key=(lambda spk: speaker_active_time_range(spk).start_frame))

View File

@@ -0,0 +1,18 @@
def all_speakers(scene):
return [obj for obj in scene.objects if obj.type == 'SPEAKER']
def solo_speakers(scene, solo_group):
for speaker in all_speakers(scene):
if speaker in solo_group:
speaker.data.muted = False
else:
speaker.data.muted = True
speaker.data.update_tag()
def unmute_all_speakers(scene):
for speaker in all_speakers(scene):
speaker.data.muted = False
speaker.data.update_tag()