Files
soundobjects_blender_addon/operator_adm_export.py
2020-09-16 21:11:59 -07:00

475 lines
15 KiB
Python

import sys
import bpy
import os
from ear.fileio.utils import openBw64
from ear.fileio.bw64.utils import interleave
from ear.fileio.bw64.chunks import (FormatInfoChunk, ChnaChunk)
from ear.fileio.adm import chna as adm_chna
from ear.fileio.adm.xml import adm_to_xml
from ear.fileio.adm.elements.block_formats import (AudioBlockFormatObjects, JumpPosition)
from ear.fileio.adm.elements.geom import ObjectCartesianPosition
from ear.fileio.adm.builder import (ADMBuilder, TypeDefinition)
from ear.fileio.adm.generate_ids import generate_ids
import lxml
import uuid
from fractions import Fraction
import struct
import numpy
from numpy.linalg import norm
from mathutils import Quaternion, Vector
from time import strftime
from math import sqrt
bl_info = {
"name": "Export ADM Broadcast-WAV File",
"description": "Export a Broadcast-WAV with each speaker as an ADM object",
"author": "Jamie Hardt",
"version": (0, 22),
"warning": "Requires `ear` EBU ADM Renderer package to be installed",
"blender": (2, 90, 0),
"category": "Import-Export",
}
def compute_relative_vector(camera: bpy.types.Camera, target: bpy.types.Object):
"""
Return a vector from `camera` to `target` in the camera's coordinate space.
The camera's lens is assumed to be norm to the ZX plane.
"""
cam_loc, cam_rot, _ = camera.matrix_world.decompose()
target_loc, _, _ = target.matrix_world.decompose()
relative_vector = target_loc - cam_loc
rotation = cam_rot.to_matrix().transposed()
relative_vector.rotate(rotation)
# The camera's worldvector is norm to the horizon, we want a vector
# down the barrel.
camera_correction = Quaternion( ( sqrt(2.) / 2. , sqrt(2.) / 2. , 0. , 0.) )
relative_vector.rotate(camera_correction)
return relative_vector
def room_norm_vector(vec, room_size=1.):
"""
The Room is tearing me apart, Lisa.
The room is a cube with the camera at its center. We use a chebyshev normalization
to convert a vector in world or camera space into a vector the represents the projection
of that vector onto the room's walls.
The Pro Tools/Dolby Atmos workflow I am targeting uses "Room Centric" panner coordinates
("cartesian allocentric coordinates" in ADM speak) and this process seems to yield good
results.
"""
chebyshev = norm(vec, ord=numpy.inf)
if chebyshev < room_size:
return vec / room_size
else:
return vec / chebyshev
def closest_approach_to_camera(scene, speaker_object):
max_dist = sys.float_info.max
at_time = scene.frame_start
for frame in range(scene.frame_start, scene.frame_end + 1):
scene.frame_set(frame)
rel = speaker_object.matrix_world.to_translation() - scene.camera.matrix_world.to_translation()
dist = norm(rel)
if dist < max_dist:
max_dist = dist
at_time = frame
return (max_dist, at_time)
def speaker_active_time_range(speaker):
"""
The time range this speaker must control in order to sound right.
At this time this is assuming the str
"""
start, end = 0xffffffff, -0xffffffff
for track in speaker.animation_data.nla_tracks:
for strip in track.strips:
if strip.frame_start < start:
start = strip.frame_start
if strip.frame_end > end:
end = strip.frame_end
return int(start), int(end)
def speakers_by_min_distance(scene, speakers):
def min_distance(speaker):
return closest_approach_to_camera(scene, speaker)[0]
return sorted(speakers, key=(lambda spk: min_distance(spk)))
def speakers_by_start_time(speaker_objs):
return sorted(speaker_objs, key=(lambda spk: speaker_active_time_range(spk)[0]))
def group_speakers(speaker_objs):
def group_speakers_impl1(bag):
"Returns a useable group and the remainder"
leftover = []
this_group = []
boundary = -0xffffffff
for speaker in bag:
start, end = speaker_active_time_range(speaker)
if start > boundary:
this_group.append(speaker)
boundary = end
else:
leftover.append(speaker)
return (this_group, leftover)
groups = []
remaining = speaker_objs
while len(remaining) > 0:
results = group_speakers_impl1(remaining)
groups.append(results[0])
remaining = results[1]
print("Will group {} sources into {} objects".format(len(speaker_objs), len(groups)))
return groups
def adm_block_formats_for_speakers(scene, speaker_objs, room_size=1.):
fps = scene.render.fps
block_formats = []
for speaker_obj in speakers_by_start_time(speaker_objs):
speaker_start, speaker_end = speaker_active_time_range(speaker_obj)
for frame in range(speaker_start, speaker_end + 1):
scene.frame_set(frame)
relative_vector = compute_relative_vector(camera=scene.camera, target=speaker_obj)
norm_vec = room_norm_vector(relative_vector, room_size=room_size)
pos = ObjectCartesianPosition(X=norm_vec.x , Y=norm_vec.y , Z=norm_vec.z)
if len(block_formats) == 0 or pos != block_formats[-1].position:
jp = JumpPosition(flag=True, interpolationLength=Fraction(1,fps * 2) )
block = AudioBlockFormatObjects(position= pos,
rtime=Fraction(frame,fps),
duration=Fraction(1,fps) ,
cartesian=True,
jumpPosition=jp)
block_formats.append(block)
else:
block_formats[-1].duration = block_formats[-1].duration + Fraction(1,fps)
return block_formats
def adm_for_object(scene, speakers_this_mixdown, room_size, b, i, frame_start, fps, frame_end, wav_format):
block_formats = adm_block_formats_for_speakers(scene=scene,
speaker_objs=speakers_this_mixdown,
room_size=room_size)
created = b.create_item_objects(track_index=i,
name=speakers_this_mixdown[0].name,
block_formats=block_formats)
created.audio_object.start = Fraction(frame_start, fps)
created.audio_object.duration = Fraction(frame_end - frame_start, fps)
created.track_uid.sampleRate = wav_format.sampleRate
created.track_uid.bitDepth = wav_format.bitsPerSample
def adm_for_scene(scene, speaker_groups, wav_format, room_size):
b = ADMBuilder()
frame_start = scene.frame_start
frame_end = scene.frame_end
fps = scene.render.fps
b.create_programme(audioProgrammeName=scene.name,
start=Fraction(frame_start ,fps),
end=Fraction(frame_end, fps) )
b.create_content(audioContentName="Objects")
for i, speakers_this_mixdown in enumerate(speaker_groups):
adm_for_object(scene, speakers_this_mixdown, room_size, b, i, frame_start, fps, frame_end, wav_format)
adm = b.adm
generate_ids(adm)
chna = ChnaChunk()
adm_chna.populate_chna_chunk(chna, adm)
return adm_to_xml(adm), chna
########################################################################
# File writing functions below
def bext_data(scene, speaker_obj, sample_rate, room_size):
description = "SCENE={};ROOM_SIZE={}\n".format(scene.name, room_size).encode("ascii")
originator_name = "Blender {}".format(bpy.app.version_string).encode("ascii")
originator_ref = uuid.uuid1().hex.encode("ascii")
date10 = strftime("%Y-%m-%d").encode("ascii")
time8 = strftime("%H:%M:%S").encode("ascii")
timeref = int(float(scene.frame_start) * sample_rate / float(scene.render.fps))
version = 0
umid = b"\0" * 64
pad = b"\0" * 190
data = struct.pack("<256s32s32s10s8sQH64s190s", description, originator_name,
originator_ref, date10, time8, timeref, version, umid, pad)
return data
def load_infiles_for_muxing(mixdowns):
infiles = []
shortest_file = 0xFFFFFFFFFFFF
for elem in mixdowns:
infile = openBw64(elem[0], 'r')
infiles.append(infile)
if len(infile) < shortest_file:
shortest_file = len(infile)
return infiles, shortest_file
def rm_object_mixes(mixdowns):
for elem in mixdowns:
os.unlink(elem[0])
def write_muxed_wav(mixdowns, scene, out_format, room_size, outfile, shortest_file, object_count, infiles):
READ_BLOCK=1024
speaker_groups = list(map(lambda x: x[1], mixdowns))
adm, chna = adm_for_scene(scene, speaker_groups, out_format, room_size=room_size)
outfile.axml = lxml.etree.tostring(adm, pretty_print=True)
outfile.chna = chna
outfile.bext = bext_data(scene, None, out_format.sampleRate, room_size=room_size)
cursor = 0
while True:
remainder = shortest_file - cursor
to_read = min(READ_BLOCK, remainder)
if to_read == 0:
break
buffer = numpy.zeros((to_read, object_count))
for i, infile in enumerate(infiles):
buffer[: , i] = infile.read(to_read)[: , 0]
outfile.write(buffer)
cursor = cursor + to_read
def mux_adm_from_object_mixdowns(scene, mixdowns_spk_list_tuple, output_filename=None, room_size=1.):
"""
mixdowns are a tuple of wave filename, and corresponding speaker object
"""
object_count = len(mixdowns_spk_list_tuple)
assert object_count > 0
infiles, shortest_file = load_infiles_for_muxing(mixdowns_spk_list_tuple)
out_file = output_filename or os.path.join(os.path.dirname(mixdowns_spk_list_tuple[0][0]),
bpy.path.clean_name(scene.name) + ".wav")
out_format = FormatInfoChunk(channelCount=object_count,
sampleRate=infiles[0].sampleRate,
bitsPerSample=infiles[0].bitdepth)
with openBw64(out_file, 'w', formatInfo=out_format) as outfile:
write_muxed_wav(mixdowns_spk_list_tuple, scene, out_format, room_size, outfile, shortest_file, object_count, infiles)
for infile in infiles:
infile._buffer.close()
rm_object_mixes(mixdowns_spk_list_tuple)
def all_speakers(scene):
return [obj for obj in scene.objects if obj.type == 'SPEAKER']
def solo_speakers(scene, solo_group):
for speaker in all_speakers(scene):
if speaker in solo_group:
speaker.data.muted = False
else:
speaker.data.muted = True
speaker.data.update_tag()
def unmute_all_speakers(scene):
for speaker in all_speakers(scene):
speaker.data.muted = False
speaker.data.update_tag()
def create_mixdown_for_object(scene, speaker_group, basedir):
solo_speakers(scene, speaker_group)
scene_name = bpy.path.clean_name(scene.name)
speaker_name = bpy.path.clean_name(speaker_group[0].name)
fn = os.path.join(basedir, "%s_%s.wav" % (scene_name, speaker_name) )
bpy.ops.sound.mixdown(filepath=fn, container='WAV', codec='PCM', format='S24')
return fn
def generate_speaker_mixdowns(scene, speaker_groups, filepath):
basedir = os.path.dirname(filepath)
for speaker_group in speaker_groups:
fn = create_mixdown_for_object(scene, speaker_group, basedir)
yield (fn, speaker_group)
def save_output_state(context):
"""
save render settings that we change to produce object WAV files
"""
ff = context.scene.render.image_settings.file_format
codec = context.scene.render.ffmpeg.audio_codec
chans = context.scene.render.ffmpeg.audio_channels
return (ff, codec, chans)
def restore_output_state(ctx, context):
context.scene.render.image_settings.file_format = ctx[0]
context.scene.render.ffmpeg.audio_codec = ctx[1]
context.scene.render.ffmpeg.audio_channels = ctx[2]
def write_some_data(context, filepath, room_size, max_objects):
ctx = save_output_state(context)
scene = bpy.context.scene
scene.render.image_settings.file_format = 'FFMPEG'
scene.render.ffmpeg.audio_codec = 'PCM'
scene.render.ffmpeg.audio_channels = 'MONO'
sound_sources = all_speakers(scene)
sorted_speakers = speakers_by_start_time(sound_sources)
object_groups = group_speakers(sorted_speakers)
closest_speakers = speakers_by_min_distance(scene, sound_sources)
too_far_speakers = []
n = len(closest_speakers) - 1
while len(object_groups) > max_objects:
sorted_speakers = speakers_by_start_time(closest_speakers[0:n])
too_far_speakers = closest_speakers[n:]
object_groups = group_speakers(sorted_speakers)
n = n - 1
print("Will create {} objects for {} sources, ignoring {} sources".format(
len(object_groups), len(sorted_speakers), len(too_far_speakers)))
mixdowns_spk_list_tuple = list(generate_speaker_mixdowns(scene, object_groups, filepath))
mixdown_count = len(mixdowns_spk_list_tuple)
if mixdown_count == 0:
return {'FINISHED'}
else:
mux_adm_from_object_mixdowns(scene, mixdowns_spk_list_tuple,
output_filename= filepath,
room_size=room_size)
#cleanup
unmute_all_speakers(scene)
restore_output_state(ctx, context)
return {'FINISHED'}
#########################################################################
### BOILERPLATE EXPORTER CODE BELOW
# ExportHelper is a helper class, defines filename and
# invoke() function which calls the file selector.
from bpy_extras.io_utils import ExportHelper
from bpy.props import StringProperty, BoolProperty, EnumProperty, FloatProperty, IntProperty
from bpy.types import Operator
class ADMWaveExport(Operator, ExportHelper):
"""Export a Broadcast-WAV audio file with each speaker encoded as an ADM object"""
bl_idname = "export.adm_wave_file" # important since its how bpy.ops.import_test.some_data is constructed
bl_label = "Export ADM Wave File"
# ExportHelper mixin class uses this
filename_ext = ".wav"
filter_glob: StringProperty(
default="*.wav",
options={'HIDDEN'},
maxlen=255, # Max internal buffer length, longer would be clamped.
)
room_size: FloatProperty(
default=1.0,
name="Room Size",
description="Distance from the lens to the front room boundary",
min=0.001,
step=1.,
unit='LENGTH'
)
max_objects: IntProperty(
name="Max Objects",
description="Maximum number of objects to create",
default=24,
min=0,
max=118
)
def execute(self, context):
return write_some_data(context, self.filepath, self.room_size, self.max_objects)
# Only needed if you want to add into a dynamic menu
def menu_func_export(self, context):
self.layout.operator(ADMWaveExport.bl_idname, text="ADM Broadcast-WAVE (.wav)")
def register():
bpy.utils.register_class(ADMWaveExport)
bpy.types.TOPBAR_MT_file_export.append(menu_func_export)
def unregister():
bpy.utils.unregister_class(ADMWaveExport)
bpy.types.TOPBAR_MT_file_export.remove(menu_func_export)
if __name__ == "__main__":
register()