diff --git a/intern/add_sound_to_meshes.py b/intern/add_sound_to_meshes.py index 897ce69..f619482 100644 --- a/intern/add_sound_to_meshes.py +++ b/intern/add_sound_to_meshes.py @@ -1,8 +1,10 @@ import bpy from numpy.linalg import norm +from numpy.typing import ArrayLike from random import uniform, gauss from math import floor from enum import Enum +from typing import cast from dataclasses import dataclass @@ -34,10 +36,9 @@ def sound_camera_spatial_envelope(scene: bpy.types.Scene, speaker_obj, enters_range_frame = None exits_range_frame = None - assert scene.camera - in_range = False for frame in range(scene.frame_start, scene.frame_end + 1): + assert scene.camera scene.frame_set(frame) rel = speaker_obj.matrix_world.to_translation() \ - scene.camera.matrix_world.to_translation() @@ -66,14 +67,20 @@ def sound_camera_spatial_envelope(scene: bpy.types.Scene, speaker_obj, min_distance=min_dist) -def closest_approach_to_camera(scene, speaker_object): +def closest_approach_to_camera(scene: bpy.types.Scene, + speaker_object: bpy.types.Object) -> tuple[float, int]: + """ + Steps through the scene frame-by-frame and returns a tuple of + (minumum_distance, at_frame_index) + """ max_dist = sys.float_info.max at_time = scene.frame_start for frame in range(scene.frame_start, scene.frame_end + 1): + assert scene.camera scene.frame_set(frame) - rel = speaker_object.matrix_world.to_translation( - ) - scene.camera.matrix_world.to_translation() - dist = norm(rel) + rel = speaker_object.matrix_world.to_translation() - \ + scene.camera.matrix_world.to_translation() + dist = float(norm(cast(ArrayLike, rel))) if dist < max_dist: max_dist = dist @@ -175,6 +182,4 @@ def add_speakers_to_meshes(meshes, context, sound=None, gaussian_stddev=gaussian_stddev, sound_bank=sound_bank, envelope=envelope) - apply_gain_envelope(speaker_obj, envelope) - speaker_obj.data.update_tag() diff --git a/intern/generate_adm.py b/intern/generate_adm.py index b43a212..6a4a14f 100644 --- a/intern/generate_adm.py +++ b/intern/generate_adm.py @@ -25,7 +25,8 @@ from .geom_utils import (speaker_active_time_range, speakers_by_min_distance, speakers_by_start_time) -from .object_mix import (ObjectMix, ObjectMixPool, object_mixes_from_source_groups) +from .object_mix import (ObjectMix, ObjectMixPool, + object_mixes_from_source_groups) from .speaker_utils import (all_speakers) @@ -71,7 +72,8 @@ def group_speakers(speakers, scene) -> List[List[bpy.types.Object]]: return ret_val -def adm_for_object(scene: bpy.types.Scene, sound_object: ObjectMix, room_size, adm_builder, object_index): +def adm_for_object(scene: bpy.types.Scene, sound_object: ObjectMix, room_size, + adm_builder, object_index): fps = scene.render.fps frame_start = scene.frame_start frame_end = scene.frame_end @@ -88,7 +90,8 @@ def adm_for_object(scene: bpy.types.Scene, sound_object: ObjectMix, room_size, a created.track_uid.bitDepth = sound_object.bits_per_sample -def adm_for_scene(scene: bpy.types.Scene, sound_object_mixes: List[ObjectMix], room_size): +def adm_for_scene(scene: bpy.types.Scene, sound_object_mixes: List[ObjectMix], + room_size): adm_builder = ADMBuilder() frame_start = scene.frame_start @@ -102,7 +105,8 @@ def adm_for_scene(scene: bpy.types.Scene, sound_object_mixes: List[ObjectMix], r adm_builder.create_content(audioContentName="Objects") for object_index, sound_object in enumerate(sound_object_mixes): - adm_for_object(scene, sound_object, room_size, adm_builder, object_index) + adm_for_object(scene, sound_object, room_size, + adm_builder, object_index) adm = adm_builder.adm @@ -114,23 +118,28 @@ def adm_for_scene(scene: bpy.types.Scene, sound_object_mixes: List[ObjectMix], r def bext_data(scene, sample_rate, room_size): - description = "SCENE={};ROOM_SIZE={}\n".format(scene.name, room_size).encode("ascii") - originator_name = "Blender {}".format(bpy.app.version_string).encode("ascii") + description = "SCENE={};ROOM_SIZE={}\n".format( + scene.name, room_size).encode("ascii") + originator_name = "Blender {}".format( + bpy.app.version_string).encode("ascii") originator_ref = uuid.uuid1().hex.encode("ascii") date10 = strftime("%Y-%m-%d").encode("ascii") time8 = strftime("%H:%M:%S").encode("ascii") - timeref = int(float(scene.frame_start) * sample_rate / float(scene.render.fps)) + timeref = int(float(scene.frame_start) * + sample_rate / float(scene.render.fps)) version = 0 umid = b"\0" * 64 pad = b"\0" * 190 - data = struct.pack("<256s32s32s10s8sQH64s190s", description, originator_name, - originator_ref, date10, time8, timeref, version, umid, pad) + data = struct.pack("<256s32s32s10s8sQH64s190s", description, + originator_name, originator_ref, date10, time8, timeref, + version, umid, pad) return data -def attach_outfile_metadata(out_format, outfile, room_size, scene, sound_objects): +def attach_outfile_metadata(out_format, outfile, room_size, scene, + sound_objects): adm, chna = adm_for_scene(scene, sound_objects, room_size=room_size) outfile.axml = lxml.etree.tostring(adm, pretty_print=True) outfile.chna = chna @@ -159,13 +168,16 @@ def write_outfile_audio_data(outfile, shortest_file, sound_objects): cursor = cursor + to_read -def write_muxed_wav(mix_pool: ObjectMixPool, scene, out_format, room_size, outfile, shortest_file): +def write_muxed_wav(mix_pool: ObjectMixPool, scene, out_format, room_size, + outfile, shortest_file): sound_objects = mix_pool.object_mixes - attach_outfile_metadata(out_format, outfile, room_size, scene, sound_objects) + attach_outfile_metadata(out_format, outfile, + room_size, scene, sound_objects) write_outfile_audio_data(outfile, shortest_file, sound_objects) -def mux_adm_from_object_mix_pool(scene, mix_pool: ObjectMixPool, output_filename, room_size=1.): +def mux_adm_from_object_mix_pool(scene, mix_pool: ObjectMixPool, + output_filename, room_size=1.): object_count = len(mix_pool.object_mixes) assert object_count > 0 @@ -188,7 +200,7 @@ def print_partition_results(object_groups, sound_sources, too_far_speakers): def partition_sounds_to_objects(scene, max_objects) -> \ - tuple[list[list[bpy.types.Speaker]], list[bpy.types.Speaker]]: + tuple[list[list[bpy.types.Object]], list[list[bpy.types.Object]]]: """ Allocates sounds in the scene into non-overlapping lists of sounds. The second return value is the list of sounds that could not be allocated @@ -213,8 +225,8 @@ def partition_sounds_to_objects(scene, max_objects) -> \ return object_groups, too_far_speakers -def generate_adm(context: bpy.types.Context, filepath: str, room_size: float, - max_objects: int) -> dict: +def generate_adm(context: bpy.types.Context, filepath: str, room_size: float, + max_objects: int) -> set[str]: scene = context.scene object_groups, _ = partition_sounds_to_objects(scene, max_objects) diff --git a/intern/geom_utils.py b/intern/geom_utils.py index e35e7d2..5b2c9fd 100644 --- a/intern/geom_utils.py +++ b/intern/geom_utils.py @@ -8,12 +8,13 @@ from numpy.linalg import norm from mathutils import Vector, Quaternion + class FrameInterval: def __init__(self, start_frame, end_frame): self.start_frame = int(start_frame) self.end_frame = int(end_frame) - def overlaps(self, other : 'FrameInterval') -> bool: + def overlaps(self, other: 'FrameInterval') -> bool: return self.start_frame <= other.start_frame <= self.end_frame or \ other.start_frame <= self.start_frame <= other.end_frame @@ -33,7 +34,7 @@ def compute_relative_vector(camera: bpy.types.Camera, target: bpy.types.Object): # The camera's worldvector is norm to the horizon, we want a vector # down the barrel. - camera_correction = Quaternion( ( sqrt(2.) / 2. , sqrt(2.) / 2. , 0. , 0.) ) + camera_correction = Quaternion((sqrt(2.) / 2., sqrt(2.) / 2., 0., 0.)) relative_vector.rotate(camera_correction) return relative_vector @@ -51,11 +52,11 @@ def room_norm_vector(vec, room_size=1.) -> Vector: The Pro Tools/Dolby Atmos workflow I am targeting uses "Room Centric" panner coordinates ("cartesian allocentric coordinates" in ADM speak) and this process seems to yield good results. - + I also experimented with using normalized camera frame coordinates from the bpy_extras.object_utils.world_to_camera_view method and this gives very good results as long as the object is on-screen; coordinates for objects off the screen are unusable. - + In the future it would be worth exploring wether there's a way to produce ADM coordinates that are "Screen-accurate" while the object is on-screen, but still gives sensible results when the object is off-screen as well. @@ -67,19 +68,20 @@ def room_norm_vector(vec, room_size=1.) -> Vector: return vec / chebyshev -def closest_approach_to_camera(scene, speaker_object) -> (float, int): +def closest_approach_to_camera(scene, speaker_object) -> tuple[float, int]: """ The distance and frame number of `speaker_object`s closest point to the scene's camera. - + (Works for any object, not just speakers.) """ max_dist = sys.float_info.max at_time = scene.frame_start for frame in range(scene.frame_start, scene.frame_end + 1): scene.frame_set(frame) - rel = speaker_object.matrix_world.to_translation() - scene.camera.matrix_world.to_translation() - dist = norm(rel) + rel = speaker_object.matrix_world.to_translation() - \ + scene.camera.matrix_world.to_translation() + dist = float(norm(rel)) if dist < max_dist: max_dist = dist diff --git a/intern/object_mix.py b/intern/object_mix.py index 6e188b6..643b822 100644 --- a/intern/object_mix.py +++ b/intern/object_mix.py @@ -31,7 +31,7 @@ def adm_object_rendering_context(scene: bpy.types.Scene): class ObjectMix: - def __init__(self, sources: List[bpy.types.Speaker], + def __init__(self, sources: List[bpy.types.Object], scene: bpy.types.Scene, base_dir: str): self.sources = sources self.intermediate_filename = None @@ -65,6 +65,7 @@ class ObjectMix: @property def mixdown_file_handle(self): + assert self.mixdown_filename if self._mixdown_file_handle is None: self._mixdown_file_handle = open(self.mixdown_filename, 'rb') @@ -146,7 +147,7 @@ class ObjectMixPool: def __enter__(self): return self - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__(self, _exc_type, _exc_val, _exc_tb): for mix in self.object_mixes: mix.rm_mixdown() @@ -156,7 +157,8 @@ class ObjectMixPool: return min(lengths) -def object_mixes_from_source_groups(groups: List[List[bpy.types.Speaker]], scene, base_dir): +def object_mixes_from_source_groups(groups: List[List[bpy.types.Object]], + scene: bpy.types.Scene, base_dir: str): mixes = [] for group in groups: mixes.append(ObjectMix(sources=group, scene=scene, base_dir=base_dir)) diff --git a/intern/speaker_utils.py b/intern/speaker_utils.py index c56a19b..9580b68 100644 --- a/intern/speaker_utils.py +++ b/intern/speaker_utils.py @@ -4,8 +4,9 @@ def all_speakers(scene: bpy.types.Scene) -> list[bpy.types.Object]: return [obj for obj in scene.objects if obj.type == 'SPEAKER'] -def solo_speakers(scene, solo_group): +def solo_speakers(scene: bpy.types.Scene, solo_group: list[bpy.types.Object]): for speaker in all_speakers(scene): + assert type(speaker.data) is bpy.types.Speaker if speaker in solo_group: speaker.data.muted = False else: @@ -16,5 +17,6 @@ def solo_speakers(scene, solo_group): def unmute_all_speakers(scene): for speaker in all_speakers(scene): + assert type(speaker.data) is bpy.types.Speaker speaker.data.muted = False speaker.data.update_tag()