{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "24915fba-1f44-46af-9233-66b896d7fa41", "metadata": {}, "outputs": [], "source": [ "import json\n", "\n", "with open(\"ucs-community/json/en.json\") as f:\n", " ucs = json.load(f)\n", " cat_ids = [x['CatID'] for x in ucs]\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "3a4b887d-5e72-4136-a7d9-650667619b12", "metadata": {}, "outputs": [], "source": [ "def ucs_catid(path: str) -> Optional[str]:\n", " import os.path\n", " 'True if the file at `path` has a valid UCS filename'\n", "\n", " basename = os.path.basename(path)\n", " first_component = basename.split(\"_\")[0]\n", "\n", " if first_component in cat_ids:\n", " return first_component\n", " else:\n", " return False" ] }, { "cell_type": "code", "execution_count": 5, "id": "faedeeb7-8c4d-4e60-ab7a-9baf9add008a", "metadata": {}, "outputs": [], "source": [ "from typing import Optional\n", "\n", "def description(path: str) -> Optional[str]:\n", " import json, subprocess\n", " result = subprocess.run(['ffprobe', '-show_format', '-of', 'json', path], capture_output=True)\n", " try:\n", " result.check_returncode()\n", " except:\n", " return None\n", " \n", " stream = json.loads(result.stdout)\n", " fmt = stream.get(\"format\", None)\n", " if fmt:\n", " tags = fmt.get(\"tags\", None)\n", " if tags:\n", " return tags.get(\"comment\", None)\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "f57c7c75-8cda-441a-bfb3-179e0afde861", "metadata": {}, "outputs": [], "source": [ "from typing import Optional, Tuple\n", "\n", "def test_data_for_file(path: str) -> Optional[Tuple[str, str]]:\n", " 'CatID and description if both are present'\n", "\n", " catid = ucs_catid(path)\n", " if catid is None:\n", " return None\n", " \n", " desc = description(path)\n", "\n", " if desc is not None:\n", " return (catid, desc)\n", " else:\n", " return None\n", "\n", "def collect_dataset(scan_root: str, set_name: str):\n", " \"\"\"\n", " Scans scan_root recursively and collects all catid/description pairs\n", " it can find.\n", " \"\"\"\n", " import os, csv\n", " test_data = []\n", " for root, _, files in os.walk(scan_root):\n", " for file in files:\n", " if file.endswith(\".wav\") or file.endswith(\".flac\"):\n", " if test_datum := test_data_for_file(os.path.join(root,file)):\n", " test_data += [test_datum]\n", "\n", " with open(set_name + '.csv', 'w') as f:\n", " writer = csv.writer(f)\n", " writer.writerow(['Category', 'Description'])\n", " for row in test_data:\n", " writer.writerow(row)\n", " \n" ] }, { "cell_type": "code", "execution_count": 17, "id": "1e05629d-15a5-406b-8064-879900e4b3c7", "metadata": {}, "outputs": [], "source": [ "collect_dataset(\"/Volumes/NAS SFX Library/JAMIELIB Libraries by Studio/_Designers/Jamie Hardt\",\"jamie_files\")" ] }, { "cell_type": "code", "execution_count": null, "id": "7340f734-5ba7-4db0-a012-9e2bd46a4fc5", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.5" } }, "nbformat": 4, "nbformat_minor": 5 }