4.0 KiB
4.0 KiB
In [3]:
import json
with open("ucs-community/json/en.json") as f:
ucs = json.load(f)
cat_ids = [x['CatID'] for x in ucs]
In [4]:
def ucs_catid(path: str) -> Optional[str]:
import os.path
'True if the file at `path` has a valid UCS filename'
basename = os.path.basename(path)
first_component = basename.split("_")[0]
if first_component in cat_ids:
return first_component
else:
return False
In [5]:
from typing import Optional
def description(path: str) -> Optional[str]:
import json, subprocess
result = subprocess.run(['ffprobe', '-show_format', '-of', 'json', path], capture_output=True)
try:
result.check_returncode()
except:
return None
stream = json.loads(result.stdout)
fmt = stream.get("format", None)
if fmt:
tags = fmt.get("tags", None)
if tags:
return tags.get("comment", None)
In [15]:
from typing import Optional, Tuple
def test_data_for_file(path: str) -> Optional[Tuple[str, str]]:
'CatID and description if both are present'
catid = ucs_catid(path)
if catid is None:
return None
desc = description(path)
if desc is not None:
return (catid, desc)
else:
return None
def collect_dataset(scan_root: str, set_name: str):
"""
Scans scan_root recursively and collects all catid/description pairs
it can find.
"""
import os, csv
test_data = []
for root, _, files in os.walk(scan_root):
for file in files:
if file.endswith(".wav") or file.endswith(".flac"):
if test_datum := test_data_for_file(os.path.join(root,file)):
test_data += [test_datum]
with open(set_name + '.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(['Category', 'Description'])
for row in test_data:
writer.writerow(row)
In [17]:
collect_dataset("/Volumes/NAS SFX Library/JAMIELIB Libraries by Studio/_Designers/Jamie Hardt","jamie_files")
In [ ]: