ucsinfer/ucsinfer/util.py

import subprocess
import json
import os
from typing import NamedTuple, Optional
from re import match


def ffmpeg_description(path: str) -> Optional[str]:
    result = subprocess.run(['ffprobe', '-show_format', '-of',
                             'json', path], capture_output=True)

    try:
        result.check_returncode()
    except:
        return None

    stream = json.loads(result.stdout)
    fmt = stream.get("format", None)
    if fmt:
        tags = fmt.get("tags", None)
        if tags:
            return tags.get("comment", None)


class UcsNameComponents(NamedTuple):
    """
    Components of a UCS filename
    """
    cat_id: str
    user_cat: str | None
    vendor_cat: str | None
    fx_name: str
    creator: str | None
    source: str | None
    user_data: str | None

    def validate(self):
        """
        Check if fields do not contain invalid characters
        """
        if not match(r"[A-Z]+[a-z]+", self.cat_id):
            return False

        if self.user_cat and not match(r"[^\-_]+", self.user_cat):
            return False

        if self.vendor_cat and not match(r"[^\-_]+", self.vendor_cat):
            return False

        if not match(r"[^\-_]+", self.fx_name):
            return False

        if self.creator and not match(r"[^_]+", self.creator):
            return False

        if self.source and not match(r"[^_]+", self.source):
            return False

        if self.user_data and not match(r"[^.]+", self.user_data):
            return False


def normalize_ucs(basename: str, catid_list: list[str]):
    """
    Take any filename and normalize it into the UCS system
    """
    n, ext = os.path.splitext(basename)
    r = parse_ucs(n, catid_list)
    if r:
        pass
    else:
        pass

    return f"aaa.{ext}"


def build_ucs(components: UcsNameComponents, extension: str) -> str:
    """
    Build a UCS filename
    """
    assert components.validate(), \
            "UcsNameComponents contains invalid characters"

    cat_segment = components.cat_id
    if components.user_cat:
        cat_segment += f"-{components.user_cat}"

    name_segment = components.fx_name
    if components.vendor_cat:
        name_segment = f"{components.vendor_cat}-{components.fx_name}"


    all_comps = [cat_segment, name_segment]

    if components.creator:
        all_comps += [components.creator]

        if components.source:
            all_comps += [components.source]

            if components.user_data:
                all_comps += [components.user_data]

    root_name = "_".join(all_comps)

    return root_name + '.' + extension


def parse_ucs(rootname: str,
              catid_list: list[str]) -> Optional[UcsNameComponents]:
    """
    Parse the UCS components from a file name root.

    :param rootname: filename root, the basename of the file without extension
    :param catid_list: a list of all UCS CatIDs
    :returns: the components, or `None` if the filename is not in UCS format
    """

    regexp1 = r"^(?P<CatID>[A-z]+)(-(?P<UserCat>[^_]+))?_"
    regexp2 = r"((?P<VendorCat>[^-]+)-)?(?P<FXName>[^_]+)"
    regexp3 = r"(_(?P<CreatorID>[^_]+)(_(?P<SourceID>[^_]+)"
    regexp4 = r"(_(?P<UserData>[^.]+))?)?)?"

    regexp = regexp1 + regexp2 + regexp3 + regexp4

    matches = match(regexp, rootname)

    if matches is None:
        return None

    if matches.group('CatID') not in catid_list:
        return None

    return UcsNameComponents(cat_id=matches.group('CatID'),
                             user_cat=matches.group('UserCat'),
                             vendor_cat=matches.group('VendorCat'),
                             fx_name=matches.group('FXName'),
                             creator=matches.group('CreatorID'),
                             source=matches.group('SourceID'),
                             user_data=matches.group('UserData'))