diff --git a/ucsinfer/util.py b/ucsinfer/util.py index 8a3d0ed..fed9e55 100644 --- a/ucsinfer/util.py +++ b/ucsinfer/util.py @@ -24,6 +24,9 @@ def ffmpeg_description(path: str) -> Optional[str]: class UcsNameComponents(NamedTuple): + """ + Components of a UCS filename + """ cat_id: str user_cat: str | None vendor_cat: str | None @@ -32,16 +35,57 @@ class UcsNameComponents(NamedTuple): source: str | None user_data: str | None + def validate(self): + """ + Check if fields do not contain invalid characters + """ + if not match(r"[A-Z]+[a-z]+", self.cat_id): + return False -def parse_ucs(basename: str, catid_list: list[str]) -> Optional[UcsNameComponents]: + if self.user_cat and not match(r"[^\-_]+", self.user_cat): + return False + + if self.vendor_cat and not match(r"[^\-_]+", self.vendor_cat): + return False + + if not match(r"[^\-_]+", self.fx_name): + return False + + if self.creator and not match(r"[^_]+", self.creator): + return False + + if self.source and not match(r"[^_]+", self.source): + return False + + if self.user_data and not match(r"[^.]+", self.user_data): + return False + + +def build_ucs(components: UcsNameComponents, extension: str) -> str: + """ + Build a UCS filename + """ + assert components.validate(), "UcsNameComponents contains invalid characters" + + return "" + + +def parse_ucs(rootname: str, catid_list: list[str]) -> Optional[UcsNameComponents]: + """ + Parse the UCS components from a file name root. + :param rootname: filename root, the basename of the file without extension + :param catid_list: a list of all UCS CatIDs + :returns: the components, or `None` if the filename is not in UCS format + """ + regexp1 = r"^(?P[A-z]+)(-(?P[^_]+))?_((?P[^-]+)-)?(?P[^_]+)" regexp2 = r"(_(?P[^_]+)(_(?P[^_]+)(_(?P[^.]+))?)?)?" regexp = regexp1 + regexp2 - matches = match(regexp, basename) + matches = match(regexp, rootname) if matches is None: return None