Compare commits
2 Commits
3d67623d77
...
fb56ca1dd4
Author | SHA1 | Date | |
---|---|---|---|
![]() |
fb56ca1dd4 | ||
![]() |
5ea64d089f |
@@ -107,7 +107,8 @@ def evaluate(dataset, offset, limit):
|
||||
|
||||
miss_counts = []
|
||||
for cat in cats:
|
||||
miss_counts.append((cat, len([x for x in results \
|
||||
miss_counts.append(
|
||||
(cat, len([x for x in results
|
||||
if x['catid'] == cat and x['result'] == 'MISS'])))
|
||||
|
||||
miss_counts = sorted(miss_counts, key=lambda x: x[1])
|
||||
|
@@ -11,6 +11,7 @@ import platformdirs
|
||||
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
|
||||
def classify_text_ranked(text, embeddings_list, model, limit=5):
|
||||
text_embedding = model.encode(text, convert_to_numpy=True)
|
||||
embeddings = np.array([info['Embedding'] for info in embeddings_list])
|
||||
@@ -32,6 +33,7 @@ class Ucs(NamedTuple):
|
||||
subcategory=d['SubCategory'],
|
||||
explanations=d['Explanations'], synonymns=d['Synonyms'])
|
||||
|
||||
|
||||
def load_ucs() -> list[Ucs]:
|
||||
FILE_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
cats = []
|
||||
@@ -43,6 +45,7 @@ def load_ucs() -> list[Ucs]:
|
||||
|
||||
return [Ucs.from_dict(cat) for cat in cats]
|
||||
|
||||
|
||||
class InferenceContext:
|
||||
"""
|
||||
Maintains caches and resources for UCS category inference.
|
||||
@@ -108,9 +111,8 @@ class InferenceContext:
|
||||
:raises: StopIterator if CatId is not on the schedule
|
||||
"""
|
||||
i = (
|
||||
(x.category, x.subcategory, x.explanations) \
|
||||
(x.category, x.subcategory, x.explanations)
|
||||
for x in self.catlist if x.catid == catid
|
||||
)
|
||||
|
||||
return next(i)
|
||||
|
||||
|
@@ -6,6 +6,7 @@ from re import match
|
||||
|
||||
from .inference import Ucs
|
||||
|
||||
|
||||
def ffmpeg_description(path: str) -> Optional[str]:
|
||||
result = subprocess.run(['ffprobe', '-show_format', '-of',
|
||||
'json', path], capture_output=True)
|
||||
@@ -65,12 +66,14 @@ def build_ucs(components: UcsNameComponents, extension: str) -> str:
|
||||
"""
|
||||
Build a UCS filename
|
||||
"""
|
||||
assert components.validate(), "UcsNameComponents contains invalid characters"
|
||||
assert components.validate(), \
|
||||
"UcsNameComponents contains invalid characters"
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def parse_ucs(rootname: str, catid_list: list[str]) -> Optional[UcsNameComponents]:
|
||||
def parse_ucs(rootname: str,
|
||||
catid_list: list[str]) -> Optional[UcsNameComponents]:
|
||||
"""
|
||||
Parse the UCS components from a file name root.
|
||||
|
||||
@@ -79,11 +82,12 @@ def parse_ucs(rootname: str, catid_list: list[str]) -> Optional[UcsNameComponent
|
||||
:returns: the components, or `None` if the filename is not in UCS format
|
||||
"""
|
||||
|
||||
regexp1 = r"^(?P<CatID>[A-z]+)(-(?P<UserCat>[^_]+))?_((?P<VendorCat>[^-]+)-)?(?P<FXName>[^_]+)"
|
||||
regexp1 = r"^(?P<CatID>[A-z]+)(-(?P<UserCat>[^_]+))?_"
|
||||
regexp2 = r"((?P<VendorCat>[^-]+)-)?(?P<FXName>[^_]+)"
|
||||
regexp3 = r"(_(?P<CreatorID>[^_]+)(_(?P<SourceID>[^_]+)"
|
||||
regexp4 = r"(_(?P<UserData>[^.]+))?)?)?"
|
||||
|
||||
regexp2 = r"(_(?P<CreatorID>[^_]+)(_(?P<SourceID>[^_]+)(_(?P<UserData>[^.]+))?)?)?"
|
||||
|
||||
regexp = regexp1 + regexp2
|
||||
regexp = regexp1 + regexp2 + regexp3 + regexp4
|
||||
|
||||
matches = match(regexp, rootname)
|
||||
|
||||
|
Reference in New Issue
Block a user