Compare commits

...

2 Commits

Author SHA1 Message Date
e5698fec7b Plumbing for CSV import from online logs 2025-09-26 21:08:41 -07:00
c75365b856 TODO 2025-09-26 20:52:08 -07:00
2 changed files with 23 additions and 7 deletions

View File

@@ -18,9 +18,8 @@
- Use (anchor, positive) pairs to train a new model - Use (anchor, positive) pairs to train a new model
- Use (sentence) + class labels to train a new model - Use (sentence) + class labels to train a new model
- Implement BatchAllTripletLoss - Implement BatchAllTripletLoss
- Implement a two-phase training regime - Train with anchored definitions and/or...
1. Train with anchored definitions then... - Train with class labels
2. Train with class labels
## Evaluate ## Evaluate

View File

@@ -1,11 +1,8 @@
import os import os
# import csv
import logging import logging
from itertools import chain from itertools import chain
import tqdm
import click import click
# from tabulate import tabulate, SEPARATING_LINE
from .inference import InferenceContext, load_ucs from .inference import InferenceContext, load_ucs
from .gather import (build_sentence_class_dataset, print_dataset_stats, from .gather import (build_sentence_class_dataset, print_dataset_stats,
@@ -136,6 +133,26 @@ def recommend(ctx, text, paths, interactive, skip_ucs):
os.rename(path, new_path) os.rename(path, new_path)
break break
@ucsinfer.command('csv')
@click.option('--filename-col', default="FileName",
help="Heading or index of the column containing filenames",
show_default=True)
@click.option('--description-col', default="TrackDescription",
help="Heading or index of the column containing descriptions",
show_default=True)
@click.option('--out', default='dataset/', show_default=True)
@click.argument('paths', nargs=-1)
@click.pass_context
def csv(ctx, paths, out, filename_col, description_col):
"""
Scan training data from CSV files
`csv` is used to build a training dataset for finetuning the selected
model, as like the `gather` command, except instead of scanning the
file system it builds a dataset from descriptions and UCS filenames in
columns of a CSV file.
"""
pass
@ucsinfer.command('gather') @ucsinfer.command('gather')
@click.option('--out', default='dataset/', show_default=True) @click.option('--out', default='dataset/', show_default=True)
@@ -146,7 +163,7 @@ def recommend(ctx, text, paths, interactive, skip_ucs):
@click.pass_context @click.pass_context
def gather(ctx, paths, out, ucs_data): def gather(ctx, paths, out, ucs_data):
""" """
Scan files to build a training dataset Scan training data from audio files
`gather` is used to build a training dataset for finetuning the selected `gather` is used to build a training dataset for finetuning the selected
model. Description sentences and UCS categories are collected from '.wav' model. Description sentences and UCS categories are collected from '.wav'