diff --git a/ucsinfer/__main__.py b/ucsinfer/__main__.py index 54fdd2d..85f4f13 100644 --- a/ucsinfer/__main__.py +++ b/ucsinfer/__main__.py @@ -1,11 +1,8 @@ import os -# import csv import logging from itertools import chain -import tqdm import click -# from tabulate import tabulate, SEPARATING_LINE from .inference import InferenceContext, load_ucs from .gather import (build_sentence_class_dataset, print_dataset_stats, @@ -136,6 +133,26 @@ def recommend(ctx, text, paths, interactive, skip_ucs): os.rename(path, new_path) break +@ucsinfer.command('csv') +@click.option('--filename-col', default="FileName", + help="Heading or index of the column containing filenames", + show_default=True) +@click.option('--description-col', default="TrackDescription", + help="Heading or index of the column containing descriptions", + show_default=True) +@click.option('--out', default='dataset/', show_default=True) +@click.argument('paths', nargs=-1) +@click.pass_context +def csv(ctx, paths, out, filename_col, description_col): + """ + Scan training data from CSV files + + `csv` is used to build a training dataset for finetuning the selected + model, as like the `gather` command, except instead of scanning the + file system it builds a dataset from descriptions and UCS filenames in + columns of a CSV file. + """ + pass @ucsinfer.command('gather') @click.option('--out', default='dataset/', show_default=True) @@ -146,7 +163,7 @@ def recommend(ctx, text, paths, interactive, skip_ucs): @click.pass_context def gather(ctx, paths, out, ucs_data): """ - Scan files to build a training dataset + Scan training data from audio files `gather` is used to build a training dataset for finetuning the selected model. Description sentences and UCS categories are collected from '.wav'