Added another function to recommend

This commit is contained in:
2025-09-04 10:43:31 -07:00
parent 10519f9c1a
commit 103fffe0a4
4 changed files with 34 additions and 140 deletions

View File

@@ -1,12 +1,24 @@
from datasets import Dataset, Features, Value, ClassLabel, DatasetInfo
from datasets.dataset_dict import DatasetDict
from typing import Generator, Any
from typing import Iterator
from tabulate import tabulate
def print_dataset_stats(dataset: DatasetDict, catlist: list[str]):
data_table = []
data_table.append([["Total records in combined dataset:", len(dataset)]])
data_table.append([["Total records in `train`:", len(dataset['train'])]])
tab = tabulate(data_table)
print(tab)
# https://www.sbert.net/docs/sentence_transformer/loss_overview.html
def build_sentence_class_dataset(
records: Generator[tuple[str, str], Any, None],
records: Iterator[tuple[str, str]],
catlist: list[str]) -> DatasetDict:
"""
Create a new dataset for `records` which contains (sentence, class) pairs.