Added another function to recommend
This commit is contained in:
@@ -1,12 +1,24 @@
|
||||
from datasets import Dataset, Features, Value, ClassLabel, DatasetInfo
|
||||
from datasets.dataset_dict import DatasetDict
|
||||
|
||||
from typing import Generator, Any
|
||||
from typing import Iterator
|
||||
|
||||
from tabulate import tabulate
|
||||
|
||||
def print_dataset_stats(dataset: DatasetDict, catlist: list[str]):
|
||||
|
||||
data_table = []
|
||||
data_table.append([["Total records in combined dataset:", len(dataset)]])
|
||||
data_table.append([["Total records in `train`:", len(dataset['train'])]])
|
||||
|
||||
tab = tabulate(data_table)
|
||||
|
||||
print(tab)
|
||||
|
||||
# https://www.sbert.net/docs/sentence_transformer/loss_overview.html
|
||||
|
||||
def build_sentence_class_dataset(
|
||||
records: Generator[tuple[str, str], Any, None],
|
||||
records: Iterator[tuple[str, str]],
|
||||
catlist: list[str]) -> DatasetDict:
|
||||
"""
|
||||
Create a new dataset for `records` which contains (sentence, class) pairs.
|
||||
|
Reference in New Issue
Block a user