Dataset metadata

This commit is contained in:
2025-09-03 16:31:05 -07:00
parent 6cd0415a26
commit e419f698c9

View File

@@ -1,4 +1,4 @@
from datasets import Dataset, Features, Value, ClassLabel
from datasets import Dataset, Features, Value, ClassLabel, DatasetInfo
from typing import Generator, Any
@@ -16,13 +16,19 @@ def build_sentence_class_dataset(
labels = ClassLabel(names=catlist)
info = DatasetInfo(
description=f"(sentence, UCS CatID) pairs gathered by the "
"ucsinfer tool on {}")
items: list[dict] = []
for obj in records:
items += [{'sentence': obj[0], 'class': obj[1]}]
return Dataset.from_list(items, features=Features({'sentence': Value('string'),
'class': labels}))
'class': labels}),
info=info)
# def build_sentence_anchor_dataset() -> Dataset: