Dataset metadata
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
from datasets import Dataset, Features, Value, ClassLabel
|
||||
from datasets import Dataset, Features, Value, ClassLabel, DatasetInfo
|
||||
|
||||
from typing import Generator, Any
|
||||
|
||||
@@ -16,13 +16,19 @@ def build_sentence_class_dataset(
|
||||
|
||||
labels = ClassLabel(names=catlist)
|
||||
|
||||
info = DatasetInfo(
|
||||
description=f"(sentence, UCS CatID) pairs gathered by the "
|
||||
"ucsinfer tool on {}")
|
||||
|
||||
|
||||
items: list[dict] = []
|
||||
for obj in records:
|
||||
items += [{'sentence': obj[0], 'class': obj[1]}]
|
||||
|
||||
|
||||
return Dataset.from_list(items, features=Features({'sentence': Value('string'),
|
||||
'class': labels}))
|
||||
'class': labels}),
|
||||
info=info)
|
||||
|
||||
|
||||
# def build_sentence_anchor_dataset() -> Dataset:
|
||||
|
Reference in New Issue
Block a user