Dataset metadata
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
from datasets import Dataset, Features, Value, ClassLabel
|
from datasets import Dataset, Features, Value, ClassLabel, DatasetInfo
|
||||||
|
|
||||||
from typing import Generator, Any
|
from typing import Generator, Any
|
||||||
|
|
||||||
@@ -16,13 +16,19 @@ def build_sentence_class_dataset(
|
|||||||
|
|
||||||
labels = ClassLabel(names=catlist)
|
labels = ClassLabel(names=catlist)
|
||||||
|
|
||||||
|
info = DatasetInfo(
|
||||||
|
description=f"(sentence, UCS CatID) pairs gathered by the "
|
||||||
|
"ucsinfer tool on {}")
|
||||||
|
|
||||||
|
|
||||||
items: list[dict] = []
|
items: list[dict] = []
|
||||||
for obj in records:
|
for obj in records:
|
||||||
items += [{'sentence': obj[0], 'class': obj[1]}]
|
items += [{'sentence': obj[0], 'class': obj[1]}]
|
||||||
|
|
||||||
|
|
||||||
return Dataset.from_list(items, features=Features({'sentence': Value('string'),
|
return Dataset.from_list(items, features=Features({'sentence': Value('string'),
|
||||||
'class': labels}))
|
'class': labels}),
|
||||||
|
info=info)
|
||||||
|
|
||||||
|
|
||||||
# def build_sentence_anchor_dataset() -> Dataset:
|
# def build_sentence_anchor_dataset() -> Dataset:
|
||||||
|
Reference in New Issue
Block a user