curiosity_dialogs

参考:

curiosity_dialogs

使用以下命令在 TFDS 中加载此数据集:

ds = tfds.load('huggingface:curiosity_dialogs/curiosity_dialogs')
  • 说明
This dataset contains 14K dialogs (181K utterances) where users and assistants converse about geographic topics like
geopolitical entities and locations. This dataset is annotated with pre-existing user knowledge, message-level dialog
acts, grounding to Wikipedia, and user reactions to messages.
拆分 样本
'test' 1287
'test_zero' 1187
'train' 10287
'val' 1287
  • 特征
{
    "messages": {
        "feature": {
            "message": {
                "dtype": "string",
                "id": null,
                "_type": "Value"
            },
            "liked": {
                "num_classes": 2,
                "names": [
                    "False",
                    "True"
                ],
                "names_file": null,
                "id": null,
                "_type": "ClassLabel"
            },
            "sender": {
                "num_classes": 2,
                "names": [
                    "user",
                    "assistant"
                ],
                "names_file": null,
                "id": null,
                "_type": "ClassLabel"
            },
            "facts": {
                "feature": {
                    "fid": {
                        "dtype": "int32",
                        "id": null,
                        "_type": "Value"
                    },
                    "used": {
                        "num_classes": 2,
                        "names": [
                            "False",
                            "True"
                        ],
                        "names_file": null,
                        "id": null,
                        "_type": "ClassLabel"
                    },
                    "source": {
                        "num_classes": 3,
                        "names": [
                            "section",
                            "known",
                            "random"
                        ],
                        "names_file": null,
                        "id": null,
                        "_type": "ClassLabel"
                    }
                },
                "length": -1,
                "id": null,
                "_type": "Sequence"
            },
            "message_id": {
                "dtype": "string",
                "id": null,
                "_type": "Value"
            },
            "dialog_acts": {
                "feature": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "length": -1,
                "id": null,
                "_type": "Sequence"
            }
        },
        "length": -1,
        "id": null,
        "_type": "Sequence"
    },
    "known_entities": {
        "feature": {
            "dtype": "string",
            "id": null,
            "_type": "Value"
        },
        "length": -1,
        "id": null,
        "_type": "Sequence"
    },
    "focus_entity": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "dialog_id": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "inferred_steps": {
        "num_classes": 2,
        "names": [
            "False",
            "True"
        ],
        "names_file": null,
        "id": null,
        "_type": "ClassLabel"
    },
    "created_time": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "aspects": {
        "feature": {
            "dtype": "string",
            "id": null,
            "_type": "Value"
        },
        "length": -1,
        "id": null,
        "_type": "Sequence"
    },
    "first_aspect": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "second_aspect": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "shuffle_facts": {
        "num_classes": 2,
        "names": [
            "False",
            "True"
        ],
        "names_file": null,
        "id": null,
        "_type": "ClassLabel"
    },
    "related_entities": {
        "feature": {
            "dtype": "string",
            "id": null,
            "_type": "Value"
        },
        "length": -1,
        "id": null,
        "_type": "Sequence"
    },
    "tag": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "user_id": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "assistant_id": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "is_annotated": {
        "num_classes": 2,
        "names": [
            "False",
            "True"
        ],
        "names_file": null,
        "id": null,
        "_type": "ClassLabel"
    },
    "user_dialog_rating": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "user_other_agent_rating": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "assistant_dialog_rating": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "assistant_other_agent_rating": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "reported": {
        "num_classes": 2,
        "names": [
            "False",
            "True"
        ],
        "names_file": null,
        "id": null,
        "_type": "ClassLabel"
    },
    "annotated": {
        "num_classes": 2,
        "names": [
            "False",
            "True"
        ],
        "names_file": null,
        "id": null,
        "_type": "ClassLabel"
    }
}