Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 53 additions & 1 deletion flexeval/core/chat_dataset/openai_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class OpenAIMessagesDataset(ChatDataset):
tool_definitions_key (str | None): Key used to extract the list of tool definitions from each JSON object.
Set to `None` (default) for data without tool_calls.
drop_if_last_from_assistant (bool): If true, when the last utterance is given by assistant, drop it.
And the last assistant utterance will be used as reference answer if `references_key` is not given.
references_key (str | None): Key used to extract the reference answers from each JSON object.

In Jsonl, each line must have a following structure:
```json
Expand Down Expand Up @@ -77,6 +79,36 @@ class OpenAIMessagesDataset(ChatDataset):
]
}
```

Example with reference answers:
```json
{
'<message_key>': [
{
'role': 'user',
'content': 'こんにちは。元気が出る言葉を教えて下さい。'
},
],
'<references_key>': [
'こんなのはどうでしょう。どんどんやってください!',
'こんなのはどうでしょう。頑張ってください!',
],
}
```

If there is only one reference answer for each conversation,
it can also be directly given as a string instead of a list:
```json
{
'<message_key>': [
{
'role': 'user',
'content': 'こんにちは。元気が出る言葉を教えて下さい。'
},
],
'<references_key>': 'こんなのはどうでしょう。どんどんやってください!',
}
```
"""

def __init__(
Expand All @@ -85,6 +117,7 @@ def __init__(
message_key: str = "messages",
tool_definitions_key: str | None = None,
drop_if_last_from_assistant: bool = False,
references_key: str | None = None,
) -> None:
self.conversations: list[ChatInstance] = []
with open(file_path) as f:
Expand All @@ -95,9 +128,28 @@ def __init__(
tool_dicts = sample.get(tool_definitions_key, None)

messages: list[dict[str, Any]] = sample.pop(message_key)
last_assistant_content: str | None = None
if drop_if_last_from_assistant and messages[-1]["role"] == "assistant":
last_assistant_content = messages[-1].get("content", None)
messages = messages[:-1]
self.conversations.append(ChatInstance(messages=messages, tools=tool_dicts, extra_info=sample))

if references_key:
references = sample.pop(references_key, None)
if isinstance(references, str):
references = [references]
elif isinstance(references, list) and all(isinstance(ref, str) for ref in references):
pass
else:
msg = "Invalid format for references."
raise ValueError(msg)
elif references_key is None and last_assistant_content:
references = [last_assistant_content]
else:
references = []

self.conversations.append(
ChatInstance(messages=messages, tools=tool_dicts, references=references, extra_info=sample)
)

def __len__(self) -> int:
return len(self.conversations)
Expand Down
76 changes: 72 additions & 4 deletions tests/core/chat_dataset/test_openai_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,21 @@
@pytest.fixture
def jsonl_data_factory(tmp_path) -> Callable: # noqa: ANN001
def _create(
message_key: str, messages_list: list[dict], num_samples: int = 10, extra_info: dict | None = None
message_key: str,
messages_list: list[dict],
num_samples: int = 10,
extra_info: dict | None = None,
references_key: str | None = None,
references_list: list[list[str]] | None = None,
) -> str:
file_path = tmp_path / f"mock_data_{message_key}.jsonl"
with open(file_path, "w") as f:
for messages in messages_list * num_samples:
for i, messages in enumerate(messages_list * num_samples):
sample = {message_key: messages}
if extra_info is not None:
sample = {**extra_info, **sample}
if references_key is not None and references_list is not None:
sample[references_key] = references_list[i % len(references_list)]
f.write(json.dumps(sample) + "\n")
return str(file_path)

Expand Down Expand Up @@ -68,7 +75,8 @@ def test_load_dataset_with_drop_if_last_from_assistant(jsonl_data_factory) -> No
{"role": TEST_CHAT_MESSAGES[0][0]["role"], "content": TEST_CHAT_MESSAGES[0][0]["content"]},
{"role": TEST_CHAT_MESSAGES[0][1]["role"], "content": TEST_CHAT_MESSAGES[0][1]["content"]},
{"role": TEST_CHAT_MESSAGES[0][2]["role"], "content": TEST_CHAT_MESSAGES[0][2]["content"]},
]
],
references=["You're welcome!"],
)

test_chat_messages_with_last_user = deepcopy(TEST_CHAT_MESSAGES)
Expand All @@ -83,7 +91,7 @@ def test_load_dataset_with_drop_if_last_from_assistant(jsonl_data_factory) -> No
{"role": TEST_CHAT_MESSAGES[0][0]["role"], "content": TEST_CHAT_MESSAGES[0][0]["content"]},
{"role": TEST_CHAT_MESSAGES[0][1]["role"], "content": TEST_CHAT_MESSAGES[0][1]["content"]},
{"role": TEST_CHAT_MESSAGES[0][2]["role"], "content": TEST_CHAT_MESSAGES[0][2]["content"]},
]
],
)


Expand All @@ -101,6 +109,7 @@ def test_load_dataset_with_extra_info(jsonl_data_factory) -> None: # noqa: ANN0
{"role": TEST_CHAT_MESSAGES[0][2]["role"], "content": TEST_CHAT_MESSAGES[0][2]["content"]},
],
extra_info={"extra_info": "some_info"},
references=["You're welcome!"],
)


Expand Down Expand Up @@ -207,3 +216,62 @@ def test_load_dataset_with_tools(mock_chat_messages_with_tools_data_path: str) -
assert processed_tool_response_2 == input_tool_response_2["content"]
# assistant response turn
assert chat_messages[4] == {"role": "assistant", "content": messages_dicts[4]["content"]}


def test_load_dataset_with_references(jsonl_data_factory) -> None: # noqa: ANN001
tmp_jsonl_path = jsonl_data_factory(
message_key="messages",
messages_list=[[{"role": "user", "content": "This is a user message."}]],
references_key="references",
references_list=[["This is a reference answer.", "This is another reference answer."]],
)

dataset = OpenAIMessagesDataset(file_path=tmp_jsonl_path, message_key="messages", references_key="references")

assert len(dataset) == 10
assert dataset[0] == ChatInstance(
messages=[{"role": "user", "content": "This is a user message."}],
references=["This is a reference answer.", "This is another reference answer."],
)


def test_load_dataset_with_references_as_string(jsonl_data_factory) -> None: # noqa: ANN001
tmp_jsonl_path = jsonl_data_factory(
message_key="messages",
messages_list=[[{"role": "user", "content": "This is a user message."}]],
references_key="references",
references_list=["This is a reference answer."],
)

dataset = OpenAIMessagesDataset(file_path=tmp_jsonl_path, message_key="messages", references_key="references")

assert len(dataset) == 10
assert dataset[0] == ChatInstance(
messages=[{"role": "user", "content": "This is a user message."}],
references=["This is a reference answer."],
)


def test_load_dataset_with_references_and_drop_if_last_from_assistant(jsonl_data_factory) -> None: # noqa: ANN001
tmp_jsonl_path = jsonl_data_factory(
message_key="messages",
messages_list=TEST_CHAT_MESSAGES,
references_key="references",
references_list=["This is a reference answer."],
)

dataset = OpenAIMessagesDataset(
file_path=tmp_jsonl_path, message_key="messages", references_key="references", drop_if_last_from_assistant=True
)

assert len(dataset) == 10
# When both drop_if_last_from_assistant and references_key are specified,
# the reference is always taken from the references_key field, not from the dropped assistant message.
assert dataset[0] == ChatInstance(
messages=[
{"role": TEST_CHAT_MESSAGES[0][0]["role"], "content": TEST_CHAT_MESSAGES[0][0]["content"]},
{"role": TEST_CHAT_MESSAGES[0][1]["role"], "content": TEST_CHAT_MESSAGES[0][1]["content"]},
{"role": TEST_CHAT_MESSAGES[0][2]["role"], "content": TEST_CHAT_MESSAGES[0][2]["content"]},
],
references=["This is a reference answer."],
)
Loading