data_juicer_sandbox.helper_funcs 源代码

from data_juicer.utils.registry import Registry

ALL_FUNCS = Registry("all_helper_funcs")


# LLM inference funcs
[文档] @ALL_FUNCS.register_module("build_messages") def build_messages(item: dict, **kwargs): """ A simple implementation. """ system_key = kwargs.get("system_key", "system") query_key = kwargs.get("query_key", "query") system_prompt = item.get(system_key, "") input_content = item[query_key] messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": input_content}) return messages
[文档] @ALL_FUNCS.register_module("parse_output") def parse_output(output: str, item: dict, **kwargs): """ A simple implementation. """ return output
# Math QA grader
[文档] @ALL_FUNCS.register_module("build_messages_for_math_qa") def build_messages_for_math_qa(item: dict, **kwargs): """ Build message for math QA grader. """ system_key = kwargs.get("system_key", "system") query_key = kwargs.get("query_key", "query") response_key = kwargs.get("response_key", "response") system_prompt = item.get(system_key, "") question = item[query_key] answer = item[response_key] messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": f"Question: {question}\nAnswer: {answer}"}) return messages