Source code for data_juicer.ops.mapper.agent_tool_relevance_mapper
# Copyright 2025 The Data-Juicer Authors. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""LLM: tool/capability fit vs user task (1–5).
Inspired by OpenJudge tool-selection; DJ: meta tags or text-only, no schema.
Reference:
https://agentscope-ai.github.io/OpenJudge/built_in_graders/agent_graders/
#toolselectiongrader
"""
from __future__ import annotations
from data_juicer.ops.base_op import OPERATORS, TAGGING_OPS
from data_juicer.ops.mapper.dialog_quality_llm_base import _DialogQualityLLMMapperBase
from data_juicer.ops.mapper.dialog_quality_llm_utils import (
build_agent_tool_fit_user_content,
)
from data_juicer.utils.constant import MetaKeys
OP_NAME = "agent_tool_relevance_mapper"
[docs]
@TAGGING_OPS.register_module(OP_NAME)
@OPERATORS.register_module(OP_NAME)
class AgentToolRelevanceMapper(_DialogQualityLLMMapperBase):
"""Rough fit between tools/capabilities and the user task (uses meta tool tags)."""
OP_NAME = OP_NAME
META_KEY = MetaKeys.agent_tool_relevance
EVAL_KIND = "agent_tool"
def _system_prompt(self) -> str:
return (
"Using the **User request**, **Assistant reply** (including any tool "
"trace embedded in that text), and the inferred tool list, judge "
"whether capability choices are roughly sound: when tools are "
"needed, were relevant ones used?\n"
"If the meta tool list matches tool names visible in the assistant "
"text, treat that as weak evidence of intent—reserve score 1–2 for "
"**clear** task/tool contradictions, not for underspecified user "
"wording alone.\n"
"If no tool list is present, infer only from the text; an empty "
"list does not automatically imply a bad score.\n"
"1 = severe mismatch; 5 = apt and efficient."
)
def _build_user_content(self, sample: dict) -> str:
return build_agent_tool_fit_user_content(
sample,
query_key=self.query_key,
response_key=self.response_key,
tool_types_key=self.tool_types_key,
primary_tool_key=self.primary_tool_key,
max_query_chars=self.max_query_chars_for_prompt,
max_response_chars=self.max_response_chars_for_prompt,
)