Source code for data_juicer.ops.mapper.agent_tool_relevance_mapper

# Copyright 2025 The Data-Juicer Authors. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""LLM: tool/capability fit vs user task (1–5).

Inspired by OpenJudge tool-selection; DJ: meta tags or text-only, no schema.

Reference:
https://agentscope-ai.github.io/OpenJudge/built_in_graders/agent_graders/
#toolselectiongrader
"""

from __future__ import annotations

from data_juicer.ops.base_op import OPERATORS, TAGGING_OPS
from data_juicer.ops.mapper.dialog_quality_llm_base import _DialogQualityLLMMapperBase
from data_juicer.ops.mapper.dialog_quality_llm_utils import (
    build_agent_tool_fit_user_content,
)
from data_juicer.utils.constant import MetaKeys

OP_NAME = "agent_tool_relevance_mapper"


[docs] @TAGGING_OPS.register_module(OP_NAME) @OPERATORS.register_module(OP_NAME) class AgentToolRelevanceMapper(_DialogQualityLLMMapperBase): """Rough fit between tools/capabilities and the user task (uses meta tool tags).""" OP_NAME = OP_NAME META_KEY = MetaKeys.agent_tool_relevance EVAL_KIND = "agent_tool" def _system_prompt(self) -> str: return ( "Using the **User request**, **Assistant reply** (including any tool " "trace embedded in that text), and the inferred tool list, judge " "whether capability choices are roughly sound: when tools are " "needed, were relevant ones used?\n" "If the meta tool list matches tool names visible in the assistant " "text, treat that as weak evidence of intent—reserve score 1–2 for " "**clear** task/tool contradictions, not for underspecified user " "wording alone.\n" "If no tool list is present, infer only from the text; an empty " "list does not automatically imply a bad score.\n" "1 = severe mismatch; 5 = apt and efficient." ) def _build_user_content(self, sample: dict) -> str: return build_agent_tool_fit_user_content( sample, query_key=self.query_key, response_key=self.response_key, tool_types_key=self.tool_types_key, primary_tool_key=self.primary_tool_key, max_query_chars=self.max_query_chars_for_prompt, max_response_chars=self.max_response_chars_for_prompt, )