Source code for data_juicer.ops.mapper.dialog_non_repetition_mapper
# Copyright 2025 The Data-Juicer Authors. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""LLM: new info vs prior assistant turns (1–5).
Inspired by OpenJudge-style repetition checks; DJ: same prompt window only.
Reference:
https://agentscope-ai.github.io/OpenJudge/built_in_graders/multi_turn/
#responserepetitiongrader
"""
from __future__ import annotations
from data_juicer.ops.base_op import OPERATORS, TAGGING_OPS
from data_juicer.ops.mapper.dialog_quality_llm_base import _DialogTurnQualityMapper
from data_juicer.utils.constant import MetaKeys
OP_NAME = "dialog_non_repetition_mapper"
[docs]
@TAGGING_OPS.register_module(OP_NAME)
@OPERATORS.register_module(OP_NAME)
class DialogNonRepetitionMapper(_DialogTurnQualityMapper):
"""New information vs prior assistant turns in the same prompt window."""
OP_NAME = OP_NAME
META_KEY = MetaKeys.dialog_non_repetition
def _system_prompt(self) -> str:
return (
"Compare with assistant content in **Earlier turns**: does the "
"**Assistant reply to score** mostly repeat prior messages without "
"new information?\n"
"1 = near-duplicate of earlier assistant text; 5 = substantive "
"new facts, conclusions, or progress.\n"
"Ignore small amounts of polite boilerplate repetition."
)