data_juicer_agents.cli 源代码

# -*- coding: utf-8 -*-
"""CLI entrypoint for Data-Juicer-Agents v0.1."""

from __future__ import annotations

import argparse
import sys

from data_juicer_agents.commands.apply_cmd import run_apply
from data_juicer_agents.commands.dev_cmd import run_dev
from data_juicer_agents.commands.plan_cmd import run_plan
from data_juicer_agents.commands.retrieve_cmd import run_retrieve


def _add_output_level_args(
    parser: argparse.ArgumentParser,
    *,
    set_default: bool,
) -> None:
    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        "--quiet",
        dest="output_level",
        action="store_const",
        const="quiet",
        default=argparse.SUPPRESS,
        help="Summary output (default)",
    )
    group.add_argument(
        "--verbose",
        dest="output_level",
        action="store_const",
        const="verbose",
        default=argparse.SUPPRESS,
        help="Expand tool execution output",
    )
    group.add_argument(
        "--debug",
        dest="output_level",
        action="store_const",
        const="debug",
        default=argparse.SUPPRESS,
        help="Include raw call details for debugging",
    )
    if set_default:
        parser.set_defaults(output_level="quiet")


[文档] def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( prog="djx", description="Agentic CLI for Data-Juicer workflows (v0.1)", ) _add_output_level_args(parser, set_default=True) output_parent = argparse.ArgumentParser(add_help=False) _add_output_level_args(output_parent, set_default=False) sub = parser.add_subparsers(dest="command", required=True) plan = sub.add_parser( "plan", help="Generate a structured execution plan", parents=[output_parent], ) plan.add_argument("intent", type=str, help="Natural language task intent") plan.add_argument("--dataset", default=None, help="Input dataset path") plan.add_argument("--export", default=None, help="Output jsonl path") plan.add_argument("--output", default=None, help="Output plan yaml path") plan.add_argument( "--custom-operator-paths", nargs="+", default=None, help="Optional custom operator directories/files for validation/execution", ) plan.set_defaults(handler=run_plan) apply_cmd = sub.add_parser( "apply", help="Apply a generated plan", parents=[output_parent], ) apply_cmd.add_argument("--plan", required=True, help="Plan yaml path") apply_cmd.add_argument("--yes", action="store_true", help="Skip confirmation") apply_cmd.add_argument("--dry-run", action="store_true", help="Do not execute dj-process") apply_cmd.add_argument( "--timeout", type=int, default=300, help="Execution timeout in seconds", ) apply_cmd.set_defaults(handler=run_apply) retrieve = sub.add_parser( "retrieve", help="Retrieve relevant Data-Juicer operators from natural language intent", parents=[output_parent], ) retrieve.add_argument("intent", type=str, help="Natural language operator need") retrieve.add_argument( "--top-k", type=int, default=10, help="Maximum candidate operators to return", ) retrieve.add_argument( "--mode", choices=["auto", "llm", "vector"], default="auto", help="Retrieval backend mode", ) retrieve.add_argument( "--dataset", default=None, help="Optional dataset path for schema/modality probing", ) retrieve.add_argument( "--json", action="store_true", help="Print machine-readable JSON payload", ) retrieve.set_defaults(handler=run_retrieve) dev = sub.add_parser( "dev", help="Generate a non-invasive custom Data-Juicer operator scaffold", parents=[output_parent], ) dev.add_argument("intent", type=str, help="Natural language operator requirement") dev.add_argument( "--operator-name", required=True, help="Target operator name (snake_case; suffix inferred if omitted)", ) dev.add_argument( "--output-dir", required=True, help="Directory to write generated operator scaffold files", ) dev.add_argument( "--type", choices=["mapper", "filter"], default=None, help="Optional operator type (mapper/filter)", ) dev.add_argument( "--from-retrieve", default=None, help="Optional path to djx retrieve JSON output for design context", ) dev.add_argument( "--smoke-check", action="store_true", help="Run an optional local dj-process smoke check using custom_operator_paths", ) dev.set_defaults(handler=run_dev) return parser
[文档] def main(argv=None) -> int: parser = build_parser() args = parser.parse_args(argv) return int(args.handler(args))
if __name__ == "__main__": sys.exit(main())