Source code for data_juicer_agents.commands.retrieve_cmd
# -*- coding: utf-8 -*-
"""Implementation for `djx retrieve`."""
from __future__ import annotations
import json
from data_juicer_agents.tools.retrieve import retrieve_operator_candidates
def _print_human_readable(payload: dict) -> None:
print("Retrieve Summary:")
print(f"Intent: {payload.get('intent', '')}")
print(f"Mode: {payload.get('mode', '')}")
print(f"Source: {payload.get('retrieval_source', '')}")
print(f"Candidates: {payload.get('candidate_count', 0)}")
candidates = payload.get("candidates", [])
if not candidates:
print("No candidate operators found.")
else:
print("Top operator candidates:")
for item in candidates:
rank = item.get("rank")
name = item.get("operator_name")
op_type = item.get("operator_type", "unknown")
score = item.get("relevance_score", 0)
desc = str(item.get("description", "")).strip()
print(f"{rank}. {name} ({op_type}) score={score}")
if desc:
print(f" {desc}")
for note in payload.get("notes", []):
print(f"Note: {note}")
[docs]
def run_retrieve(args) -> int:
top_k = int(args.top_k)
if top_k <= 0:
print("top-k must be > 0")
return 2
try:
payload = retrieve_operator_candidates(
intent=args.intent,
top_k=top_k,
mode=args.mode,
dataset_path=args.dataset,
)
except Exception as exc:
print(f"Retrieve failed: {exc}")
return 2
if args.json:
print(json.dumps(payload, ensure_ascii=False, indent=2))
else:
_print_human_readable(payload)
return 0