data_juicer_agents#

Data-Juicer-Agents package.

class data_juicer_agents.PlanValidator[source]#

Bases: object

Validate plan schema and local filesystem preconditions.

static validate(plan: PlanModel) List[str][source]#
class data_juicer_agents.ApplyUseCase[source]#

Bases: object

Execute validated plans and return execution summaries.

execute(plan_payload: Dict[str, Any], runtime_dir: Path, dry_run: bool = False, timeout_seconds: int = 300, command_override: str | Iterable[str] | None = None, cancel_check: Callable[[], bool] | None = None) Tuple[ApplyResult, int, str, str][source]#
class data_juicer_agents.PlanModel(plan_id: str, user_intent: str, modality: str = 'unknown', operator_names: List[str] = <factory>, recipe: Dict[str, ~typing.Any]=<factory>, risk_notes: List[str] = <factory>, estimation: Dict[str, ~typing.Any]=<factory>, warnings: List[str] = <factory>, approval_required: bool = True, created_at: str = <factory>)[source]#

Bases: object

Execution plan: plan metadata + embedded DJ-native recipe.

The recipe field is a plain dict that maps 1-to-1 with a Data-Juicer YAML config file. All dataset, system, and process settings live inside recipe; PlanModel itself only owns plan-level metadata.

Downstream code should access recipe fields via plan.recipe[key] or plan.recipe.get(key) to keep the boundary between plan metadata and DJ config clear.

plan_id: str#
user_intent: str#
modality: str = 'unknown'#
operator_names: List[str]#
recipe: Dict[str, Any]#
risk_notes: List[str]#
estimation: Dict[str, Any]#
warnings: List[str]#
approval_required: bool = True#
created_at: str#
static new_id() str[source]#
classmethod from_dict(data: Dict[str, Any]) PlanModel[source]#
to_dict() Dict[str, Any][source]#
__init__(plan_id: str, user_intent: str, modality: str = 'unknown', operator_names: List[str] = <factory>, recipe: Dict[str, ~typing.Any]=<factory>, risk_notes: List[str] = <factory>, estimation: Dict[str, ~typing.Any]=<factory>, warnings: List[str] = <factory>, approval_required: bool = True, created_at: str = <factory>) None#
data_juicer_agents.validate_plan_schema(plan: PlanModel) List[str][source]#