data_juicer_agents#

Data-Juicer-Agents package (v0.2).

class data_juicer_agents.PlanValidator[source]#

Bases: object

Validate plan schema and local filesystem preconditions.

static validate(plan: PlanModel) List[str][source]#
class data_juicer_agents.ApplyUseCase[source]#

Bases: object

Execute validated plans and return execution summaries.

execute(plan_payload: Dict[str, Any], runtime_dir: Path, dry_run: bool = False, timeout_seconds: int = 300, command_override: str | Iterable[str] | None = None, cancel_check: Callable[[], bool] | None = None) Tuple[ApplyResult, int, str, str][source]#
class data_juicer_agents.PlanModel(plan_id: str, user_intent: str, dataset_path: str, export_path: str, dataset: Dict[str, ~typing.Any] | None=None, generated_dataset_config: Dict[str, ~typing.Any] | None=None, modality: str = 'unknown', text_keys: List[str] = <factory>, image_key: str | None = None, audio_key: str | None = None, video_key: str | None = None, image_bytes_key: str | None = None, operators: List[OperatorStep] = <factory>, risk_notes: List[str] = <factory>, estimation: Dict[str, ~typing.Any]=<factory>, executor_type: str = 'default', np: int = 1, open_tracer: bool = False, open_monitor: bool | None = None, use_cache: bool | None = None, skip_op_error: bool = False, custom_operator_paths: List[str] = <factory>, warnings: List[str] = <factory>, approval_required: bool = True, created_at: str = <factory>)[source]#

Bases: object

Final flattened execution plan representation.

plan_id: str#
user_intent: str#
dataset_path: str#
export_path: str#
dataset: Dict[str, Any] | None = None#
generated_dataset_config: Dict[str, Any] | None = None#
modality: str = 'unknown'#
text_keys: List[str]#
image_key: str | None = None#
audio_key: str | None = None#
video_key: str | None = None#
image_bytes_key: str | None = None#
operators: List[OperatorStep]#
risk_notes: List[str]#
estimation: Dict[str, Any]#
executor_type: str = 'default'#
np: int = 1#
open_tracer: bool = False#
open_monitor: bool | None = None#
use_cache: bool | None = None#
skip_op_error: bool = False#
custom_operator_paths: List[str]#
warnings: List[str]#
approval_required: bool = True#
created_at: str#
static new_id() str[source]#
classmethod from_dict(data: Dict[str, Any]) PlanModel[source]#
to_dict() Dict[str, Any][source]#
__init__(plan_id: str, user_intent: str, dataset_path: str, export_path: str, dataset: Dict[str, ~typing.Any] | None=None, generated_dataset_config: Dict[str, ~typing.Any] | None=None, modality: str = 'unknown', text_keys: List[str] = <factory>, image_key: str | None = None, audio_key: str | None = None, video_key: str | None = None, image_bytes_key: str | None = None, operators: List[OperatorStep] = <factory>, risk_notes: List[str] = <factory>, estimation: Dict[str, ~typing.Any]=<factory>, executor_type: str = 'default', np: int = 1, open_tracer: bool = False, open_monitor: bool | None = None, use_cache: bool | None = None, skip_op_error: bool = False, custom_operator_paths: List[str] = <factory>, warnings: List[str] = <factory>, approval_required: bool = True, created_at: str = <factory>) None#
data_juicer_agents.validate_plan_schema(plan: PlanModel) List[str][source]#