Source code for data_juicer_agents.tools.plan.build_system_spec.input
# -*- coding: utf-8 -*-
"""Input models for build_system_spec."""
from __future__ import annotations
from typing import List, Optional
from pydantic import BaseModel, ConfigDict, Field
[docs]
class BuildSystemSpecInput(BaseModel):
"""Input for building system spec.
Core parameters are exposed directly for common use cases.
All other system parameters can be passed as additional kwargs.
Use list_system_config tool to discover all available options.
"""
model_config = ConfigDict(extra='allow') # Allow any additional fields
# Core parameters (most commonly used)
np: Optional[int] = Field(
None,
description="Number of processes to use for dataset processing. Default is 4."
)
executor_type: Optional[str] = Field(
None,
description='Executor type: "default" (single machine), "ray" (distributed), or "ray_partitioned". Default is "default".'
)
custom_operator_paths: List[str] = Field(
default_factory=list,
description="Paths to custom operator modules or packages."
)
# All other system parameters (open_tracer, use_cache, checkpoint, etc.)
# can be passed directly as kwargs - they will be validated by DJ bridge