data_juicer_agents.core.tool#

Core runtime-agnostic tool contracts and registry.

class data_juicer_agents.core.tool.DatasetSource(*, path: str = '', config: Dict[str, Any] | None = None, generated: Dict[str, Any] | None = None)[源代码]#

基类：BaseModel

Unified dataset source envelope.

Exactly one of path, config, or generated must be provided. Providing zero or more than one raises a validation error.

Examples:

# Simple local file (shortcut)
DatasetSource(path="/data/train.jsonl")

# Structured load config (remote, multi-source, max_sample_num …)
DatasetSource(config={
    "configs": [
        {"type": "local", "path": "/data/a.jsonl", "weight": 0.7},
        {"type": "local", "path": "/data/b.jsonl", "weight": 0.3},
    ],
    "max_sample_num": 50000,
})

# Dynamic generation via Data-Juicer FORMATTERS
DatasetSource(generated={"type": "text_formatter", ...})

path: str#

config: Dict[str, Any] | None#

generated: Dict[str, Any] | None#

to_legacy_args() → Dict[str, Any][源代码]#

Convert to the legacy (dataset_path, dataset, generated_dataset_config) dict.

Returns a dict with exactly the three legacy keys so callers can unpack with **source.to_legacy_args().

classmethod from_legacy(dataset_path: str = '', dataset: Dict[str, Any] | None = None, generated_dataset_config: Dict[str, Any] | None = None) → DatasetSource[源代码]#

Create a DatasetSource from the legacy triple.

This is the primary migration bridge: CLI argument parsers and existing callers can keep their three-parameter interface and convert to the unified envelope at the boundary.

model_config = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class data_juicer_agents.core.tool.ToolArtifact(path: str, description: str = '', kind: str = 'file', label: str = '')[源代码]#

基类：object

Named artifact produced by a tool.

path: str#

description: str = ''#

kind: str = 'file'#

label: str = ''#

to_dict() → Dict[str, Any][源代码]#

__init__(path: str, description: str = '', kind: str = 'file', label: str = '') → None#

class data_juicer_agents.core.tool.ToolContext(working_dir: str = './.djx', env: Dict[str, str]=<factory>, artifacts_dir: str | None = None, runtime_values: Dict[str, ~typing.Any]=<factory>)[源代码]#

基类：object

Execution context shared by all tool runtimes.

working_dir: str = './.djx'#

env: Dict[str, str]#

artifacts_dir: str | None = None#

runtime_values: Dict[str, Any]#

resolve_artifacts_dir() → Path[源代码]#

__init__(working_dir: str = './.djx', env: Dict[str, str]=<factory>, artifacts_dir: str | None = None, runtime_values: Dict[str, ~typing.Any]=<factory>) → None#

class data_juicer_agents.core.tool.ToolRegistry(_tools: Dict[str, ~data_juicer_agents.core.tool.contracts.ToolSpec]=<factory>)[源代码]#

基类：object

Container of tool definitions.

register(spec: ToolSpec) → None[源代码]#

get(name: str) → ToolSpec[源代码]#

list(*, tags: Sequence[str] | None = None) → List[ToolSpec][源代码]#

list_tools(*, tags: Sequence[str] | None = None) → List[ToolSpec][源代码]#

names() → List[str][源代码]#

__init__(_tools: Dict[str, ~data_juicer_agents.core.tool.contracts.ToolSpec]=<factory>) → None#

class data_juicer_agents.core.tool.ToolResult(ok: bool, summary: str = '', data: Dict[str, ~typing.Any]=<factory>, artifacts: List[ToolArtifact] = <factory>, error_type: str = '', error_message: str = '', next_actions: List[str] = <factory>)[源代码]#

基类：object

Normalized tool execution result.

ok: bool#

summary: str = ''#

data: Dict[str, Any]#

artifacts: List[ToolArtifact]#

error_type: str = ''#

error_message: str = ''#

next_actions: List[str]#

classmethod success(*, summary: str = '', data: Dict[str, Any] | None = None, artifacts: Iterable[ToolArtifact] | None = None) → ToolResult[源代码]#

classmethod failure(*, summary: str, error_type: str, error_message: str = '', data: Dict[str, Any] | None = None, next_actions: Iterable[str] | None = None) → ToolResult[源代码]#

to_payload(*, action: str | None = None) → Dict[str, Any][源代码]#

__init__(ok: bool, summary: str = '', data: Dict[str, ~typing.Any]=<factory>, artifacts: List[ToolArtifact] = <factory>, error_type: str = '', error_message: str = '', next_actions: List[str] = <factory>) → None#

class data_juicer_agents.core.tool.ToolSpec(name: str, description: str, input_model: Type[BaseModel], output_model: Type[BaseModel] | None, executor: Callable[[ToolContext, BaseModel], ToolResult], tags: Tuple[str, ...] = (), effects: Literal['read', 'write', 'execute', 'external'] = 'read', confirmation: Literal['none', 'recommended', 'required'] = 'none')[源代码]#

基类：object

Definition of one atomic tool.

name: str#

description: str#

input_model: Type[BaseModel]#

output_model: Type[BaseModel] | None#

executor: Callable[[ToolContext, BaseModel], ToolResult]#

tags: Tuple[str, ...] = ()#

effects: Literal['read', 'write', 'execute', 'external'] = 'read'#

confirmation: Literal['none', 'recommended', 'required'] = 'none'#

execute(ctx: ToolContext, raw_input: BaseModel | Dict[str, Any]) → ToolResult[源代码]#

__init__(name: str, description: str, input_model: Type[BaseModel], output_model: Type[BaseModel] | None, executor: Callable[[ToolContext, BaseModel], ToolResult], tags: Tuple[str, ...] = (), effects: Literal['read', 'write', 'execute', 'external'] = 'read', confirmation: Literal['none', 'recommended', 'required'] = 'none') → None#

data_juicer_agents.core.tool.build_default_tool_registry(*, profile: str | None = None, groups: Sequence[str] | None = None) → ToolRegistry[源代码]#

data_juicer_agents.core.tool.get_active_tool_profile() → str[源代码]#

data_juicer_agents.core.tool.get_tool_spec(name: str, *, profile: str | None = None) → ToolSpec[源代码]#

data_juicer_agents.core.tool.groups_for_tool_profile(profile: str | None) → Tuple[str, ...] | None[源代码]#

data_juicer_agents.core.tool.list_tool_specs(*, tags: Sequence[str] | None = None, profile: str | None = None) → List[ToolSpec][源代码]#

data_juicer_agents.core.tool.normalize_tool_profile(profile: str | None) → str[源代码]#

data_juicer_agents.core.tool.tool_is_excluded_from_profile(tool_name: str, profile: str | None) → bool[源代码]#

data_juicer_agents.core.tool#

本页