data_juicer_agents.tools.dj_dev_helpers 源代码

# -*- coding: utf-8 -*-
"""
DataJuicer Development Tools

Tools for developing DataJuicer operators, including access to basic documentation
and example code for different operator types.
"""

import os
from agentscope.message import TextBlock
from agentscope.tool import ToolResponse

# DataJuicer home path - should be configured based on your environment
DATA_JUICER_PATH = os.getenv("DATA_JUICER_PATH", None)

BASIC_LIST_RELATIVE = [
    "data_juicer/ops/base_op.py",
    "docs/DeveloperGuide.md",
    "docs/DeveloperGuide_ZH.md",
]


[文档] def get_basic_files() -> ToolResponse: """Get basic DataJuicer development files content. Returns the content of essential files needed for DJ operator development: - base_op.py: Base operator class - DeveloperGuide.md: English developer guide - DeveloperGuide_ZH.md: Chinese developer guide Returns: ToolResponse: Combined content of all basic development files """ global DATA_JUICER_PATH, BASIC_LIST_RELATIVE if DATA_JUICER_PATH is None: return ToolResponse( content=[ TextBlock( type="text", text="DATA_JUICER_PATH is not configured. Please ask the user to provide the DATA_JUICER_PATH", ), ], ) try: combined_content = f"# DataJuicer Path: {DATA_JUICER_PATH}\n" combined_content += "# DataJuicer Operator Development Basic Files\n\n" for relative_path in BASIC_LIST_RELATIVE: file_path = os.path.join(DATA_JUICER_PATH, relative_path) if os.path.exists(file_path): try: with open(file_path, "r", encoding="utf-8") as f: content = f.read() filename = os.path.basename(file_path) combined_content += f"## {filename}\n\n" combined_content += f"```{'python' if filename.endswith('.py') else 'markdown'}\n" combined_content += content combined_content += "\n```\n\n" except Exception as e: combined_content += ( f"## {os.path.basename(file_path)} (Read Failed)\n" ) combined_content += f"Error: {str(e)}\n\n" return ToolResponse( content=[TextBlock(type="text", text=combined_content)], ) except Exception as e: return ToolResponse( content=[ TextBlock( type="text", text=f"Error occurred while getting basic files: {str(e)}", ), ], )
[文档] async def get_operator_example( operator_names: list, ) -> ToolResponse: """Get example operators based on a list of operator names. Args: operator_names (list): List of operator names to get examples for Returns: ToolResponse: Example operator code and test files for the specified operators """ global DATA_JUICER_PATH if DATA_JUICER_PATH is None: return ToolResponse( content=[ TextBlock( type="text", text="DATA_JUICER_PATH is not configured. Please ask the user to provide the DATA_JUICER_PATH", ), ], ) try: if not operator_names: return ToolResponse( content=[ TextBlock( type="text", text="No operator names provided. Please provide a list of operator names to get examples.", ), ], ) combined_content = f"# Operator Examples\n\n" combined_content += f"Requested operators: {', '.join(operator_names)}\n\n" # Process each operator name for i, tool_name in enumerate(operator_names): combined_content += f"## {i+1}. {tool_name}\n\n" op_type = tool_name.split("_")[-1] operator_path = f"data_juicer/ops/{op_type}/{tool_name}.py" # Try to find operator source file full_path = os.path.join(DATA_JUICER_PATH, operator_path) if os.path.exists(full_path): with open(full_path, "r", encoding="utf-8") as f: operator_code = f.read() combined_content += f"### Source Code\n" combined_content += "```python\n" combined_content += operator_code combined_content += "\n```\n\n" else: combined_content += f"**Note:** Source code file not found for `{tool_name}`.\n\n" test_path = f"tests/ops/{op_type}/test_{tool_name}.py" full_test_path = os.path.join(DATA_JUICER_PATH, test_path) if os.path.exists(full_test_path): with open(full_test_path, "r", encoding="utf-8") as f: test_code = f.read() combined_content += f"### Test Code\n" combined_content += f"**File Path:** `{test_path}`\n\n" combined_content += "```python\n" combined_content += test_code combined_content += "\n```\n\n" else: combined_content += ( f"**Note:** Test file not found for `{tool_name}`.\n\n" ) combined_content += "---\n\n" return ToolResponse( content=[TextBlock(type="text", text=combined_content)], ) except Exception as e: return ToolResponse( content=[ TextBlock( type="text", text=f"Error occurred while getting operator examples: {str(e)}\n" f"Please check the operator names and try again.", ), ], )
[文档] def configure_data_juicer_path(data_juicer_path: str) -> ToolResponse: """Configure DataJuicer path. If the user provides the data_juicer_path, please use this method to configure it. Args: data_juicer_path (str): Path to DataJuicer installation Returns: ToolResponse: Configuration result """ global DATA_JUICER_PATH data_juicer_path = os.path.expanduser(data_juicer_path) try: if not os.path.exists(data_juicer_path): return ToolResponse( content=[ TextBlock( type="text", text=f"Specified DataJuicer path does not exist: {data_juicer_path}", ), ], ) # Update global DATA_JUICER_PATH DATA_JUICER_PATH = data_juicer_path return ToolResponse( content=[ TextBlock( type="text", text=f"DataJuicer path has been updated to: {DATA_JUICER_PATH}", ), ], ) except Exception as e: return ToolResponse( content=[ TextBlock( type="text", text=f"Error occurred while configuring DataJuicer path: {str(e)}", ), ], )