data_juicer.ops.load 源代码
from .base_op import OPERATORS
[文档]
def load_ops(process_list, op_env_manager=None):
"""
Load op list according to the process list from config file.
:param process_list: A process list. Each item is an op name and its
arguments.
:param op_env_manager: The OPEnvManager to try to merge environment specs of different OPs that have common
dependencies. Only available when min_common_dep_num_to_combine >= 0.
:return: The op instance list.
"""
ops = []
new_process_list = []
for process in process_list:
op_name, args = list(process.items())[0]
ops.append(OPERATORS.modules[op_name](**args))
new_process_list.append(process)
# store the OP configs into each OP
for op_cfg, op in zip(new_process_list, ops):
op._op_cfg = op_cfg
# update op runtime environment if OPEnvManager is enabled
if op_env_manager:
# first round: record and merge possible common env specs
for op in ops:
op_name = op._name
op_env_spec = op.get_env_spec()
op_env_manager.record_op_env_spec(op_name, op_env_spec)
# second round: update op runtime environment
for op in ops:
op_name = op._name
op_env_spec = op_env_manager.get_op_env_spec(op_name)
op._requirements = op_env_spec.pip_pkgs
# if the runtime_env is not set for this OP, update the runtime_env as well
if op.runtime_env is None:
op.runtime_env = op_env_spec.to_dict()
return ops