Source code for data_juicer.core.tracer
from data_juicer.utils.ray_utils import is_ray_mode
from .tracer import Tracer
[docs]
def check_tracer_collect_complete(tracer_instance, op_name):
if is_ray_mode():
import ray
return ray.get(tracer_instance.is_collection_complete.remote(op_name))
else:
return tracer_instance.is_collection_complete(op_name)
[docs]
def should_trace_op(tracer_instance, op_name):
if is_ray_mode():
import ray
return ray.get(tracer_instance.should_trace_op.remote(op_name))
else:
return tracer_instance.should_trace_op(op_name)
[docs]
def collect_for_mapper(tracer_instance, op_name, original_sample_dict, processed_sample_dict, text_key):
if is_ray_mode():
import ray
ray.get(
tracer_instance.collect_mapper_sample.remote(op_name, original_sample_dict, processed_sample_dict, text_key)
)
else:
tracer_instance.collect_mapper_sample(op_name, original_sample_dict, processed_sample_dict, text_key)
[docs]
def collect_for_filter(tracer_instance, op_name, sample, should_keep):
if is_ray_mode():
import ray
ray.get(tracer_instance.collect_filter_sample.remote(op_name, sample, should_keep))
else:
tracer_instance.collect_filter_sample(op_name, sample, should_keep)