data_juicer.ops.filter.image_face_ratio_filter 源代码

import os

import numpy as np
from loguru import logger

from data_juicer.utils.constant import Fields, StatsKeys
from data_juicer.utils.lazy_loader import LazyLoader
from data_juicer.utils.mm_utils import detect_faces, load_data_with_context, load_image
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import OPERATORS, UNFORKABLE, Filter
from ..op_fusion import LOADED_IMAGES

cv2 = LazyLoader("cv2", "opencv-python")

OP_NAME = "image_face_ratio_filter"


[文档] @UNFORKABLE.register_module(OP_NAME) @OPERATORS.register_module(OP_NAME) @LOADED_IMAGES.register_module(OP_NAME) class ImageFaceRatioFilter(Filter): """Filter to keep samples with face area ratios within a specific range. This operator filters samples based on the ratio of the largest face area to the total image area. It uses an OpenCV classifier for face detection. The key metric, 'face_ratios', is computed for each image in the sample. Samples are kept if the face area ratios fall within the specified min and max ratio range. The filtering strategy can be set to 'any' (keep if any image meets the condition) or 'all' (keep only if all images meet the condition). If no images are present in the sample, the sample is retained.""" _default_kwargs = { "scaleFactor": 1.1, "minNeighbors": 3, "minSize": None, "maxSize": None, }
[文档] def __init__( self, cv_classifier: str = "", min_ratio: float = 0.0, max_ratio: float = 0.4, any_or_all: str = "any", *args, **kwargs, ): """ Initialization method. :param cv_classifier: OpenCV classifier path for face detection. By default, we will use 'haarcascade_frontalface_alt.xml'. :param min_ratio: Min ratio for the largest face area in an image. :param max_ratio: Max ratio for the largest face area in an image. :param any_or_all: Keep this sample with 'any' or 'all' strategy of all images. 'any': keep this sample if any images meet the condition. 'all': keep this sample only if all images meet the condition. :param args: Extra positional arguments. :param kwargs: Extra keyword arguments. """ super().__init__(*args, **kwargs) if cv_classifier == "": cv_classifier = os.path.join(cv2.data.haarcascades, "haarcascade_frontalface_alt.xml") self.min_ratio = min_ratio self.max_ratio = max_ratio self.extra_kwargs = self._default_kwargs for key in kwargs: if key in self.extra_kwargs: self.extra_kwargs[key] = kwargs[key] if any_or_all not in ["any", "all"]: raise ValueError(f"Keep strategy [{any_or_all}] is not supported. " f'Can only be one of ["any", "all"].') self.any = any_or_all == "any" self.model_key = prepare_model(model_type="opencv_classifier", model_path=cv_classifier)
[文档] def compute_stats_single(self, sample, context=False): # check if it's computed already if StatsKeys.face_ratios in sample[Fields.stats]: return sample # there is no image in this sample if self.image_key not in sample or not sample[self.image_key]: sample[Fields.stats][StatsKeys.face_ratios] = np.array([], dtype=np.float64) return sample # load images loaded_image_keys = sample[self.image_key] sample, images = load_data_with_context( sample, context, loaded_image_keys, load_image, mm_bytes_key=self.image_bytes_key ) model = get_model(self.model_key) # detect faces face_detections = {} for key, image in images.items(): face_detections[key] = detect_faces(image, model, **self.extra_kwargs) logger.debug(f"detections: {face_detections}") # compute face area ratios for each image considering the largest face face_area_ratios = {} for key, dets in face_detections.items(): image_area = images[key].width * images[key].height face_area_ratios[key] = max([w * h for _, _, w, h in dets], default=0.0) / image_area logger.debug(f"ratios: {face_area_ratios}") sample[Fields.stats][StatsKeys.face_ratios] = [face_area_ratios[key] for key in loaded_image_keys] return sample
[文档] def process_single(self, sample): face_ratios = sample[Fields.stats][StatsKeys.face_ratios] if len(face_ratios) <= 0: return True keep_bools = np.array( [self.get_keep_boolean(face_ratio, self.min_ratio, self.max_ratio) for face_ratio in face_ratios] ) # different strategies if self.any: return keep_bools.any() else: return keep_bools.all()