data_juicer.utils.video_utils module#

基类：object

Metadata for video content.

This class stores essential video properties such as resolution, frame rate, duration.

height: int | None = None#

width: int | None = None#

fps: float | None = None#

num_frames: int | None = None#

duration: float | None = None#

__init__(height: int | None = None, width: int | None = None, fps: float | None = None, num_frames: int | None = None, duration: float | None = None) → None#

class data_juicer.utils.video_utils.Frames(frames: List[ndarray[tuple[Any, ...], dtype[uint8]]], indices: List[int] | None = None, pts_time: List[float] | None = None)[源代码]#

基类：object

frames: List[ndarray[tuple[Any, ...], dtype[uint8]]]#

indices: List[int] | None#

pts_time: List[float] | None#

__init__(frames: List[ndarray[tuple[Any, ...], dtype[uint8]]], indices: List[int] | None = None, pts_time: List[float] | None = None) → None#: Method generated by attrs for class Frames.

class data_juicer.utils.video_utils.Clip(source_video: str, span: tuple[float, float], id: str | None = None, path: str | None = None, encoded_data: bytes | None = None, frames: List[ndarray[tuple[Any, ...], dtype[uint8]]] | None = None)[源代码]#

基类：object

Container for video clip data including metadata, frames, and processing results.

This class stores information about a video segment, including its source, span, frames and so on.

source_video: str#

span: tuple[float, float]#

id: str | None#

path: str | None#

encoded_data: bytes | None#

frames: List[ndarray[tuple[Any, ...], dtype[uint8]]] | None#

__init__(source_video: str, span: tuple[float, float], id: str | None = None, path: str | None = None, encoded_data: bytes | None = None, frames: List[ndarray[tuple[Any, ...], dtype[uint8]]] | None = None) → None#: Method generated by attrs for class Clip.

class data_juicer.utils.video_utils.VideoReader(video_source: str | Path | bytes | IO[bytes])[源代码]#

基类：ABC

Abstract class for video processing.

This class provides an interface for video processing tasks such as extracting frames, key frames, and clipping.

__init__(video_source: str | Path | bytes | IO[bytes])[源代码]#

Initialize video reader.

参数:: video_source -- Path, URL, bytes, or file-like object.

property metadata#

abstractmethod get_metadata() → VideoMetadata[源代码]#: Get video metadata.

abstractmethod extract_frames(start_time: float = 0, end_time: float | None = None) → Iterator[ndarray][源代码]#

Yield frames between [start_time, end_time) as numpy arrays.

参数:

start_time -- Start time in seconds (inclusive)
end_time -- End time in seconds (exclusive). If None, extract to end of video.

abstractmethod extract_keyframes(start_time: float = 0, end_time: float | None = None) → Frames[源代码]#

Extract keyframes and return them in a Frames object.

参数:

start_time -- Start time in seconds (inclusive)
end_time -- End time in seconds (exclusive). If None, extract to end of video.

abstractmethod extract_clip(start_time: float = 0, end_time: float | None = None, output_path: str = None, to_numpy: bool = True) → Clip | None[源代码]#

Extract a subclip.

参数:

start_time -- Start time in seconds
end_time -- End time in seconds. If None, extract to end of video.
output_path -- The path to save the output video clip. If provided, the clip is saved to a file.
to_numpy -- Whether to return frames as a list of numpy arrays.

返回:

A Clip object on success, or None on failure.

check_time_span(start_time: float | None = 0.0, end_time: float | None = None) → None[源代码]#

abstractmethod close() → None[源代码]#: Release any held resources.

abstractmethod classmethod is_available() → bool[源代码]#: Check if the backend is available.

class data_juicer.utils.video_utils.AVReader(video_source: str | Path | bytes | IO[bytes], video_stream_index: int = 0, frame_format: str = 'rgb24')[源代码]#

基类：VideoReader

Video reader using the AV library.

__init__(video_source: str | Path | bytes | IO[bytes], video_stream_index: int = 0, frame_format: str = 'rgb24')[源代码]#

Initialize AVReader.

参数:

video_source -- Path, URL, bytes, or file-like object.
video_stream_index -- Video stream index to decode, default set to 0.
frame_format -- Frame format to decode, default set to "rgb24".

get_metadata() → VideoMetadata[源代码]#: Get video metadata.

extract_frames(start_time: float | None = 0.0, end_time: float | None = None) → Iterator[ndarray][源代码]#

Get the video's frames from the container within a specified time range.

参数:

start_time -- Start time in seconds (default: 0.0).
end_time -- End time in seconds (exclusive). If None, decode until end.

返回:

Iterator of numpy objects within the specified time range.

extract_keyframes(start_time: float = 0, end_time: float | None = None, return_meta_only: bool = False)[源代码]#

Extract key frames.

参数:

start_time -- Start time in seconds (default: 0.0).
end_time -- End time in seconds (exclusive). If None, decode until end.
return_meta_only -- If True, only return timestamps and indices of keyframes.

返回:

Return a Frames object.

extract_clip(start_time, end_time, output_path: str = None, to_numpy: bool = True)[源代码]#

Extract a clip from the video based on the start and end time.

参数:

start_time -- the start time in second.
end_time -- the end time in second. If it's None, this function will cut the video from the start_seconds to the end of the video.
output_path -- the path to output video.

返回:

Clip object. If output_path is not None, it will save the clip to output_path. If to_numpy is True, it will return clip data as numpy array and save to Clip.frames. If to_numpy is False, it will return clip data as bytes and save to Clip.encoded_data.

classmethod is_available()[源代码]#: Check if the backend is available.

close()[源代码]#: Release any held resources.

class data_juicer.utils.video_utils.FFmpegReader(video_source: str | Path | bytes | IO[bytes], video_stream_index: int = 0, frame_format: str = 'rgb24')[源代码]#

基类：VideoReader

Video reader using FFmpeg.

__init__(video_source: str | Path | bytes | IO[bytes], video_stream_index: int = 0, frame_format: str = 'rgb24')[源代码]#

Initialize FFmpegReader.

参数:

video_source -- Path, URL, bytes, or file-like object.
video_stream_index -- Video stream index to decode, default set to 0.
frame_format -- Frame format, default set to "rgb24".

get_metadata() → VideoMetadata[源代码]#: Get video metadata.

extract_frames(start_time: float | None = 0.0, end_time: float | None = None) → Iterator[ndarray][源代码]#

Get the video's frames within a specified time range.

参数:

start_time -- Start time in seconds (default: 0.0).
end_time -- End time in seconds (exclusive). If None, decode until end.
duration -- Duration from start_time. Mutually exclusive with end_time.

返回:

Iterator of VideoFrame objects within the specified time range.

extract_keyframes(start_time: float = 0, end_time: float | None = None, return_meta_only: bool = False)[源代码]#

Extract only true keyframes (I-frames) from video.

参数:

start_time -- Start time in seconds (default: 0.0).
end_time -- End time in seconds (exclusive). If None, decode until end.
return_meta_only -- If True, only return timestamps and indices of keyframes.

返回:

Return a Frames object.

extract_clip(start_time, end_time, output_path: str = None, to_numpy=True, **kwargs)[源代码]#

Extract a clip from the video based on the start and end time. :param output_path: the path to output video. :param start_time: the start time in second. :param end_time: the end time in second. If it's None, this function

will cut the video from the start_seconds to the end of the video.

参数:: to_numpy -- whether to return clip data as numpy array and save to Clip.frames.
返回:: Clip object. If output_path is not None, it will save the clip to output_path. If to_numpy is True, it will return clip data as numpy array and save to Clip.frames. If to_numpy is False, it will return clip data as bytes and save to Clip.encoded_data.

close()[源代码]#: Clean up resources, including temporary files.

classmethod is_available()[源代码]#: Check if the backend is available.

class data_juicer.utils.video_utils.DecordReader(video_source: str | Path | bytes | IO[bytes])[源代码]#

基类：VideoReader

Video reader using Decord

__init__(video_source: str | Path | bytes | IO[bytes])[源代码]#

Initialize the video reader.

参数:: video_source -- Path, URL, bytes, or file-like object.

get_metadata() → VideoMetadata[源代码]#: Get video metadata.

extract_frames(start_time: float | None = 0.0, end_time: float | None = None) → Iterator[ndarray][源代码]#

Get the video's frames within a specified time range using decord.

参数:

start_time -- Start time in seconds (default: 0.0).
end_time -- End time in seconds (exclusive). If None, decode until end.

返回:

Numpy array of frames in shape (num_frames, height, width, channels).

extract_keyframes(start_time: float = 0, end_time: float | None = None, return_meta_only: bool = False)[源代码]#

Extract keyframes from video.

参数:

start_time -- Start time in seconds (default: 0.0).
end_time -- End time in seconds (exclusive). If None, decode until end.
return_meta_only -- If True, only return timestamps and indices of keyframes.

返回:

Return a Frames object.

extract_clip(start_time, end_time, output_path: str = None, to_numpy=True)[源代码]#

Extract a clip from the video based on the start and end time.

参数:

start_time -- the start time in second.
end_time -- the end time in second. If it's None, this function will cut the video from the start_seconds to the end of the video.
output_path -- the path to output video.
to_numpy -- whether to return clip data as numpy array and save to Clip.frames.

返回:

Clip object.

close()[源代码]#: Release any held resources.

classmethod is_available()[源代码]#: Check if the backend is available.

data_juicer.utils.video_utils.create_video_reader(video_source: str, backend: str = 'auto', **kwargs) → VideoReader[源代码]#

data_juicer.utils.video_utils module#

本页