Shortcuts

Source code for torchcodec.decoders._core._metadata

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import dataclasses
import json
import pathlib
from dataclasses import dataclass
from typing import List, Optional, Union

import torch

from torchcodec.decoders._core.video_decoder_ops import (
    _get_container_json_metadata,
    _get_stream_json_metadata,
    create_from_file,
)


[docs]@dataclass class VideoStreamMetadata: """Metadata of a single video stream.""" duration_seconds_from_header: Optional[float] """Duration of the stream, in seconds, obtained from the header (float or None). This could be inaccurate.""" bit_rate: Optional[float] """Bit rate of the stream, in seconds (float or None).""" num_frames_from_header: Optional[int] """Number of frames, from the stream's metadata. This is potentially inaccurate. We recommend using the ``num_frames`` attribute instead. (int or None).""" num_frames_from_content: Optional[int] """Number of frames computed by TorchCodec by scanning the stream's content (the scan doesn't involve decoding). This is more accurate than ``num_frames_from_header``. We recommend using the ``num_frames`` attribute instead. (int or None).""" begin_stream_seconds_from_content: Optional[float] """Beginning of the stream, in seconds (float or None). Conceptually, this corresponds to the first frame's :term:`pts`. It is computed as min(frame.pts) across all frames in the stream. Usually, this is equal to 0.""" end_stream_seconds_from_content: Optional[float] """End of the stream, in seconds (float or None). Conceptually, this corresponds to last_frame.pts + last_frame.duration. It is computed as max(frame.pts + frame.duration) across all frames in the stream. Note that no frame is played at this time value, so calling :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` with this value would result in an error. Retrieving the last frame is best done by simply indexing the :class:`~torchcodec.decoders.VideoDecoder` object with ``[-1]``. """ codec: Optional[str] """Codec (str or None).""" width: Optional[int] """Width of the frames (int or None).""" height: Optional[int] """Height of the frames (int or None).""" average_fps_from_header: Optional[float] """Averate fps of the stream, obtained from the header (float or None). We recommend using the ``average_fps`` attribute instead.""" stream_index: int """Index of the stream within the video (int).""" @property def num_frames(self) -> Optional[int]: """Number of frames in the stream. This corresponds to ``num_frames_from_content`` if a :term:`scan` was made, otherwise it corresponds to ``num_frames_from_header``. """ if self.num_frames_from_content is not None: return self.num_frames_from_content else: return self.num_frames_from_header @property def duration_seconds(self) -> Optional[float]: """Duration of the stream in seconds. We try to calculate the duration from the actual frames if a :term:`scan` was performed. Otherwise we fall back to ``duration_seconds_from_header``. """ if ( self.end_stream_seconds_from_content is None or self.begin_stream_seconds_from_content is None ): return self.duration_seconds_from_header return ( self.end_stream_seconds_from_content - self.begin_stream_seconds_from_content ) @property def average_fps(self) -> Optional[float]: """Average fps of the stream. If a :term:`scan` was perfomed, this is computed from the number of frames and the duration of the stream. Otherwise we fall back to ``average_fps_from_header``. """ if ( self.end_stream_seconds_from_content is None or self.begin_stream_seconds_from_content is None or self.num_frames is None ): return self.average_fps_from_header return self.num_frames / ( self.end_stream_seconds_from_content - self.begin_stream_seconds_from_content ) @property def begin_stream_seconds(self) -> float: """Beginning of the stream, in seconds (float). Conceptually, this corresponds to the first frame's :term:`pts`. If ``begin_stream_seconds_from_content`` is not None, then it is returned. Otherwise, this value is 0. """ if self.begin_stream_seconds_from_content is None: return 0 else: return self.begin_stream_seconds_from_content @property def end_stream_seconds(self) -> Optional[float]: """End of the stream, in seconds (float or None). Conceptually, this corresponds to last_frame.pts + last_frame.duration. If ``end_stream_seconds_from_content`` is not None, then that value is returned. Otherwise, returns ``duration_seconds``. """ if self.end_stream_seconds_from_content is None: return self.duration_seconds else: return self.end_stream_seconds_from_content def __repr__(self): # Overridden because properites are not printed by default. s = self.__class__.__name__ + ":\n" spaces = " " s += f"{spaces}num_frames: {self.num_frames}\n" s += f"{spaces}duration_seconds: {self.duration_seconds}\n" s += f"{spaces}average_fps: {self.average_fps}\n" for field in dataclasses.fields(self): s += f"{spaces}{field.name}: {getattr(self, field.name)}\n" return s
@dataclass class VideoMetadata: duration_seconds_from_header: Optional[float] bit_rate_from_header: Optional[float] best_video_stream_index: Optional[int] best_audio_stream_index: Optional[int] streams: List[VideoStreamMetadata] @property def duration_seconds(self) -> Optional[float]: raise NotImplementedError("Decide on logic and implement this!") @property def bit_rate(self) -> Optional[float]: raise NotImplementedError("Decide on logic and implement this!") @property def best_video_stream(self) -> VideoStreamMetadata: if self.best_video_stream_index is None: raise ValueError("The best video stream is unknown.") return self.streams[self.best_video_stream_index] def get_video_metadata(decoder: torch.Tensor) -> VideoMetadata: """Return video metadata from a video decoder. The accuracy of the metadata and the availability of some returned fields depends on whether a full scan was performed by the decoder. """ container_dict = json.loads(_get_container_json_metadata(decoder)) streams_metadata = [] for stream_index in range(container_dict["numStreams"]): stream_dict = json.loads(_get_stream_json_metadata(decoder, stream_index)) streams_metadata.append( VideoStreamMetadata( duration_seconds_from_header=stream_dict.get("durationSeconds"), bit_rate=stream_dict.get("bitRate"), num_frames_from_header=stream_dict.get("numFrames"), num_frames_from_content=stream_dict.get("numFramesFromScan"), begin_stream_seconds_from_content=stream_dict.get( "minPtsSecondsFromScan" ), end_stream_seconds_from_content=stream_dict.get( "maxPtsSecondsFromScan" ), codec=stream_dict.get("codec"), width=stream_dict.get("width"), height=stream_dict.get("height"), average_fps_from_header=stream_dict.get("averageFps"), stream_index=stream_index, ) ) return VideoMetadata( duration_seconds_from_header=container_dict.get("durationSeconds"), bit_rate_from_header=container_dict.get("bitRate"), best_video_stream_index=container_dict.get("bestVideoStreamIndex"), best_audio_stream_index=container_dict.get("bestAudioStreamIndex"), streams=streams_metadata, ) def get_video_metadata_from_header(filename: Union[str, pathlib.Path]) -> VideoMetadata: return get_video_metadata(create_from_file(str(filename), seek_mode="approximate"))

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources