Source code for cutcutcodec.core.analysis.video.complexity

"""Video complexity metrics."""

import torch

from cutcutcodec.core.opti.parallel.threading import TorchThreads

from .dct import spatial_dct, temporal_dct
from .utils import batched_frames

__all__ = ["rms_sobel", "rms_time_diff", "spatial_dct", "temporal_dct"]


[docs] @batched_frames def rms_sobel(img: torch.Tensor, threads: int = 0) -> torch.Tensor: r"""Compute the spatial root mean square sobel gratient complexity for the image. .. note:: It comes from ``ENCODING TIME AND ENERGY MODEL FOR SVT-AV1 BASED ON VIDEO COMPLEXITY``. This function implements the following formula: .. math:: \begin{cases} C_{sob} = \sqrt{ \frac{1}{(h-2)(w-2)} \sum\limits_{\boldsymbol{i} \in [\![1, h-2]\!] \times [\![1, w-2]\!]}\left( \boldsymbol{G_x}^2(\boldsymbol{i}) + \boldsymbol{G_y}^2(\boldsymbol{i}) \right) } \\ \boldsymbol{G_x} = \boldsymbol{S} \star \boldsymbol{Y} \\ \boldsymbol{G_y} = \boldsymbol{S}^\intercal \star \boldsymbol{Y} \\ \boldsymbol{S} = \begin{pmatrix} -1 & 0 & 1 \\ -2 & 0 & 2 \\ -1 & 0 & 1 \\ \end{pmatrix} \end{cases} With: * :math:`\star` the correlation product along the axis ``height`` and ``width``. * :math:`\boldsymbol{Y}` the Y layer of the image as a 2d matrix. Parameters ---------- img : arraylike The Y[UV] images, of shape ([*batch], [1], height, width, [channels]). Only the Y component is used. It has to be in range [0, 1]. As there is no padding on the edges, the image must be at least 3x3 pixels. threads : int, optional Defines the number of threads. The value -1 means that the function uses as many calculation threads as there are cores. The default value (0) allows the same behavior as (-1) if the function is called in the main thread, otherwise (1) to avoid nested threads. Any other positive value corresponds to the number of threads used. Returns ------- rms_sobel : arraylike The :math:`C_{sob} \in \mathbb{R}^+` scalar for each image (of shape batch). Examples -------- >>> import numpy as np >>> from cutcutcodec.core.analysis.video.complexity import rms_sobel >>> np.random.seed(0) >>> img = np.random.random((720, 1080, 3)) # It could also be a torch array list... >>> rms_sobel(img).round(1) np.float64(1.4) >>> """ _, _, height, width, _ = img.shape assert (height, width) >= (3, 3), f"the image {img.shape} is to small" with TorchThreads(threads): g_x = img[:, 0, :, 2:, 0] - img[:, 0, :, :-2, 0] # shape (batch, height, width-2) g_x = g_x[:, :-2, :] + 2.0*g_x[:, 1:-1, :] + g_x[:, 2:, :] g_y = img[:, 0, 2:, :, 0] - img[:, 0, :-2, :, 0] # shape (batch, height-2, width) g_y = g_y[:, :, :-2] + 2.0*g_y[:, :, 1:-1] + g_y[:, :, 2:] sobel_square = g_x*g_x + g_y*g_y # shape (batch, height-2, width-2) return torch.sqrt(sobel_square.mean(dim=(1, 2)))
[docs] @batched_frames def rms_time_diff(imgs: torch.Tensor, threads: int = 0) -> torch.Tensor: r"""Compute the temporal root mean square time difference complexity for 2 images. .. note:: It comes from ``ENCODING TIME AND ENERGY MODEL FOR SVT-AV1 BASED ON VIDEO COMPLEXITY``. This function implements the following formula: .. math:: C_{td} = \sqrt{ \frac{1}{hw} \sum\limits_{\boldsymbol{i} \in [\![1, h]\!] \times [\![1, w]\!]}\left( \boldsymbol{Y_t}(\boldsymbol{i}) - \boldsymbol{Y_{t+1}}(\boldsymbol{i}) \right)^2 } With: * :math:`\boldsymbol{Y}` the Y layer of the image as a 2d matrix. * :math:`t+1` the frame just after :math:`t`. Parameters ---------- imgs : arraylike The Y[UV] images, of shape ([*batch], 2, height, width, [channels]). Only the Y component is used. It has to be in range [0, 1]. threads : int, optional Defines the number of threads. The value -1 means that the function uses as many calculation threads as there are cores. The default value (0) allows the same behavior as (-1) if the function is called in the main thread, otherwise (1) to avoid nested threads. Any other positive value corresponds to the number of threads used. Returns ------- rms_time_diff : arraylike The :math:`C_{td} \in \mathbb{R}^+` scalar for each couple of image (of shape batch). Examples -------- >>> import numpy as np >>> from cutcutcodec.core.analysis.video.complexity import rms_time_diff >>> np.random.seed(0) >>> imgs = np.random.random((2, 720, 1080, 3)) # It could also be a torch array list... >>> rms_time_diff(imgs).round(1) np.float64(0.4) >>> """ _, nbr, _, _, _ = imgs.shape assert nbr == 2, f"this temporal metric requires 2 images, {imgs.shape} is wrong" with TorchThreads(threads): diff = imgs[:, 0, :, :, 0] - imgs[:, 1, :, :, 0] # shape (batch, height, width) rms_time_diff = torch.sqrt((diff * diff).mean(dim=(1, 2))) # not inplace fo gradient return rms_time_diff