"""Video complexity metrics."""
import torch
from cutcutcodec.core.opti.parallel.threading import TorchThreads
from .dct import spatial_dct, temporal_dct
from .utils import batched_frames
__all__ = ["rms_sobel", "rms_time_diff", "spatial_dct", "temporal_dct"]
[docs]
@batched_frames
def rms_sobel(img: torch.Tensor, threads: int = 0) -> torch.Tensor:
r"""Compute the spatial root mean square sobel gratient complexity for the image.
.. note::
It comes from
``ENCODING TIME AND ENERGY MODEL FOR SVT-AV1 BASED ON VIDEO COMPLEXITY``.
This function implements the following formula:
.. math::
\begin{cases}
C_{sob} = \sqrt{
\frac{1}{(h-2)(w-2)}
\sum\limits_{\boldsymbol{i} \in [\![1, h-2]\!] \times [\![1, w-2]\!]}\left(
\boldsymbol{G_x}^2(\boldsymbol{i}) + \boldsymbol{G_y}^2(\boldsymbol{i})
\right)
} \\
\boldsymbol{G_x} = \boldsymbol{S} \star \boldsymbol{Y} \\
\boldsymbol{G_y} = \boldsymbol{S}^\intercal \star \boldsymbol{Y} \\
\boldsymbol{S} =
\begin{pmatrix}
-1 & 0 & 1 \\
-2 & 0 & 2 \\
-1 & 0 & 1 \\
\end{pmatrix}
\end{cases}
With:
* :math:`\star` the correlation product along the axis ``height`` and ``width``.
* :math:`\boldsymbol{Y}` the Y layer of the image as a 2d matrix.
Parameters
----------
img : arraylike
The Y[UV] images, of shape ([*batch], [1], height, width, [channels]).
Only the Y component is used. It has to be in range [0, 1].
As there is no padding on the edges, the image must be at least 3x3 pixels.
threads : int, optional
Defines the number of threads.
The value -1 means that the function uses as many calculation threads as there are cores.
The default value (0) allows the same behavior as (-1) if the function
is called in the main thread, otherwise (1) to avoid nested threads.
Any other positive value corresponds to the number of threads used.
Returns
-------
rms_sobel : arraylike
The :math:`C_{sob} \in \mathbb{R}^+` scalar for each image (of shape batch).
Examples
--------
>>> import numpy as np
>>> from cutcutcodec.core.analysis.video.complexity import rms_sobel
>>> np.random.seed(0)
>>> img = np.random.random((720, 1080, 3)) # It could also be a torch array list...
>>> rms_sobel(img).round(1)
np.float64(1.4)
>>>
"""
_, _, height, width, _ = img.shape
assert (height, width) >= (3, 3), f"the image {img.shape} is to small"
with TorchThreads(threads):
g_x = img[:, 0, :, 2:, 0] - img[:, 0, :, :-2, 0] # shape (batch, height, width-2)
g_x = g_x[:, :-2, :] + 2.0*g_x[:, 1:-1, :] + g_x[:, 2:, :]
g_y = img[:, 0, 2:, :, 0] - img[:, 0, :-2, :, 0] # shape (batch, height-2, width)
g_y = g_y[:, :, :-2] + 2.0*g_y[:, :, 1:-1] + g_y[:, :, 2:]
sobel_square = g_x*g_x + g_y*g_y # shape (batch, height-2, width-2)
return torch.sqrt(sobel_square.mean(dim=(1, 2)))
[docs]
@batched_frames
def rms_time_diff(imgs: torch.Tensor, threads: int = 0) -> torch.Tensor:
r"""Compute the temporal root mean square time difference complexity for 2 images.
.. note::
It comes from
``ENCODING TIME AND ENERGY MODEL FOR SVT-AV1 BASED ON VIDEO COMPLEXITY``.
This function implements the following formula:
.. math::
C_{td} = \sqrt{
\frac{1}{hw}
\sum\limits_{\boldsymbol{i} \in [\![1, h]\!] \times [\![1, w]\!]}\left(
\boldsymbol{Y_t}(\boldsymbol{i}) - \boldsymbol{Y_{t+1}}(\boldsymbol{i})
\right)^2
}
With:
* :math:`\boldsymbol{Y}` the Y layer of the image as a 2d matrix.
* :math:`t+1` the frame just after :math:`t`.
Parameters
----------
imgs : arraylike
The Y[UV] images, of shape ([*batch], 2, height, width, [channels]).
Only the Y component is used. It has to be in range [0, 1].
threads : int, optional
Defines the number of threads.
The value -1 means that the function uses as many calculation threads as there are cores.
The default value (0) allows the same behavior as (-1) if the function
is called in the main thread, otherwise (1) to avoid nested threads.
Any other positive value corresponds to the number of threads used.
Returns
-------
rms_time_diff : arraylike
The :math:`C_{td} \in \mathbb{R}^+` scalar for each couple of image (of shape batch).
Examples
--------
>>> import numpy as np
>>> from cutcutcodec.core.analysis.video.complexity import rms_time_diff
>>> np.random.seed(0)
>>> imgs = np.random.random((2, 720, 1080, 3)) # It could also be a torch array list...
>>> rms_time_diff(imgs).round(1)
np.float64(0.4)
>>>
"""
_, nbr, _, _, _ = imgs.shape
assert nbr == 2, f"this temporal metric requires 2 images, {imgs.shape} is wrong"
with TorchThreads(threads):
diff = imgs[:, 0, :, :, 0] - imgs[:, 1, :, :, 0] # shape (batch, height, width)
rms_time_diff = torch.sqrt((diff * diff).mean(dim=(1, 2))) # not inplace fo gradient
return rms_time_diff