Source code for cutcutcodec.core.analysis.video.complexity

"""Video complexity metrics."""

import torch

from cutcutcodec.core.opti.parallel.threading import TorchThreads

from .dct import spatial_dct, temporal_dct
from .utils import batched_frames

__all__ = ["rms_sobel", "rms_time_diff", "spatial_dct", "temporal_dct"]



[docs]
@batched_frames
def rms_sobel(img: torch.Tensor, threads: int = 0) -> torch.Tensor:
    r"""Compute the spatial root mean square sobel gratient complexity for the image.

    .. note::

        It comes from
        ``ENCODING TIME AND ENERGY MODEL FOR SVT-AV1 BASED ON VIDEO COMPLEXITY``.

    This function implements the following formula:

    .. math::

        \begin{cases}
            C_{sob} = \sqrt{
                \frac{1}{(h-2)(w-2)}
                \sum\limits_{\boldsymbol{i} \in [\![1, h-2]\!] \times [\![1, w-2]\!]}\left(
                    \boldsymbol{G_x}^2(\boldsymbol{i}) + \boldsymbol{G_y}^2(\boldsymbol{i})
                \right)
            } \\
            \boldsymbol{G_x} = \boldsymbol{S} \star \boldsymbol{Y} \\
            \boldsymbol{G_y} = \boldsymbol{S}^\intercal \star \boldsymbol{Y} \\
            \boldsymbol{S} =
                \begin{pmatrix}
                    -1 & 0 & 1 \\
                    -2 & 0 & 2 \\
                    -1 & 0 & 1 \\
                \end{pmatrix}
        \end{cases}

    With:
        * :math:`\star` the correlation product along the axis ``height`` and ``width``.
        * :math:`\boldsymbol{Y}` the Y layer of the image as a 2d matrix.

    Parameters
    ----------
    img : arraylike
        The Y[UV] images, of shape ([*batch], [1], height, width, [channels]).
        Only the Y component is used. It has to be in range [0, 1].
        As there is no padding on the edges, the image must be at least 3x3 pixels.
    threads : int, optional
        Defines the number of threads.
        The value -1 means that the function uses as many calculation threads as there are cores.
        The default value (0) allows the same behavior as (-1) if the function
        is called in the main thread, otherwise (1) to avoid nested threads.
        Any other positive value corresponds to the number of threads used.

    Returns
    -------
    rms_sobel : arraylike
        The :math:`C_{sob} \in \mathbb{R}^+` scalar for each image (of shape batch).

    Examples
    --------
    >>> import numpy as np
    >>> from cutcutcodec.core.analysis.video.complexity import rms_sobel
    >>> np.random.seed(0)
    >>> img = np.random.random((720, 1080, 3))  # It could also be a torch array list...
    >>> rms_sobel(img).round(1)
    np.float64(1.4)
    >>>

    """
    _, _, height, width, _ = img.shape
    assert (height, width) >= (3, 3), f"the image {img.shape} is to small"
    with TorchThreads(threads):
        g_x = img[:, 0, :, 2:, 0] - img[:, 0, :, :-2, 0]  # shape (batch, height, width-2)
        g_x = g_x[:, :-2, :] + 2.0*g_x[:, 1:-1, :] + g_x[:, 2:, :]
        g_y = img[:, 0, 2:, :, 0] - img[:, 0, :-2, :, 0]  # shape (batch, height-2, width)
        g_y = g_y[:, :, :-2] + 2.0*g_y[:, :, 1:-1] + g_y[:, :, 2:]
        sobel_square = g_x*g_x + g_y*g_y  # shape (batch, height-2, width-2)
    return torch.sqrt(sobel_square.mean(dim=(1, 2)))




[docs]
@batched_frames
def rms_time_diff(imgs: torch.Tensor, threads: int = 0) -> torch.Tensor:
    r"""Compute the temporal root mean square time difference complexity for 2 images.

    .. note::

        It comes from
        ``ENCODING TIME AND ENERGY MODEL FOR SVT-AV1 BASED ON VIDEO COMPLEXITY``.

    This function implements the following formula:

    .. math::

        C_{td} = \sqrt{
            \frac{1}{hw}
            \sum\limits_{\boldsymbol{i} \in [\![1, h]\!] \times [\![1, w]\!]}\left(
                \boldsymbol{Y_t}(\boldsymbol{i}) - \boldsymbol{Y_{t+1}}(\boldsymbol{i})
            \right)^2
        }

    With:
        * :math:`\boldsymbol{Y}` the Y layer of the image as a 2d matrix.
        * :math:`t+1` the frame just after :math:`t`.

    Parameters
    ----------
    imgs : arraylike
        The Y[UV] images, of shape ([*batch], 2, height, width, [channels]).
        Only the Y component is used. It has to be in range [0, 1].
    threads : int, optional
        Defines the number of threads.
        The value -1 means that the function uses as many calculation threads as there are cores.
        The default value (0) allows the same behavior as (-1) if the function
        is called in the main thread, otherwise (1) to avoid nested threads.
        Any other positive value corresponds to the number of threads used.

    Returns
    -------
    rms_time_diff : arraylike
        The :math:`C_{td} \in \mathbb{R}^+` scalar for each couple of image (of shape batch).

    Examples
    --------
    >>> import numpy as np
    >>> from cutcutcodec.core.analysis.video.complexity import rms_time_diff
    >>> np.random.seed(0)
    >>> imgs = np.random.random((2, 720, 1080, 3))  # It could also be a torch array list...
    >>> rms_time_diff(imgs).round(1)
    np.float64(0.4)
    >>>

    """
    _, nbr, _, _, _ = imgs.shape
    assert nbr == 2, f"this temporal metric requires 2 images, {imgs.shape} is wrong"
    with TorchThreads(threads):
        diff = imgs[:, 0, :, :, 0] - imgs[:, 1, :, :, 0]  # shape (batch, height, width)
        rms_time_diff = torch.sqrt((diff * diff).mean(dim=(1, 2)))  # not inplace fo gradient
    return rms_time_diff