Source code for cutcutcodec.core.analysis.video.quality

"""Video quality metrics."""

import numbers

import torch

from .utils import batched_comparative_frames, batched_single_frames

__all__ = ["lpips", "psnr", "ssim", "uvq", "vif", "vmaf"]



[docs]
@batched_comparative_frames
def lpips(dis: torch.Tensor, ref: torch.Tensor, *args, **kwargs) -> torch.Tensor:
    """Compute the Learned Perceptual Image Patch Similarity.

    It uses the module ``pip install lpips`` in backend, based on torch.

    Parameters
    ----------
    dis, ref : arraylike
        The 2 images to be compared, of shape ([*batch], height, width, channels=3).
        The frames are assumed to be in RGB (r'g'b') in range [0, 1].
        Gamut and EOTF must be standard rgb.
    net : str, default="alex"
        The neuronal network used, "alex" or "vgg".
    threads : int, optional
        Defines the number of threads.
        The value -1 means that the function uses as many calculation threads as there are cores.
        The default value (0) allows the same behavior as (-1) if the function
        is called in the main thread, otherwise (1) to avoid nested threads.
        Any other positive value corresponds to the number of threads used.

    Returns
    -------
    lpips : arraylike
        The learned perceptual image patch similarity of each image.

    Examples
    --------
    >>> import numpy as np
    >>> from cutcutcodec.core.analysis.video.quality import lpips
    >>> np.random.seed(0)
    >>> ref = np.random.random((720, 1080, 3))  # It could also be a torch array list...
    >>> dis = 0.8 * ref + 0.2 * np.random.random((720, 1080, 3))
    >>> lpips(dis, ref).round(1)
    np.float64(0.0)
    >>>

    """
    from .lpips_torch import lpips_torch
    dtype = ref.dtype
    dis, ref = dis.to(torch.float32), ref.to(torch.float32)
    return lpips_torch(ref, dis, *args, **kwargs).to(dtype)




[docs]
@batched_comparative_frames
def psnr(dis: torch.Tensor, ref: torch.Tensor, *args, **kwargs) -> torch.Tensor:
    """Compute the peak signal to noise ratio of 2 images.

    Parameters
    ----------
    dis, ref : arraylike
        The 2 images to be compared, of shape ([*batch], height, width, channels).
        Supported types are float32 and float64.
    weights : iterable[float], optional
        The relative weight of each channel. By default, all channels have the same weight.
    threads : int, optional
        Defines the number of threads.
        The value -1 means that the function uses as many calculation threads as there are cores.
        The default value (0) allows the same behavior as (-1) if the function
        is called in the main thread, otherwise (1) to avoid nested threads.
        Any other positive value corresponds to the number of threads used.

    Returns
    -------
    psnr : arraylike
        The global peak signal to noise ratio,
        as a ponderation of the mean square error of each channel.
        It is batched and clamped in [0, 100] db.

    Notes
    -----
    * It is optimized for C contiguous tensors.
    * If device is cpu and gradient is not required, a fast C code is used instead of torch code.

    Examples
    --------
    >>> import numpy as np
    >>> from cutcutcodec.core.analysis.video.quality import psnr
    >>> np.random.seed(0)
    >>> ref = np.random.random((720, 1080, 3))  # It could also be a torch array list...
    >>> dis = 0.8 * ref + 0.2 * np.random.random((720, 1080, 3))
    >>> psnr(dis, ref).round(1)
    np.float64(21.8)
    >>>

    """
    if (
        ref.requires_grad or dis.requires_grad
        or ref.device.type != "cpu" or dis.device.type != "cpu"
    ):
        from .psnr_torch import psnr_torch
        return psnr_torch(ref, dis, *args, **kwargs)
    from .metric import psnr as psnr_c
    return torch.asarray(
        [psnr_c(r, d, *args, **kwargs) for r, d in zip(ref.numpy(), dis.numpy())],
        dtype=ref.dtype,
    )




[docs]
@batched_comparative_frames
def ssim(dis: torch.Tensor, ref: torch.Tensor, *args, stride: int = 1, **kwargs) -> torch.Tensor:
    """Compute the structural similarity index measure of 2 images.

    Parameters
    ----------
    dis, ref : arraylike
        The 2 images to be compared, of shape ([*batch], height, width, channels).
        Supported types are float32 and float64.
    data_range : float, default=1.0
        The data range of the input image (difference between maximum and minimum possible values).
    weights : iterable[float], optional
        The relative weight of each channel. By default, all channels have the same weight.
    sigma : float, default=1.5
        The standard deviation of the gaussian. It has to be strictely positive.
    stride : int, default=1
        The stride of the convolving kernel.
    threads : int, optional
        Defines the number of threads.
        The value -1 means that the function uses as many calculation threads as there are cores.
        The default value (0) allows the same behavior as (-1) if the function
        is called in the main thread, otherwise (1) to avoid nested threads.
        Any other positive value corresponds to the number of threads used.

    Returns
    -------
    ssim : arraylike
        The ponderated structural similarity index measure of each layers.

    Notes
    -----
    * It is optimized for C contiguous tensors.
    * If device is cpu, gradient is not required and stride != 1, a fast C code is used.

    Examples
    --------
    >>> import numpy as np
    >>> from cutcutcodec.core.analysis.video.quality import ssim
    >>> np.random.seed(0)
    >>> ref = np.random.random((720, 1080, 3))  # It could also be a torch array list...
    >>> dis = 0.8 * ref + 0.2 * np.random.random((720, 1080, 3))
    >>> ssim(dis, ref).round(2)
    np.float64(0.95)
    >>>

    """
    assert isinstance(stride, numbers.Integral), stride.__class__.__name__
    if stride == 1:
        from .ssim_torch import ssim_fft_torch
        return ssim_fft_torch(ref, dis, *args, **kwargs)
    if (
        ref.requires_grad or dis.requires_grad
        or ref.device.type != "cpu" or dis.device.type != "cpu"
    ):
        from .ssim_torch import ssim_conv_torch
        return ssim_conv_torch(ref, dis, *args, stride=stride, **kwargs)
    from .metric import ssim as ssim_c
    return torch.asarray(
        [ssim_c(r, d, *args, stride=stride, **kwargs) for r, d in zip(ref.numpy(), dis.numpy())],
        dtype=ref.dtype,
    )




[docs]
@batched_single_frames
def uvq(dis: torch.Tensor, *, _model=None) -> torch.Tensor:
    """Compute the Perceptual Video Quality.

    Parameters
    ----------
    dis : arraylike
        The frames to be evaluated, of shape ([*batch], fps=5, height, width, channels=3).
        The framerate is assumed to be 5 Hz.
        The frames are assumed to be in RGB in range [0, 1].
        Gamut and EOTF must be standard rgb.

    Returns
    -------
    uvq : arraylike
        The perceptual video quality measure for each group of 5 images.

    Examples
    --------
    >>> import numpy as np
    >>> from cutcutcodec.core.analysis.video.quality import uvq
    >>> np.random.seed(0)
    >>> dis = np.random.random((5, 720, 1080, 3))  # It could also be a torch array list...
    >>> uvq(dis).round(1)
    np.float32(3.3)
    >>>

    """
    if _model is None:
        from .uvq_google.inference import UVQInference
        _model = UVQInference()
    return _model.forward(dis)




[docs]
@batched_comparative_frames
def vif(dis: torch.Tensor, ref: torch.Tensor) -> torch.Tensor:
    """Compute the visual information fidelity of 2 images.

    Parameters
    ----------
    dis, ref : arraylike
        The 2 images to be compared, of shape ([*batch], height, width, channels=[1, 3]).
        The frames are assumed to be in Y or YUV (y'pbpr) in range [0, 1].
        Only the y' component is used.

    Returns
    -------
    vif : arraylike
        The visual information fidelity of each image.

    Notes
    -----
    This metric isn't symmetric, so make sure to place arguments in correct order.

    """
    from .vif_torch import vif_conv_torch
    return vif_conv_torch(dis[:, :, :, 0], ref[:, :, :, 0])




[docs]
@batched_comparative_frames
def vmaf(dis: torch.Tensor, ref: torch.Tensor, *, _model=None, **kwargs) -> torch.Tensor:
    """Compute the Video Multi-Method Assessment Fusion of 2 images.

    Parameters
    ----------
    dis, ref : arraylike
        The 2 images to be compared, of shape ([*batch], height, width, channels=3).
        The frames are assumed to be in YUV (y'pbpr) in range [0, 1].
        Gamut and EOTF must be standard rgb.
    threads : int, optional
        Defines the number of threads.
        The value -1 means that the function uses as many calculation threads as there are cores.
        The default value (0) allows the same behavior as (-1) if the function
        is called in the main thread, otherwise (1) to avoid nested threads.
        Any other positive value corresponds to the number of threads used.

    Returns
    -------
    vmaf : arraylike
        The learned perceptual image patch similarity of each image.

    Notes
    -----
    This static function does not require the installation of vmaf.

    Examples
    --------
    >>> import numpy as np
    >>> from cutcutcodec.core.analysis.video.quality import vmaf
    >>> np.random.seed(0)
    >>> ref = np.random.random((720, 1080, 3))  # It could also be a torch array list...
    >>> ref[..., 1:3] -= 0.5  # because pbpr in [-0.5, 0.5]
    >>> dis = 0.8 * ref + 0.2 * np.random.randn(720, 1080, 3)
    >>> vmaf(dis, ref).round(1)
    np.float32(15.4)
    >>>

    """
    if _model is None:
        from .vmaf_torch.vmaf import VMAF
        _model = VMAF()
    # thanks batched_comparative_frames, we have shape = (batch, height, width, channels)
    assert dis.shape[3] == 3
    assert ref.shape[3] == 3
    dis = dis[:, None, :, :, 0]  # only Y
    ref = ref[:, None, :, :, 0]  # (batch, 1, height, width)
    dis = dis * 255.0
    ref = ref * 255.0
    return _model.compute_vmaf_score(ref.to(torch.float32), dis.to(torch.float32))