Source code for cutcutcodec.core.analysis.video.quality

"""Video quality metrics."""

import numbers

import torch

from .utils import batched_comparative_frames, batched_single_frames

__all__ = ["lpips", "psnr", "ssim", "uvq", "vif", "vmaf"]


[docs] @batched_comparative_frames def lpips(dis: torch.Tensor, ref: torch.Tensor, *args, **kwargs) -> torch.Tensor: """Compute the Learned Perceptual Image Patch Similarity. It uses the module ``pip install lpips`` in backend, based on torch. Parameters ---------- dis, ref : arraylike The 2 images to be compared, of shape ([*batch], height, width, channels=3). The frames are assumed to be in RGB (r'g'b') in range [0, 1]. Gamut and EOTF must be standard rgb. net : str, default="alex" The neuronal network used, "alex" or "vgg". threads : int, optional Defines the number of threads. The value -1 means that the function uses as many calculation threads as there are cores. The default value (0) allows the same behavior as (-1) if the function is called in the main thread, otherwise (1) to avoid nested threads. Any other positive value corresponds to the number of threads used. Returns ------- lpips : arraylike The learned perceptual image patch similarity of each image. Examples -------- >>> import numpy as np >>> from cutcutcodec.core.analysis.video.quality import lpips >>> np.random.seed(0) >>> ref = np.random.random((720, 1080, 3)) # It could also be a torch array list... >>> dis = 0.8 * ref + 0.2 * np.random.random((720, 1080, 3)) >>> lpips(dis, ref).round(1) np.float64(0.0) >>> """ from .lpips_torch import lpips_torch dtype = ref.dtype dis, ref = dis.to(torch.float32), ref.to(torch.float32) return lpips_torch(ref, dis, *args, **kwargs).to(dtype)
[docs] @batched_comparative_frames def psnr(dis: torch.Tensor, ref: torch.Tensor, *args, **kwargs) -> torch.Tensor: """Compute the peak signal to noise ratio of 2 images. Parameters ---------- dis, ref : arraylike The 2 images to be compared, of shape ([*batch], height, width, channels). Supported types are float32 and float64. weights : iterable[float], optional The relative weight of each channel. By default, all channels have the same weight. threads : int, optional Defines the number of threads. The value -1 means that the function uses as many calculation threads as there are cores. The default value (0) allows the same behavior as (-1) if the function is called in the main thread, otherwise (1) to avoid nested threads. Any other positive value corresponds to the number of threads used. Returns ------- psnr : arraylike The global peak signal to noise ratio, as a ponderation of the mean square error of each channel. It is batched and clamped in [0, 100] db. Notes ----- * It is optimized for C contiguous tensors. * If device is cpu and gradient is not required, a fast C code is used instead of torch code. Examples -------- >>> import numpy as np >>> from cutcutcodec.core.analysis.video.quality import psnr >>> np.random.seed(0) >>> ref = np.random.random((720, 1080, 3)) # It could also be a torch array list... >>> dis = 0.8 * ref + 0.2 * np.random.random((720, 1080, 3)) >>> psnr(dis, ref).round(1) np.float64(21.8) >>> """ if ( ref.requires_grad or dis.requires_grad or ref.device.type != "cpu" or dis.device.type != "cpu" ): from .psnr_torch import psnr_torch return psnr_torch(ref, dis, *args, **kwargs) from .metric import psnr as psnr_c return torch.asarray( [psnr_c(r, d, *args, **kwargs) for r, d in zip(ref.numpy(), dis.numpy())], dtype=ref.dtype, )
[docs] @batched_comparative_frames def ssim(dis: torch.Tensor, ref: torch.Tensor, *args, stride: int = 1, **kwargs) -> torch.Tensor: """Compute the structural similarity index measure of 2 images. Parameters ---------- dis, ref : arraylike The 2 images to be compared, of shape ([*batch], height, width, channels). Supported types are float32 and float64. data_range : float, default=1.0 The data range of the input image (difference between maximum and minimum possible values). weights : iterable[float], optional The relative weight of each channel. By default, all channels have the same weight. sigma : float, default=1.5 The standard deviation of the gaussian. It has to be strictely positive. stride : int, default=1 The stride of the convolving kernel. threads : int, optional Defines the number of threads. The value -1 means that the function uses as many calculation threads as there are cores. The default value (0) allows the same behavior as (-1) if the function is called in the main thread, otherwise (1) to avoid nested threads. Any other positive value corresponds to the number of threads used. Returns ------- ssim : arraylike The ponderated structural similarity index measure of each layers. Notes ----- * It is optimized for C contiguous tensors. * If device is cpu, gradient is not required and stride != 1, a fast C code is used. Examples -------- >>> import numpy as np >>> from cutcutcodec.core.analysis.video.quality import ssim >>> np.random.seed(0) >>> ref = np.random.random((720, 1080, 3)) # It could also be a torch array list... >>> dis = 0.8 * ref + 0.2 * np.random.random((720, 1080, 3)) >>> ssim(dis, ref).round(2) np.float64(0.95) >>> """ assert isinstance(stride, numbers.Integral), stride.__class__.__name__ if stride == 1: from .ssim_torch import ssim_fft_torch return ssim_fft_torch(ref, dis, *args, **kwargs) if ( ref.requires_grad or dis.requires_grad or ref.device.type != "cpu" or dis.device.type != "cpu" ): from .ssim_torch import ssim_conv_torch return ssim_conv_torch(ref, dis, *args, stride=stride, **kwargs) from .metric import ssim as ssim_c return torch.asarray( [ssim_c(r, d, *args, stride=stride, **kwargs) for r, d in zip(ref.numpy(), dis.numpy())], dtype=ref.dtype, )
[docs] @batched_single_frames def uvq(dis: torch.Tensor, *, _model=None) -> torch.Tensor: """Compute the Perceptual Video Quality. Parameters ---------- dis : arraylike The frames to be evaluated, of shape ([*batch], fps=5, height, width, channels=3). The framerate is assumed to be 5 Hz. The frames are assumed to be in RGB in range [0, 1]. Gamut and EOTF must be standard rgb. Returns ------- uvq : arraylike The perceptual video quality measure for each group of 5 images. Examples -------- >>> import numpy as np >>> from cutcutcodec.core.analysis.video.quality import uvq >>> np.random.seed(0) >>> dis = np.random.random((5, 720, 1080, 3)) # It could also be a torch array list... >>> uvq(dis).round(1) np.float32(3.3) >>> """ if _model is None: from .uvq_google.inference import UVQInference _model = UVQInference() return _model.forward(dis)
[docs] @batched_comparative_frames def vif(dis: torch.Tensor, ref: torch.Tensor) -> torch.Tensor: """Compute the visual information fidelity of 2 images. Parameters ---------- dis, ref : arraylike The 2 images to be compared, of shape ([*batch], height, width, channels=[1, 3]). The frames are assumed to be in Y or YUV (y'pbpr) in range [0, 1]. Only the y' component is used. Returns ------- vif : arraylike The visual information fidelity of each image. Notes ----- This metric isn't symmetric, so make sure to place arguments in correct order. """ from .vif_torch import vif_conv_torch return vif_conv_torch(dis[:, :, :, 0], ref[:, :, :, 0])
[docs] @batched_comparative_frames def vmaf(dis: torch.Tensor, ref: torch.Tensor, *, _model=None, **kwargs) -> torch.Tensor: """Compute the Video Multi-Method Assessment Fusion of 2 images. Parameters ---------- dis, ref : arraylike The 2 images to be compared, of shape ([*batch], height, width, channels=3). The frames are assumed to be in YUV (y'pbpr) in range [0, 1]. Gamut and EOTF must be standard rgb. threads : int, optional Defines the number of threads. The value -1 means that the function uses as many calculation threads as there are cores. The default value (0) allows the same behavior as (-1) if the function is called in the main thread, otherwise (1) to avoid nested threads. Any other positive value corresponds to the number of threads used. Returns ------- vmaf : arraylike The learned perceptual image patch similarity of each image. Notes ----- This static function does not require the installation of vmaf. Examples -------- >>> import numpy as np >>> from cutcutcodec.core.analysis.video.quality import vmaf >>> np.random.seed(0) >>> ref = np.random.random((720, 1080, 3)) # It could also be a torch array list... >>> ref[..., 1:3] -= 0.5 # because pbpr in [-0.5, 0.5] >>> dis = 0.8 * ref + 0.2 * np.random.randn(720, 1080, 3) >>> vmaf(dis, ref).round(1) np.float32(15.4) >>> """ if _model is None: from .vmaf_torch.vmaf import VMAF _model = VMAF() # thanks batched_comparative_frames, we have shape = (batch, height, width, channels) assert dis.shape[3] == 3 assert ref.shape[3] == 3 dis = dis[:, None, :, :, 0] # only Y ref = ref[:, None, :, :, 0] # (batch, 1, height, width) dis = dis * 255.0 ref = ref * 255.0 return _model.compute_vmaf_score(ref.to(torch.float32), dis.to(torch.float32))