Source code for cutcutcodec.core.analysis.video.quality.uvq_google.contentnet

"""A modified version of the Google UVQ source file.

As the original file is under apache lisence,
I should mention that this is a modified version of the source file:

https://github.com/google/uvq/blob/main/uvq_pytorch/utils/contentnet.py
"""

import numpy as np
import torch

from cutcutcodec.core.nn.start import load

from . import custom_nn_layers

# Output feature size
DIM_HEIGHT_FEATURE = 16
DIM_WIDTH_FEATURE = 16
DIM_CHANNEL_FEATURE = 100

# ContentNet specs
DIM_LABEL_CONTENT = 3862


[docs] class ContentNet(torch.nn.Module): """Model to find the features.""" def __init__(self, dropout: float = 0.2, **kwargs): super().__init__() stochastic_depth_prob_step = 0.0125 stochastic_depth_prob = [x * stochastic_depth_prob_step for x in range(16)] self.features = torch.nn.Sequential( custom_nn_layers.Conv2dNormActivationSamePadding( 3, 32, kernel_size=3, stride=2, activation_layer=torch.nn.SiLU, ), custom_nn_layers.MBConvSamePadding(32, 1, 16, 3, 1, stochastic_depth_prob[0]), custom_nn_layers.MBConvSamePadding(16, 6, 24, 3, 2, stochastic_depth_prob[1]), custom_nn_layers.MBConvSamePadding(24, 6, 24, 3, 1, stochastic_depth_prob[2]), custom_nn_layers.MBConvSamePadding(24, 6, 40, 5, 2, stochastic_depth_prob[3]), custom_nn_layers.MBConvSamePadding(40, 6, 40, 5, 1, stochastic_depth_prob[4]), custom_nn_layers.MBConvSamePadding(40, 6, 80, 3, 2, stochastic_depth_prob[5]), custom_nn_layers.MBConvSamePadding(80, 6, 80, 3, 1, stochastic_depth_prob[6]), custom_nn_layers.MBConvSamePadding(80, 6, 80, 3, 1, stochastic_depth_prob[7]), custom_nn_layers.MBConvSamePadding(80, 6, 112, 5, 1, stochastic_depth_prob[8]), custom_nn_layers.MBConvSamePadding(112, 6, 112, 5, 1, stochastic_depth_prob[9]), custom_nn_layers.MBConvSamePadding(112, 6, 112, 5, 1, stochastic_depth_prob[10]), custom_nn_layers.MBConvSamePadding(112, 6, 192, 5, 2, stochastic_depth_prob[11]), custom_nn_layers.MBConvSamePadding(192, 6, 192, 5, 1, stochastic_depth_prob[12]), custom_nn_layers.MBConvSamePadding(192, 6, 192, 5, 1, stochastic_depth_prob[13]), custom_nn_layers.MBConvSamePadding(192, 6, 192, 5, 1, stochastic_depth_prob[14]), custom_nn_layers.MBConvSamePadding(192, 6, 320, 3, 1, stochastic_depth_prob[15]), custom_nn_layers.Interpolate(size=(16, 16), mode="bilinear", align_corners=False), custom_nn_layers.Conv2dSamePadding(320, 100, kernel_size=16, stride=1), ) self.avgpool = torch.nn.AdaptiveAvgPool2d((1, 1)) self.classifier = torch.nn.Sequential( torch.nn.Dropout(dropout), torch.nn.Flatten(), torch.nn.Linear(100, DIM_LABEL_CONTENT), torch.nn.Sigmoid(), ) load(self, kwargs.get("weights")) # 85c8865f2c0a2a2b2eb942fa5d2be795
[docs] def forward(self, x): """Eval the model.""" features = self.features(x) x = self.avgpool(features) return features
[docs] class ContentNetInference: """Find the features in the image.""" def __init__( self, eval_mode=True, **kwargs, ): self.model = ContentNet(**kwargs) if eval_mode: self.model.eval() self.features_transpose = (0, 2, 3, 1)
[docs] def predict_and_get_features(self, frame) -> tuple[np.ndarray, np.ndarray]: """Eval the model and get the features.""" with torch.no_grad(): features = self.model(torch.Tensor(np.expand_dims(frame, 0))) return features.detach().numpy().transpose(*self.features_transpose)
[docs] def get_features_for_all_frames( self, video: np.ndarray, ) -> tuple[np.ndarray, np.ndarray]: """Eval the model.""" feature = np.ndarray( ( video.shape[0], DIM_HEIGHT_FEATURE, DIM_WIDTH_FEATURE, DIM_CHANNEL_FEATURE, ), np.float32, ) for k in range(video.shape[0]): frame_features = self.predict_and_get_features( video[k, 0, :, :, :], ) feature[k, :, :, :] = frame_features return feature