Source code for cutcutcodec.core.classes.frame_audio

"""Defines the structure an audio frame."""

import numbers
import re
import typing
from fractions import Fraction

import numpy as np
import torch

from cutcutcodec.core.classes.frame import Frame
from cutcutcodec.core.classes.layout import Layout


[docs] class FrameAudio(Frame): """An audio sample packet with time information. Behaves like a torch tensor of shape (nb_channels, samples). The shape is consistent with pyav and torchaudio. Values are supposed to be between -1 and 1 but no verification is done. Attributes ---------- channels : int The numbers of channels (readonly). For more informations about each channels, let see ``self.layout``. layout : cutcutcodec.core.classes.layout.Layout The signification of each channels (readonly). rate : int The frequency of the samples in Hz (readonly). samples : int The number of samples per channels (readonly). time : Fraction The time of the first sample of the frame in second (readonly). """ def __new__( # pylint: disable=W0222 cls, time: Fraction | numbers.Real | str, rate: numbers.Integral, layout: Layout | str | numbers.Integral, data: torch.Tensor | np.ndarray | typing.Container, **kwargs, ): """Initialise and create the class. Parameters ---------- time : Fraction The time of the first sample of the frame in second. rate : int The frequency of the samples in Hz. layout : cutcutcodec.core.classes.layout.Layout or str or numbers.Integral The canonical name of the layout, let see ``cutcutcodec.core.classes.layout.Layout`` for the available layouts. data : arraylike Transmitted to ``cutcutcodec.core.classes.frame.Frame`` initialisator. **kwargs : dict Transmitted to ``cutcutcodec.core.classes.frame.Frame`` initialisator. """ frame = super().__new__(cls, data, context=[time, rate, layout], **kwargs) frame.check_state() return frame def __repr__(self) -> str: """Compact and complete display of an evaluable version of the audio frame. Examples -------- >>> import torch >>> from cutcutcodec.core.classes.frame_audio import FrameAudio >>> >>> FrameAudio("2/4", 48000, "stereo", torch.zeros(2, 1024)) FrameAudio('1/2', 48000, 'stereo', [[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]) >>> _.to(torch.float16) FrameAudio('1/2', 48000, 'stereo', [[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]], dtype=torch.float16) >>> """ time_str = f"'{self.time}'" if int(self.time) != self.time else f"{self.time}" header = f"{self.__class__.__name__}({time_str}, {self.rate}, {self.layout.name!r}, " tensor_str = np.array2string( self.numpy(force=True), separator=", ", prefix=header, suffix=" ", ) if (infos := re.findall(r"\w+=[a-zA-Z0-9_\-.\"']+", repr(torch.Tensor(self)))): infos = "\n" + " "*len(header) + (",\n" + " "*len(header)).join(infos) return f"{header}{tensor_str},{infos})" return f"{header}{tensor_str})"
[docs] def check_state(self) -> None: """Apply verifications. Raises ------ AssertionError If something wrong in this frame. """ assert isinstance(self.context[0], (Fraction, numbers.Real, str)), \ self.context[0].__class__.__name__ # corresponds to time attribute self.context[0] = Fraction(self.context[0]) assert isinstance(self.context[1], numbers.Integral), \ self.context[1].__class__.__name__ # corresponds to rate attribute self.context[1] = int(self.context[1]) assert self.context[1] > 0, self.context[1] # corresponds to rate attribute assert isinstance(self.context[2], (Layout, str, numbers.Integral)), \ self.context[2].__class__.__name__ # corresponds to the layout if isinstance(self.context[2], (str, numbers.Integral)): self.context[2] = Layout(self.context[2]) assert self.ndim == 2, self.shape assert self.shape[0] == len(self.context[2]), self.shape # nb_channels assert self.dtype.is_floating_point, self.dtype
@property def channels(self) -> int: """Return the number of channels. Examples -------- >>> import torch >>> from cutcutcodec.core.classes.frame_audio import FrameAudio >>> FrameAudio(0, 48000, "stereo", torch.empty(2, 1024)).channels 2 >>> """ return self.shape[0] @property def layout(self) -> Layout: """Return the signification of each channels. Examples -------- >>> import torch >>> from cutcutcodec.core.classes.frame_audio import FrameAudio >>> FrameAudio(0, 48000, "stereo", torch.empty(2, 1024)).layout Layout('stereo') >>> """ return self.context[2] @property def rate(self) -> int: """Return the frequency of the samples in Hz. Examples -------- >>> import torch >>> from cutcutcodec.core.classes.frame_audio import FrameAudio >>> FrameAudio(0, 48000, "stereo", torch.empty(2, 1024)).rate 48000 >>> """ return self.context[1] @property def samples(self) -> int: """Return the number of samples per channels. Examples -------- >>> import torch >>> from cutcutcodec.core.classes.frame_audio import FrameAudio >>> FrameAudio(0, 48000, "stereo", torch.empty(2, 1024)).samples 1024 >>> """ return self.shape[1] @property def time(self) -> Fraction: """Return the time of the first sample of the frame in second. Examples -------- >>> import torch >>> from cutcutcodec.core.classes.frame_audio import FrameAudio >>> FrameAudio(0, 48000, "stereo", torch.empty(2, 1024)).time Fraction(0, 1) >>> """ return self.context[0] @property def timestamps(self) -> torch.Tensor: """Return the time value of each sample of the frame. The vector is cast on the same type than the samples and in the same device. The shape of the timestamps 1d vector is (self.samples,). Examples -------- >>> import torch >>> from cutcutcodec.core.classes.frame_audio import FrameAudio >>> FrameAudio(1, 48000, "stereo", torch.empty(2, 1024)).timestamps tensor([1.0000, 1.0000, 1.0000, ..., 1.0213, 1.0213, 1.0213]) >>> """ timestamps = torch.arange(self.samples, dtype=self.dtype, device=self.device) timestamps /= float(self.rate) timestamps += float(self.time) return timestamps