cutcutcodec.core.analysis.ffprobe

Extract the properties of different streams of a multimedia file.

Functions

get_metadata(filename[, ignore_errors])

Call ffprobe and parse the result as a dictionary.

get_slices_metadata(filename[, slice_type])

Get the packets informations for all streams.

get_streams_type(filename[, ignore_errors])

Retrieve in order the stream types present in the file.

parse_duration(duration)

Try to convert a duration information into a fraction in second.

Details

cutcutcodec.core.analysis.ffprobe.get_metadata(filename: Path | str | bytes, ignore_errors=False) dict[str][source]

Call ffprobe and parse the result as a dictionary.

Parameters

filenamepathlike

The pathlike of the file containing streams.

ignore_errorsboolean, default=False

If True, returns an empty dict rather than throwing an exception if invalid data are detected.

Returns

metadatadict

All the metadata containing in the container and each streams.

Examples

>>> from pprint import pprint
>>> from cutcutcodec.core.analysis.ffprobe import get_metadata
>>> from cutcutcodec.utils import get_project_root
>>> media = get_project_root().parent / "media" / "video" / "intro.webm"
>>> pprint(get_metadata(media))  
{'format': {'bit_rate': '401541',
            'duration': '9.891000',
            'filename': ...,
            'format_long_name': 'Matroska / WebM',
            'format_name': 'matroska,webm',
            'nb_programs': 0,
            'nb_stream_groups': 0,
            'nb_streams': 4,
            'probe_score': 100,
            'size': '496456',
            'start_time': '0.000000',
            'tags': ...,
 'streams': [{'avg_frame_rate': '30000/1001',
              'chroma_location': 'left',
              'codec_long_name': 'Google VP9',
              'codec_name': 'vp9',
              'codec_tag': '0x0000',
              'codec_tag_string': '[0][0][0][0]',
              'codec_type': 'video',
              'coded_height': 720,
              'coded_width': 1280,
              'color_primaries': 'bt709',
              'color_range': 'tv',
              'color_space': 'bt709',
              'color_transfer': 'bt709',
              'display_aspect_ratio': '16:9',
              'disposition': {'attached_pic': 0,
                              ...
                              'visual_impaired': 0},
              'field_order': 'progressive',
              'has_b_frames': 0,
              'height': 720,
              'index': 0,
              'level': -99,
              'pix_fmt': 'yuv420p',
              'profile': 'Profile 0',
              'r_frame_rate': '30000/1001',
              'refs': 1,
              'sample_aspect_ratio': '1:1',
              'start_pts': 0,
              'start_time': '0.000000',
              'tags': ...,
              'time_base': '1/1000',
              'width': 1280},
             ...
             {'avg_frame_rate': '0/0',
              'bits_per_sample': 0,
              'channel_layout': 'mono',
              'channels': 1,
              'codec_long_name': 'Vorbis',
              'codec_name': 'vorbis',
              'codec_tag': '0x0000',
              'codec_tag_string': '[0][0][0][0]',
              'codec_type': 'audio',
              'disposition': {'attached_pic': 0,
                              ...
                              'visual_impaired': 0},
              'extradata_size': 3340,
              'index': 3,
              'initial_padding': 0,
              'r_frame_rate': '0/0',
              'sample_fmt': 'fltp',
              'sample_rate': '22050',
              'start_pts': 0,
              'start_time': '0.000000',
              'tags': ...,
              'time_base': '1/1000'}]}
>>>
cutcutcodec.core.analysis.ffprobe.get_slices_metadata(filename: Path | str | bytes, slice_type: str = 'frame') tuple[list[list[str]], list[ndarray]][source]

Get the packets informations for all streams.

Parameters

filenamepathlike

The pathlike of the file containing streams.

slice_typestr

The type of slices to decode, ‘frame’ or ‘packet’. ‘frame’ is slower but more accurate and informative. ‘packet’ is faster but less acurate.

Returns

headerslist[list[str]]

For each stream, the name of the columns.

infos: list[np.ndarray]

For each stream, the 2d str array. Each row correspond to one packet.

Examples

>>> from pprint import pprint
>>> from cutcutcodec.core.analysis.ffprobe import get_slices_metadata
>>> headers, data = (
...     get_slices_metadata("cutcutcodec/examples/audio_5.1_narration.oga", slice_type="packet")
... )
>>> pprint(headers)  
[['codec_type',
  ...
  'stream_index']]
>>> pprint(data)  
[array([['audio', ..., '0'],
       ['audio', ..., '0'],
       ... dtype='<U15')]
>>> headers, data = get_slices_metadata("cutcutcodec/examples/video.mp4", slice_type="packet")
>>> pprint(headers)
[['codec_type',
  'dts',
  'dts_time',
  'duration',
  'duration_time',
  'flags',
  'pos',
  'pts',
  'pts_time',
  'size',
  'stream_index']]
>>> pprint(data)
[array([['video', '-1024', '-0.080000', ..., '0.000000', '5156', '0'],
       ['video', '-512', '-0.040000', ..., '0.240000', '845', '0'],
       ['video', '0', '0.000000', ..., '0.120000', '372', '0'],
       ...,
       ['video', '202240', '15.800000', ..., '15.960000', '354', '0'],
       ['video', '202752', '15.840000', ..., '15.920000', '247', '0'],
       ['video', '203264', '15.880000', ..., '15.880000', '192', '0']],
      shape=(400, 11), dtype='<U9')]
>>>
cutcutcodec.core.analysis.ffprobe.get_streams_type(filename: Path | str | bytes, ignore_errors=False) list[str][source]

Retrieve in order the stream types present in the file.

Parameters

filenamepathlike

The pathlike of the file containing streams.

ignore_errorsboolean, default=False

If True, returns an empty list rather than throwing an exception if no valid stream is detected.

Returns

streams_typelist[str]

Each item can be “audio”, “subtitle” or “video”.

Raises

MissingStreamError

If ignore_errors is False and if one of the indexes is missing or redondant.

Examples

>>> from cutcutcodec.core.analysis.ffprobe import get_streams_type
>>> get_streams_type("cutcutcodec/examples/intro.webm")
['video', 'video', 'audio', 'audio']
>>> get_streams_type("cutcutcodec/__main__.py", ignore_errors=True)
[]
>>>
cutcutcodec.core.analysis.ffprobe.parse_duration(duration: Real | str) None | Fraction[source]

Try to convert a duration information into a fraction in second.

Parameters

durationnumber or str

The duration to cast in integer

Returns

sec_durationFraction

The decoded duration in second.

Examples

>>> from cutcutcodec.core.analysis.ffprobe import parse_duration
>>> parse_duration(1.5)  # from float
Fraction(3, 2)
>>> parse_duration(2)  # from integer
Fraction(2, 1)
>>> parse_duration(".5")  # from float rep
Fraction(1, 2)
>>> parse_duration("1.")  # from float rep
Fraction(1, 1)
>>> parse_duration("1.5")  # from complete float rep
Fraction(3, 2)
>>> parse_duration("1:01:01")  # from h:m:s
Fraction(3661, 1)
>>>