Merge pull request #1217 from BigRoy/enhancement/transcoding_oiio_tool_for_ffmpeg_one_call

Transcoding: Use single `oiiotool` call for sequences, instead of frames one by one
This commit is contained in:
Jakub Trllo 2025-12-12 12:57:02 +01:00 committed by GitHub
commit 7fa5b39ef6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 141 additions and 57 deletions

View file

@ -1,3 +1,4 @@
from __future__ import annotations
import os
import re
import logging
@ -12,6 +13,8 @@ from typing import Optional
import xml.etree.ElementTree
import clique
from .execute import run_subprocess
from .vendor_bin_utils import (
get_ffmpeg_tool_args,
@ -634,6 +637,37 @@ def should_convert_for_ffmpeg(src_filepath):
return False
def _get_attributes_to_erase(
input_info: dict, logger: logging.Logger
) -> list[str]:
"""FFMPEG does not support some attributes in metadata."""
erase_attrs: dict[str, str] = {} # Attr name to reason mapping
for attr_name, attr_value in input_info["attribs"].items():
if not isinstance(attr_value, str):
continue
# Remove attributes that have string value longer than allowed length
# for ffmpeg or when contain prohibited symbols
if len(attr_value) > MAX_FFMPEG_STRING_LEN:
reason = f"has too long value ({len(attr_value)} chars)."
erase_attrs[attr_name] = reason
continue
for char in NOT_ALLOWED_FFMPEG_CHARS:
if char not in attr_value:
continue
reason = f"contains unsupported character \"{char}\"."
erase_attrs[attr_name] = reason
break
for attr_name, reason in erase_attrs.items():
logger.info(
f"Removed attribute \"{attr_name}\" from metadata"
f" because {reason}."
)
return list(erase_attrs.keys())
def convert_input_paths_for_ffmpeg(
input_paths,
output_dir,
@ -659,7 +693,7 @@ def convert_input_paths_for_ffmpeg(
Raises:
ValueError: If input filepath has extension not supported by function.
Currently is supported only ".exr" extension.
Currently, only ".exr" extension is supported.
"""
if logger is None:
logger = logging.getLogger(__name__)
@ -684,7 +718,22 @@ def convert_input_paths_for_ffmpeg(
# Collect channels to export
input_arg, channels_arg = get_oiio_input_and_channel_args(input_info)
for input_path in input_paths:
# Find which attributes to strip
erase_attributes: list[str] = _get_attributes_to_erase(
input_info, logger=logger
)
# clique.PATTERNS["frames"] supports only `.1001.exr` not `_1001.exr` so
# we use a customized pattern.
pattern = "[_.](?P<index>(?P<padding>0*)\\d+)\\.\\D+\\d?$"
input_collections, input_remainder = clique.assemble(
input_paths,
patterns=[pattern],
assume_padded_when_ambiguous=True,
)
input_items = list(input_collections)
input_items.extend(input_remainder)
for input_item in input_items:
# Prepare subprocess arguments
oiio_cmd = get_oiio_tool_args(
"oiiotool",
@ -695,8 +744,23 @@ def convert_input_paths_for_ffmpeg(
if compression:
oiio_cmd.extend(["--compression", compression])
# Convert a sequence of files using a single oiiotool command
# using its sequence syntax
if isinstance(input_item, clique.Collection):
frames = input_item.format("{head}#{tail}").replace(" ", "")
oiio_cmd.extend([
"--framepadding", input_item.padding,
"--frames", frames,
"--parallel-frames"
])
input_item: str = input_item.format("{head}#{tail}")
elif not isinstance(input_item, str):
raise TypeError(
f"Input is not a string or Collection: {input_item}"
)
oiio_cmd.extend([
input_arg, input_path,
input_arg, input_item,
# Tell oiiotool which channels should be put to top stack
# (and output)
"--ch", channels_arg,
@ -704,38 +768,11 @@ def convert_input_paths_for_ffmpeg(
"--subimage", "0"
])
for attr_name, attr_value in input_info["attribs"].items():
if not isinstance(attr_value, str):
continue
# Remove attributes that have string value longer than allowed
# length for ffmpeg or when containing prohibited symbols
erase_reason = "Missing reason"
erase_attribute = False
if len(attr_value) > MAX_FFMPEG_STRING_LEN:
erase_reason = "has too long value ({} chars).".format(
len(attr_value)
)
erase_attribute = True
if not erase_attribute:
for char in NOT_ALLOWED_FFMPEG_CHARS:
if char in attr_value:
erase_attribute = True
erase_reason = (
"contains unsupported character \"{}\"."
).format(char)
break
if erase_attribute:
# Set attribute to empty string
logger.info((
"Removed attribute \"{}\" from metadata because {}."
).format(attr_name, erase_reason))
oiio_cmd.extend(["--eraseattrib", attr_name])
for attr_name in erase_attributes:
oiio_cmd.extend(["--eraseattrib", attr_name])
# Add last argument - path to output
base_filename = os.path.basename(input_path)
base_filename = os.path.basename(input_item)
output_path = os.path.join(output_dir, base_filename)
oiio_cmd.extend([
"-o", output_path
@ -1136,7 +1173,10 @@ def oiio_color_convert(
target_display=None,
target_view=None,
additional_command_args=None,
logger=None,
frames: Optional[str] = None,
frame_padding: Optional[int] = None,
parallel_frames: bool = False,
logger: Optional[logging.Logger] = None,
):
"""Transcode source file to other with colormanagement.
@ -1148,7 +1188,7 @@ def oiio_color_convert(
input_path (str): Path that should be converted. It is expected that
contains single file or image sequence of same type
(sequence in format 'file.FRAMESTART-FRAMEEND#.ext', see oiio docs,
eg `big.1-3#.tif`)
eg `big.1-3#.tif` or `big.1-3%d.ext` with `frames` argument)
output_path (str): Path to output filename.
(must follow format of 'input_path', eg. single file or
sequence in 'file.FRAMESTART-FRAMEEND#.ext', `output.1-3#.tif`)
@ -1169,6 +1209,13 @@ def oiio_color_convert(
both 'view' and 'display' must be filled (if 'target_colorspace')
additional_command_args (list): arguments for oiiotool (like binary
depth for .dpx)
frames (Optional[str]): Complex frame range to process. This requires
input path and output path to use frame token placeholder like
`#` or `%d`, e.g. file.#.exr
frame_padding (Optional[int]): Frame padding to use for the input and
output when using a sequence filepath.
parallel_frames (bool): If True, process frames in parallel inside
the `oiiotool` process. Only supported in OIIO 2.5.20.0+.
logger (logging.Logger): Logger used for logging.
Raises:
@ -1178,7 +1225,16 @@ def oiio_color_convert(
if logger is None:
logger = logging.getLogger(__name__)
input_info = get_oiio_info_for_input(input_path, logger=logger)
# Get oiioinfo only from first image, otherwise file can't be found
first_input_path = input_path
if frames:
frames: str
first_frame = int(re.split("[ x-]", frames, 1)[0])
first_frame = str(first_frame).zfill(frame_padding or 0)
for token in ["#", "%d"]:
first_input_path = first_input_path.replace(token, first_frame)
input_info = get_oiio_info_for_input(first_input_path, logger=logger)
# Collect channels to export
input_arg, channels_arg = get_oiio_input_and_channel_args(input_info)
@ -1191,6 +1247,22 @@ def oiio_color_convert(
"--colorconfig", config_path
)
if frames:
# If `frames` is specified, then process the input and output
# as if it's a sequence of frames (must contain `%04d` as frame
# token placeholder in filepaths)
oiio_cmd.extend([
"--frames", frames,
])
if frame_padding:
oiio_cmd.extend([
"--framepadding", str(frame_padding),
])
if parallel_frames:
oiio_cmd.append("--parallel-frames")
oiio_cmd.extend([
input_arg, input_path,
# Tell oiiotool which channels should be put to top stack

View file

@ -172,20 +172,33 @@ class ExtractOIIOTranscode(publish.Extractor):
additional_command_args = (output_def["oiiotool_args"]
["additional_command_args"])
sequence_files = self._translate_to_sequence(files_to_convert)
sequence_files = self._translate_to_sequence(
files_to_convert)
self.log.debug("Files to convert: {}".format(sequence_files))
missing_rgba_review_channels = False
for file_name in sequence_files:
if isinstance(file_name, clique.Collection):
# Convert to filepath that can be directly converted
# by oiio like `frame.1001-1025%04d.exr`
file_name: str = file_name.format(
"{head}{range}{padding}{tail}"
# Support sequences with holes by supplying
# dedicated `--frames` argument to `oiiotool`
# Create `frames` string like "1001-1002,1004,1010-1012
# Create `filename` string like "file.#.exr"
frames = file_name.format("{ranges}").replace(" ", "")
frame_padding = file_name.padding
file_name = file_name.format("{head}#{tail}")
parallel_frames = True
elif isinstance(file_name, str):
# Single file
frames = None
frame_padding = None
parallel_frames = False
else:
raise TypeError(
f"Unsupported file name type: {type(file_name)}."
" Expected str or clique.Collection."
)
self.log.debug("Transcoding file: `{}`".format(file_name))
input_path = os.path.join(original_staging_dir,
file_name)
input_path = os.path.join(original_staging_dir, file_name)
output_path = self._get_output_file_path(input_path,
new_staging_dir,
output_extension)
@ -201,6 +214,9 @@ class ExtractOIIOTranscode(publish.Extractor):
source_display=source_display,
source_view=source_view,
additional_command_args=additional_command_args,
frames=frames,
frame_padding=frame_padding,
parallel_frames=parallel_frames,
logger=self.log
)
except MissingRGBAChannelsError as exc:
@ -294,16 +310,18 @@ class ExtractOIIOTranscode(publish.Extractor):
new_repre["files"] = renamed_files
def _translate_to_sequence(self, files_to_convert):
"""Returns original list or a clique.Collection of a sequence.
"""Returns original individual filepaths or list of clique.Collection.
Uses clique to find frame sequence Collection.
If sequence not found, it returns original list.
Uses clique to find frame sequence, and return the collections instead.
If sequence not detected in input filenames, it returns original list.
Args:
files_to_convert (list): list of file names
files_to_convert (list[str]): list of file names
Returns:
list[str | clique.Collection]: List of filepaths or a list
of Collections (usually one, unless there are holes)
list[str | clique.Collection]: List of
filepaths ['fileA.exr', 'fileB.exr']
or clique.Collection for a sequence.
"""
pattern = [clique.PATTERNS["frames"]]
collections, _ = clique.assemble(
@ -314,14 +332,7 @@ class ExtractOIIOTranscode(publish.Extractor):
raise ValueError(
"Too many collections {}".format(collections))
collection = collections[0]
# TODO: Technically oiiotool supports holes in the sequence as well
# using the dedicated --frames argument to specify the frames.
# We may want to use that too so conversions of sequences with
# holes will perform faster as well.
# Separate the collection so that we have no holes/gaps per
# collection.
return collection.separate()
return collections
return files_to_convert

View file

@ -12,6 +12,7 @@ ayon_server_version = ">=1.8.4,<2.0.0"
ayon_launcher_version = ">=1.0.2"
ayon_required_addons = {}
ayon_compatible_addons = {
"ayon_third_party": ">=1.3.0",
"ayon_ocio": ">=1.2.1",
"applications": ">=1.1.2",
"harmony": ">0.4.0",