ayon-core/openpype/plugins/publish/integrate.py
Jakub Ježek fc3b0e4741
Merge pull request #3580 from pypeclub/bugfix/integrate_fails_editorial_publish
Editorial publishing workflow improvements
2022-07-28 17:37:54 +02:00

907 lines
33 KiB
Python

import os
import logging
import sys
import copy
import clique
import six
from bson.objectid import ObjectId
from pymongo import DeleteMany, ReplaceOne, InsertOne, UpdateOne
import pyblish.api
from openpype.client import (
get_representations,
get_subset_by_name,
get_version_by_name,
)
from openpype.lib import source_hash
from openpype.lib.profiles_filtering import filter_profiles
from openpype.lib.file_transaction import FileTransaction
from openpype.pipeline import legacy_io
from openpype.pipeline.publish import KnownPublishError
log = logging.getLogger(__name__)
def assemble(files):
"""Convenience `clique.assemble` wrapper for files of a single collection.
Unlike `clique.assemble` this wrapper does not allow more than a single
Collection nor any remainder files. Errors will be raised when not only
a single collection is assembled.
Returns:
clique.Collection: A single sequence Collection
Raises:
ValueError: Error is raised when files do not result in a single
collected Collection.
"""
# todo: move this to lib?
# Get the sequence as a collection. The files must be of a single
# sequence and have no remainder outside of the collections.
patterns = [clique.PATTERNS["frames"]]
collections, remainder = clique.assemble(files,
minimum_items=1,
patterns=patterns)
if not collections:
raise ValueError("No collections found in files: "
"{}".format(files))
if remainder:
raise ValueError("Files found not detected as part"
" of a sequence: {}".format(remainder))
if len(collections) > 1:
raise ValueError("Files in sequence are not part of a"
" single sequence collection: "
"{}".format(collections))
return collections[0]
def get_instance_families(instance):
"""Get all families of the instance"""
# todo: move this to lib?
family = instance.data.get("family")
families = []
if family:
families.append(family)
for _family in (instance.data.get("families") or []):
if _family not in families:
families.append(_family)
return families
def get_frame_padded(frame, padding):
"""Return frame number as string with `padding` amount of padded zeros"""
return "{frame:0{padding}d}".format(padding=padding, frame=frame)
class IntegrateAsset(pyblish.api.InstancePlugin):
"""Register publish in the database and transfer files to destinations.
Steps:
1) Register the subset and version
2) Transfer the representation files to the destination
3) Register the representation
Requires:
instance.data['representations'] - must be a list and each member
must be a dictionary with following data:
'files': list of filenames for sequence, string for single file.
Only the filename is allowed, without the folder path.
'stagingDir': "path/to/folder/with/files"
'name': representation name (usually the same as extension)
'ext': file extension
optional data
"frameStart"
"frameEnd"
'fps'
"data": additional metadata for each representation.
"""
label = "Integrate Asset"
order = pyblish.api.IntegratorOrder
families = ["workfile",
"pointcache",
"camera",
"animation",
"model",
"mayaAscii",
"mayaScene",
"setdress",
"layout",
"ass",
"vdbcache",
"scene",
"vrayproxy",
"vrayscene_layer",
"render",
"prerender",
"imagesequence",
"review",
"rendersetup",
"rig",
"plate",
"look",
"audio",
"yetiRig",
"yeticache",
"nukenodes",
"gizmo",
"source",
"matchmove",
"image",
"assembly",
"fbx",
"textures",
"action",
"harmony.template",
"harmony.palette",
"editorial",
"background",
"camerarig",
"redshiftproxy",
"effect",
"xgen",
"hda",
"usd",
"staticMesh",
"skeletalMesh",
"mvLook",
"mvUsd",
"mvUsdComposition",
"mvUsdOverride",
"simpleUnrealTexture"
]
default_template_name = "publish"
# Representation context keys that should always be written to
# the database even if not used by the destination template
db_representation_context_keys = [
"project", "asset", "task", "subset", "version", "representation",
"family", "hierarchy", "username", "output"
]
skip_host_families = []
def process(self, instance):
if self._temp_skip_instance_by_settings(instance):
return
# Mark instance as processed for legacy integrator
instance.data["processedWithNewIntegrator"] = True
# Instance should be integrated on a farm
if instance.data.get("farm"):
self.log.info(
"Instance is marked to be processed on farm. Skipping")
return
filtered_repres = self.filter_representations(instance)
# Skip instance if there are not representations to integrate
# all representations should not be integrated
if not filtered_repres:
self.log.warning((
"Skipping, there are no representations"
" to integrate for instance {}"
).format(instance.data["family"]))
return
file_transactions = FileTransaction(log=self.log)
try:
self.register(instance, file_transactions, filtered_repres)
except Exception:
# clean destination
# todo: preferably we'd also rollback *any* changes to the database
file_transactions.rollback()
self.log.critical("Error when registering", exc_info=True)
six.reraise(*sys.exc_info())
# Finalizing can't rollback safely so no use for moving it to
# the try, except.
file_transactions.finalize()
def _temp_skip_instance_by_settings(self, instance):
"""Decide if instance will be processed with new or legacy integrator.
This is temporary solution until we test all usecases with new (this)
integrator plugin.
"""
host_name = instance.context.data["hostName"]
instance_family = instance.data["family"]
instance_families = set(instance.data.get("families") or [])
skip = False
for item in self.skip_host_families:
if host_name not in item["host"]:
continue
families = set(item["families"])
if instance_family in families:
skip = True
break
for family in instance_families:
if family in families:
skip = True
break
if skip:
break
if skip:
self.log.debug("Instance is marked to be skipped by settings.")
return skip
def filter_representations(self, instance):
# Prepare repsentations that should be integrated
repres = instance.data.get("representations")
# Raise error if instance don't have any representations
if not repres:
raise KnownPublishError(
"Instance {} has no representations to integrate".format(
instance.data["family"]
)
)
# Validate type of stored representations
if not isinstance(repres, (list, tuple)):
raise TypeError(
"Instance 'files' must be a list, got: {0} {1}".format(
str(type(repres)), str(repres)
)
)
# Filter representations
filtered_repres = []
for repre in repres:
if "delete" in repre.get("tags", []):
continue
filtered_repres.append(repre)
return filtered_repres
def register(self, instance, file_transactions, filtered_repres):
project_name = legacy_io.active_project()
instance_stagingdir = instance.data.get("stagingDir")
if not instance_stagingdir:
self.log.info((
"{0} is missing reference to staging directory."
" Will try to get it from representation."
).format(instance))
else:
self.log.debug(
"Establishing staging directory "
"@ {0}".format(instance_stagingdir)
)
template_name = self.get_template_name(instance)
subset, subset_writes = self.prepare_subset(instance, project_name)
version, version_writes = self.prepare_version(
instance, subset, project_name
)
instance.data["versionEntity"] = version
# Get existing representations (if any)
existing_repres_by_name = {
repre_doc["name"].lower(): repre_doc
for repre_doc in get_representations(
project_name,
version_ids=[version["_id"]],
fields=["_id", "name"]
)
}
# Prepare all representations
prepared_representations = []
for repre in filtered_repres:
# todo: reduce/simplify what is returned from this function
prepared = self.prepare_representation(
repre,
template_name,
existing_repres_by_name,
version,
instance_stagingdir,
instance)
for src, dst in prepared["transfers"]:
# todo: add support for hardlink transfers
file_transactions.add(src, dst)
prepared_representations.append(prepared)
# Each instance can also have pre-defined transfers not explicitly
# part of a representation - like texture resources used by a
# .ma representation. Those destination paths are pre-defined, etc.
# todo: should we move or simplify this logic?
resource_destinations = set()
for src, dst in instance.data.get("transfers", []):
file_transactions.add(src, dst, mode=FileTransaction.MODE_COPY)
resource_destinations.add(os.path.abspath(dst))
for src, dst in instance.data.get("hardlinks", []):
file_transactions.add(src, dst, mode=FileTransaction.MODE_HARDLINK)
resource_destinations.add(os.path.abspath(dst))
# Bulk write to the database
# We write the subset and version to the database before the File
# Transaction to reduce the chances of another publish trying to
# publish to the same version number since that chance can greatly
# increase if the file transaction takes a long time.
legacy_io.bulk_write(subset_writes + version_writes)
self.log.info("Subset {subset[name]} and Version {version[name]} "
"written to database..".format(subset=subset,
version=version))
# Process all file transfers of all integrations now
self.log.debug("Integrating source files to destination ...")
file_transactions.process()
self.log.debug(
"Backed up existing files: {}".format(file_transactions.backups))
self.log.debug(
"Transferred files: {}".format(file_transactions.transferred))
self.log.debug("Retrieving Representation Site Sync information ...")
# Get the accessible sites for Site Sync
modules_by_name = instance.context.data["openPypeModules"]
sync_server_module = modules_by_name["sync_server"]
sites = sync_server_module.compute_resource_sync_sites(
project_name=instance.data["projectEntity"]["name"]
)
self.log.debug("Sync Server Sites: {}".format(sites))
# Compute the resource file infos once (files belonging to the
# version instance instead of an individual representation) so
# we can re-use those file infos per representation
anatomy = instance.context.data["anatomy"]
resource_file_infos = self.get_files_info(resource_destinations,
sites=sites,
anatomy=anatomy)
# Finalize the representations now the published files are integrated
# Get 'files' info for representations and its attached resources
representation_writes = []
new_repre_names_low = set()
for prepared in prepared_representations:
representation = prepared["representation"]
transfers = prepared["transfers"]
destinations = [dst for src, dst in transfers]
representation["files"] = self.get_files_info(
destinations, sites=sites, anatomy=anatomy
)
# Add the version resource file infos to each representation
representation["files"] += resource_file_infos
# Set up representation for writing to the database. Since
# we *might* be overwriting an existing entry if the version
# already existed we'll use ReplaceOnce with `upsert=True`
representation_writes.append(ReplaceOne(
filter={"_id": representation["_id"]},
replacement=representation,
upsert=True
))
new_repre_names_low.add(representation["name"].lower())
# Delete any existing representations that didn't get any new data
# if the instance is not set to append mode
if not instance.data.get("append", False):
delete_names = set()
for name, existing_repres in existing_repres_by_name.items():
if name not in new_repre_names_low:
# We add the exact representation name because `name` is
# lowercase for name matching only and not in the database
delete_names.add(existing_repres["name"])
if delete_names:
representation_writes.append(DeleteMany(
filter={
"parent": version["_id"],
"name": {"$in": list(delete_names)}
}
))
# Write representations to the database
legacy_io.bulk_write(representation_writes)
# Backwards compatibility
# todo: can we avoid the need to store this?
instance.data["published_representations"] = {
p["representation"]["_id"]: p for p in prepared_representations
}
self.log.info("Registered {} representations"
"".format(len(prepared_representations)))
def prepare_subset(self, instance, project_name):
asset_doc = instance.data["assetEntity"]
subset_name = instance.data["subset"]
self.log.debug("Subset: {}".format(subset_name))
# Get existing subset if it exists
subset_doc = get_subset_by_name(
project_name, subset_name, asset_doc["_id"]
)
# Define subset data
data = {
"families": get_instance_families(instance)
}
subset_group = instance.data.get("subsetGroup")
if subset_group:
data["subsetGroup"] = subset_group
bulk_writes = []
if subset_doc is None:
# Create a new subset
self.log.info("Subset '%s' not found, creating ..." % subset_name)
subset_doc = {
"_id": ObjectId(),
"schema": "openpype:subset-3.0",
"type": "subset",
"name": subset_name,
"data": data,
"parent": asset_doc["_id"]
}
bulk_writes.append(InsertOne(subset_doc))
else:
# Update existing subset data with new data and set in database.
# We also change the found subset in-place so we don't need to
# re-query the subset afterwards
subset_doc["data"].update(data)
bulk_writes.append(UpdateOne(
{"type": "subset", "_id": subset_doc["_id"]},
{"$set": {
"data": subset_doc["data"]
}}
))
self.log.info("Prepared subset: {}".format(subset_name))
return subset_doc, bulk_writes
def prepare_version(self, instance, subset_doc, project_name):
version_number = instance.data["version"]
version_doc = {
"schema": "openpype:version-3.0",
"type": "version",
"parent": subset_doc["_id"],
"name": version_number,
"data": self.create_version_data(instance)
}
existing_version = get_version_by_name(
project_name,
version_number,
subset_doc["_id"],
fields=["_id"]
)
if existing_version:
self.log.debug("Updating existing version ...")
version_doc["_id"] = existing_version["_id"]
else:
self.log.debug("Creating new version ...")
version_doc["_id"] = ObjectId()
bulk_writes = [ReplaceOne(
filter={"_id": version_doc["_id"]},
replacement=version_doc,
upsert=True
)]
self.log.info("Prepared version: v{0:03d}".format(version_doc["name"]))
return version_doc, bulk_writes
def prepare_representation(self, repre,
template_name,
existing_repres_by_name,
version,
instance_stagingdir,
instance):
# pre-flight validations
if repre["ext"].startswith("."):
raise KnownPublishError((
"Extension must not start with a dot '.': {}"
).format(repre["ext"]))
if repre.get("transfers"):
raise KnownPublishError((
"Representation is not allowed to have transfers"
"data before integration. They are computed in "
"the integrator. Got: {}"
).format(repre["transfers"]))
# create template data for Anatomy
template_data = copy.deepcopy(instance.data["anatomyData"])
# required representation keys
files = repre["files"]
template_data["representation"] = repre["name"]
template_data["ext"] = repre["ext"]
# optionals
# retrieve additional anatomy data from representation if exists
for key, anatomy_key in {
# Representation Key: Anatomy data key
"resolutionWidth": "resolution_width",
"resolutionHeight": "resolution_height",
"fps": "fps",
"outputName": "output",
"originalBasename": "originalBasename"
}.items():
# Allow to take value from representation
# if not found also consider instance.data
value = repre.get(key)
if value is None:
value = instance.data.get(key)
if value is not None:
template_data[anatomy_key] = value
stagingdir = repre.get("stagingDir")
if not stagingdir:
# Fall back to instance staging dir if not explicitly
# set for representation in the instance
self.log.debug((
"Representation uses instance staging dir: {}"
).format(instance_stagingdir))
stagingdir = instance_stagingdir
if not stagingdir:
raise KnownPublishError(
"No staging directory set for representation: {}".format(repre)
)
self.log.debug("Anatomy template name: {}".format(template_name))
anatomy = instance.context.data["anatomy"]
publish_template_category = anatomy.templates[template_name]
template = os.path.normpath(publish_template_category["path"])
is_udim = bool(repre.get("udim"))
is_sequence_representation = isinstance(files, (list, tuple))
if is_sequence_representation:
# Collection of files (sequence)
if any(os.path.isabs(fname) for fname in files):
raise KnownPublishError("Given file names contain full paths")
src_collection = assemble(files)
destination_indexes = list(src_collection.indexes)
# Use last frame for minimum padding
# - that should cover both 'udim' and 'frame' minimum padding
destination_padding = len(str(destination_indexes[-1]))
if not is_udim:
# Change padding for frames if template has defined higher
# padding.
template_padding = int(
publish_template_category["frame_padding"]
)
if template_padding > destination_padding:
destination_padding = template_padding
# If the representation has `frameStart` set it renumbers the
# frame indices of the published collection. It will start from
# that `frameStart` index instead. Thus if that frame start
# differs from the collection we want to shift the destination
# frame indices from the source collection.
repre_frame_start = repre.get("frameStart")
if repre_frame_start is not None:
index_frame_start = int(repre["frameStart"])
# Shift destination sequence to the start frame
destination_indexes = [
index_frame_start + idx
for idx in range(len(destination_indexes))
]
# To construct the destination template with anatomy we require
# a Frame or UDIM tile set for the template data. We use the first
# index of the destination for that because that could've shifted
# from the source indexes, etc.
first_index_padded = get_frame_padded(frame=destination_indexes[0],
padding=destination_padding)
if is_udim:
# UDIM representations handle ranges in a different manner
template_data["udim"] = first_index_padded
else:
template_data["frame"] = first_index_padded
# Construct destination collection from template
anatomy_filled = anatomy.format(template_data)
template_filled = anatomy_filled[template_name]["path"]
repre_context = template_filled.used_values
# Make sure context contains frame
# NOTE: Frame would not be available only if template does not
# contain '{frame}' in template -> Do we want support it?
if not is_udim:
repre_context["frame"] = first_index_padded
self.log.debug("Template filled: {}".format(str(template_filled)))
dst_collection = assemble([os.path.normpath(template_filled)])
# Update the destination indexes and padding
dst_collection.indexes.clear()
dst_collection.indexes.update(set(destination_indexes))
dst_collection.padding = destination_padding
if len(src_collection.indexes) != len(dst_collection.indexes):
raise KnownPublishError((
"This is a bug. Source sequence frames length"
" does not match integration frames length"
))
# Multiple file transfers
transfers = []
for src_file_name, dst in zip(src_collection, dst_collection):
src = os.path.join(stagingdir, src_file_name)
transfers.append((src, dst))
else:
# Single file
fname = files
if os.path.isabs(fname):
self.log.error(
"Filename in representation is filepath {}".format(fname)
)
raise KnownPublishError(
"This is a bug. Representation file name is full path"
)
# Manage anatomy template data
template_data.pop("frame", None)
if is_udim:
template_data["udim"] = repre["udim"][0]
# Construct destination filepath from template
anatomy_filled = anatomy.format(template_data)
template_filled = anatomy_filled[template_name]["path"]
repre_context = template_filled.used_values
dst = os.path.normpath(template_filled)
# Single file transfer
src = os.path.join(stagingdir, fname)
transfers = [(src, dst)]
# todo: Are we sure the assumption each representation
# ends up in the same folder is valid?
if not instance.data.get("publishDir"):
instance.data["publishDir"] = (
anatomy_filled
[template_name]
["folder"]
)
for key in self.db_representation_context_keys:
# Also add these values to the context even if not used by the
# destination template
value = template_data.get(key)
if value is not None:
repre_context[key] = value
# Explicitly store the full list even though template data might
# have a different value because it uses just a single udim tile
if repre.get("udim"):
repre_context["udim"] = repre.get("udim") # store list
# Use previous representation's id if there is a name match
existing = existing_repres_by_name.get(repre["name"].lower())
if existing:
repre_id = existing["_id"]
else:
repre_id = ObjectId()
# Store first transferred destination as published path data
# - used primarily for reviews that are integrated to custom modules
# TODO we should probably store all integrated files
# related to the representation?
published_path = transfers[0][1]
repre["published_path"] = published_path
# todo: `repre` is not the actual `representation` entity
# we should simplify/clarify difference between data above
# and the actual representation entity for the database
data = repre.get("data", {})
data.update({"path": published_path, "template": template})
representation = {
"_id": repre_id,
"schema": "openpype:representation-2.0",
"type": "representation",
"parent": version["_id"],
"name": repre["name"],
"data": data,
# Imprint shortcut to context for performance reasons.
"context": repre_context
}
return {
"representation": representation,
"anatomy_data": template_data,
"transfers": transfers,
# todo: avoid the need for 'published_files' used by Integrate Hero
# backwards compatibility
"published_files": [transfer[1] for transfer in transfers]
}
def create_version_data(self, instance):
"""Create the data dictionary for the version
Args:
instance: the current instance being published
Returns:
dict: the required information for version["data"]
"""
context = instance.context
# create relative source path for DB
if "source" in instance.data:
source = instance.data["source"]
else:
source = context.data["currentFile"]
anatomy = instance.context.data["anatomy"]
source = self.get_rootless_path(anatomy, source)
self.log.debug("Source: {}".format(source))
version_data = {
"families": get_instance_families(instance),
"time": context.data["time"],
"author": context.data["user"],
"source": source,
"comment": context.data.get("comment"),
"machine": context.data.get("machine"),
"fps": instance.data.get("fps", context.data.get("fps"))
}
# todo: preferably we wouldn't need this "if dict" etc. logic and
# instead be able to rely what the input value is if it's set.
intent_value = context.data.get("intent")
if intent_value and isinstance(intent_value, dict):
intent_value = intent_value.get("value")
if intent_value:
version_data["intent"] = intent_value
# Include optional data if present in
optionals = [
"frameStart", "frameEnd", "step", "handles",
"handleEnd", "handleStart", "sourceHashes"
]
for key in optionals:
if key in instance.data:
version_data[key] = instance.data[key]
# Include instance.data[versionData] directly
version_data_instance = instance.data.get("versionData")
if version_data_instance:
version_data.update(version_data_instance)
return version_data
def get_template_name(self, instance):
"""Return anatomy template name to use for integration"""
# Define publish template name from profiles
filter_criteria = self.get_profile_filter_criteria(instance)
template_name_profiles = self._get_template_name_profiles(instance)
profile = filter_profiles(
template_name_profiles,
filter_criteria,
logger=self.log
)
if profile:
return profile["template_name"]
return self.default_template_name
def _get_template_name_profiles(self, instance):
"""Receive profiles for publish template keys.
Reuse template name profiles from legacy integrator. Goal is to move
the profile settings out of plugin settings but until that happens we
want to be able set it at one place and don't break backwards
compatibility (more then once).
"""
return (
instance.context.data["project_settings"]
["global"]
["publish"]
["IntegrateAssetNew"]
["template_name_profiles"]
)
def get_profile_filter_criteria(self, instance):
"""Return filter criteria for `filter_profiles`"""
# Anatomy data is pre-filled by Collectors
anatomy_data = instance.data["anatomyData"]
# Task can be optional in anatomy data
task = anatomy_data.get("task", {})
# Return filter criteria
return {
"families": anatomy_data["family"],
"tasks": task.get("name"),
"task_types": task.get("type"),
"hosts": instance.context.data["hostName"],
}
def get_rootless_path(self, anatomy, path):
"""Returns, if possible, path without absolute portion from root
(eg. 'c:\' or '/opt/..')
This information is platform dependent and shouldn't be captured.
Example:
'c:/projects/MyProject1/Assets/publish...' >
'{root}/MyProject1/Assets...'
Args:
anatomy: anatomy part from instance
path: path (absolute)
Returns:
path: modified path if possible, or unmodified path
+ warning logged
"""
success, rootless_path = anatomy.find_root_template_from_path(path)
if success:
path = rootless_path
else:
self.log.warning((
"Could not find root path for remapping \"{}\"."
" This may cause issues on farm."
).format(path))
return path
def get_files_info(self, destinations, sites, anatomy):
"""Prepare 'files' info portion for representations.
Arguments:
destinations (list): List of transferred file destinations
sites (list): array of published locations
anatomy: anatomy part from instance
Returns:
output_resources: array of dictionaries to be added to 'files' key
in representation
"""
file_infos = []
for file_path in destinations:
file_info = self.prepare_file_info(file_path, anatomy, sites=sites)
file_infos.append(file_info)
return file_infos
def prepare_file_info(self, path, anatomy, sites):
""" Prepare information for one file (asset or resource)
Arguments:
path: destination url of published file
anatomy: anatomy part from instance
sites: array of published locations,
[ {'name':'studio', 'created_dt':date} by default
keys expected ['studio', 'site1', 'gdrive1']
Returns:
dict: file info dictionary
"""
return {
"_id": ObjectId(),
"path": self.get_rootless_path(anatomy, path),
"size": os.path.getsize(path),
"hash": source_hash(path),
"sites": sites
}