Merge pull request #560 from pypeclub/hotfix/anatomy_instance_data_time_enhancement

Anatomy instance data collection is substantially faster for many instances
This commit is contained in:
Milan Kolar 2020-10-01 18:44:28 +02:00 committed by GitHub
commit 9f539c007d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -23,123 +23,256 @@ Provides:
import copy
import json
import collections
from avalon import io
import pyblish.api
class CollectAnatomyInstanceData(pyblish.api.InstancePlugin):
"""Collect Instance specific Anatomy data."""
class CollectAnatomyInstanceData(pyblish.api.ContextPlugin):
"""Collect Instance specific Anatomy data.
Plugin is running for all instances on context even not active instances.
"""
order = pyblish.api.CollectorOrder + 0.49
label = "Collect Anatomy Instance data"
def process(self, instance):
# get all the stuff from the database
anatomy_data = copy.deepcopy(instance.context.data["anatomyData"])
project_entity = instance.context.data["projectEntity"]
context_asset_entity = instance.context.data["assetEntity"]
instance_asset_entity = instance.data.get("assetEntity")
def process(self, context):
self.log.info("Collecting anatomy data for all instances.")
asset_name = instance.data["asset"]
self.fill_missing_asset_docs(context)
self.fill_latest_versions(context)
self.fill_anatomy_data(context)
# There is possibility that assetEntity on instance is already set
# which can happen in standalone publisher
if (
instance_asset_entity
and instance_asset_entity["name"] == asset_name
):
asset_entity = instance_asset_entity
self.log.info("Anatomy Data collection finished.")
# Check if asset name is the same as what is in context
# - they may be different, e.g. in NukeStudio
elif context_asset_entity["name"] == asset_name:
asset_entity = context_asset_entity
def fill_missing_asset_docs(self, context):
self.log.debug("Qeurying asset documents for instances.")
else:
asset_entity = io.find_one({
"type": "asset",
"name": asset_name,
"parent": project_entity["_id"]
})
context_asset_doc = context.data["assetEntity"]
subset_name = instance.data["subset"]
version_number = instance.data.get("version")
latest_version = None
instances_with_missing_asset_doc = collections.defaultdict(list)
for instance in context:
instance_asset_doc = instance.data.get("assetEntity")
_asset_name = instance.data["asset"]
if asset_entity:
subset_entity = io.find_one({
"type": "subset",
"name": subset_name,
"parent": asset_entity["_id"]
})
# There is possibility that assetEntity on instance is already set
# which can happen in standalone publisher
if (
instance_asset_doc
and instance_asset_doc["name"] == _asset_name
):
continue
# Check if asset name is the same as what is in context
# - they may be different, e.g. in NukeStudio
if context_asset_doc["name"] == _asset_name:
instance.data["assetEntity"] = context_asset_doc
if subset_entity is None:
self.log.debug("Subset entity does not exist yet.")
else:
version_entity = io.find_one(
{
"type": "version",
"parent": subset_entity["_id"]
},
sort=[("name", -1)]
)
if version_entity:
latest_version = version_entity["name"]
instances_with_missing_asset_doc[_asset_name].append(instance)
# If version is not specified for instance or context
if version_number is None:
# TODO we should be able to change default version by studio
# preferences (like start with version number `0`)
version_number = 1
# use latest version (+1) if already any exist
if latest_version is not None:
version_number += int(latest_version)
if not instances_with_missing_asset_doc:
self.log.debug("All instances already had right asset document.")
return
anatomy_updates = {
"asset": asset_name,
"family": instance.data["family"],
"subset": subset_name,
"version": version_number
asset_names = list(instances_with_missing_asset_doc.keys())
self.log.debug("Querying asset documents with names: {}".format(
", ".join(["\"{}\"".format(name) for name in asset_names])
))
asset_docs = io.find({
"type": "asset",
"name": {"$in": asset_names}
})
asset_docs_by_name = {
asset_doc["name"]: asset_doc
for asset_doc in asset_docs
}
if (
asset_entity
and asset_entity["_id"] != context_asset_entity["_id"]
):
parents = asset_entity["data"].get("parents") or list()
anatomy_updates["hierarchy"] = "/".join(parents)
task_name = instance.data.get("task")
if task_name:
anatomy_updates["task"] = task_name
not_found_asset_names = []
for asset_name, instances in instances_with_missing_asset_doc.items():
asset_doc = asset_docs_by_name.get(asset_name)
if not asset_doc:
not_found_asset_names.append(asset_name)
continue
# Version should not be collected since may be instance
anatomy_data.update(anatomy_updates)
for _instance in instances:
_instance.data["assetEntity"] = asset_doc
resolution_width = instance.data.get("resolutionWidth")
if resolution_width:
anatomy_data["resolution_width"] = resolution_width
if not_found_asset_names:
joined_asset_names = ", ".join(
["\"{}\"".format(name) for name in not_found_asset_names]
)
self.log.warning((
"Not found asset documents with names \"{}\"."
).format(joined_asset_names))
resolution_height = instance.data.get("resolutionHeight")
if resolution_height:
anatomy_data["resolution_height"] = resolution_height
def fill_latest_versions(self, context):
"""Try to find latest version for each instance's subset.
pixel_aspect = instance.data.get("pixelAspect")
if pixel_aspect:
anatomy_data["pixel_aspect"] = float("{:0.2f}".format(
float(pixel_aspect)))
Key "latestVersion" is always set to latest version or `None`.
fps = instance.data.get("fps")
if fps:
anatomy_data["fps"] = float("{:0.2f}".format(
float(fps)))
Args:
context (pyblish.Context)
instance.data["projectEntity"] = project_entity
instance.data["assetEntity"] = asset_entity
instance.data["anatomyData"] = anatomy_data
instance.data["latestVersion"] = latest_version
# TODO should be version number set here?
instance.data["version"] = version_number
Returns:
None
self.log.info("Instance anatomy Data collected")
self.log.debug(json.dumps(anatomy_data, indent=4))
"""
self.log.debug("Qeurying latest versions for instances.")
hierarchy = {}
subset_names = set()
asset_ids = set()
for instance in context:
# Make sure `"latestVersion"` key is set
latest_version = instance.data.get("latestVersion")
instance.data["latestVersion"] = latest_version
# Skip instances withou "assetEntity"
asset_doc = instance.data.get("assetEntity")
if not asset_doc:
continue
# Store asset ids and subset names for queries
asset_id = asset_doc["_id"]
subset_name = instance.data["subset"]
asset_ids.add(asset_id)
subset_names.add(subset_name)
# Prepare instance hiearchy for faster filling latest versions
if asset_id not in hierarchy:
hierarchy[asset_id] = {}
if subset_name not in hierarchy[asset_id]:
hierarchy[asset_id][subset_name] = []
hierarchy[asset_id][subset_name].append(instance)
subset_docs = list(io.find({
"type": "subset",
"parent": {"$in": list(asset_ids)},
"name": {"$in": list(subset_names)}
}))
subset_ids = [
subset_doc["_id"]
for subset_doc in subset_docs
]
last_version_by_subset_id = self._query_last_versions(subset_ids)
for subset_doc in subset_docs:
subset_id = subset_doc["_id"]
last_version = last_version_by_subset_id.get(subset_id)
if last_version is None:
continue
asset_id = subset_doc["parent"]
subset_name = subset_doc["name"]
_instances = hierarchy[asset_id][subset_name]
for _instance in _instances:
_instance.data["latestVersion"] = last_version
def _query_last_versions(self, subset_ids):
"""Retrieve all latest versions for entered subset_ids.
Args:
subset_ids (list): List of subset ids with type `ObjectId`.
Returns:
dict: Key is subset id and value is last version name.
"""
_pipeline = [
# Find all versions of those subsets
{"$match": {
"type": "version",
"parent": {"$in": subset_ids}
}},
# Sorting versions all together
{"$sort": {"name": 1}},
# Group them by "parent", but only take the last
{"$group": {
"_id": "$parent",
"_version_id": {"$last": "$_id"},
"name": {"$last": "$name"}
}}
]
last_version_by_subset_id = {}
for doc in io.aggregate(_pipeline):
subset_id = doc["_id"]
last_version_by_subset_id[subset_id] = doc["name"]
return last_version_by_subset_id
def fill_anatomy_data(self, context):
self.log.debug("Storing anatomy data to instance data.")
project_doc = context.data["projectEntity"]
context_asset_doc = context.data["assetEntity"]
for instance in context:
version_number = instance.data.get("version")
# If version is not specified for instance or context
if version_number is None:
# TODO we should be able to change default version by studio
# preferences (like start with version number `0`)
version_number = 1
# use latest version (+1) if already any exist
latest_version = instance.data["latestVersion"]
if latest_version is not None:
version_number += int(latest_version)
anatomy_updates = {
"asset": instance.data["asset"],
"family": instance.data["family"],
"subset": instance.data["subset"],
"version": version_number
}
# Hiearchy
asset_doc = instance.data.get("assetEntity")
if asset_doc and asset_doc["_id"] != context_asset_doc["_id"]:
parents = asset_doc["data"].get("parents") or list()
anatomy_updates["hierarchy"] = "/".join(parents)
# Task
task_name = instance.data.get("task")
if task_name:
anatomy_updates["task"] = task_name
# Additional data
resolution_width = instance.data.get("resolutionWidth")
if resolution_width:
anatomy_updates["resolution_width"] = resolution_width
resolution_height = instance.data.get("resolutionHeight")
if resolution_height:
anatomy_updates["resolution_height"] = resolution_height
pixel_aspect = instance.data.get("pixelAspect")
if pixel_aspect:
anatomy_updates["pixel_aspect"] = float(
"{:0.2f}".format(float(pixel_aspect))
)
fps = instance.data.get("fps")
if fps:
anatomy_updates["fps"] = float("{:0.2f}".format(float(fps)))
anatomy_data = copy.deepcopy(context.data["anatomyData"])
anatomy_data.update(anatomy_updates)
# Store anatomy data
instance.data["projectEntity"] = project_doc
instance.data["anatomyData"] = anatomy_data
instance.data["version"] = version_number
# Log collected data
instance_name = instance.data["name"]
instance_label = instance.data.get("label")
if instance_label:
instance_name += "({})".format(instance_label)
self.log.debug("Anatomy data for instance {}: {}".format(
instance_name,
json.dumps(anatomy_data, indent=4)
))