enhance speed of collect audio by converting it to context plugin

This commit is contained in:
Jakub Trllo 2022-11-18 15:59:58 +01:00
parent a375af68a8
commit 64a1e55170

View file

@ -1,21 +1,27 @@
import collections
import pyblish.api
from openpype.client import (
get_last_version_by_subset_name,
get_assets,
get_subsets,
get_last_versions,
get_representations,
)
from openpype.pipeline import (
legacy_io,
get_representation_path,
)
from openpype.pipeline import get_representation_path_with_anatomy
class CollectAudio(pyblish.api.InstancePlugin):
class CollectAudio(pyblish.api.ContextPlugin):
"""Collect asset's last published audio.
The audio subset name searched for is defined in:
project settings > Collect Audio
Note:
The plugin was instance plugin but because of so much queries the
plugin was slowing down whole collection phase a lot thus was
converted to context plugin which requires only 4 queries top.
"""
label = "Collect Asset Audio"
order = pyblish.api.CollectorOrder + 0.1
families = ["review"]
@ -39,67 +45,134 @@ class CollectAudio(pyblish.api.InstancePlugin):
audio_subset_name = "audioMain"
def process(self, instance):
if instance.data.get("audio"):
self.log.info(
"Skipping Audio collecion. It is already collected"
)
def process(self, context):
# Fake filtering by family inside context plugin
filtered_instances = []
for instance in pyblish.api.instances_by_plugin(
context, self.__class__
):
# Skip instances that already have audio filled
if instance.data.get("audio"):
self.log.info(
"Skipping Audio collecion. It is already collected"
)
continue
filtered_instances.append(instance)
# Skip if none of instances remained
if not filtered_instances:
return
# Add audio to instance if exists.
instances_by_asset_name = collections.defaultdict(list)
for instance in filtered_instances:
asset_name = instance.data["asset"]
instances_by_asset_name[asset_name].append(instance)
asset_names = set(instances_by_asset_name.keys())
self.log.info((
"Searching for audio subset '{subset}'"
" in asset '{asset}'"
"Searching for audio subset '{subset}' in assets {assets}"
).format(
subset=self.audio_subset_name,
asset=instance.data["asset"]
assets=", ".join([
'"{}"'.format(asset_name)
for asset_name in asset_names
])
))
repre_doc = self._get_repre_doc(instance)
# Query all required documents
project_name = context.data["projectName"]
anatomy = context.data["anatomy"]
repre_docs_by_asset_names = self.query_representations(
project_name, asset_names)
# Add audio to instance if representation was found
if repre_doc:
instance.data["audio"] = [{
"offset": 0,
"filename": get_representation_path(repre_doc)
}]
self.log.info("Audio Data added to instance ...")
for asset_name, instances in instances_by_asset_name.items():
repre_docs = repre_docs_by_asset_names[asset_name]
if not repre_docs:
continue
def _get_repre_doc(self, instance):
cache = instance.context.data.get("__cache_asset_audio")
if cache is None:
cache = {}
instance.context.data["__cache_asset_audio"] = cache
asset_name = instance.data["asset"]
repre_doc = repre_docs[0]
repre_path = get_representation_path_with_anatomy(
repre_doc, anatomy
)
for instance in instances:
instance.data["audio"] = [{
"offset": 0,
"filename": repre_path
}]
self.log.info("Audio Data added to instance ...")
# first try to get it from cache
if asset_name in cache:
return cache[asset_name]
def query_representations(self, project_name, asset_names):
"""Query representations related to audio subsets for passed assets.
project_name = legacy_io.active_project()
Args:
project_name (str): Project in which we're looking for all
entities.
asset_names (Iterable[str]): Asset names where to look for audio
subsets and their representations.
# Find latest versions document
last_version_doc = get_last_version_by_subset_name(
Returns:
collections.defaultdict[str, List[Dict[Str, Any]]]: Representations
related to audio subsets by asset name.
"""
output = collections.defaultdict(list)
# Query asset documents
asset_docs = get_assets(
project_name,
self.audio_subset_name,
asset_name=asset_name,
asset_names=asset_names,
fields=["_id"]
)
repre_doc = None
if last_version_doc:
# Try to find it's representation (Expected there is only one)
repre_docs = list(get_representations(
project_name, version_ids=[last_version_doc["_id"]]
))
if not repre_docs:
self.log.warning(
"Version document does not contain any representations"
)
else:
repre_doc = repre_docs[0]
asset_id_by_name = {}
for asset_doc in asset_docs:
asset_id_by_name[asset_doc["name"]] = asset_doc["_id"]
asset_ids = set(asset_id_by_name.values())
# update cache
cache[asset_name] = repre_doc
# Query subsets with name define by 'audio_subset_name' attr
# - one or none subsets with the name should be available on an asset
subset_docs = get_subsets(
project_name,
subset_names=[self.audio_subset_name],
asset_ids=asset_ids,
fields=["_id", "parent"]
)
subset_id_by_asset_id = {}
for subset_doc in subset_docs:
asset_id = subset_doc["parent"]
subset_id_by_asset_id[asset_id] = subset_doc["_id"]
return repre_doc
subset_ids = set(subset_id_by_asset_id.values())
if not subset_ids:
return output
# Find all latest versions for the subsets
version_docs_by_subset_id = get_last_versions(
project_name, subset_ids=subset_ids, fields=["_id", "parent"]
)
version_id_by_subset_id = {
subset_id: version_doc["_id"]
for subset_id, version_doc in version_docs_by_subset_id.items()
}
version_ids = set(version_id_by_subset_id.values())
if not version_ids:
return output
# Find representations under latest versions of audio subsets
repre_docs = get_representations(
project_name, version_ids=version_ids
)
repre_docs_by_version_id = collections.defaultdict(list)
for repre_doc in repre_docs:
version_id = repre_doc["parent"]
repre_docs_by_version_id[version_id].append(repre_doc)
if not repre_docs_by_version_id:
return output
for asset_name in asset_names:
asset_id = asset_id_by_name.get(asset_name)
subset_id = subset_id_by_asset_id.get(asset_id)
version_id = version_id_by_subset_id.get(subset_id)
output[asset_name] = repre_docs_by_version_id[version_id]
return output