Added implementation of hash

Yanked source_hash function from extract_look into lib (and api)
This commit is contained in:
petr.kalis 2020-06-18 11:09:20 +02:00
parent a908e2867e
commit b138387656
4 changed files with 74 additions and 49 deletions

View file

@ -1,5 +1,12 @@
from .plugin import (
from pypeapp import (
Logger,
Anatomy,
project_overrides_dir_path,
config,
execute
)
from .plugin import (
Extractor,
ValidatePipelineOrder,
@ -16,8 +23,6 @@ from .action import (
RepairContextAction
)
from pypeapp import Logger
from .lib import (
version_up,
get_asset,
@ -26,13 +31,20 @@ from .lib import (
get_subsets,
get_version_from_path,
modified_environ,
add_tool_to_environment
add_tool_to_environment,
source_hash
)
# Special naming case for subprocess since its a built-in method.
from .lib import _subprocess as subprocess
__all__ = [
"Logger",
"Anatomy",
"project_overrides_dir_path",
"config",
"execute",
# plugin classes
"Extractor",
# ordering
@ -58,6 +70,7 @@ __all__ = [
"get_version_from_path",
"modified_environ",
"add_tool_to_environment",
"source_hash",
"subprocess"
]

View file

@ -15,7 +15,7 @@ from abc import ABCMeta, abstractmethod
from avalon import io, pipeline
import six
import avalon.api
from pypeapp import config
from .api import config
log = logging.getLogger(__name__)
@ -1349,3 +1349,24 @@ def ffprobe_streams(path_to_file):
popen_output = popen.communicate()[0]
log.debug("FFprobe output: {}".format(popen_output))
return json.loads(popen_output)["streams"]
def source_hash(filepath, *args):
"""Generate simple identifier for a source file.
This is used to identify whether a source file has previously been
processe into the pipeline, e.g. a texture.
The hash is based on source filepath, modification time and file size.
This is only used to identify whether a specific source file was already
published before from the same location with the same modification date.
We opt to do it this way as opposed to Avalanch C4 hash as this is much
faster and predictable enough for all our production use cases.
Args:
filepath (str): The source file path.
You can specify additional arguments in the function
to allow for specific 'processing' values to be included.
"""
# We replace dots with comma because . cannot be a key in a pymongo dict.
file_name = os.path.basename(filepath)
time = str(os.path.getmtime(filepath))
size = str(os.path.getsize(filepath))
return "|".join([file_name, time, size] + list(args)).replace(".", ",")

View file

@ -9,8 +9,9 @@ import six
from pymongo import DeleteOne, InsertOne
import pyblish.api
from avalon import api, io
from avalon import io
from avalon.vendor import filelink
import pype.api
# this is needed until speedcopy for linux is fixed
if sys.platform == "win32":
@ -44,6 +45,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
"frameStart"
"frameEnd"
'fps'
"data": additional metadata for each representation.
"""
label = "Integrate Asset New"
@ -76,12 +78,13 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
"gizmo",
"source",
"matchmove",
"image"
"image",
"source",
"assembly",
"fbx",
"textures",
"action"
"action",
"harmony.template"
]
exclude_families = ["clip"]
db_representation_context_keys = [
@ -94,7 +97,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
# file_url : file_size of all published and uploaded files
integrated_file_sizes = {}
TMP_FILE_EXT = 'tmp' # suffix to denote temporary files, use without '.'
TMP_FILE_EXT = 'tmp' # suffix to denote temporary files, use without '.'
def process(self, instance):
self.integrated_file_sizes = {}
@ -107,12 +110,11 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
self.log.info("Integrated Asset in to the database ...")
self.log.info("instance.data: {}".format(instance.data))
self.handle_destination_files(self.integrated_file_sizes,
instance, 'finalize')
'finalize')
except Exception as e:
# clean destination
self.log.critical("Error when registering", exc_info=True)
self.handle_destination_files(self.integrated_file_sizes,
instance, 'remove')
self.handle_destination_files(self.integrated_file_sizes, 'remove')
raise
def register(self, instance):
@ -394,9 +396,10 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
index_frame_start += 1
dst = "{0}{1}{2}".format(
dst_head,
dst_padding,
dst_tail).replace("..", ".")
dst_head,
dst_padding,
dst_tail
).replace("..", ".")
self.log.debug("destination: `{}`".format(dst))
src = os.path.join(stagingdir, src_file_name)
@ -469,13 +472,15 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
if repre_id is None:
repre_id = io.ObjectId()
data = repre.get("data") or {}
data.update({'path': dst, 'template': template})
representation = {
"_id": repre_id,
"schema": "pype:representation-2.0",
"type": "representation",
"parent": version_id,
"name": repre['name'],
"data": {'path': dst, 'template': template},
"data": data,
"dependencies": instance.data.get("dependencies", "").split(),
# Imprint shortcut to context
@ -500,11 +505,14 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
# so no rollback needed
self.log.debug("Integrating source files to destination ...")
self.integrated_file_sizes.update(self.integrate(instance))
self.log.debug("Integrated files {}".format(self.integrated_file_sizes))
self.log.debug("Integrated files {}".
format(self.integrated_file_sizes))
# get 'files' info for representation and all attached resources
self.log.debug("Preparing files information ..")
representation["files"] = self.get_files_info(instance, self.integrated_file_sizes)
representation["files"] = self.get_files_info(
instance,
self.integrated_file_sizes)
self.log.debug("__ representation: {}".format(representation))
destination_list.append(dst)
@ -800,7 +808,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
return template_name
def get_rootless_path(self, anatomy, path):
""" Returns, if possible, path without absolute portion from host
(eg. 'c:\' or '/opt/..')
@ -846,15 +853,21 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
resources = list(instance.data.get("transfers", []))
resources.extend(list(instance.data.get("hardlinks", [])))
self.log.debug("get_resource_files_info.resources: {}".format(resources))
self.log.debug("get_resource_files_info.resources:{}".format(resources))
output_resources = []
anatomy = instance.context.data["anatomy"]
for src, dest in resources:
# TODO - hash or use self.integrated_file_size
path = self.get_rootless_path(anatomy, dest)
dest = self.get_dest_temp_url(dest)
output_resources.append(self.prepare_file_info(path, integrated_file_sizes[dest], 'temphash'))
hash = pype.api.source_hash(dest)
if self.TMP_FILE_EXT and ',{}'.format(self.TMP_FILE_EXT) in hash:
hash = hash.replace(',{}'.format(self.TMP_FILE_EXT), '')
file_info = self.prepare_file_info(path,
integrated_file_sizes[dest],
hash)
output_resources.append(file_info)
return output_resources
@ -872,7 +885,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
dest += '.{}'.format(self.TMP_FILE_EXT)
return dest
def prepare_file_info(self, path, size = None, hash = None, sites = None):
def prepare_file_info(self, path, size=None, hash=None, sites=None):
""" Prepare information for one file (asset or resource)
Arguments:
@ -902,7 +915,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
return rec
def handle_destination_files(self, integrated_file_sizes, instance, mode):
def handle_destination_files(self, integrated_file_sizes, mode):
""" Clean destination files
Called when error happened during integrating to DB or to disk
OR called to rename uploaded files from temporary name to final to
@ -911,7 +924,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
Arguments:
integrated_file_sizes: dictionary, file urls as keys, size as value
instance: processed instance - for publish directories
mode: 'remove' - clean files,
'finalize' - rename files,
remove TMP_FILE_EXT suffix denoting temp file
@ -936,4 +948,4 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
except OSError:
self.log.critical("Cannot {} file {}".format(mode, file_url)
, exc_info=True)
raise
raise

View file

@ -14,34 +14,13 @@ import avalon.maya
from avalon import io, api
import pype.api
import pype.maya.lib as lib
from pype.hosts.maya import lib
# Modes for transfer
COPY = 1
HARDLINK = 2
def source_hash(filepath, *args):
"""Generate simple identifier for a source file.
This is used to identify whether a source file has previously been
processe into the pipeline, e.g. a texture.
The hash is based on source filepath, modification time and file size.
This is only used to identify whether a specific source file was already
published before from the same location with the same modification date.
We opt to do it this way as opposed to Avalanch C4 hash as this is much
faster and predictable enough for all our production use cases.
Args:
filepath (str): The source file path.
You can specify additional arguments in the function
to allow for specific 'processing' values to be included.
"""
# We replace dots with comma because . cannot be a key in a pymongo dict.
file_name = os.path.basename(filepath)
time = str(os.path.getmtime(filepath))
size = str(os.path.getsize(filepath))
return "|".join([file_name, time, size] + list(args)).replace(".", ",")
def find_paths_by_hash(texture_hash):
# Find the texture hash key in the dictionary and all paths that
# originate from it.
@ -363,7 +342,7 @@ class ExtractLook(pype.api.Extractor):
args = []
if do_maketx:
args.append("maketx")
texture_hash = source_hash(filepath, *args)
texture_hash = pype.api.source_hash(filepath, *args)
# If source has been published before with the same settings,
# then don't reprocess but hardlink from the original