Merged in feature/pype-62_hardlink_for_publishing (pull request #88)

Feature/pype 62 hardlink for publishing Approved-by: Milan Kolar <milan@orbi.tools>
2025-12-25 13:24:54 +01:00 · 2019-03-14 17:32:08 +00:00 · 2019-03-14 17:32:08 +00:00 · fd0fde4f37
commit fd0fde4f37
parent 822f73a83c 492f89056a
7 changed files with 307 additions and 120 deletions
--- a/pype/plugins/global/publish/collect_assumed_destination.py
+++ b/pype/plugins/global/publish/collect_assumed_destination.py
@ -1,5 +1,5 @@
-import pyblish.api
 import os
+import pyblish.api

 from avalon import io, api

@ -16,53 +16,7 @@ class CollectAssumedDestination(pyblish.api.InstancePlugin):
                if instance.data["family"] in ef]:
            return

-        self.create_destination_template(instance)
-
-        template_data = instance.data["assumedTemplateData"]
-        # template = instance.data["template"]
-
-        anatomy = instance.context.data['anatomy']
-        # template = anatomy.publish.path
-        anatomy_filled = anatomy.format(template_data)
-        mock_template = anatomy_filled.publish.path
-
-        # For now assume resources end up in a "resources" folder in the
-        # published folder
-        mock_destination = os.path.join(os.path.dirname(mock_template),
-                                        "resources")
-
-        # Clean the path
-        mock_destination = os.path.abspath(os.path.normpath(mock_destination))
-
-        # Define resource destination and transfers
-        resources = instance.data.get("resources", list())
-        transfers = instance.data.get("transfers", list())
-        for resource in resources:
-
-            # Add destination to the resource
-            source_filename = os.path.basename(resource["source"])
-            destination = os.path.join(mock_destination, source_filename)
-
-            # Force forward slashes to fix issue with software unable
-            # to work correctly with backslashes in specific scenarios
-            # (e.g. escape characters in PLN-151 V-Ray UDIM)
-            destination = destination.replace("\\", "/")
-
-            resource['destination'] = destination
-
-            # Collect transfers for the individual files of the resource
-            # e.g. all individual files of a cache or UDIM textures.
-            files = resource['files']
-            for fsrc in files:
-                fname = os.path.basename(fsrc)
-                fdest = os.path.join(mock_destination, fname)
-                transfers.append([fsrc, fdest])
-
-        instance.data["resources"] = resources
-        instance.data["transfers"] = transfers
-
-    def create_destination_template(self, instance):
-        """Create a filepath based on the current data available
+        """Create a destination filepath based on the current data available

        Example template:
            {root}/{project}/{silo}/{asset}/publish/{subset}/v{version:0>3}/
@ -84,7 +38,7 @@ class CollectAssumedDestination(pyblish.api.InstancePlugin):
                              projection={"config": True, "data": True})

        template = project["config"]["template"]["publish"]
-        # anatomy = instance.context.data['anatomy']
+        anatomy = instance.context.data['anatomy']

        asset = io.find_one({"type": "asset",
                             "name": asset_name,
@ -126,5 +80,10 @@ class CollectAssumedDestination(pyblish.api.InstancePlugin):
                         "hierarchy": hierarchy,
                         "representation": "TEMP"}

-        instance.data["assumedTemplateData"] = template_data
        instance.data["template"] = template
+        instance.data["assumedTemplateData"] = template_data
+
+        # We take the parent folder of representation 'filepath'
+        instance.data["assumedDestination"] = os.path.dirname(
+            (anatomy.format(template_data)).publish.path
+        )
--- a/pype/plugins/global/publish/integrate.py
+++ b/pype/plugins/global/publish/integrate.py
@ -5,6 +5,7 @@ import shutil
 import errno
 import pyblish.api
 from avalon import api, io
+from avalon.vendor import filelink


 log = logging.getLogger(__name__)
@ -91,6 +92,13 @@ class IntegrateAsset(pyblish.api.InstancePlugin):

        self.log.debug("Establishing staging directory @ %s" % stagingdir)

+        # Ensure at least one file is set up for transfer in staging dir.
+        files = instance.data.get("files", [])
+        assert files, "Instance has no files to transfer"
+        assert isinstance(files, (list, tuple)), (
+            "Instance 'files' must be a list, got: {0}".format(files)
+        )
+
        project = io.find_one({"type": "project"})

        asset = io.find_one({"type": "asset",
@ -170,6 +178,8 @@ class IntegrateAsset(pyblish.api.InstancePlugin):
        # Each should be a single representation (as such, a single extension)
        representations = []
        destination_list = []
+        if 'transfers' not in instance.data:
+            instance.data['transfers'] = []

        for files in instance.data["files"]:

@ -271,12 +281,22 @@ class IntegrateAsset(pyblish.api.InstancePlugin):
            instance: the instance to integrate
        """

-        transfers = instance.data["transfers"]
+        transfers = instance.data.get("transfers", list())

        for src, dest in transfers:
            self.log.info("Copying file .. {} -> {}".format(src, dest))
            self.copy_file(src, dest)

+        # Produce hardlinked copies
+        # Note: hardlink can only be produced between two files on the same
+        # server/disk and editing one of the two will edit both files at once.
+        # As such it is recommended to only make hardlinks between static files
+        # to ensure publishes remain safe and non-edited.
+        hardlinks = instance.data.get("hardlinks", list())
+        for src, dest in hardlinks:
+            self.log.info("Hardlinking file .. {} -> {}".format(src, dest))
+            self.hardlink_file(src, dest)
+
    def copy_file(self, src, dst):
        """ Copy given source to destination

@ -299,6 +319,20 @@ class IntegrateAsset(pyblish.api.InstancePlugin):

        shutil.copy(src, dst)

+    def hardlink_file(self, src, dst):
+
+        dirname = os.path.dirname(dst)
+        try:
+            os.makedirs(dirname)
+        except OSError as e:
+            if e.errno == errno.EEXIST:
+                pass
+            else:
+                self.log.critical("An unexpected error occurred.")
+                raise
+
+        filelink.create(src, dst, filelink.HARDLINK)
+
    def get_subset(self, asset, instance):

        subset = io.find_one({"type": "subset",
@ -362,7 +396,7 @@ class IntegrateAsset(pyblish.api.InstancePlugin):
            families.append(instance_family)
        families += current_families

-        self.log.debug("Registered roor: {}".format(api.registered_root()))
+        self.log.debug("Registered root: {}".format(api.registered_root()))
        # create relative source path for DB
        try:
            source = instance.data['source']
@ -382,7 +416,9 @@ class IntegrateAsset(pyblish.api.InstancePlugin):
                        "fps": context.data.get("fps")}

        # Include optional data if present in
-        optionals = ["startFrame", "endFrame", "step", "handles"]
+        optionals = [
+            "startFrame", "endFrame", "step", "handles", "sourceHashes"
+        ]
        for key in optionals:
            if key in instance.data:
                version_data[key] = instance.data[key]
--- a/pype/plugins/maya/create/create_look.py
+++ b/pype/plugins/maya/create/create_look.py
@ -15,3 +15,6 @@ class CreateLook(avalon.maya.Creator):
        super(CreateLook, self).__init__(*args, **kwargs)

        self.data["renderlayer"] = lib.get_current_renderlayer()
+
+        # Whether to automatically convert the textures to .tx upon publish.
+        self.data["maketx"] = True
--- a/pype/plugins/maya/publish/collect_look.py
+++ b/pype/plugins/maya/publish/collect_look.py
@ -218,6 +218,7 @@ class CollectLook(pyblish.api.InstancePlugin):

        # make ftrack publishable
        instance.data["families"] = ['ftrack']
+        instance.data['maketx'] = True

    def collect(self, instance):

--- a/pype/plugins/maya/publish/extract_look.py
+++ b/pype/plugins/maya/publish/extract_look.py
@ -2,16 +2,97 @@ import os
 import json
 import tempfile
 import contextlib
+import subprocess
 from collections import OrderedDict

 from maya import cmds

 import pyblish.api
 import avalon.maya
+from avalon import io

 import pype.api
 import pype.maya.lib as lib

+# Modes for transfer
+COPY = 1
+HARDLINK = 2
+
+
+def source_hash(filepath, *args):
+    """Generate simple identifier for a source file.
+    This is used to identify whether a source file has previously been
+    processe into the pipeline, e.g. a texture.
+    The hash is based on source filepath, modification time and file size.
+    This is only used to identify whether a specific source file was already
+    published before from the same location with the same modification date.
+    We opt to do it this way as opposed to Avalanch C4 hash as this is much
+    faster and predictable enough for all our production use cases.
+    Args:
+        filepath (str): The source file path.
+    You can specify additional arguments in the function
+    to allow for specific 'processing' values to be included.
+    """
+    # We replace dots with comma because . cannot be a key in a pymongo dict.
+    file_name = os.path.basename(filepath)
+    time = str(os.path.getmtime(filepath))
+    size = str(os.path.getsize(filepath))
+    return "|".join([
+        file_name,
+        time,
+        size
+    ] + list(args)).replace(".", ",")
+
+
+def find_paths_by_hash(texture_hash):
+    # Find the texture hash key in the dictionary and all paths that
+    # originate from it.
+    key = "data.sourceHashes.{0}".format(texture_hash)
+    return io.distinct(key, {"type": "version"})
+
+
+def maketx(source, destination, *args):
+    """Make .tx using maketx with some default settings.
+    The settings are based on default as used in Arnold's
+    txManager in the scene.
+    This function requires the `maketx` executable to be
+    on the `PATH`.
+    Args:
+        source (str): Path to source file.
+        destination (str): Writing destination path.
+    """
+
+    cmd = [
+        "maketx",
+        "-v", # verbose
+        "-u", # update mode
+        # unpremultiply before conversion (recommended when alpha present)
+        "--unpremult",
+        # use oiio-optimized settings for tile-size, planarconfig, metadata
+        "--oiio"
+    ]
+    cmd.extend(args)
+    cmd.extend([
+           "-o", destination,
+           source
+    ])
+
+    CREATE_NO_WINDOW = 0x08000000
+    try:
+        out = subprocess.check_output(
+            cmd,
+            stderr=subprocess.STDOUT,
+            creationflags=CREATE_NO_WINDOW
+        )
+    except subprocess.CalledProcessError as exc:
+        print exc
+        print out
+        import traceback
+        traceback.print_exc()
+        raise
+
+    return out
+

@contextlib.contextmanager
 def no_workspace_dir():
@ -79,12 +160,53 @@ class ExtractLook(pype.api.Extractor):
        relationships = lookdata["relationships"]
        sets = relationships.keys()

+        # Extract the textures to transfer, possibly convert with maketx and
+        # remap the node paths to the destination path. Note that a source
+        # might be included more than once amongst the resources as they could
+        # be the input file to multiple nodes.
        resources = instance.data["resources"]
+        do_maketx = instance.data.get("maketx", False)

+        # Collect all unique files used in the resources
+        files = set()
+        for resource in resources:
+            files.update(os.path.normpath(f) for f in resource["files"])
+
+        # Process the resource files
+        transfers = list()
+        hardlinks = list()
+        hashes = dict()
+        for filepath in files:
+            source, mode, hash = self._process_texture(
+                filepath, do_maketx, staging=dir_path
+            )
+            destination = self.resource_destination(
+                instance, source, do_maketx
+            )
+            if mode == COPY:
+                transfers.append((source, destination))
+            elif mode == HARDLINK:
+                hardlinks.append((source, destination))
+
+            # Store the hashes from hash to destination to include in the
+            # database
+            hashes[hash] = destination
+
+        # Remap the resources to the destination path (change node attributes)
+        destinations = dict()
        remap = OrderedDict()  # needs to be ordered, see color space values
        for resource in resources:
+            source = os.path.normpath(resource["source"])
+            if source not in destinations:
+                # Cache destination as source resource might be included
+                # multiple times
+                destinations[source] = self.resource_destination(
+                    instance, source, do_maketx
+                )
+
+            # Remap file node filename to destination
            attr = resource['attribute']
-            remap[attr] = resource['destination']
+            remap[attr] = destinations[source]

            # Preserve color space values (force value after filepath change)
            # This will also trigger in the same order at end of context to
@ -107,15 +229,17 @@ class ExtractLook(pype.api.Extractor):
                    with lib.attribute_values(remap):
                        with avalon.maya.maintained_selection():
                            cmds.select(sets, noExpand=True)
-                            cmds.file(maya_path,
-                                      force=True,
-                                      typ="mayaAscii",
-                                      exportSelected=True,
-                                      preserveReferences=False,
-                                      channels=True,
-                                      constraints=True,
-                                      expressions=True,
-                                      constructionHistory=True)
+                            cmds.file(
+                                maya_path,
+                                force=True,
+                                typ="mayaAscii",
+                                exportSelected=True,
+                                preserveReferences=False,
+                                channels=True,
+                                constraints=True,
+                                expressions=True,
+                                constructionHistory=True
+                            )

        # Write the JSON data
        self.log.info("Extract json..")
@ -127,9 +251,90 @@ class ExtractLook(pype.api.Extractor):

        if "files" not in instance.data:
            instance.data["files"] = list()
+        if "hardlinks" not in instance.data:
+            instance.data["hardlinks"] = list()
+        if "transfers" not in instance.data:
+            instance.data["transfers"] = list()

        instance.data["files"].append(maya_fname)
        instance.data["files"].append(json_fname)

-        self.log.info("Extracted instance '%s' to: %s" % (instance.name,
-                                                          maya_path))
+        # Set up the resources transfers/links for the integrator
+        instance.data["transfers"].extend(transfers)
+        instance.data["hardlinks"].extend(hardlinks)
+
+        # Source hash for the textures
+        instance.data["sourceHashes"] = hashes
+
+        self.log.info("Extracted instance '%s' to: %s" % (
+            instance.name, maya_path)
+        )
+
+    def resource_destination(self, instance, filepath, do_maketx):
+
+        # Compute destination location
+        basename, ext = os.path.splitext(os.path.basename(filepath))
+
+        # If maketx then the texture will always end with .tx
+        if do_maketx:
+            ext = ".tx"
+
+        return os.path.join(
+            instance.data["assumedDestination"],
+            "resources",
+            basename + ext
+        )
+
+    def _process_texture(self, filepath, do_maketx, staging):
+        """Process a single texture file on disk for publishing.
+        This will:
+            1. Check whether it's already published, if so it will do hardlink
+            2. If not published and maketx is enabled, generate a new .tx file.
+            3. Compute the destination path for the source file.
+        Args:
+            filepath (str): The source file path to process.
+            do_maketx (bool): Whether to produce a .tx file
+        Returns:
+        """
+
+        fname, ext = os.path.splitext(os.path.basename(filepath))
+
+        args = []
+        if do_maketx:
+            args.append("maketx")
+        texture_hash = source_hash(filepath, *args)
+
+        # If source has been published before with the same settings,
+        # then don't reprocess but hardlink from the original
+        existing = find_paths_by_hash(texture_hash)
+        if existing:
+            self.log.info("Found hash in database, preparing hardlink..")
+            source = next((p for p in existing if os.path.exists(p)), None)
+            if filepath:
+                return source, HARDLINK, texture_hash
+            else:
+                self.log.warning(
+                    "Paths not found on disk, "
+                    "skipping hardlink: %s" % (existing,)
+                )
+
+        if do_maketx and ext != ".tx":
+            # Produce .tx file in staging if source file is not .tx
+            converted = os.path.join(
+                staging,
+                "resources",
+                fname + ".tx"
+            )
+
+            # Ensure folder exists
+            if not os.path.exists(os.path.dirname(converted)):
+                os.makedirs(os.path.dirname(converted))
+
+            self.log.info("Generating .tx file for %s .." % filepath)
+            maketx(filepath, converted,
+                   # Include `source-hash` as string metadata
+                   "-sattrib", "sourceHash", texture_hash)
+
+            return converted, COPY, texture_hash
+
+        return filepath, COPY, texture_hash
--- a/pype/plugins/maya/publish/validate_resources.py
+++ b/pype/plugins/maya/publish/validate_resources.py
@ -1,8 +1,9 @@
+import os
+from collections import defaultdict
+
 import pyblish.api
 import pype.api

-import os
-

 class ValidateResources(pyblish.api.InstancePlugin):
    """Validates mapped resources.
@ -12,18 +13,45 @@ class ValidateResources(pyblish.api.InstancePlugin):
    media.

    This validates:
-        - The resources are existing files.
-        - The resources have correctly collected the data.
+        - The resources have unique filenames (without extension)

    """

    order = pype.api.ValidateContentsOrder
-    label = "Resources"
+    label = "Resources Unique"

    def process(self, instance):

-        for resource in instance.data.get('resources', []):
-            # Required data
-            assert "source" in resource, "No source found"
-            assert "files" in resource, "No files from source"
-            assert all(os.path.exists(f) for f in resource['files'])
+        resources = instance.data.get("resources", [])
+        if not resources:
+            self.log.debug("No resources to validate..")
+            return
+
+        basenames = defaultdict(set)
+
+        for resource in resources:
+            files = resource.get("files", [])
+            for filename in files:
+
+                # Use normalized paths in comparison and ignore case
+                # sensitivity
+                filename = os.path.normpath(filename).lower()
+
+                basename = os.path.splitext(os.path.basename(filename))[0]
+                basenames[basename].add(filename)
+
+        invalid_resources = list()
+        for basename, sources in basenames.items():
+            if len(sources) > 1:
+                invalid_resources.extend(sources)
+
+                self.log.error(
+                    "Non-unique resource name: {0}"
+                    "{0} (sources: {1})".format(
+                        basename,
+                        list(sources)
+                    )
+                )
+
+        if invalid_resources:
+            raise RuntimeError("Invalid resources in instance.")
--- a/pype/plugins/maya/publish/validate_transfers.py
+++ b/pype/plugins/maya/publish/validate_transfers.py
@ -1,45 +0,0 @@
-import pyblish.api
-import pype.api
-import os
-
-from collections import defaultdict
-
-
-class ValidateTransfers(pyblish.api.InstancePlugin):
-    """Validates mapped resources.
-
-    This validates:
-        - The resources all transfer to a unique destination.
-
-    """
-
-    order = pype.api.ValidateContentsOrder
-    label = "Transfers"
-
-    def process(self, instance):
-
-        transfers = instance.data.get("transfers", [])
-        if not transfers:
-            return
-
-        # Collect all destination with its sources
-        collected = defaultdict(set)
-        for source, destination in transfers:
-
-            # Use normalized paths in comparison and ignore case sensitivity
-            source = os.path.normpath(source).lower()
-            destination = os.path.normpath(destination).lower()
-
-            collected[destination].add(source)
-
-        invalid_destinations = list()
-        for destination, sources in collected.items():
-            if len(sources) > 1:
-                invalid_destinations.append(destination)
-
-                self.log.error("Non-unique file transfer for resources: "
-                               "{0} (sources: {1})".format(destination,
-                                                           list(sources)))
-
-        if invalid_destinations:
-            raise RuntimeError("Invalid transfers in queue.")