From d88ed919e6a4a566b8ff8b289415c471de454b00 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Wed, 16 Mar 2022 22:09:23 +0100
Subject: [PATCH 01/72] First draft pass of refactoring the Integrator

---
 openpype/plugins/publish/integrate_new.py | 1076 ++++++++++-----------
 1 file changed, 508 insertions(+), 568 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index e8dab089af..e4986e3b3f 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -7,9 +7,8 @@ import clique
 import errno
 import six
 import re
-import shutil
 
-from pymongo import DeleteOne, InsertOne
+from pymongo import DeleteOne, InsertOne, UpdateOne
 import pyblish.api
 from avalon import io
 from avalon.api import format_template_with_optional_keys
@@ -31,6 +30,17 @@ else:
 log = logging.getLogger(__name__)
 
 
+def get_frame_padded(frame, padding):
+    """Return frame number as string with `padding` amount of padded zeros"""
+    return "{frame:0{padding}d}".format(padding=padding, frame=frame)
+
+
+def get_first_frame_padded(collection):
+    """Return first frame as padded number from `clique.Collection`"""
+    start_frame = next(iter(collection.indexes))
+    return get_frame_padded(start_frame, padding=collection.padding)
+
+
 class IntegrateAssetNew(pyblish.api.InstancePlugin):
     """Resolve any dependency issues
 
@@ -108,7 +118,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
     exclude_families = ["clip"]
     db_representation_context_keys = [
         "project", "asset", "task", "subset", "version", "representation",
-        "family", "hierarchy", "task", "username"
+        "family", "hierarchy", "task", "username", "frame", "udim"
     ]
     default_template_name = "publish"
 
@@ -116,38 +126,40 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
     TMP_FILE_EXT = 'tmp'
 
     # file_url : file_size of all published and uploaded files
-    integrated_file_sizes = {}
+    destinations = list()
 
     # Attributes set by settings
     template_name_profiles = None
     subset_grouping_profiles = None
 
     def process(self, instance):
-        self.integrated_file_sizes = {}
-        if [ef for ef in self.exclude_families
-                if instance.data["family"] in ef]:
+        self.destinations = []
+
+        # Exclude instances that also contain families from exclude families
+        families = set(
+            # Consider family and families data
+            [instance.data["family"]] + instance.data.get("families", [])
+        )
+        if families & set(self.exclude_families):
             return
 
         try:
             self.register(instance)
             self.log.info("Integrated Asset in to the database ...")
-            self.log.info("instance.data: {}".format(instance.data))
-            self.handle_destination_files(self.integrated_file_sizes,
+            self.handle_destination_files(self.destinations,
                                           'finalize')
         except Exception:
             # clean destination
             self.log.critical("Error when registering", exc_info=True)
-            self.handle_destination_files(self.integrated_file_sizes, 'remove')
+            self.handle_destination_files(self.destinations, 'remove')
             six.reraise(*sys.exc_info())
 
-    def register(self, instance):
-        # Required environment variables
-        anatomy_data = instance.data["anatomyData"]
-
-        io.install()
+    def prepare_anatomy(self, instance):
+        """Prepare anatomy data used to define representation destinations"""
 
         context = instance.context
 
+        anatomy_data = instance.data["anatomyData"]
         project_entity = instance.data["projectEntity"]
 
         context_asset_name = None
@@ -206,8 +218,36 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         # Fill family in anatomy data
         anatomy_data["family"] = instance.data.get("family")
 
-        stagingdir = instance.data.get("stagingDir")
-        if not stagingdir:
+        intent_value = instance.context.data.get("intent")
+        if intent_value and isinstance(intent_value, dict):
+            intent_value = intent_value.get("value")
+
+        if intent_value:
+            anatomy_data["intent"] = intent_value
+
+        # Get profile
+        key_values = {
+            "families": self.main_family_from_instance(instance),
+            "tasks": task_name,
+            "hosts": instance.context.data["hostName"],
+            "task_types": task_type
+        }
+        profile = filter_profiles(
+            self.template_name_profiles,
+            key_values,
+            logger=self.log
+        )
+
+        template_name = "publish"
+        if profile:
+            template_name = profile["template_name"]
+
+        return template_name, anatomy_data
+
+    def register(self, instance):
+
+        instance_stagingdir = instance.data.get("stagingDir")
+        if not instance_stagingdir:
             self.log.info((
                 "{0} is missing reference to staging directory."
                 " Will try to get it from representation."
@@ -215,7 +255,8 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         else:
             self.log.debug(
-                "Establishing staging directory @ {0}".format(stagingdir)
+                "Establishing staging directory "
+                "@ {0}".format(instance_stagingdir)
             )
 
         # Ensure at least one file is set up for transfer in staging dir.
@@ -227,28 +268,74 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             )
         )
 
-        subset = self.get_subset(asset_entity, instance)
-        instance.data["subsetEntity"] = subset
+        subset = self.register_subset(instance)
+
+        version = self.register_version(instance, subset)
+        instance.data["versionEntity"] = version
+        instance.data['version'] = version['name']
+
+        existing_repres = list(io.find({
+            "parent": version["_id"],
+            "type": "archived_representation"
+        }))
+
+        # Find the representations to transfer amongst the files
+        # Each should be a single representation (as such, a single extension)
+        template_name, anatomy_data = self.prepare_anatomy(instance)
+        published_representations = {}
+        representations = []
+        for repre in instance.data["representations"]:
+
+            if "delete" in repre.get("tags", []):
+                self.log.debug("Skipping representation marked for deletion: "
+                               "{}".format(repre))
+                continue
+
+            prepared = self.prepare_representation(repre,
+                                                   anatomy_data,
+                                                   template_name,
+                                                   existing_repres,
+                                                   version,
+                                                   instance_stagingdir,
+                                                   instance)
+
+            # todo: simplify this?
+            representation = prepared["representation"]
+            representations.append(representation)
+            published_representations[representation["_id"]] = prepared
+
+        # Remove old representations if there are any (before insertion of new)
+        if existing_repres:
+            repre_ids_to_remove = [repre["_id"] for repre in existing_repres]
+            io.delete_many({"_id": {"$in": repre_ids_to_remove}})
+
+        # Write the new representations to the database
+        io.insert_many(representations)
+
+        instance.data["published_representations"] = published_representations
+
+        self.log.info("Registered {} representations"
+                      "".format(len(representations)))
+
+    def register_version(self, instance, subset):
 
         version_number = instance.data["version"]
         self.log.debug("Next version: v{}".format(version_number))
 
-        version_data = self.create_version_data(context, instance)
-
+        version_data = self.create_version_data(instance)
         version_data_instance = instance.data.get('versionData')
         if version_data_instance:
             version_data.update(version_data_instance)
 
-        # TODO rename method from `create_version` to
-        # `prepare_version` or similar...
-        version = self.create_version(
-            subset=subset,
-            version_number=version_number,
-            data=version_data
-        )
-
-        self.log.debug("Creating version ...")
+        version = {
+            "schema": "openpype:version-3.0",
+            "type": "version",
+            "parent": subset["_id"],
+            "name": version_number,
+            "data": version_data
+        }
 
+        repres = instance.data.get("representations", [])
         new_repre_names_low = [_repre["name"].lower() for _repre in repres]
 
         existing_version = io.find_one({
@@ -258,29 +345,28 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         })
 
         if existing_version is None:
+            self.log.debug("Creating new version ...")
             version_id = io.insert_one(version).inserted_id
         else:
+            self.log.debug("Updating existing version ...")
             # Check if instance have set `append` mode which cause that
             # only replicated representations are set to archive
             append_repres = instance.data.get("append", False)
+            bulk_writes = []
 
             # Update version data
-            # TODO query by _id and
-            io.update_many({
-                'type': 'version',
-                'parent': subset["_id"],
-                'name': version_number
+            version_id = existing_version['_id']
+            bulk_writes.append(UpdateOne({
+                '_id': version_id
             }, {
                 '$set': version
-            })
-            version_id = existing_version['_id']
+            }))
 
             # Find representations of existing version and archive them
-            current_repres = list(io.find({
+            current_repres = io.find({
                 "type": "representation",
                 "parent": version_id
-            }))
-            bulk_writes = []
+            })
             for repre in current_repres:
                 if append_repres:
                     # archive only duplicated representations
@@ -304,346 +390,248 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 )
 
         version = io.find_one({"_id": version_id})
-        instance.data["versionEntity"] = version
+        return version
 
-        existing_repres = list(io.find({
-            "parent": version_id,
-            "type": "archived_representation"
-        }))
+    def prepare_representation(self, repre,
+                               anatomy_data,
+                               template_name,
+                               existing_repres,
+                               version,
+                               instance_stagingdir,
+                               instance):
 
-        instance.data['version'] = version['name']
+        # create template data for Anatomy
+        template_data = copy.deepcopy(anatomy_data)
 
-        intent_value = instance.context.data.get("intent")
-        if intent_value and isinstance(intent_value, dict):
-            intent_value = intent_value.get("value")
+        # pre-flight validations
+        if repre["ext"].startswith("."):
+            raise ValueError("Extension must not start with a dot '.': "
+                             "{}".format(repre["ext"]))
 
-        if intent_value:
-            anatomy_data["intent"] = intent_value
+        if repre.get("transfers"):
+            raise ValueError("Representation is not allowed to have transfers"
+                             "data before integration. "
+                             "Got: {}".format(repre["transfers"]))
 
-        anatomy = instance.context.data['anatomy']
+        # required representation keys
+        files = repre['files']
+        template_data["representation"] = repre["name"]
+        template_data["ext"] = repre["ext"]
 
-        # Find the representations to transfer amongst the files
-        # Each should be a single representation (as such, a single extension)
-        representations = []
-        destination_list = []
+        # optionals
+        # retrieve additional anatomy data from representation if exists
+        for representation_key, anatomy_key in {
+            # Representation Key: Anatomy data key
+            "resolutionWidth": "resolution_width",
+            "resolutionHeight": "resolution_height",
+            "fps": "fps",
+            "outputName": "output",
+        }.items():
+            value = repre.get(representation_key)
+            if value:
+                template_data[anatomy_key] = value
 
-        orig_transfers = []
-        if 'transfers' not in instance.data:
-            instance.data['transfers'] = []
+        if repre.get('stagingDir'):
+            stagingdir = repre['stagingDir']
         else:
-            orig_transfers = list(instance.data['transfers'])
+            # Fall back to instance staging dir if not explicitly
+            # set for representation in the instance
+            self.log.debug("Representation uses instance staging dir: "
+                           "{}".format(instance_stagingdir))
+            stagingdir = instance_stagingdir
 
-        family = self.main_family_from_instance(instance)
+        self.log.debug("Anatomy template name: {}".format(template_name))
+        anatomy = instance.context.data['anatomy']
+        template = os.path.normpath(
+            anatomy.templates[template_name]["path"])
 
-        key_values = {
-            "families": family,
-            "tasks": task_name,
-            "hosts": instance.context.data["hostName"],
-            "task_types": task_type
-        }
-        profile = filter_profiles(
-            self.template_name_profiles,
-            key_values,
-            logger=self.log
-        )
+        is_sequence_representation = isinstance(files, (list, tuple))
+        if is_sequence_representation:
+            # Collection of files (sequence)
+            # Get the sequence as a collection. The files must be of a single
+            # sequence and have no remainder outside of the collections.
+            collections, remainder = clique.assemble(files,
+                                                     minimum_items=1)
+            if not collections:
+                raise ValueError("No collections found in files: "
+                                 "{}".format(files))
+            if remainder:
+                raise ValueError("Files found not detected as part"
+                                 " of a sequence: {}".format(remainder))
+            if len(collections) > 1:
+                raise ValueError("Files in sequence are not part of a"
+                                 " single sequence collection: "
+                                 "{}".format(collections))
+            src_collection = collections[0]
 
-        template_name = "publish"
-        if profile:
-            template_name = profile["template_name"]
-
-        published_representations = {}
-        for idx, repre in enumerate(instance.data["representations"]):
-            # reset transfers for next representation
-            # instance.data['transfers'] is used as a global variable
-            # in current codebase
-            instance.data['transfers'] = list(orig_transfers)
-
-            if "delete" in repre.get("tags", []):
-                continue
-
-            published_files = []
-
-            # create template data for Anatomy
-            template_data = copy.deepcopy(anatomy_data)
-            if intent_value is not None:
-                template_data["intent"] = intent_value
-
-            resolution_width = repre.get("resolutionWidth")
-            resolution_height = repre.get("resolutionHeight")
-            fps = instance.data.get("fps")
-
-            if resolution_width:
-                template_data["resolution_width"] = resolution_width
-            if resolution_width:
-                template_data["resolution_height"] = resolution_height
-            if resolution_width:
-                template_data["fps"] = fps
-
-            files = repre['files']
-            if repre.get('stagingDir'):
-                stagingdir = repre['stagingDir']
-
-            if repre.get("outputName"):
-                template_data["output"] = repre['outputName']
-
-            template_data["representation"] = repre["name"]
-
-            ext = repre["ext"]
-            if ext.startswith("."):
-                self.log.warning((
-                    "Implementaion warning: <\"{}\">"
-                    " Representation's extension stored under \"ext\" key "
-                    " started with dot (\"{}\")."
-                ).format(repre["name"], ext))
-                ext = ext[1:]
-            repre["ext"] = ext
-            template_data["ext"] = ext
-
-            self.log.info(template_name)
-            template = os.path.normpath(
-                anatomy.templates[template_name]["path"])
-
-            sequence_repre = isinstance(files, list)
-            repre_context = None
-            if sequence_repre:
-                self.log.debug(
-                    "files: {}".format(files))
-                src_collections, remainder = clique.assemble(files)
-                self.log.debug(
-                    "src_tail_collections: {}".format(str(src_collections)))
-                src_collection = src_collections[0]
-
-                # Assert that each member has identical suffix
-                src_head = src_collection.format("{head}")
-                src_tail = src_collection.format("{tail}")
-
-                # fix dst_padding
-                valid_files = [x for x in files if src_collection.match(x)]
-                padd_len = len(
-                    valid_files[0].replace(src_head, "").replace(src_tail, "")
-                )
-                src_padding_exp = "%0{}d".format(padd_len)
-
-                test_dest_files = list()
-                for i in [1, 2]:
-                    template_data["representation"] = repre['ext']
-                    if not repre.get("udim"):
-                        template_data["frame"] = src_padding_exp % i
-                    else:
-                        template_data["udim"] = src_padding_exp % i
-
-                    anatomy_filled = anatomy.format(template_data)
-                    template_filled = anatomy_filled[template_name]["path"]
-                    if repre_context is None:
-                        repre_context = template_filled.used_values
-                    test_dest_files.append(
-                        os.path.normpath(template_filled)
-                    )
-                if not repre.get("udim"):
-                    template_data["frame"] = repre_context["frame"]
-                else:
-                    template_data["udim"] = repre_context["udim"]
-
-                self.log.debug(
-                    "test_dest_files: {}".format(str(test_dest_files)))
-
-                dst_collections, remainder = clique.assemble(test_dest_files)
-                dst_collection = dst_collections[0]
-                dst_head = dst_collection.format("{head}")
-                dst_tail = dst_collection.format("{tail}")
-
-                index_frame_start = None
+            # If the representation has `frameStart` set it renumbers the
+            # frame indices of the published collection. It will start from
+            # that `frameStart` index instead. Thus if that frame start
+            # differs from the collection we want to shift the destination
+            # frame indices from the source collection.
+            destination_indexes = list(src_collection.indexes)
+            destination_padding = len(get_first_frame_padded(src_collection))
+            if repre.get("frameStart") is not None:
+                index_frame_start = int(repre.get("frameStart"))
 
                 # TODO use frame padding from right template group
-                if repre.get("frameStart") is not None:
-                    frame_start_padding = int(
-                        anatomy.templates["render"].get(
-                            "frame_padding",
-                            anatomy.templates["render"].get("padding")
-                        )
+                render_template = anatomy.templates["render"]
+                frame_start_padding = int(
+                    render_template.get(
+                        "frame_padding",
+                        render_template.get("padding")
                     )
-
-                    index_frame_start = int(repre.get("frameStart"))
-
-                # exception for slate workflow
-                if index_frame_start and "slate" in instance.data["families"]:
-                    index_frame_start -= 1
-
-                dst_padding_exp = src_padding_exp
-                dst_start_frame = None
-                collection_start = list(src_collection.indexes)[0]
-                for i in src_collection.indexes:
-                    # TODO 1.) do not count padding in each index iteration
-                    # 2.) do not count dst_padding from src_padding before
-                    #   index_frame_start check
-                    frame_number = i - collection_start
-                    src_padding = src_padding_exp % i
-
-                    src_file_name = "{0}{1}{2}".format(
-                        src_head, src_padding, src_tail)
-
-                    dst_padding = src_padding_exp % frame_number
-
-                    if index_frame_start is not None:
-                        dst_padding_exp = "%0{}d".format(frame_start_padding)
-                        dst_padding = dst_padding_exp % (index_frame_start + frame_number)  # noqa: E501
-                    elif repre.get("udim"):
-                        dst_padding = int(i)
-
-                    dst = "{0}{1}{2}".format(
-                        dst_head,
-                        dst_padding,
-                        dst_tail
-                    )
-
-                    self.log.debug("destination: `{}`".format(dst))
-                    src = os.path.join(stagingdir, src_file_name)
-
-                    self.log.debug("source: {}".format(src))
-                    instance.data["transfers"].append([src, dst])
-
-                    published_files.append(dst)
-
-                    # for adding first frame into db
-                    if not dst_start_frame:
-                        dst_start_frame = dst_padding
-
-                # Store used frame value to template data
-                if repre.get("frame"):
-                    template_data["frame"] = dst_start_frame
-
-                dst = "{0}{1}{2}".format(
-                    dst_head,
-                    dst_start_frame,
-                    dst_tail
-                )
-                repre['published_path'] = dst
-
-            else:
-                # Single file
-                #  _______
-                # |      |\
-                # |       |
-                # |       |
-                # |       |
-                # |_______|
-                #
-                template_data.pop("frame", None)
-                fname = files
-                assert not os.path.isabs(fname), (
-                    "Given file name is a full path"
                 )
 
-                template_data["representation"] = repre['ext']
-                # Store used frame value to template data
-                if repre.get("udim"):
-                    template_data["udim"] = repre["udim"][0]
-                src = os.path.join(stagingdir, fname)
-                anatomy_filled = anatomy.format(template_data)
-                template_filled = anatomy_filled[template_name]["path"]
-                repre_context = template_filled.used_values
-                dst = os.path.normpath(template_filled)
-
-                instance.data["transfers"].append([src, dst])
-
-                published_files.append(dst)
-                repre['published_path'] = dst
-                self.log.debug("__ dst: {}".format(dst))
+                # Shift destination sequence to the start frame
+                src_start_frame = next(iter(src_collection.indexes))
+                shift = index_frame_start - src_start_frame
+                if shift:
+                    destination_indexes = [
+                        frame + shift for frame in destination_indexes
+                    ]
+                destination_padding = frame_start_padding
 
+            # To construct the destination template with anatomy we require
+            # a Frame or UDIM tile set for the template data. We use the first
+            # index of the destination for that because that could've shifted
+            # from the source indexes, etc.
+            first_index_padded = get_frame_padded(frame=destination_indexes[0],
+                                                  padding=destination_padding)
             if repre.get("udim"):
-                repre_context["udim"] = repre.get("udim")  # store list
+                # UDIM representations handle ranges in a different manner
+                template_data["udim"] = first_index_padded
+            else:
+                template_data["frame"] = first_index_padded
 
-            repre["publishedFiles"] = published_files
+            # Construct destination collection from template
+            anatomy_filled = anatomy.format(template_data)
+            template_filled = anatomy_filled[template_name]["path"]
+            repre_context = template_filled.used_values
+            self.log.debug("Template filled: {}".format(str(template_filled)))
+            dst_collections, _remainder = clique.assemble(
+                [os.path.normpath(template_filled)], minimum_items=1
+            )
+            assert not _remainder, "This is a bug"
+            assert len(dst_collections) == 1, "This is a bug"
+            dst_collection = dst_collections[0]
 
-            for key in self.db_representation_context_keys:
-                value = template_data.get(key)
-                if not value:
-                    continue
-                repre_context[key] = template_data[key]
+            # Update the destination indexes and padding
+            dst_collection.indexes = destination_indexes
+            dst_collection.padding = destination_padding
+            assert len(src_collection) == len(dst_collection), "This is a bug"
 
-            # Use previous representation's id if there are any
-            repre_id = None
-            repre_name_low = repre["name"].lower()
-            for _repre in existing_repres:
-                # NOTE should we check lowered names?
-                if repre_name_low == _repre["name"]:
-                    repre_id = _repre["orig_id"]
-                    break
+            transfers = []
+            for src_file_name, dst in zip(src_collection, dst_collection):
+                src = os.path.join(stagingdir, src_file_name)
+                self.log.debug("source: {}".format(src))
+                self.log.debug("destination: `{}`".format(dst))
+                transfers.append(src, dst)
 
-            # Create new id if existing representations does not match
-            if repre_id is None:
-                repre_id = io.ObjectId()
+            # Store first frame as published path
+            # todo: remove `published_path` since it can be retrieved from
+            #       `transfers` by taking the first destination transfers[0][1]
+            repre['published_path'] = next(iter(dst_collection))
+            repre["transfers"].extend(transfers)
 
-            data = repre.get("data") or {}
-            data.update({'path': dst, 'template': template})
-            representation = {
-                "_id": repre_id,
-                "schema": "openpype:representation-2.0",
-                "type": "representation",
-                "parent": version_id,
-                "name": repre['name'],
-                "data": data,
-                "dependencies": instance.data.get("dependencies", "").split(),
+        else:
+            # Single file
+            template_data.pop("frame", None)
+            fname = files
+            assert not os.path.isabs(fname), (
+                "Given file name is a full path"
+            )
+            # Store used frame value to template data
+            if repre.get("udim"):
+                template_data["udim"] = repre["udim"][0]
+            src = os.path.join(stagingdir, fname)
+            anatomy_filled = anatomy.format(template_data)
+            template_filled = anatomy_filled[template_name]["path"]
+            repre_context = template_filled.used_values
+            dst = os.path.normpath(template_filled)
 
-                # Imprint shortcut to context
-                # for performance reasons.
-                "context": repre_context
-            }
+            # Single file transfer
+            self.log.debug("source: {}".format(src))
+            self.log.debug("destination: `{}`".format(dst))
+            repre["transfers"] = [src, dst]
 
-            if repre.get("outputName"):
-                representation["context"]["output"] = repre['outputName']
+            repre['published_path'] = dst
 
-            if sequence_repre and repre.get("frameStart") is not None:
-                representation['context']['frame'] = (
-                    dst_padding_exp % int(repre.get("frameStart"))
-                )
+        if repre.get("udim"):
+            repre_context["udim"] = repre.get("udim")  # store list
 
-            # any file that should be physically copied is expected in
-            # 'transfers' or 'hardlinks'
-            if instance.data.get('transfers', False) or \
-               instance.data.get('hardlinks', False):
-                # could throw exception, will be caught in 'process'
-                # all integration to DB is being done together lower,
-                # so no rollback needed
-                self.log.debug("Integrating source files to destination ...")
-                self.integrated_file_sizes.update(self.integrate(instance))
-                self.log.debug("Integrated files {}".
-                               format(self.integrated_file_sizes))
+        for key in self.db_representation_context_keys:
+            value = template_data.get(key)
+            if not value:
+                continue
+            repre_context[key] = template_data[key]
 
-            # get 'files' info for representation and all attached resources
-            self.log.debug("Preparing files information ...")
-            representation["files"] = self.get_files_info(
-                instance,
-                self.integrated_file_sizes)
+        # Use previous representation's id if there are any
+        repre_id = None
+        repre_name_lower = repre["name"].lower()
+        for _existing_repre in existing_repres:
+            # NOTE should we check lowered names?
+            if repre_name_lower == _existing_repre["name"].lower():
+                repre_id = _existing_repre["orig_id"]
+                break
 
-            self.log.debug("__ representation: {}".format(representation))
-            destination_list.append(dst)
-            self.log.debug("__ destination_list: {}".format(destination_list))
-            instance.data['destination_list'] = destination_list
-            representations.append(representation)
-            published_representations[repre_id] = {
-                "representation": representation,
-                "anatomy_data": template_data,
-                "published_files": published_files
-            }
-            self.log.debug("__ representations: {}".format(representations))
+        # Create new id if existing representations does not match
+        if repre_id is None:
+            repre_id = io.ObjectId()
 
-        # Remove old representations if there are any (before insertion of new)
-        if existing_repres:
-            repre_ids_to_remove = []
-            for repre in existing_repres:
-                repre_ids_to_remove.append(repre["_id"])
-            io.delete_many({"_id": {"$in": repre_ids_to_remove}})
+        # todo: `repre` is not the actual `representation` entity
+        #       we should simplify/clarify difference between data above
+        #       and the actual representation entity for the database
+        data = repre.get("data") or {}
+        data.update({'path': dst, 'template': template})
+        representation = {
+            "_id": repre_id,
+            "schema": "openpype:representation-2.0",
+            "type": "representation",
+            "parent": version["_id"],
+            "name": repre['name'],
+            "data": data,
+            "dependencies": instance.data.get("dependencies", "").split(),
 
-        for rep in instance.data["representations"]:
-            self.log.debug("__ rep: {}".format(rep))
+            # Imprint shortcut to context for performance reasons.
+            "context": repre_context
+        }
 
-        io.insert_many(representations)
-        instance.data["published_representations"] = (
-            published_representations
+        if repre.get("outputName"):
+            representation["context"]["output"] = repre['outputName']
+
+        if is_sequence_representation and repre.get("frameStart") is not None:
+            representation['context']['frame'] = template_data["frame"]
+
+        # any file that should be physically copied is expected in
+        # 'transfers' or 'hardlinks'
+        integrated_files = []
+        if instance.data.get('transfers', False) or \
+                instance.data.get('hardlinks', False):
+            # could throw exception, will be caught in 'process'
+            # all integration to DB is being done together lower,
+            # so no rollback needed
+            # todo: separate the actual integrating of the files onto its own
+            #       taking just a list of transfers as inputs (potentially
+            #       with copy mode flag, like hardlink/copy, etc.)
+            self.log.debug("Integrating source files to destination ...")
+            integrated_files = self.integrate(instance)
+            self.log.debug("Integrated files {}".format(integrated_files))
+
+        # get 'files' info for representation and all attached resources
+        self.log.debug("Preparing files information ...")
+        representation["files"] = self.get_files_info(
+            instance,
+            integrated_files
         )
-        # self.log.debug("Representation: {}".format(representations))
-        self.log.info("Registered {} items".format(len(representations)))
+
+        return {
+            "representation": representation,
+            "anatomy_data": template_data,
+            # todo: avoid the need for 'published_files'?
+            # backwards compatibility
+            "published_files": [transfer[1] for transfer in repre["transfers"]]
+        }
 
     def integrate(self, instance):
         """ Move the files.
@@ -653,92 +641,93 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             Args:
                 instance: the instance to integrate
             Returns:
-                integrated_file_sizes: dictionary of destination file url and
-                its size in bytes
+                list: destination full paths of integrated files
         """
-        # store destination url and size for reporting and rollback
-        integrated_file_sizes = {}
+        # store destinations for potential rollback and measuring sizes
+        destinations = []
         transfers = list(instance.data.get("transfers", list()))
         for src, dest in transfers:
-            if os.path.normpath(src) != os.path.normpath(dest):
+            src = os.path.normpath(src)
+            dest = os.path.normpath(dest)
+            if src != dest:
                 dest = self.get_dest_temp_url(dest)
                 self.copy_file(src, dest)
-                # TODO needs to be updated during site implementation
-                integrated_file_sizes[dest] = os.path.getsize(dest)
+                destinations.append(dest)
 
         # Produce hardlinked copies
-        # Note: hardlink can only be produced between two files on the same
-        # server/disk and editing one of the two will edit both files at once.
-        # As such it is recommended to only make hardlinks between static files
-        # to ensure publishes remain safe and non-edited.
         hardlinks = instance.data.get("hardlinks", list())
         for src, dest in hardlinks:
             dest = self.get_dest_temp_url(dest)
-            self.log.debug("Hardlinking file ... {} -> {}".format(src, dest))
             if not os.path.exists(dest):
                 self.hardlink_file(src, dest)
 
-            # TODO needs to be updated during site implementation
-            integrated_file_sizes[dest] = os.path.getsize(dest)
+            destinations.append(dest)
 
-        return integrated_file_sizes
+        return destinations
+
+    def _create_folder_for_file(self, path):
+        dirname = os.path.dirname(path)
+        try:
+            os.makedirs(dirname)
+        except OSError as e:
+            if e.errno == errno.EEXIST:
+                pass
+            else:
+                self.log.critical("An unexpected error occurred.")
+                six.reraise(*sys.exc_info())
 
     def copy_file(self, src, dst):
-        """ Copy given source to destination
+        """Copy source filepath to destination filepath
 
         Arguments:
             src (str): the source file which needs to be copied
-            dst (str): the destination of the sourc file
+            dst (str): the destination filepath
+
+        Returns:
+            None
+
+        """
+        self._create_folder_for_file(dst)
+        self.log.debug("Copying file ... {} -> {}".format(src, dst))
+        copyfile(src, dst)
+
+    def hardlink_file(self, src, dst):
+        """Hardlink source filepath to destination filepath.
+
+        Note:
+            Hardlink can only be produced between two files on the same
+            server/disk and editing one of the two will edit both files at
+            once. As such it is recommended to only make hardlinks between
+            static files to ensure publishes remain safe and non-edited.
+
+        Arguments:
+            src (str): the source file which needs to be hardlinked
+            dst (str): the destination filepath
+
         Returns:
             None
         """
-        src = os.path.normpath(src)
-        dst = os.path.normpath(dst)
-        self.log.debug("Copying file ... {} -> {}".format(src, dst))
-        dirname = os.path.dirname(dst)
-        try:
-            os.makedirs(dirname)
-        except OSError as e:
-            if e.errno == errno.EEXIST:
-                pass
-            else:
-                self.log.critical("An unexpected error occurred.")
-                six.reraise(*sys.exc_info())
-
-        # copy file with speedcopy and check if size of files are simetrical
-        while True:
-            if not shutil._samefile(src, dst):
-                copyfile(src, dst)
-            else:
-                self.log.critical(
-                    "files are the same {} to {}".format(src, dst)
-                )
-                os.remove(dst)
-                try:
-                    shutil.copyfile(src, dst)
-                    self.log.debug("Copying files with shutil...")
-                except OSError as e:
-                    self.log.critical("Cannot copy {} to {}".format(src, dst))
-                    self.log.critical(e)
-                    six.reraise(*sys.exc_info())
-            if str(getsize(src)) in str(getsize(dst)):
-                break
-
-    def hardlink_file(self, src, dst):
-        dirname = os.path.dirname(dst)
-
-        try:
-            os.makedirs(dirname)
-        except OSError as e:
-            if e.errno == errno.EEXIST:
-                pass
-            else:
-                self.log.critical("An unexpected error occurred.")
-                six.reraise(*sys.exc_info())
-
+        self._create_folder_for_file(dst)
+        self.log.debug("Hardlinking file ... {} -> {}".format(src, dst))
         create_hard_link(src, dst)
 
-    def get_subset(self, asset, instance):
+    def _get_instance_families(self, instance):
+        """Get all families of the instance"""
+        # todo: move this to lib?
+        family = instance.data.get("family")
+        families = []
+        if family:
+            families.append(family)
+
+        for _family in (instance.data.get("families") or []):
+            if _family not in families:
+                families.append(_family)
+
+        return families
+
+    def register_subset(self, instance):
+        # todo: rely less on self.prepare_anatomy to create this value
+        asset = instance.data.get("assetEntity") # <- from prepare_anatomy :(
         subset_name = instance.data["subset"]
         subset = io.find_one({
             "type": "subset",
@@ -748,18 +737,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         if subset is None:
             self.log.info("Subset '%s' not found, creating ..." % subset_name)
-            self.log.debug("families.  %s" % instance.data.get('families'))
-            self.log.debug(
-                "families.  %s" % type(instance.data.get('families')))
-
-            family = instance.data.get("family")
-            families = []
-            if family:
-                families.append(family)
-
-            for _family in (instance.data.get("families") or []):
-                if _family not in families:
-                    families.append(_family)
+            families = self._get_instance_families(instance)
 
             _id = io.insert_one({
                 "schema": "openpype:subset-3.0",
@@ -773,8 +751,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
             subset = io.find_one({"_id": _id})
 
-        # QUESTION Why is changing of group and updating it's
-        #   families in 'get_subset'?
+        # Update subset group
         self._set_subset_group(instance, subset["_id"])
 
         # Update families on subset.
@@ -838,7 +815,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             self.subset_grouping_profiles,
             filtering_criteria
         )
-        # Skip if there is not matchin profile
+        # Skip if there is not matching profile
         if not matching_profile:
             return None
 
@@ -867,41 +844,17 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         return filled_template
 
-    def create_version(self, subset, version_number, data=None):
-        """ Copy given source to destination
-
-        Args:
-            subset (dict): the registered subset of the asset
-            version_number (int): the version number
-
-        Returns:
-            dict: collection of data to create a version
-        """
-
-        return {"schema": "openpype:version-3.0",
-                "type": "version",
-                "parent": subset["_id"],
-                "name": version_number,
-                "data": data}
-
-    def create_version_data(self, context, instance):
+    def create_version_data(self, instance):
         """Create the data collection for the version
 
         Args:
-            context: the current context
             instance: the current instance being published
 
         Returns:
             dict: the required information with instance.data as key
         """
 
-        families = []
-        current_families = instance.data.get("families", list())
-        instance_family = instance.data.get("family", None)
-
-        if instance_family is not None:
-            families.append(instance_family)
-        families += current_families
+        context = instance.context
 
         # create relative source path for DB
         if "source" in instance.data:
@@ -910,10 +863,10 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             source = context.data["currentFile"]
             anatomy = instance.context.data["anatomy"]
             source = self.get_rootless_path(anatomy, source)
-
         self.log.debug("Source: {}".format(source))
+
         version_data = {
-            "families": families,
+            "families": self._get_instance_families(instance),
             "time": context.data["time"],
             "author": context.data["user"],
             "source": source,
@@ -924,7 +877,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             )
         }
 
-        intent_value = instance.context.data.get("intent")
+        intent_value = context.data.get("intent")
         if intent_value and isinstance(intent_value, dict):
             intent_value = intent_value.get("value")
 
@@ -944,10 +897,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
     def main_family_from_instance(self, instance):
         """Returns main family of entered instance."""
-        family = instance.data.get("family")
-        if not family:
-            family = instance.data["families"][0]
-        return family
+        return self._get_instance_families(instance)[0]
 
     def get_rootless_path(self, anatomy, path):
         """  Returns, if possible, path without absolute portion from host
@@ -976,7 +926,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             ).format(path))
         return path
 
-    def get_files_info(self, instance, integrated_file_sizes):
+    def get_files_info(self, instance):
         """ Prepare 'files' portion for attached resources and main asset.
             Combining records from 'transfers' and 'hardlinks' parts from
             instance.
@@ -991,27 +941,18 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             output_resources: array of dictionaries to be added to 'files' key
             in representation
         """
+        # todo: refactor to use transfers/hardlinks of representations
+        #       currently broken logic
         resources = list(instance.data.get("transfers", []))
         resources.extend(list(instance.data.get("hardlinks", [])))
+        self.log.debug("get_files_info.resources:{}".format(resources))
 
-        self.log.debug("get_resource_files_info.resources:{}".
-                       format(resources))
+        sites = self.compute_resource_sync_sites(instance)
 
         output_resources = []
         anatomy = instance.context.data["anatomy"]
         for _src, dest in resources:
-            path = self.get_rootless_path(anatomy, dest)
-            dest = self.get_dest_temp_url(dest)
-            file_hash = openpype.api.source_hash(dest)
-            if self.TMP_FILE_EXT and \
-               ',{}'.format(self.TMP_FILE_EXT) in file_hash:
-                file_hash = file_hash.replace(',{}'.format(self.TMP_FILE_EXT),
-                                              '')
-
-            file_info = self.prepare_file_info(path,
-                                               integrated_file_sizes[dest],
-                                               file_hash,
-                                               instance=instance)
+            file_info = self.prepare_file_info(dest, anatomy, sites=sites)
             output_resources.append(file_info)
 
         return output_resources
@@ -1031,8 +972,11 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             dest += '.{}'.format(self.TMP_FILE_EXT)
         return dest
 
-    def prepare_file_info(self, path, size=None, file_hash=None,
-                          sites=None, instance=None):
+    def get_dest_final_url(self, temp_file_url):
+        """Temporary destination file url to final destination file url"""
+        return re.sub(r'\.{}$'.format(self.TMP_FILE_EXT), '', temp_file_url)
+
+    def prepare_file_info(self, path, anatomy, sites):
         """ Prepare information for one file (asset or resource)
 
         Arguments:
@@ -1042,74 +986,78 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             sites(optional): array of published locations,
                             [ {'name':'studio', 'created_dt':date} by default
                                 keys expected ['studio', 'site1', 'gdrive1']
-            instance(dict, optional): to get collected settings
         Returns:
             rec: dictionary with filled info
         """
+        file_hash = openpype.api.source_hash(path)
+
+        # todo: Avoid this logic
+        # Strip the temporary file extension from the file hash
+        if self.TMP_FILE_EXT and ',{}'.format(self.TMP_FILE_EXT) in file_hash:
+            file_hash = file_hash.replace(',{}'.format(self.TMP_FILE_EXT), '')
+
+        return {
+            "_id": io.ObjectId(),
+            "path": self.get_rootless_path(anatomy, path),
+            "size": os.path.getsize(path),
+            "hash": file_hash,
+            "sites": sites
+        }
+
+    def compute_resource_sync_sites(self, instance):
+        """Get available resource sync sites"""
+        # Sync server logic
+        # TODO: Clean up sync settings
         local_site = 'studio'  # default
         remote_site = None
-        always_accesible = []
+        always_accessible = []
         sync_project_presets = None
 
-        rec = {
-            "_id": io.ObjectId(),
-            "path": path
-        }
-        if size:
-            rec["size"] = size
+        system_sync_server_presets = (
+            instance.context.data["system_settings"]
+                                 ["modules"]
+                                 ["sync_server"])
+        log.debug("system_sett:: {}".format(system_sync_server_presets))
 
-        if file_hash:
-            rec["hash"] = file_hash
-
-        if sites:
-            rec["sites"] = sites
-        else:
-            system_sync_server_presets = (
-                instance.context.data["system_settings"]
-                                     ["modules"]
+        if system_sync_server_presets["enabled"]:
+            sync_project_presets = (
+                instance.context.data["project_settings"]
+                                     ["global"]
                                      ["sync_server"])
-            log.debug("system_sett:: {}".format(system_sync_server_presets))
 
-            if system_sync_server_presets["enabled"]:
-                sync_project_presets = (
-                    instance.context.data["project_settings"]
-                                         ["global"]
-                                         ["sync_server"])
+        if sync_project_presets and sync_project_presets["enabled"]:
+            local_site, remote_site = self._get_sites(sync_project_presets)
+            always_accessible = sync_project_presets["config"]. \
+                get("always_accessible_on", [])
 
-            if sync_project_presets and sync_project_presets["enabled"]:
-                local_site, remote_site = self._get_sites(sync_project_presets)
+        already_attached_sites = {}
+        meta = {"name": local_site, "created_dt": datetime.now()}
+        sites = [meta]
+        already_attached_sites[meta["name"]] = meta["created_dt"]
 
-                always_accesible = sync_project_presets["config"]. \
-                    get("always_accessible_on", [])
+        if sync_project_presets and sync_project_presets["enabled"]:
+            if remote_site and \
+                    remote_site not in already_attached_sites.keys():
+                # add remote
+                meta = {"name": remote_site.strip()}
+                sites.append(meta)
+                already_attached_sites[meta["name"]] = None
 
-            already_attached_sites = {}
-            meta = {"name": local_site, "created_dt": datetime.now()}
-            rec["sites"] = [meta]
-            already_attached_sites[meta["name"]] = meta["created_dt"]
-
-            if sync_project_presets and sync_project_presets["enabled"]:
-                if remote_site and \
-                        remote_site not in already_attached_sites.keys():
-                    # add remote
-                    meta = {"name": remote_site.strip()}
-                    rec["sites"].append(meta)
+            # add skeleton for site where it should be always synced to
+            for always_on_site in always_accessible:
+                if always_on_site not in already_attached_sites.keys():
+                    meta = {"name": always_on_site.strip()}
+                    sites.append(meta)
                     already_attached_sites[meta["name"]] = None
 
-                # add skeleton for site where it should be always synced to
-                for always_on_site in always_accesible:
-                    if always_on_site not in already_attached_sites.keys():
-                        meta = {"name": always_on_site.strip()}
-                        rec["sites"].append(meta)
-                        already_attached_sites[meta["name"]] = None
+            # add alternative sites
+            alt = self._add_alternative_sites(system_sync_server_presets,
+                                              already_attached_sites)
+            sites.extend(alt)
 
-                # add alternative sites
-                rec = self._add_alternative_sites(system_sync_server_presets,
-                                                  already_attached_sites,
-                                                  rec)
+        log.debug("final sites:: {}".format(sites))
 
-            log.debug("final sites:: {}".format(rec["sites"]))
-
-        return rec
+        return sites
 
     def _get_sites(self, sync_project_presets):
         """Returns tuple (local_site, remote_site)"""
@@ -1129,14 +1077,14 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
     def _add_alternative_sites(self,
                                system_sync_server_presets,
-                               already_attached_sites,
-                               rec):
+                               already_attached_sites):
         """Loop through all configured sites and add alternatives.
 
             See SyncServerModule.handle_alternate_site
         """
         conf_sites = system_sync_server_presets.get("sites", {})
 
+        alternative_sites = []
         for site_name, site_info in conf_sites.items():
             alt_sites = set(site_info.get("alternative_sites", []))
             already_attached_keys = list(already_attached_sites.keys())
@@ -1149,12 +1097,12 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                     # alt site inherits state of 'created_dt'
                     if real_created:
                         meta["created_dt"] = real_created
-                    rec["sites"].append(meta)
+                    alternative_sites.append(meta)
                     already_attached_sites[meta["name"]] = real_created
 
-        return rec
+        return alternative_sites
 
-    def handle_destination_files(self, integrated_file_sizes, mode):
+    def handle_destination_files(self, destinations, mode):
         """ Clean destination files
             Called when error happened during integrating to DB or to disk
             OR called to rename uploaded files from temporary name to final to
@@ -1162,46 +1110,38 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             Used to clean unwanted files
 
         Arguments:
-            integrated_file_sizes: dictionary, file urls as keys, size as value
+            destinations (list): file paths
             mode: 'remove' - clean files,
                   'finalize' - rename files,
                                remove TMP_FILE_EXT suffix denoting temp file
         """
-        if integrated_file_sizes:
-            for file_url, _file_size in integrated_file_sizes.items():
-                if not os.path.exists(file_url):
+        if not destinations:
+            return
+
+        for file_url in destinations:
+            if not os.path.exists(file_url):
+                self.log.debug(
+                    "File {} was not found.".format(file_url)
+                )
+                continue
+
+            try:
+                if mode == 'remove':
+                    self.log.debug("Removing file {}".format(file_url))
+                    os.remove(file_url)
+                if mode == 'finalize':
+
+                    new_name = self.get_dest_final_url(file_url)
+                    if os.path.exists(new_name):
+                        self.log.debug("Removing existing "
+                                       "file: {}".format(new_name))
+                        os.remove(new_name)
+
                     self.log.debug(
-                        "File {} was not found.".format(file_url)
+                        "Renaming file {} to {}".format(file_url, new_name)
                     )
-                    continue
-
-                try:
-                    if mode == 'remove':
-                        self.log.debug("Removing file {}".format(file_url))
-                        os.remove(file_url)
-                    if mode == 'finalize':
-                        new_name = re.sub(
-                            r'\.{}$'.format(self.TMP_FILE_EXT),
-                            '',
-                            file_url
-                        )
-
-                        if os.path.exists(new_name):
-                            self.log.debug(
-                                "Overwriting file {} to {}".format(
-                                    file_url, new_name
-                                )
-                            )
-                            shutil.copy(file_url, new_name)
-                            os.remove(file_url)
-                        else:
-                            self.log.debug(
-                                "Renaming file {} to {}".format(
-                                    file_url, new_name
-                                )
-                            )
-                            os.rename(file_url, new_name)
-                except OSError:
-                    self.log.error("Cannot {} file {}".format(mode, file_url),
-                                   exc_info=True)
-                    six.reraise(*sys.exc_info())
+                    os.rename(file_url, new_name)
+            except OSError:
+                self.log.error("Cannot {} file {}".format(mode, file_url),
+                               exc_info=True)
+                six.reraise(*sys.exc_info())

From ae1a9ff4cf996445bd74dcd7641639ed8342592e Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 17 Mar 2022 11:49:12 +0100
Subject: [PATCH 02/72] More refactoring + draft (untested) implementation for
 separating File Transaction logic

---
 openpype/plugins/publish/integrate_new.py | 421 +++++++++++-----------
 1 file changed, 215 insertions(+), 206 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index e4986e3b3f..500456eaed 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -1,12 +1,10 @@
 import os
-from os.path import getsize
 import logging
 import sys
 import copy
 import clique
 import errno
 import six
-import re
 
 from pymongo import DeleteOne, InsertOne, UpdateOne
 import pyblish.api
@@ -14,7 +12,6 @@ from avalon import io
 from avalon.api import format_template_with_optional_keys
 import openpype.api
 from datetime import datetime
-# from pype.modules import ModulesManager
 from openpype.lib.profiles_filtering import filter_profiles
 from openpype.lib import (
     prepare_template_data,
@@ -41,6 +38,160 @@ def get_first_frame_padded(collection):
     return get_frame_padded(start_frame, padding=collection.padding)
 
 
+class FileTransaction(object):
+    """
+
+    The file transaction is a three step process.
+
+    1) Rename any existing files to a "temporary backup" during `process()`
+    2) Copy the files to final destination during `process()`
+    3) Remove any backed up files (*no rollback possible!) during `finalize()`
+
+    Step 3 is done during `finalize()`. If not called the .bak files will
+    remain on disk.
+
+    These steps try to ensure that we don't overwrite half of any existing
+    files e.g. if they are currently in use.
+
+    Note:
+        A regular filesystem is *not* a transactional file system and even
+        though this implementation tries to produce a 'safe copy' with a
+        potential rollback do keep in mind that it's inherently unsafe due
+        to how filesystem works and a myriad of things could happen during
+        the transaction that break the logic. A file storage could go down,
+        permissions could be changed, other machines could be moving or writing
+        files. A lot can happen.
+
+    Warning:
+        Any folders created during the transfer will not be removed.
+
+    """
+
+    MODE_COPY = 0
+    MODE_HARDLINK = 1
+
+    def __init__(self, log=None):
+
+        if log is None:
+            log = logging.getLogger("FileTransaction")
+
+        self.log = log
+
+        # The transfer queue
+        # todo: make this an actual FIFO queue?
+        self._transfers = {}
+
+        # Destination file paths that a file was transferred to
+        self._transferred = []
+
+        # Backup file location mapping to original locations
+        self._backup_to_original = {}
+
+    def add(self, src, dst, mode=MODE_COPY):
+        """Add a new file to transfer queue"""
+        opts = {"mode": mode}
+
+        src = os.path.normpath(src)
+        dst = os.path.normpath(dst)
+
+        if dst in self._transfers:
+            queued_src = self._transfers[dst][0]
+            if src == queued_src:
+                self.log.debug("File transfer was already "
+                               "in queue: {} -> {}".format(src, dst))
+                return
+            else:
+                self.log.warning("File transfer in queue overwritten")
+
+        self._transfers[dst] = (src, opts)
+
+    def process(self):
+
+        # Backup any existing files
+        for dst in self._transfers.keys():
+            if os.path.exists(dst):
+                # Backup original file
+                # todo: add timestamp or uuid to ensure unique
+                backup = dst + ".bak"
+                self._backup_to_original[backup] = dst
+                self.log.debug("Backup existing file: "
+                               "{} -> {}".format(dst, backup))
+                os.rename(dst, backup)
+
+        # Copy the files to transfer
+        for dst, (src, opts) in self._transfers.items():
+            self._create_folder_for_file(dst)
+
+            if opts["mode"] == self.MODE_COPY:
+                self.log.debug("Copying file ... {} -> {}".format(src, dst))
+                copyfile(src, dst)
+            elif opts["mode"] == self.MODE_HARDLINK:
+                self.log.debug("Hardlinking file ... {} -> {}".format(src, dst))
+                create_hard_link(src, dst)
+
+            self._transferred.append(dst)
+
+    def finalize(self):
+        # Delete any backed up files
+        for backup in self._backup_to_original.keys():
+            try:
+                os.remove(backup)
+            except OSError:
+                self.log.error("Failed to remove backup file: "
+                               "{}".format(backup),
+                               exc_info=True)
+
+    def rollback(self):
+
+        errors = 0
+
+        # Rollback any transferred files
+        for path in self._transferred:
+            try:
+                os.remove(path)
+            except OSError:
+                errors += 1
+                self.log.error("Failed to rollback created file: "
+                               "{}".format(path),
+                               exc_info=True)
+
+        # Rollback the backups
+        for backup, original in self._backup_to_original.items():
+            try:
+                os.rename(backup, original)
+            except OSError:
+                errors +=1
+                self.log.error("Failed to restore original file: "
+                               "{} -> {}".format(backup, original),
+                               exc_info=True)
+
+        if errors:
+            self.log.error("{} errors occurred during "
+                           "rollback.".format(errors), exc_info=True)
+            six.reraise(*sys.exc_info())
+
+    @property
+    def transferred(self):
+        """Return the processed transfers destination paths"""
+        return list(self._transferred)
+
+    @property
+    def backups(self):
+        """Return the backup file paths"""
+        return list(self._backup_to_original.keys())
+
+    def _create_folder_for_file(self, path):
+        dirname = os.path.dirname(path)
+        try:
+            os.makedirs(dirname)
+        except OSError as e:
+            if e.errno == errno.EEXIST:
+                pass
+            else:
+                self.log.critical("An unexpected error occurred.")
+                six.reraise(*sys.exc_info())
+
+
 class IntegrateAssetNew(pyblish.api.InstancePlugin):
     """Resolve any dependency issues
 
@@ -122,18 +273,11 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
     ]
     default_template_name = "publish"
 
-    # suffix to denote temporary files, use without '.'
-    TMP_FILE_EXT = 'tmp'
-
-    # file_url : file_size of all published and uploaded files
-    destinations = list()
-
     # Attributes set by settings
     template_name_profiles = None
     subset_grouping_profiles = None
 
     def process(self, instance):
-        self.destinations = []
 
         # Exclude instances that also contain families from exclude families
         families = set(
@@ -143,17 +287,20 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         if families & set(self.exclude_families):
             return
 
+        file_transactions = FileTransaction(log=self.log)
         try:
-            self.register(instance)
-            self.log.info("Integrated Asset in to the database ...")
-            self.handle_destination_files(self.destinations,
-                                          'finalize')
+            self.register(instance, file_transactions)
         except Exception:
             # clean destination
+            # todo: rollback any registered entities? (or how safe are we?)
+            file_transactions.rollback()
             self.log.critical("Error when registering", exc_info=True)
-            self.handle_destination_files(self.destinations, 'remove')
             six.reraise(*sys.exc_info())
 
+        # Finalizing can't be rollbacked safely so no use for moving it to
+        # the try, except.
+        file_transactions.finalize()
+
     def prepare_anatomy(self, instance):
         """Prepare anatomy data used to define representation destinations"""
 
@@ -244,7 +391,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         return template_name, anatomy_data
 
-    def register(self, instance):
+    def register(self, instance, file_transactions):
 
         instance_stagingdir = instance.data.get("stagingDir")
         if not instance_stagingdir:
@@ -272,9 +419,8 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         version = self.register_version(instance, subset)
         instance.data["versionEntity"] = version
-        instance.data['version'] = version['name']
 
-        existing_repres = list(io.find({
+        archived_repres = list(io.find({
             "parent": version["_id"],
             "type": "archived_representation"
         }))
@@ -294,19 +440,47 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             prepared = self.prepare_representation(repre,
                                                    anatomy_data,
                                                    template_name,
-                                                   existing_repres,
+                                                   archived_repres,
                                                    version,
                                                    instance_stagingdir,
                                                    instance)
+            representation = prepared["representation"]
+
+            # todo: register the file transfers correctly
+            for src, dst in representation["transfers"]:
+                file_transactions.add(src, dst,
+                                      mode=file_transactions.MODE_COPY)
+            for src, dst in representation["hardlinks"]:
+                file_transactions.add(src, dst,
+                                      mode=file_transactions.MODE_HARDLINK)
 
             # todo: simplify this?
-            representation = prepared["representation"]
             representations.append(representation)
             published_representations[representation["_id"]] = prepared
 
+        # could throw exception, will be caught in 'process'
+        # all integration to DB is being done together lower,
+        # so no rollback needed
+        self.log.debug("Integrating source files to destination ...")
+        file_transactions.process()
+        self.log.debug("Backup files "
+                       "{}".format(file_transactions.backups))
+        self.log.debug("Integrated files "
+                       "{}".format(file_transactions.transferred))
+
+        # todo: fix get file info for transferred files per representation
+        #       currently it'd set all files for all representations
+        # get 'files' info for representation and all attached resources
+        integrated_files = file_transactions.transferred
+        self.log.debug("Preparing files information ...")
+        representation["files"] = self.get_files_info(
+            instance,
+            integrated_files
+        )
+
         # Remove old representations if there are any (before insertion of new)
-        if existing_repres:
-            repre_ids_to_remove = [repre["_id"] for repre in existing_repres]
+        if archived_repres:
+            repre_ids_to_remove = [repre["_id"] for repre in archived_repres]
             io.delete_many({"_id": {"$in": repre_ids_to_remove}})
 
         # Write the new representations to the database
@@ -395,7 +569,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
     def prepare_representation(self, repre,
                                anatomy_data,
                                template_name,
-                               existing_repres,
+                               archived_repres,
                                version,
                                instance_stagingdir,
                                instance):
@@ -439,11 +613,13 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             self.log.debug("Representation uses instance staging dir: "
                            "{}".format(instance_stagingdir))
             stagingdir = instance_stagingdir
+        if not stagingdir:
+            raise ValueError("No staging directory set for representation: "
+                             "{}".format(repre))
 
         self.log.debug("Anatomy template name: {}".format(template_name))
         anatomy = instance.context.data['anatomy']
-        template = os.path.normpath(
-            anatomy.templates[template_name]["path"])
+        template = os.path.normpath(anatomy.templates[template_name]["path"])
 
         is_sequence_representation = isinstance(files, (list, tuple))
         if is_sequence_representation:
@@ -566,24 +742,21 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 continue
             repre_context[key] = template_data[key]
 
-        # Use previous representation's id if there are any
-        repre_id = None
-        repre_name_lower = repre["name"].lower()
-        for _existing_repre in existing_repres:
-            # NOTE should we check lowered names?
-            if repre_name_lower == _existing_repre["name"].lower():
-                repre_id = _existing_repre["orig_id"]
-                break
+        # Define representation id
+        repre_id = io.ObjectId()
 
-        # Create new id if existing representations does not match
-        if repre_id is None:
-            repre_id = io.ObjectId()
+        # Use previous representation's id if there is a name match
+        repre_name_lower = repre["name"].lower()
+        for _archived_repres in archived_repres:
+            if repre_name_lower == _archived_repres["name"].lower():
+                repre_id = _archived_repres["orig_id"]
+                break
 
         # todo: `repre` is not the actual `representation` entity
         #       we should simplify/clarify difference between data above
         #       and the actual representation entity for the database
         data = repre.get("data") or {}
-        data.update({'path': dst, 'template': template})
+        data.update({'path': repre["published_path"], 'template': template})
         representation = {
             "_id": repre_id,
             "schema": "openpype:representation-2.0",
@@ -597,34 +770,14 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             "context": repre_context
         }
 
+        # todo: simplify/streamline which additional data makes its way into
+        #       the representation context
         if repre.get("outputName"):
             representation["context"]["output"] = repre['outputName']
 
         if is_sequence_representation and repre.get("frameStart") is not None:
             representation['context']['frame'] = template_data["frame"]
 
-        # any file that should be physically copied is expected in
-        # 'transfers' or 'hardlinks'
-        integrated_files = []
-        if instance.data.get('transfers', False) or \
-                instance.data.get('hardlinks', False):
-            # could throw exception, will be caught in 'process'
-            # all integration to DB is being done together lower,
-            # so no rollback needed
-            # todo: separate the actual integrating of the files onto its own
-            #       taking just a list of transfers as inputs (potentially
-            #       with copy mode flag, like hardlink/copy, etc.)
-            self.log.debug("Integrating source files to destination ...")
-            integrated_files = self.integrate(instance)
-            self.log.debug("Integrated files {}".format(integrated_files))
-
-        # get 'files' info for representation and all attached resources
-        self.log.debug("Preparing files information ...")
-        representation["files"] = self.get_files_info(
-            instance,
-            integrated_files
-        )
-
         return {
             "representation": representation,
             "anatomy_data": template_data,
@@ -633,84 +786,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             "published_files": [transfer[1] for transfer in repre["transfers"]]
         }
 
-    def integrate(self, instance):
-        """ Move the files.
-
-            Through `instance.data["transfers"]`
-
-            Args:
-                instance: the instance to integrate
-            Returns:
-                list: destination full paths of integrated files
-        """
-        # store destinations for potential rollback and measuring sizes
-        destinations = []
-        transfers = list(instance.data.get("transfers", list()))
-        for src, dest in transfers:
-            src = os.path.normpath(src)
-            dest = os.path.normpath(dest)
-            if src != dest:
-                dest = self.get_dest_temp_url(dest)
-                self.copy_file(src, dest)
-                destinations.append(dest)
-
-        # Produce hardlinked copies
-        hardlinks = instance.data.get("hardlinks", list())
-        for src, dest in hardlinks:
-            dest = self.get_dest_temp_url(dest)
-            if not os.path.exists(dest):
-                self.hardlink_file(src, dest)
-
-            destinations.append(dest)
-
-        return destinations
-
-    def _create_folder_for_file(self, path):
-        dirname = os.path.dirname(path)
-        try:
-            os.makedirs(dirname)
-        except OSError as e:
-            if e.errno == errno.EEXIST:
-                pass
-            else:
-                self.log.critical("An unexpected error occurred.")
-                six.reraise(*sys.exc_info())
-
-    def copy_file(self, src, dst):
-        """Copy source filepath to destination filepath
-
-        Arguments:
-            src (str): the source file which needs to be copied
-            dst (str): the destination filepath
-
-        Returns:
-            None
-
-        """
-        self._create_folder_for_file(dst)
-        self.log.debug("Copying file ... {} -> {}".format(src, dst))
-        copyfile(src, dst)
-
-    def hardlink_file(self, src, dst):
-        """Hardlink source filepath to destination filepath.
-
-        Note:
-            Hardlink can only be produced between two files on the same
-            server/disk and editing one of the two will edit both files at
-            once. As such it is recommended to only make hardlinks between
-            static files to ensure publishes remain safe and non-edited.
-
-        Arguments:
-            src (str): the source file which needs to be hardlinked
-            dst (str): the destination filepath
-
-        Returns:
-            None
-        """
-        self._create_folder_for_file(dst)
-        self.log.debug("Hardlinking file ... {} -> {}".format(src, dst))
-        create_hard_link(src, dst)
-
     def _get_instance_families(self, instance):
         """Get all families of the instance"""
         # todo: move this to lib?
@@ -727,7 +802,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
     def register_subset(self, instance):
         # todo: rely less on self.prepare_anatomy to create this value
-        asset = instance.data.get("assetEntity") # <- from prepare_anatomy :(
+        asset = instance.data.get("assetEntity")  # stored by prepare_anatomy
         subset_name = instance.data["subset"]
         subset = io.find_one({
             "type": "subset",
@@ -957,25 +1032,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         return output_resources
 
-    def get_dest_temp_url(self, dest):
-        """ Enhance destination path with TMP_FILE_EXT to denote temporary
-            file.
-            Temporary files will be renamed after successful registration
-            into DB and full copy to destination
-
-        Arguments:
-            dest: destination url of published file (absolute)
-        Returns:
-            dest: destination path + '.TMP_FILE_EXT'
-        """
-        if self.TMP_FILE_EXT and '.{}'.format(self.TMP_FILE_EXT) not in dest:
-            dest += '.{}'.format(self.TMP_FILE_EXT)
-        return dest
-
-    def get_dest_final_url(self, temp_file_url):
-        """Temporary destination file url to final destination file url"""
-        return re.sub(r'\.{}$'.format(self.TMP_FILE_EXT), '', temp_file_url)
-
     def prepare_file_info(self, path, anatomy, sites):
         """ Prepare information for one file (asset or resource)
 
@@ -991,11 +1047,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         """
         file_hash = openpype.api.source_hash(path)
 
-        # todo: Avoid this logic
-        # Strip the temporary file extension from the file hash
-        if self.TMP_FILE_EXT and ',{}'.format(self.TMP_FILE_EXT) in file_hash:
-            file_hash = file_hash.replace(',{}'.format(self.TMP_FILE_EXT), '')
-
         return {
             "_id": io.ObjectId(),
             "path": self.get_rootless_path(anatomy, path),
@@ -1004,6 +1055,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             "sites": sites
         }
 
+    # region sync sites
     def compute_resource_sync_sites(self, instance):
         """Get available resource sync sites"""
         # Sync server logic
@@ -1101,47 +1153,4 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                     already_attached_sites[meta["name"]] = real_created
 
         return alternative_sites
-
-    def handle_destination_files(self, destinations, mode):
-        """ Clean destination files
-            Called when error happened during integrating to DB or to disk
-            OR called to rename uploaded files from temporary name to final to
-            highlight publishing in progress/broken
-            Used to clean unwanted files
-
-        Arguments:
-            destinations (list): file paths
-            mode: 'remove' - clean files,
-                  'finalize' - rename files,
-                               remove TMP_FILE_EXT suffix denoting temp file
-        """
-        if not destinations:
-            return
-
-        for file_url in destinations:
-            if not os.path.exists(file_url):
-                self.log.debug(
-                    "File {} was not found.".format(file_url)
-                )
-                continue
-
-            try:
-                if mode == 'remove':
-                    self.log.debug("Removing file {}".format(file_url))
-                    os.remove(file_url)
-                if mode == 'finalize':
-
-                    new_name = self.get_dest_final_url(file_url)
-                    if os.path.exists(new_name):
-                        self.log.debug("Removing existing "
-                                       "file: {}".format(new_name))
-                        os.remove(new_name)
-
-                    self.log.debug(
-                        "Renaming file {} to {}".format(file_url, new_name)
-                    )
-                    os.rename(file_url, new_name)
-            except OSError:
-                self.log.error("Cannot {} file {}".format(mode, file_url),
-                               exc_info=True)
-                six.reraise(*sys.exc_info())
+    # endregion

From 9f6cc5df3a11031fb18155c97e0a73bb6f3f6108 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 17 Mar 2022 11:51:06 +0100
Subject: [PATCH 03/72] Fix hound

---
 openpype/plugins/publish/integrate_new.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 500456eaed..e74b528ae7 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -126,7 +126,8 @@ class FileTransaction(object):
                 self.log.debug("Copying file ... {} -> {}".format(src, dst))
                 copyfile(src, dst)
             elif opts["mode"] == self.MODE_HARDLINK:
-                self.log.debug("Hardlinking file ... {} -> {}".format(src, dst))
+                self.log.debug("Hardlinking file ... {} -> {}".format(src,
+                                                                      dst))
                 create_hard_link(src, dst)
 
             self._transferred.append(dst)
@@ -160,7 +161,7 @@ class FileTransaction(object):
             try:
                 os.rename(backup, original)
             except OSError:
-                errors +=1
+                errors += 1
                 self.log.error("Failed to restore original file: "
                                "{} -> {}".format(backup, original),
                                exc_info=True)

From 56bcd8cec35f201ead80a18c09f6c070b76209c1 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 17 Mar 2022 16:30:49 +0100
Subject: [PATCH 04/72] Continue refactor, restore functionality - now can
 correctly publish as before (rudimentary tested only)

---
 openpype/plugins/publish/integrate_new.py | 136 +++++++++++-----------
 1 file changed, 70 insertions(+), 66 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index e74b528ae7..c550c1011c 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -101,7 +101,10 @@ class FileTransaction(object):
                                "in queue: {} -> {}".format(src, dst))
                 return
             else:
-                self.log.warning("File transfer in queue overwritten")
+                self.log.warning("File transfer in queue replaced..")
+                self.log.debug("Removed from queue: "
+                               "{} -> {}".format(queued_src, dst))
+                self.log.debug("Added to queue: {} -> {}".format(src, dst))
 
         self._transfers[dst] = (src, opts)
 
@@ -298,7 +301,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             self.log.critical("Error when registering", exc_info=True)
             six.reraise(*sys.exc_info())
 
-        # Finalizing can't be rollbacked safely so no use for moving it to
+        # Finalizing can't rollback safely so no use for moving it to
         # the try, except.
         file_transactions.finalize()
 
@@ -426,11 +429,9 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             "type": "archived_representation"
         }))
 
-        # Find the representations to transfer amongst the files
-        # Each should be a single representation (as such, a single extension)
+        # Prepare all representations
         template_name, anatomy_data = self.prepare_anatomy(instance)
-        published_representations = {}
-        representations = []
+        prepared_representations = []
         for repre in instance.data["representations"]:
 
             if "delete" in repre.get("tags", []):
@@ -438,6 +439,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                                "{}".format(repre))
                 continue
 
+            # todo: reduce/simplify what is returned from this function
             prepared = self.prepare_representation(repre,
                                                    anatomy_data,
                                                    template_name,
@@ -445,23 +447,23 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                                                    version,
                                                    instance_stagingdir,
                                                    instance)
-            representation = prepared["representation"]
 
-            # todo: register the file transfers correctly
-            for src, dst in representation["transfers"]:
-                file_transactions.add(src, dst,
-                                      mode=file_transactions.MODE_COPY)
-            for src, dst in representation["hardlinks"]:
-                file_transactions.add(src, dst,
-                                      mode=file_transactions.MODE_HARDLINK)
+            for src, dst in prepared["transfers"]:
+                # todo: add support for hardlink transfers
+                file_transactions.add(src, dst)
 
-            # todo: simplify this?
-            representations.append(representation)
-            published_representations[representation["_id"]] = prepared
+            prepared_representations.append(prepared)
 
-        # could throw exception, will be caught in 'process'
-        # all integration to DB is being done together lower,
-        # so no rollback needed
+        # Each instance can also have pre-defined transfers not explicitly
+        # part of a representation - like texture resources used by a
+        # .ma representation. Those destination paths are pre-defined, etc.
+        # todo: should we move or simplify this logic?
+        for src, dst in instance.data.get("transfers", []):
+            file_transactions.add(src, dst, mode=FileTransaction.MODE_COPY)
+        for src, dst in instance.data.get("hardlinks", []):
+            file_transactions.add(src, dst, mode=FileTransaction.MODE_HARDLINK)
+
+        # Process all file transfers of all integrations now
         self.log.debug("Integrating source files to destination ...")
         file_transactions.process()
         self.log.debug("Backup files "
@@ -469,17 +471,21 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         self.log.debug("Integrated files "
                        "{}".format(file_transactions.transferred))
 
-        # todo: fix get file info for transferred files per representation
-        #       currently it'd set all files for all representations
-        # get 'files' info for representation and all attached resources
-        integrated_files = file_transactions.transferred
-        self.log.debug("Preparing files information ...")
-        representation["files"] = self.get_files_info(
-            instance,
-            integrated_files
-        )
+        # Finalize the representations now the published files are integrated
+        # Get 'files' info for representations and its attached resources
+        self.log.debug("Retrieving Representation files information ...")
+        sites = self.compute_resource_sync_sites(instance)
+        anatomy = instance.context.data["anatomy"]
+        representations = []
+        for prepared in prepared_representations:
+            transfers = prepared["transfers"]
+            representation = prepared["representation"]
+            representation["files"] = self.get_files_info(
+                transfers, sites, anatomy
+            )
+            representations.append(representation)
 
-        # Remove old representations if there are any (before insertion of new)
+        # Remove all archived representations
         if archived_repres:
             repre_ids_to_remove = [repre["_id"] for repre in archived_repres]
             io.delete_many({"_id": {"$in": repre_ids_to_remove}})
@@ -487,7 +493,11 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         # Write the new representations to the database
         io.insert_many(representations)
 
-        instance.data["published_representations"] = published_representations
+        # Backwards compatibility
+        # todo: can we avoid the need to store this?
+        instance.data["published_representations"] = {
+            p["representation"]["_id"]: p for p in prepared_representations
+        }
 
         self.log.info("Registered {} representations"
                       "".format(len(representations)))
@@ -495,7 +505,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
     def register_version(self, instance, subset):
 
         version_number = instance.data["version"]
-        self.log.debug("Next version: v{}".format(version_number))
+        self.log.debug("Version: v{0:03d}".format(version_number))
 
         version_data = self.create_version_data(instance)
         version_data_instance = instance.data.get('versionData')
@@ -565,6 +575,9 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 )
 
         version = io.find_one({"_id": version_id})
+
+        self.log.info("Registered version: v{0:03d}".format(version["name"]))
+
         return version
 
     def prepare_representation(self, repre,
@@ -585,7 +598,8 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         if repre.get("transfers"):
             raise ValueError("Representation is not allowed to have transfers"
-                             "data before integration. "
+                             "data before integration. They are computed in "
+                             "the integrator"
                              "Got: {}".format(repre["transfers"]))
 
         # required representation keys
@@ -698,18 +712,11 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             dst_collection.padding = destination_padding
             assert len(src_collection) == len(dst_collection), "This is a bug"
 
+            # Multiple file transfers
             transfers = []
             for src_file_name, dst in zip(src_collection, dst_collection):
                 src = os.path.join(stagingdir, src_file_name)
-                self.log.debug("source: {}".format(src))
-                self.log.debug("destination: `{}`".format(dst))
-                transfers.append(src, dst)
-
-            # Store first frame as published path
-            # todo: remove `published_path` since it can be retrieved from
-            #       `transfers` by taking the first destination transfers[0][1]
-            repre['published_path'] = next(iter(dst_collection))
-            repre["transfers"].extend(transfers)
+                transfers.append((src, dst))
 
         else:
             # Single file
@@ -728,11 +735,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             dst = os.path.normpath(template_filled)
 
             # Single file transfer
-            self.log.debug("source: {}".format(src))
-            self.log.debug("destination: `{}`".format(dst))
-            repre["transfers"] = [src, dst]
-
-            repre['published_path'] = dst
+            transfers = [(src, dst)]
 
         if repre.get("udim"):
             repre_context["udim"] = repre.get("udim")  # store list
@@ -753,11 +756,16 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 repre_id = _archived_repres["orig_id"]
                 break
 
+        # Backwards compatibility:
+        # Store first transferred destination as published path data
+        # todo: can we remove this?
+        published_path = transfers[0][1]
+
         # todo: `repre` is not the actual `representation` entity
         #       we should simplify/clarify difference between data above
         #       and the actual representation entity for the database
         data = repre.get("data") or {}
-        data.update({'path': repre["published_path"], 'template': template})
+        data.update({'path': published_path, 'template': template})
         representation = {
             "_id": repre_id,
             "schema": "openpype:representation-2.0",
@@ -782,9 +790,10 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         return {
             "representation": representation,
             "anatomy_data": template_data,
-            # todo: avoid the need for 'published_files'?
+            "transfers": transfers,
+            # todo: avoid the need for 'published_files' used by Integrate Hero
             # backwards compatibility
-            "published_files": [transfer[1] for transfer in repre["transfers"]]
+            "published_files": [transfer[1] for transfer in transfers]
         }
 
     def _get_instance_families(self, instance):
@@ -805,6 +814,8 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         # todo: rely less on self.prepare_anatomy to create this value
         asset = instance.data.get("assetEntity")  # stored by prepare_anatomy
         subset_name = instance.data["subset"]
+        self.log.debug("Subset: {}".format(subset_name))
+
         subset = io.find_one({
             "type": "subset",
             "parent": asset["_id"],
@@ -838,6 +849,8 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             {"$set": {"data.families": families}}
         )
 
+        self.log.info("Registered subset: {}".format(subset_name))
+
         return subset
 
     def _set_subset_group(self, instance, subset_id):
@@ -871,9 +884,9 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         if not self.subset_grouping_profiles:
             return None
 
+        # TODO: Resolve below questions
         # QUESTION
-        #   - is there a chance that task name is not filled in anatomy
-        #       data?
+        #   - is there a chance that task name is not filled in anatomy data?
         #   - should we use context task in that case?
         anatomy_data = instance.data["anatomyData"]
         task_name = None
@@ -1002,7 +1015,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             ).format(path))
         return path
 
-    def get_files_info(self, instance):
+    def get_files_info(self, transfers, sites, anatomy):
         """ Prepare 'files' portion for attached resources and main asset.
             Combining records from 'transfers' and 'hardlinks' parts from
             instance.
@@ -1017,21 +1030,12 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             output_resources: array of dictionaries to be added to 'files' key
             in representation
         """
-        # todo: refactor to use transfers/hardlinks of representations
-        #       currently broken logic
-        resources = list(instance.data.get("transfers", []))
-        resources.extend(list(instance.data.get("hardlinks", [])))
-        self.log.debug("get_files_info.resources:{}".format(resources))
-
-        sites = self.compute_resource_sync_sites(instance)
-
-        output_resources = []
-        anatomy = instance.context.data["anatomy"]
-        for _src, dest in resources:
+        file_infos = []
+        for _src, dest in transfers:
             file_info = self.prepare_file_info(dest, anatomy, sites=sites)
-            output_resources.append(file_info)
+            file_infos.append(file_info)
 
-        return output_resources
+        return file_infos
 
     def prepare_file_info(self, path, anatomy, sites):
         """ Prepare information for one file (asset or resource)

From 8996280224aa30ad800e955ff165bdbe48bb8296 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Wed, 23 Mar 2022 23:38:05 +0100
Subject: [PATCH 05/72] Reduce duplicated logic by implementing
 `resolve_profile` method

---
 openpype/plugins/publish/integrate_new.py | 107 ++++++++++------------
 1 file changed, 48 insertions(+), 59 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 2142920a09..e43afbf7f6 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -359,17 +359,8 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 "short": task_code
             }
 
-        elif "task" in anatomy_data:
-            # Just set 'task_name' variable to context task
-            task_name = anatomy_data["task"]["name"]
-            task_type = anatomy_data["task"]["type"]
-
-        else:
-            task_name = None
-            task_type = None
-
         # Fill family in anatomy data
-        anatomy_data["family"] = instance.data.get("family")
+        anatomy_data["family"] = self.main_family_from_instance(instance)
 
         intent_value = instance.context.data.get("intent")
         if intent_value and isinstance(intent_value, dict):
@@ -378,25 +369,44 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         if intent_value:
             anatomy_data["intent"] = intent_value
 
-        # Get profile
-        key_values = {
-            "families": self.main_family_from_instance(instance),
-            "tasks": task_name,
-            "hosts": instance.context.data["hostName"],
-            "task_types": task_type
-        }
-        profile = filter_profiles(
-            self.template_name_profiles,
-            key_values,
-            logger=self.log
-        )
-
+        profile, _ = self.resolve_profile(self.template_name_profiles,
+                                          instance)
         template_name = "publish"
         if profile:
             template_name = profile["template_name"]
 
         return template_name, anatomy_data
 
+    def resolve_profile(self, profiles, instance):
+        """Resolve profile by family, task name, host name and task type"""
+
+        # Anatomy data is pre-filled by Collectors and `self.prepare_anatomy`
+        anatomy_data = instance.data["anatomyData"]
+
+        # TODO: Resolve below questions
+        # QUESTION
+        #   - is there a chance that task name is not filled in anatomy data?
+        #   - should we use context task in that case?
+        # Task can be optional in anatomy data
+        task = anatomy_data.get("task", {})
+
+        filter_criteria = {
+            "families": anatomy_data["family"],
+            "tasks": task.get("name"),
+            "hosts": anatomy_data["host"],
+            "task_types": task.get("type")
+        }
+        # Get profile
+        profile = filter_profiles(
+            profiles,
+            filter_criteria,
+            logger=self.log
+        )
+
+        # TODO: See if we can simplify to avoid needing to return filter
+        #       criteria used in `self._get_subset_group`
+        return profile, filter_criteria
+
     def register(self, instance, file_transactions):
 
         instance_stagingdir = instance.data.get("stagingDir")
@@ -886,50 +896,29 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         if not self.subset_grouping_profiles:
             return None
 
-        # TODO: Resolve below questions
-        # QUESTION
-        #   - is there a chance that task name is not filled in anatomy data?
-        #   - should we use context task in that case?
-        anatomy_data = instance.data["anatomyData"]
-        task_name = None
-        task_type = None
-        if "task" in anatomy_data:
-            task_name = anatomy_data["task"]["name"]
-            task_type = anatomy_data["task"]["type"]
-        filtering_criteria = {
-            "families": instance.data["family"],
-            "hosts": instance.context.data["hostName"],
-            "tasks": task_name,
-            "task_types": task_type
-        }
-        matching_profile = filter_profiles(
-            self.subset_grouping_profiles,
-            filtering_criteria
-        )
-        # Skip if there is not matching profile
-        if not matching_profile:
+        # Skip if there is no matching profile
+        profile, criteria = self.resolve_profile(self.subset_grouping_profiles,
+                                                 instance)
+        if not profile:
             return None
 
-        filled_template = None
-        template = matching_profile["template"]
-        fill_pairs = (
-            ("family", filtering_criteria["families"]),
-            ("task", filtering_criteria["tasks"]),
-            ("host", filtering_criteria["hosts"]),
-            ("subset", instance.data["subset"]),
-            ("renderlayer", instance.data.get("renderlayer"))
-        )
-        fill_pairs = prepare_template_data(fill_pairs)
+        template = profile["template"]
 
+        fill_pairs = prepare_template_data({
+            "family": criteria["families"],
+            "task": criteria["tasks"],
+            "host": criteria["hosts"],
+            "subset": instance.data["subset"],
+            "renderlayer": instance.data.get("renderlayer")
+        })
+
+        filled_template = None
         try:
             filled_template = StringTemplate.format_strict_template(
                 template, fill_pairs
             )
         except (KeyError, TemplateUnsolved):
-            keys = []
-            if fill_pairs:
-                keys = fill_pairs.keys()
-
+            keys = fill_pairs.keys()
             msg = "Subset grouping failed. " \
                   "Only {} are expected in Settings".format(','.join(keys))
             self.log.warning(msg)

From 177e244bd80bf0b1d472948cba45f40dfecd672e Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Wed, 23 Mar 2022 23:45:24 +0100
Subject: [PATCH 06/72] Remove prepare anatomy data logic that is already
 collected/generated in CollectAnatomyContextData and
 CollectAnatomyInstanceData.

This currently was duplicated logic and should not be handled in the Integrator
---
 openpype/plugins/publish/integrate_new.py | 51 +----------------------
 1 file changed, 1 insertion(+), 50 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index e43afbf7f6..a1a116bd43 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -310,58 +310,9 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
     def prepare_anatomy(self, instance):
         """Prepare anatomy data used to define representation destinations"""
 
-        context = instance.context
-
         anatomy_data = instance.data["anatomyData"]
-        project_entity = instance.data["projectEntity"]
-
-        context_asset_name = None
-        context_asset_doc = context.data.get("assetEntity")
-        if context_asset_doc:
-            context_asset_name = context_asset_doc["name"]
-
-        asset_name = instance.data["asset"]
-        asset_entity = instance.data.get("assetEntity")
-        if not asset_entity or asset_entity["name"] != context_asset_name:
-            asset_entity = io.find_one({
-                "type": "asset",
-                "name": asset_name,
-                "parent": project_entity["_id"]
-            })
-            assert asset_entity, (
-                "No asset found by the name \"{0}\" in project \"{1}\""
-            ).format(asset_name, project_entity["name"])
-
-            instance.data["assetEntity"] = asset_entity
-
-            # update anatomy data with asset specific keys
-            # - name should already been set
-            hierarchy = ""
-            parents = asset_entity["data"]["parents"]
-            if parents:
-                hierarchy = "/".join(parents)
-            anatomy_data["hierarchy"] = hierarchy
-
-        # Make sure task name in anatomy data is same as on instance.data
-        asset_tasks = (
-            asset_entity.get("data", {}).get("tasks")
-        ) or {}
-        task_name = instance.data.get("task")
-        if task_name:
-            task_info = asset_tasks.get(task_name) or {}
-            task_type = task_info.get("type")
-
-            project_task_types = project_entity["config"]["tasks"]
-            task_code = project_task_types.get(task_type, {}).get("short_name")
-            anatomy_data["task"] = {
-                "name": task_name,
-                "type": task_type,
-                "short": task_code
-            }
-
-        # Fill family in anatomy data
-        anatomy_data["family"] = self.main_family_from_instance(instance)
 
+        # TODO: This logic should move to CollectAnatomyContextData
         intent_value = instance.context.data.get("intent")
         if intent_value and isinstance(intent_value, dict):
             intent_value = intent_value.get("value")

From 3fd2d020149e5b33c0be0ab7000376a0f30ed96f Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Wed, 23 Mar 2022 23:55:40 +0100
Subject: [PATCH 07/72] Move logic to clarify what should be removed/moved and
 bring logic closer to where it's used

---
 openpype/plugins/publish/integrate_new.py | 36 ++++++++++-------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index a1a116bd43..e57fbaf294 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -293,6 +293,10 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         if families & set(self.exclude_families):
             return
 
+        # TODO: Avoid the need to do any adjustments to anatomy data
+        #       Best case scenario that's all handled by collectors
+        self.prepare_anatomy(instance)
+
         file_transactions = FileTransaction(log=self.log)
         try:
             self.register(instance, file_transactions)
@@ -309,24 +313,12 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
     def prepare_anatomy(self, instance):
         """Prepare anatomy data used to define representation destinations"""
-
-        anatomy_data = instance.data["anatomyData"]
-
         # TODO: This logic should move to CollectAnatomyContextData
         intent_value = instance.context.data.get("intent")
         if intent_value and isinstance(intent_value, dict):
             intent_value = intent_value.get("value")
-
-        if intent_value:
-            anatomy_data["intent"] = intent_value
-
-        profile, _ = self.resolve_profile(self.template_name_profiles,
-                                          instance)
-        template_name = "publish"
-        if profile:
-            template_name = profile["template_name"]
-
-        return template_name, anatomy_data
+            if intent_value:
+                instance.data["anatomyData"]["intent"] = intent_value
 
     def resolve_profile(self, profiles, instance):
         """Resolve profile by family, task name, host name and task type"""
@@ -382,6 +374,13 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             )
         )
 
+        # Define publish template name from profiles
+        profile, _ = self.resolve_profile(self.template_name_profiles,
+                                          instance)
+        template_name = "publish"
+        if profile:
+            template_name = profile["template_name"]
+
         subset = self.register_subset(instance)
 
         version = self.register_version(instance, subset)
@@ -393,7 +392,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         }))
 
         # Prepare all representations
-        template_name, anatomy_data = self.prepare_anatomy(instance)
         prepared_representations = []
         for repre in instance.data["representations"]:
 
@@ -404,7 +402,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
             # todo: reduce/simplify what is returned from this function
             prepared = self.prepare_representation(repre,
-                                                   anatomy_data,
                                                    template_name,
                                                    archived_repres,
                                                    version,
@@ -544,16 +541,12 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         return version
 
     def prepare_representation(self, repre,
-                               anatomy_data,
                                template_name,
                                archived_repres,
                                version,
                                instance_stagingdir,
                                instance):
 
-        # create template data for Anatomy
-        template_data = copy.deepcopy(anatomy_data)
-
         # pre-flight validations
         if repre["ext"].startswith("."):
             raise ValueError("Extension must not start with a dot '.': "
@@ -565,6 +558,9 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                              "the integrator"
                              "Got: {}".format(repre["transfers"]))
 
+        # create template data for Anatomy
+        template_data = copy.deepcopy(instance.data["anatomyData"])
+
         # required representation keys
         files = repre['files']
         template_data["representation"] = repre["name"]

From 8edfb3f7d3f926539f7f060725b3b7e0b1d697e5 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 24 Mar 2022 00:10:59 +0100
Subject: [PATCH 08/72] Simplify profile filtering

---
 openpype/plugins/publish/integrate_new.py | 42 +++++++++--------------
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index e57fbaf294..bdc045d1db 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -320,35 +320,21 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             if intent_value:
                 instance.data["anatomyData"]["intent"] = intent_value
 
-    def resolve_profile(self, profiles, instance):
-        """Resolve profile by family, task name, host name and task type"""
-
-        # Anatomy data is pre-filled by Collectors and `self.prepare_anatomy`
+    def get_profile_filter_criteria(self, instance):
+        """Return filter criteria for `filter_profiles`"""
+        # Anatomy data is pre-filled by Collectors
         anatomy_data = instance.data["anatomyData"]
 
-        # TODO: Resolve below questions
-        # QUESTION
-        #   - is there a chance that task name is not filled in anatomy data?
-        #   - should we use context task in that case?
         # Task can be optional in anatomy data
         task = anatomy_data.get("task", {})
 
-        filter_criteria = {
+        # Return filter criteria
+        return {
             "families": anatomy_data["family"],
             "tasks": task.get("name"),
             "hosts": anatomy_data["host"],
             "task_types": task.get("type")
         }
-        # Get profile
-        profile = filter_profiles(
-            profiles,
-            filter_criteria,
-            logger=self.log
-        )
-
-        # TODO: See if we can simplify to avoid needing to return filter
-        #       criteria used in `self._get_subset_group`
-        return profile, filter_criteria
 
     def register(self, instance, file_transactions):
 
@@ -375,8 +361,10 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         )
 
         # Define publish template name from profiles
-        profile, _ = self.resolve_profile(self.template_name_profiles,
-                                          instance)
+        filter_criteria = self.get_profile_filter_criteria(instance)
+        profile = filter_profiles(self.template_name_profiles,
+                                  filter_criteria,
+                                  logger=self.log)
         template_name = "publish"
         if profile:
             template_name = profile["template_name"]
@@ -844,17 +832,19 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             return None
 
         # Skip if there is no matching profile
-        profile, criteria = self.resolve_profile(self.subset_grouping_profiles,
-                                                 instance)
+        filter_criteria = self.get_profile_filter_criteria(instance)
+        profile = filter_profiles(self.subset_grouping_profiles,
+                                  filter_criteria,
+                                  logger=self.log)
         if not profile:
             return None
 
         template = profile["template"]
 
         fill_pairs = prepare_template_data({
-            "family": criteria["families"],
-            "task": criteria["tasks"],
-            "host": criteria["hosts"],
+            "family": filter_criteria["families"],
+            "task": filter_criteria["tasks"],
+            "host": filter_criteria["hosts"],
             "subset": instance.data["subset"],
             "renderlayer": instance.data.get("renderlayer")
         })

From 79286ead4b91504afa30df711e8f751451f53552 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 24 Mar 2022 00:16:32 +0100
Subject: [PATCH 09/72] Re-use get families logic

---
 openpype/plugins/publish/integrate_new.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index bdc045d1db..e66a71c483 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -286,10 +286,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
     def process(self, instance):
 
         # Exclude instances that also contain families from exclude families
-        families = set(
-            # Consider family and families data
-            [instance.data["family"]] + instance.data.get("families", [])
-        )
+        families = set(self._get_instance_families(instance))
         if families & set(self.exclude_families):
             return
 

From d6c682723de6eb025b21768ced54b2756373fba6 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 24 Mar 2022 00:19:16 +0100
Subject: [PATCH 10/72] Remove todo since assetEntity already comes from
 Collectors + re-use families variable

---
 openpype/plugins/publish/integrate_new.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index e66a71c483..856f8af163 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -755,8 +755,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         return families
 
     def register_subset(self, instance):
-        # todo: rely less on self.prepare_anatomy to create this value
-        asset = instance.data.get("assetEntity")  # stored by prepare_anatomy
+        asset = instance.data.get("assetEntity")
         subset_name = instance.data["subset"]
         self.log.debug("Subset: {}".format(subset_name))
 
@@ -766,9 +765,9 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             "name": subset_name
         })
 
+        families = self._get_instance_families(instance)
         if subset is None:
             self.log.info("Subset '%s' not found, creating ..." % subset_name)
-            families = self._get_instance_families(instance)
 
             _id = io.insert_one({
                 "schema": "openpype:subset-3.0",
@@ -786,8 +785,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         self._set_subset_group(instance, subset["_id"])
 
         # Update families on subset.
-        families = [instance.data["family"]]
-        families.extend(instance.data.get("families", []))
         io.update_many(
             {"type": "subset", "_id": ObjectId(subset["_id"])},
             {"$set": {"data.families": families}}

From 47259f8ef7b177892c76a8dbfde6d147cf664d39 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 24 Mar 2022 00:21:44 +0100
Subject: [PATCH 11/72] Add todo to move get subset group logic

---
 openpype/plugins/publish/integrate_new.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 856f8af163..91d2f3a943 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -821,6 +821,8 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         Attribute 'subset_grouping_profiles' is defined by OpenPype settings.
         """
+        # TODO: This logic is better suited for a Collector to just store
+        #       instance.data["subsetGroup"]
         # Skip if 'subset_grouping_profiles' is empty
         if not self.subset_grouping_profiles:
             return None

From b128e0addffc77991e5ff25f2d219d8ed8613136 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 24 Mar 2022 14:21:32 +0100
Subject: [PATCH 12/72] Override stored repre context `udim` for backwards
 compatibility

---
 openpype/plugins/publish/integrate_new.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 91d2f3a943..e3abb8f04f 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -275,7 +275,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
     exclude_families = ["clip"]
     db_representation_context_keys = [
         "project", "asset", "task", "subset", "version", "representation",
-        "family", "hierarchy", "task", "username", "frame", "udim"
+        "family", "hierarchy", "task", "username", "frame"
     ]
     default_template_name = "publish"
 
@@ -681,15 +681,17 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             # Single file transfer
             transfers = [(src, dst)]
 
-        if repre.get("udim"):
-            repre_context["udim"] = repre.get("udim")  # store list
-
         for key in self.db_representation_context_keys:
             value = template_data.get(key)
             if not value:
                 continue
             repre_context[key] = template_data[key]
 
+        # Explicitly store the full list even though template data might
+        # have a different value
+        if repre.get("udim"):
+            repre_context["udim"] = repre.get("udim")  # store list
+
         # Define representation id
         repre_id = ObjectId()
 

From 9997acbbeae32f1473c39df6cf78a8bfa7257aff Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 24 Mar 2022 14:22:49 +0100
Subject: [PATCH 13/72] Encapsulate version data completely into its own
 function

---
 openpype/plugins/publish/integrate_new.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index e3abb8f04f..6e92f81b8b 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -452,17 +452,12 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         version_number = instance.data["version"]
         self.log.debug("Version: v{0:03d}".format(version_number))
 
-        version_data = self.create_version_data(instance)
-        version_data_instance = instance.data.get('versionData')
-        if version_data_instance:
-            version_data.update(version_data_instance)
-
         version = {
             "schema": "openpype:version-3.0",
             "type": "version",
             "parent": subset["_id"],
             "name": version_number,
-            "data": version_data
+            "data": self.create_version_data(instance)
         }
 
         repres = instance.data.get("representations", [])
@@ -909,6 +904,11 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             if key in instance.data:
                 version_data[key] = instance.data[key]
 
+        # Include instance.data[versionData] directly
+        version_data_instance = instance.data.get('versionData')
+        if version_data_instance:
+            version_data.update(version_data_instance)
+
         return version_data
 
     def main_family_from_instance(self, instance):

From 5b1f6eb30c459011fa685dcf325f39c4af72838e Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 24 Mar 2022 14:23:27 +0100
Subject: [PATCH 14/72] Move logic closer to where it's used

---
 openpype/plugins/publish/integrate_new.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 6e92f81b8b..a787f8d50d 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -460,9 +460,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             "data": self.create_version_data(instance)
         }
 
-        repres = instance.data.get("representations", [])
-        new_repre_names_low = [_repre["name"].lower() for _repre in repres]
-
         existing_version = io.find_one({
             'type': 'version',
             'parent': subset["_id"],
@@ -488,6 +485,8 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             }))
 
             # Find representations of existing version and archive them
+            repres = instance.data.get("representations", [])
+            new_repre_names_low = [_repre["name"].lower() for _repre in repres]
             current_repres = io.find({
                 "type": "representation",
                 "parent": version_id

From 3369c15bdf837d6d8e83a8c054794e95fccd061b Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 24 Mar 2022 14:25:15 +0100
Subject: [PATCH 15/72] Preparation to delay Version document write to database
 closer to representation write

---
 openpype/plugins/publish/integrate_new.py | 24 ++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index a787f8d50d..8dd2d57959 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -466,15 +466,16 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             'name': version_number
         })
 
+        bulk_writes = []
         if existing_version is None:
             self.log.debug("Creating new version ...")
-            version_id = io.insert_one(version).inserted_id
+            version["_id"] = ObjectId()
+            bulk_writes.append(InsertOne(version))
         else:
             self.log.debug("Updating existing version ...")
             # Check if instance have set `append` mode which cause that
             # only replicated representations are set to archive
             append_repres = instance.data.get("append", False)
-            bulk_writes = []
 
             # Update version data
             version_id = existing_version['_id']
@@ -484,6 +485,12 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 '$set': version
             }))
 
+            # Instead of directly writing and querying we reproduce what
+            # the resulting version would look like so we can hold off making
+            # changes to the database to avoid the need for 'rollback'
+            version = copy.deepcopy(version)
+            version["_id"] = existing_version["_id"]
+
             # Find representations of existing version and archive them
             repres = instance.data.get("representations", [])
             new_repre_names_low = [_repre["name"].lower() for _repre in repres]
@@ -507,13 +514,12 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 repre["type"] = "archived_representation"
                 bulk_writes.append(InsertOne(repre))
 
-            # bulk updates
-            if bulk_writes:
-                io._database[io.Session["AVALON_PROJECT"]].bulk_write(
-                    bulk_writes
-                )
-
-        version = io.find_one({"_id": version_id})
+        # bulk updates
+        # todo: Try to avoid writing already until after we've prepared
+        #       representations to allow easier rollback?
+        io._database[io.Session["AVALON_PROJECT"]].bulk_write(
+            bulk_writes
+        )
 
         self.log.info("Registered version: v{0:03d}".format(version["name"]))
 

From 42175ff6f829ce30ef61538243d7bd4b804c8e28 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 24 Mar 2022 14:41:56 +0100
Subject: [PATCH 16/72] Fix `get_profile_filter_criteria` anatomy data key for
 app name

---
 openpype/plugins/publish/integrate_new.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 8dd2d57959..e3dcfcc93c 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -329,7 +329,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         return {
             "families": anatomy_data["family"],
             "tasks": task.get("name"),
-            "hosts": anatomy_data["host"],
+            "hosts": anatomy_data["app"],
             "task_types": task.get("type")
         }
 

From 7713af5a1dac4b0080dc6006f08811dcd9fc9d04 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 24 Mar 2022 17:23:02 +0100
Subject: [PATCH 17/72] Fix sequence functionality

---
 openpype/plugins/publish/integrate_new.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index e3dcfcc93c..b5986a62ee 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -645,16 +645,20 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             repre_context = template_filled.used_values
             self.log.debug("Template filled: {}".format(str(template_filled)))
             dst_collections, _remainder = clique.assemble(
-                [os.path.normpath(template_filled)], minimum_items=1
+                [os.path.normpath(template_filled)],
+                minimum_items=1,
+                patterns=[clique.PATTERNS["frames"]]
             )
             assert not _remainder, "This is a bug"
             assert len(dst_collections) == 1, "This is a bug"
             dst_collection = dst_collections[0]
 
             # Update the destination indexes and padding
-            dst_collection.indexes = destination_indexes
+            dst_collection.indexes.clear()
+            dst_collection.indexes.update(set(destination_indexes))
             dst_collection.padding = destination_padding
-            assert len(src_collection) == len(dst_collection), "This is a bug"
+            assert len(src_collection.indexes) == \
+                   len(dst_collection.indexes), "This is a bug"
 
             # Multiple file transfers
             transfers = []

From 229626bffdbc7e59c2206798b5bb3066a5602228 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 24 Mar 2022 17:36:01 +0100
Subject: [PATCH 18/72] Reformat code

---
 openpype/plugins/publish/integrate_new.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index b5986a62ee..9e3e9de77c 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -657,8 +657,9 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             dst_collection.indexes.clear()
             dst_collection.indexes.update(set(destination_indexes))
             dst_collection.padding = destination_padding
-            assert len(src_collection.indexes) == \
-                   len(dst_collection.indexes), "This is a bug"
+            assert (
+                len(src_collection.indexes) == len(dst_collection.indexes)
+            ), "This is a bug"
 
             # Multiple file transfers
             transfers = []

From e1eb0887e0bdaaf012e95f289bdaddcf9089d65c Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 24 Mar 2022 20:26:10 +0100
Subject: [PATCH 19/72] Reduce database calls for register subset + prepare for
 bulk writes logic

---
 openpype/plugins/publish/integrate_new.py | 72 ++++++++++-------------
 1 file changed, 31 insertions(+), 41 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 9e3e9de77c..44768df368 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -766,63 +766,53 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         subset_name = instance.data["subset"]
         self.log.debug("Subset: {}".format(subset_name))
 
+        # Get existing subset if it exists
         subset = io.find_one({
             "type": "subset",
             "parent": asset["_id"],
             "name": subset_name
         })
 
-        families = self._get_instance_families(instance)
-        if subset is None:
-            self.log.info("Subset '%s' not found, creating ..." % subset_name)
+        # Define subset data
+        data = {
+            "families": self._get_instance_families(instance)
+        }
 
-            _id = io.insert_one({
+        subset_group = instance.data.get("subsetGroup")
+        if not subset_group:
+            # todo: move _get_subset_group fallback to its own collector
+            subset_group = self._get_subset_group(instance)
+        if subset_group:
+            data["subsetGroup"] = subset_group
+
+        if subset is None:
+            # Create a new subset
+            self.log.info("Subset '%s' not found, creating ..." % subset_name)
+            subset = {
+                "_id": ObjectId(),
                 "schema": "openpype:subset-3.0",
                 "type": "subset",
                 "name": subset_name,
-                "data": {
-                    "families": families
-                },
+                "data": data,
                 "parent": asset["_id"]
-            }).inserted_id
+            }
+            io.insert_one(subset)
 
-            subset = io.find_one({"_id": _id})
-
-        # Update subset group
-        self._set_subset_group(instance, subset["_id"])
-
-        # Update families on subset.
-        io.update_many(
-            {"type": "subset", "_id": ObjectId(subset["_id"])},
-            {"$set": {"data.families": families}}
-        )
+        else:
+            # Update existing subset data with new data and set in database.
+            # We also change the found subset  in-place so we don't need to
+            # re-query the subset afterwards
+            subset["data"].update(data)
+            io.update_many(
+                {"type": "subset", "_id": subset["_id"]},
+                {"$set": {
+                    "data": subset["data"]
+                }}
+            )
 
         self.log.info("Registered subset: {}".format(subset_name))
-
         return subset
 
-    def _set_subset_group(self, instance, subset_id):
-        """
-            Mark subset as belonging to group in DB.
-
-            Uses Settings > Global > Publish plugins > IntegrateAssetNew
-
-            Args:
-                instance (dict): processed instance
-                subset_id (str): DB's subset _id
-
-        """
-        # Fist look into instance data
-        subset_group = instance.data.get("subsetGroup")
-        if not subset_group:
-            subset_group = self._get_subset_group(instance)
-
-        if subset_group:
-            io.update_many({
-                'type': 'subset',
-                '_id': ObjectId(subset_id)
-            }, {'$set': {'data.subsetGroup': subset_group}})
-
     def _get_subset_group(self, instance):
         """Look into subset group profiles set by settings.
 

From b906365f593025bf7bbba67ea6d8a907b717c98e Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Fri, 25 Mar 2022 21:42:39 +0100
Subject: [PATCH 20/72] Separate site sync logic further from Integrator
 plug-in (Draft)

---
 openpype/plugins/publish/integrate_new.py | 154 ++++++++++++----------
 1 file changed, 88 insertions(+), 66 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 44768df368..138a4fcc06 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -419,7 +419,12 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         # Finalize the representations now the published files are integrated
         # Get 'files' info for representations and its attached resources
         self.log.debug("Retrieving Representation files information ...")
-        sites = self.compute_resource_sync_sites(instance)
+        sites = SiteSync.compute_resource_sync_sites(
+            system_settings=instance.context.data["system_settings"],
+            project_settings=instance.context.data["project_settings"]
+        )
+        log.debug("final sites:: {}".format(sites))
+
         anatomy = instance.context.data["anatomy"]
         representations = []
         for prepared in prepared_representations:
@@ -987,63 +992,65 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             "sites": sites
         }
 
-    # region sync sites
-    def compute_resource_sync_sites(self, instance):
+
+class SiteSync(object):
+    """Logic for Site Sync Module functionality"""
+
+    @classmethod
+    def compute_resource_sync_sites(cls,
+                                    system_settings,
+                                    project_settings):
         """Get available resource sync sites"""
-        # Sync server logic
-        # TODO: Clean up sync settings
-        local_site = 'studio'  # default
-        remote_site = None
-        always_accessible = []
-        sync_project_presets = None
 
-        system_sync_server_presets = (
-            instance.context.data["system_settings"]
-                                 ["modules"]
-                                 ["sync_server"])
+        def create_metadata(name, created=True):
+            """Create sync site metadata for site with `name`"""
+            metadata = {"name": name}
+            if created:
+                metadata["created_dt"] = datetime.now()
+            return metadata
+
+        default_sites = [create_metadata("studio")]
+
+        # If sync site module is disabled return default fallback site
+        system_sync_server_presets = system_settings["modules"]["sync_server"]
         log.debug("system_sett:: {}".format(system_sync_server_presets))
+        if not system_sync_server_presets["enabled"]:
+            return default_sites
 
-        if system_sync_server_presets["enabled"]:
-            sync_project_presets = (
-                instance.context.data["project_settings"]
-                                     ["global"]
-                                     ["sync_server"])
+        # If sync site module is disabled in current
+        # project return default fallback site
+        sync_project_presets = project_settings["global"]["sync_server"]
+        if not sync_project_presets["enabled"]:
+            return default_sites
 
-        if sync_project_presets and sync_project_presets["enabled"]:
-            local_site, remote_site = self._get_sites(sync_project_presets)
-            always_accessible = sync_project_presets["config"]. \
-                get("always_accessible_on", [])
+        local_site, remote_site = cls._get_sites(sync_project_presets)
 
-        already_attached_sites = {}
-        meta = {"name": local_site, "created_dt": datetime.now()}
-        sites = [meta]
-        already_attached_sites[meta["name"]] = meta["created_dt"]
+        # Attached sites metadata by site name
+        # That is the local site, remote site, the always accesible sites
+        # and their alternate sites (alias of sites with different protocol)
+        attached_sites = dict()
+        attached_sites[local_site] = create_metadata(local_site)
 
-        if sync_project_presets and sync_project_presets["enabled"]:
-            if remote_site and \
-                    remote_site not in already_attached_sites.keys():
-                # add remote
-                meta = {"name": remote_site.strip()}
-                sites.append(meta)
-                already_attached_sites[meta["name"]] = None
+        if remote_site and remote_site != local_site:
+            attached_sites[remote_site] = create_metadata(remote_site,
+                                                          created=False)
 
-            # add skeleton for site where it should be always synced to
-            for always_on_site in always_accessible:
-                if always_on_site not in already_attached_sites.keys():
-                    meta = {"name": always_on_site.strip()}
-                    sites.append(meta)
-                    already_attached_sites[meta["name"]] = None
+        # add skeleton for sites where it should be always synced to
+        always_accessible_sites = (
+            sync_project_presets["config"].get("always_accessible_on", [])
+        )
+        for site in always_accessible_sites:
+            site = site.strip()
+            if site not in attached_sites:
+                attached_sites[site] = create_metadata(site, created=False)
 
-            # add alternative sites
-            alt = self._add_alternative_sites(system_sync_server_presets,
-                                              already_attached_sites)
-            sites.extend(alt)
+        # add alternative sites
+        cls._add_alternative_sites(system_sync_server_presets, attached_sites)
 
-        log.debug("final sites:: {}".format(sites))
+        return list(attached_sites.values())
 
-        return sites
-
-    def _get_sites(self, sync_project_presets):
+    @staticmethod
+    def _get_sites(sync_project_presets):
         """Returns tuple (local_site, remote_site)"""
         local_site_id = openpype.api.get_local_site_id()
         local_site = sync_project_presets["config"]. \
@@ -1053,36 +1060,51 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             local_site = local_site_id
 
         remote_site = sync_project_presets["config"].get("remote_site")
+        if remote_site:
+            remote_site.strip()
 
         if remote_site == 'local':
             remote_site = local_site_id
 
         return local_site, remote_site
 
-    def _add_alternative_sites(self,
-                               system_sync_server_presets,
-                               already_attached_sites):
+    @staticmethod
+    def _add_alternative_sites(system_sync_server_presets,
+                               attached_sites):
         """Loop through all configured sites and add alternatives.
 
+        For all sites if an alternative site is detected that has an
+        accessible site then we can also register to that alternative site
+        with the same "created" state. So we match the existing data.
+
             See SyncServerModule.handle_alternate_site
         """
         conf_sites = system_sync_server_presets.get("sites", {})
 
-        alternative_sites = []
         for site_name, site_info in conf_sites.items():
-            alt_sites = set(site_info.get("alternative_sites", []))
-            already_attached_keys = list(already_attached_sites.keys())
-            for added_site in already_attached_keys:
-                if added_site in alt_sites:
-                    if site_name in already_attached_keys:
-                        continue
-                    meta = {"name": site_name}
-                    real_created = already_attached_sites[added_site]
-                    # alt site inherits state of 'created_dt'
-                    if real_created:
-                        meta["created_dt"] = real_created
-                    alternative_sites.append(meta)
-                    already_attached_sites[meta["name"]] = real_created
 
-        return alternative_sites
-    # endregion
+            # Skip if already defined
+            if site_name in attached_sites:
+                continue
+
+            # Get alternate sites (stripped names) for this site name
+            alt_sites = site_info.get("alternative_sites", [])
+            alt_sites = [site.strip() for site in alt_sites]
+            alt_sites = set(alt_sites)
+
+            # If no alternative sites we don't need to add
+            if not alt_sites:
+                continue
+
+            # Take a copy of data of the first alternate site that is already
+            # defined as an attached site to match the same state.
+            match_meta = next((attached_sites[site] for site in alt_sites
+                               if site in attached_sites), None)
+            if not match_meta:
+                continue
+
+            alt_site_meta = copy.deepcopy(match_meta)
+            alt_site_meta["name"] = site_name
+
+            # Note: We change mutable `attached_site` dict in-place
+            attached_sites[site_name] = alt_site_meta

From e0aaa5f6cc2fd2a2e6fa708364136d9d6235163d Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 26 Mar 2022 14:20:13 +0100
Subject: [PATCH 21/72] Move FileTransaction into lib

---
 openpype/lib/file_transaction.py          | 171 ++++++++++++++++++++++
 openpype/plugins/publish/integrate_new.py | 167 +--------------------
 2 files changed, 172 insertions(+), 166 deletions(-)
 create mode 100644 openpype/lib/file_transaction.py

diff --git a/openpype/lib/file_transaction.py b/openpype/lib/file_transaction.py
new file mode 100644
index 0000000000..57592e297f
--- /dev/null
+++ b/openpype/lib/file_transaction.py
@@ -0,0 +1,171 @@
+import os
+import logging
+import sys
+import errno
+import six
+
+from openpype.lib import create_hard_link
+
+# this is needed until speedcopy for linux is fixed
+if sys.platform == "win32":
+    from speedcopy import copyfile
+else:
+    from shutil import copyfile
+
+
+class FileTransaction(object):
+    """
+
+    The file transaction is a three step process.
+
+    1) Rename any existing files to a "temporary backup" during `process()`
+    2) Copy the files to final destination during `process()`
+    3) Remove any backed up files (*no rollback possible!) during `finalize()`
+
+    Step 3 is done during `finalize()`. If not called the .bak files will
+    remain on disk.
+
+    These steps try to ensure that we don't overwrite half of any existing
+    files e.g. if they are currently in use.
+
+    Note:
+        A regular filesystem is *not* a transactional file system and even
+        though this implementation tries to produce a 'safe copy' with a
+        potential rollback do keep in mind that it's inherently unsafe due
+        to how filesystem works and a myriad of things could happen during
+        the transaction that break the logic. A file storage could go down,
+        permissions could be changed, other machines could be moving or writing
+        files. A lot can happen.
+
+    Warning:
+        Any folders created during the transfer will not be removed.
+
+    """
+
+    MODE_COPY = 0
+    MODE_HARDLINK = 1
+
+    def __init__(self, log=None):
+
+        if log is None:
+            log = logging.getLogger("FileTransaction")
+
+        self.log = log
+
+        # The transfer queue
+        # todo: make this an actual FIFO queue?
+        self._transfers = {}
+
+        # Destination file paths that a file was transferred to
+        self._transferred = []
+
+        # Backup file location mapping to original locations
+        self._backup_to_original = {}
+
+    def add(self, src, dst, mode=MODE_COPY):
+        """Add a new file to transfer queue"""
+        opts = {"mode": mode}
+
+        src = os.path.normpath(src)
+        dst = os.path.normpath(dst)
+
+        if dst in self._transfers:
+            queued_src = self._transfers[dst][0]
+            if src == queued_src:
+                self.log.debug("File transfer was already "
+                               "in queue: {} -> {}".format(src, dst))
+                return
+            else:
+                self.log.warning("File transfer in queue replaced..")
+                self.log.debug("Removed from queue: "
+                               "{} -> {}".format(queued_src, dst))
+                self.log.debug("Added to queue: {} -> {}".format(src, dst))
+
+        self._transfers[dst] = (src, opts)
+
+    def process(self):
+
+        # Backup any existing files
+        for dst in self._transfers.keys():
+            if os.path.exists(dst):
+                # Backup original file
+                # todo: add timestamp or uuid to ensure unique
+                backup = dst + ".bak"
+                self._backup_to_original[backup] = dst
+                self.log.debug("Backup existing file: "
+                               "{} -> {}".format(dst, backup))
+                os.rename(dst, backup)
+
+        # Copy the files to transfer
+        for dst, (src, opts) in self._transfers.items():
+            self._create_folder_for_file(dst)
+
+            if opts["mode"] == self.MODE_COPY:
+                self.log.debug("Copying file ... {} -> {}".format(src, dst))
+                copyfile(src, dst)
+            elif opts["mode"] == self.MODE_HARDLINK:
+                self.log.debug("Hardlinking file ... {} -> {}".format(src,
+                                                                      dst))
+                create_hard_link(src, dst)
+
+            self._transferred.append(dst)
+
+    def finalize(self):
+        # Delete any backed up files
+        for backup in self._backup_to_original.keys():
+            try:
+                os.remove(backup)
+            except OSError:
+                self.log.error("Failed to remove backup file: "
+                               "{}".format(backup),
+                               exc_info=True)
+
+    def rollback(self):
+
+        errors = 0
+
+        # Rollback any transferred files
+        for path in self._transferred:
+            try:
+                os.remove(path)
+            except OSError:
+                errors += 1
+                self.log.error("Failed to rollback created file: "
+                               "{}".format(path),
+                               exc_info=True)
+
+        # Rollback the backups
+        for backup, original in self._backup_to_original.items():
+            try:
+                os.rename(backup, original)
+            except OSError:
+                errors += 1
+                self.log.error("Failed to restore original file: "
+                               "{} -> {}".format(backup, original),
+                               exc_info=True)
+
+        if errors:
+            self.log.error("{} errors occurred during "
+                           "rollback.".format(errors), exc_info=True)
+            six.reraise(*sys.exc_info())
+
+    @property
+    def transferred(self):
+        """Return the processed transfers destination paths"""
+        return list(self._transferred)
+
+    @property
+    def backups(self):
+        """Return the backup file paths"""
+        return list(self._backup_to_original.keys())
+
+    def _create_folder_for_file(self, path):
+        dirname = os.path.dirname(path)
+        try:
+            os.makedirs(dirname)
+        except OSError as e:
+            if e.errno == errno.EEXIST:
+                pass
+            else:
+                self.log.critical("An unexpected error occurred.")
+                six.reraise(*sys.exc_info())
diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 138a4fcc06..92976e6151 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -3,7 +3,6 @@ import logging
 import sys
 import copy
 import clique
-import errno
 import six
 
 from bson.objectid import ObjectId
@@ -13,19 +12,13 @@ from avalon import io
 import openpype.api
 from datetime import datetime
 from openpype.lib.profiles_filtering import filter_profiles
+from openpype.lib.file_transaction import FileTransaction
 from openpype.lib import (
     prepare_template_data,
-    create_hard_link,
     StringTemplate,
     TemplateUnsolved
 )
 
-# this is needed until speedcopy for linux is fixed
-if sys.platform == "win32":
-    from speedcopy import copyfile
-else:
-    from shutil import copyfile
-
 log = logging.getLogger(__name__)
 
 
@@ -40,164 +33,6 @@ def get_first_frame_padded(collection):
     return get_frame_padded(start_frame, padding=collection.padding)
 
 
-class FileTransaction(object):
-    """
-
-    The file transaction is a three step process.
-
-    1) Rename any existing files to a "temporary backup" during `process()`
-    2) Copy the files to final destination during `process()`
-    3) Remove any backed up files (*no rollback possible!) during `finalize()`
-
-    Step 3 is done during `finalize()`. If not called the .bak files will
-    remain on disk.
-
-    These steps try to ensure that we don't overwrite half of any existing
-    files e.g. if they are currently in use.
-
-    Note:
-        A regular filesystem is *not* a transactional file system and even
-        though this implementation tries to produce a 'safe copy' with a
-        potential rollback do keep in mind that it's inherently unsafe due
-        to how filesystem works and a myriad of things could happen during
-        the transaction that break the logic. A file storage could go down,
-        permissions could be changed, other machines could be moving or writing
-        files. A lot can happen.
-
-    Warning:
-        Any folders created during the transfer will not be removed.
-
-    """
-
-    MODE_COPY = 0
-    MODE_HARDLINK = 1
-
-    def __init__(self, log=None):
-
-        if log is None:
-            log = logging.getLogger("FileTransaction")
-
-        self.log = log
-
-        # The transfer queue
-        # todo: make this an actual FIFO queue?
-        self._transfers = {}
-
-        # Destination file paths that a file was transferred to
-        self._transferred = []
-
-        # Backup file location mapping to original locations
-        self._backup_to_original = {}
-
-    def add(self, src, dst, mode=MODE_COPY):
-        """Add a new file to transfer queue"""
-        opts = {"mode": mode}
-
-        src = os.path.normpath(src)
-        dst = os.path.normpath(dst)
-
-        if dst in self._transfers:
-            queued_src = self._transfers[dst][0]
-            if src == queued_src:
-                self.log.debug("File transfer was already "
-                               "in queue: {} -> {}".format(src, dst))
-                return
-            else:
-                self.log.warning("File transfer in queue replaced..")
-                self.log.debug("Removed from queue: "
-                               "{} -> {}".format(queued_src, dst))
-                self.log.debug("Added to queue: {} -> {}".format(src, dst))
-
-        self._transfers[dst] = (src, opts)
-
-    def process(self):
-
-        # Backup any existing files
-        for dst in self._transfers.keys():
-            if os.path.exists(dst):
-                # Backup original file
-                # todo: add timestamp or uuid to ensure unique
-                backup = dst + ".bak"
-                self._backup_to_original[backup] = dst
-                self.log.debug("Backup existing file: "
-                               "{} -> {}".format(dst, backup))
-                os.rename(dst, backup)
-
-        # Copy the files to transfer
-        for dst, (src, opts) in self._transfers.items():
-            self._create_folder_for_file(dst)
-
-            if opts["mode"] == self.MODE_COPY:
-                self.log.debug("Copying file ... {} -> {}".format(src, dst))
-                copyfile(src, dst)
-            elif opts["mode"] == self.MODE_HARDLINK:
-                self.log.debug("Hardlinking file ... {} -> {}".format(src,
-                                                                      dst))
-                create_hard_link(src, dst)
-
-            self._transferred.append(dst)
-
-    def finalize(self):
-        # Delete any backed up files
-        for backup in self._backup_to_original.keys():
-            try:
-                os.remove(backup)
-            except OSError:
-                self.log.error("Failed to remove backup file: "
-                               "{}".format(backup),
-                               exc_info=True)
-
-    def rollback(self):
-
-        errors = 0
-
-        # Rollback any transferred files
-        for path in self._transferred:
-            try:
-                os.remove(path)
-            except OSError:
-                errors += 1
-                self.log.error("Failed to rollback created file: "
-                               "{}".format(path),
-                               exc_info=True)
-
-        # Rollback the backups
-        for backup, original in self._backup_to_original.items():
-            try:
-                os.rename(backup, original)
-            except OSError:
-                errors += 1
-                self.log.error("Failed to restore original file: "
-                               "{} -> {}".format(backup, original),
-                               exc_info=True)
-
-        if errors:
-            self.log.error("{} errors occurred during "
-                           "rollback.".format(errors), exc_info=True)
-            six.reraise(*sys.exc_info())
-
-    @property
-    def transferred(self):
-        """Return the processed transfers destination paths"""
-        return list(self._transferred)
-
-    @property
-    def backups(self):
-        """Return the backup file paths"""
-        return list(self._backup_to_original.keys())
-
-    def _create_folder_for_file(self, path):
-        dirname = os.path.dirname(path)
-        try:
-            os.makedirs(dirname)
-        except OSError as e:
-            if e.errno == errno.EEXIST:
-                pass
-            else:
-                self.log.critical("An unexpected error occurred.")
-                six.reraise(*sys.exc_info())
-
-
 class IntegrateAssetNew(pyblish.api.InstancePlugin):
     """Resolve any dependency issues
 

From d3cb32ebe1df79408ff03fddef4d74a55fa1f4b6 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 26 Mar 2022 14:32:34 +0100
Subject: [PATCH 22/72] Collect subset group in a Collector instead of during
 Integrator

---
 .../plugins/publish/collect_subset_group.py   | 100 ++++++++++++++++++
 openpype/plugins/publish/integrate_new.py     |  50 ---------
 2 files changed, 100 insertions(+), 50 deletions(-)
 create mode 100644 openpype/plugins/publish/collect_subset_group.py

diff --git a/openpype/plugins/publish/collect_subset_group.py b/openpype/plugins/publish/collect_subset_group.py
new file mode 100644
index 0000000000..60c1c04e70
--- /dev/null
+++ b/openpype/plugins/publish/collect_subset_group.py
@@ -0,0 +1,100 @@
+"""Produces instance.data["subsetGroup"] data used during integration.
+
+Requires:
+    dict -> context["anatomyData"] *(pyblish.api.CollectorOrder + 0.49)
+
+Provides:
+    instance -> subsetGroup (str)
+
+"""
+import pyblish.api
+
+from openpype.lib.profiles_filtering import filter_profiles
+from openpype.lib import (
+    prepare_template_data,
+    StringTemplate,
+    TemplateUnsolved
+)
+
+
+class CollectSubsetGroup(pyblish.api.ContextPlugin):
+    """Collect Subset Group for publish."""
+
+    # Run after CollectAnatomyInstanceData
+    order = pyblish.api.CollectorOrder + 0.495
+    label = "Collect Subset Group"
+
+    def process(self, instance):
+        """Look into subset group profiles set by settings.
+
+        Attribute 'subset_grouping_profiles' is defined by OpenPype settings.
+        """
+
+        # TODO: Move this setting to this Collector instead of Integrator
+        project_settings = instance.context.data["project_settings"]
+        subset_grouping_profiles = (
+            project_settings["global"]
+                            ["publish"]
+                            ["IntegrateAssetNew"]
+                            ["subset_grouping_profiles"]
+        )
+
+        # Skip if 'subset_grouping_profiles' is empty
+        if not subset_grouping_profiles:
+            return
+
+        # Skip if there is no matching profile
+        filter_criteria = self.get_profile_filter_criteria(instance)
+        profile = filter_profiles(subset_grouping_profiles,
+                                  filter_criteria,
+                                  logger=self.log)
+        if not profile:
+            return
+
+        if instance.data.get("subsetGroup"):
+            # If subsetGroup is already set then allow that value to remain
+            self.log.debug("Skipping collect subset group due to existing "
+                           "value: {}".format(instance.data["subsetGroup"]))
+            return
+
+        template = profile["template"]
+
+        fill_pairs = prepare_template_data({
+            "family": filter_criteria["families"],
+            "task": filter_criteria["tasks"],
+            "host": filter_criteria["hosts"],
+            "subset": instance.data["subset"],
+            "renderlayer": instance.data.get("renderlayer")
+        })
+
+        filled_template = None
+        try:
+            filled_template = StringTemplate.format_strict_template(
+                template, fill_pairs
+            )
+        except (KeyError, TemplateUnsolved):
+            keys = fill_pairs.keys()
+            msg = "Subset grouping failed. " \
+                  "Only {} are expected in Settings".format(','.join(keys))
+            self.log.warning(msg)
+
+        if filled_template:
+            instance.data["subsetGroup"] = filled_template
+
+    def get_profile_filter_criteria(self, instance):
+        """Return filter criteria for `filter_profiles`"""
+        # TODO: This logic is used in much more plug-ins in one way or another
+        #       Maybe better suited for lib?
+        # Anatomy data is pre-filled by Collectors
+        anatomy_data = instance.data["anatomyData"]
+
+        # Task can be optional in anatomy data
+        task = anatomy_data.get("task", {})
+
+        # Return filter criteria
+        return {
+            "families": anatomy_data["family"],
+            "tasks": task.get("name"),
+            "hosts": anatomy_data["app"],
+            "task_types": task.get("type")
+        }
diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 92976e6151..284e110916 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -13,11 +13,6 @@ import openpype.api
 from datetime import datetime
 from openpype.lib.profiles_filtering import filter_profiles
 from openpype.lib.file_transaction import FileTransaction
-from openpype.lib import (
-    prepare_template_data,
-    StringTemplate,
-    TemplateUnsolved
-)
 
 log = logging.getLogger(__name__)
 
@@ -619,9 +614,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         }
 
         subset_group = instance.data.get("subsetGroup")
-        if not subset_group:
-            # todo: move _get_subset_group fallback to its own collector
-            subset_group = self._get_subset_group(instance)
         if subset_group:
             data["subsetGroup"] = subset_group
 
@@ -653,48 +645,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         self.log.info("Registered subset: {}".format(subset_name))
         return subset
 
-    def _get_subset_group(self, instance):
-        """Look into subset group profiles set by settings.
-
-        Attribute 'subset_grouping_profiles' is defined by OpenPype settings.
-        """
-        # TODO: This logic is better suited for a Collector to just store
-        #       instance.data["subsetGroup"]
-        # Skip if 'subset_grouping_profiles' is empty
-        if not self.subset_grouping_profiles:
-            return None
-
-        # Skip if there is no matching profile
-        filter_criteria = self.get_profile_filter_criteria(instance)
-        profile = filter_profiles(self.subset_grouping_profiles,
-                                  filter_criteria,
-                                  logger=self.log)
-        if not profile:
-            return None
-
-        template = profile["template"]
-
-        fill_pairs = prepare_template_data({
-            "family": filter_criteria["families"],
-            "task": filter_criteria["tasks"],
-            "host": filter_criteria["hosts"],
-            "subset": instance.data["subset"],
-            "renderlayer": instance.data.get("renderlayer")
-        })
-
-        filled_template = None
-        try:
-            filled_template = StringTemplate.format_strict_template(
-                template, fill_pairs
-            )
-        except (KeyError, TemplateUnsolved):
-            keys = fill_pairs.keys()
-            msg = "Subset grouping failed. " \
-                  "Only {} are expected in Settings".format(','.join(keys))
-            self.log.warning(msg)
-
-        return filled_template
-
     def create_version_data(self, instance):
         """Create the data collection for the version
 

From d7c5ad1f7c9913a39b43087cebbbee7971844f8c Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 26 Mar 2022 14:33:37 +0100
Subject: [PATCH 23/72] Remove duplicate "source" in families

---
 openpype/plugins/publish/integrate_new.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 284e110916..08088479d0 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -86,7 +86,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 "source",
                 "matchmove",
                 "image",
-                "source",
                 "assembly",
                 "fbx",
                 "textures",

From 8fffc60b5016d63d6fad2b8c3b399537a3736171 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 26 Mar 2022 14:37:23 +0100
Subject: [PATCH 24/72] Move remainder of prepare anatomy data to the Collector

---
 .../plugins/publish/collect_anatomy_context_data.py |  6 ++++++
 openpype/plugins/publish/integrate_new.py           | 13 -------------
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/openpype/plugins/publish/collect_anatomy_context_data.py b/openpype/plugins/publish/collect_anatomy_context_data.py
index bd8d9e50c4..346caf6b83 100644
--- a/openpype/plugins/publish/collect_anatomy_context_data.py
+++ b/openpype/plugins/publish/collect_anatomy_context_data.py
@@ -91,5 +91,11 @@ class CollectAnatomyContextData(pyblish.api.ContextPlugin):
                 }
             })
 
+        intent = context.data.get("intent")
+        if intent and isinstance(intent, dict):
+            intent_value = intent.get("value")
+            if intent_value:
+                context_data["intent"] = intent_value
+
         self.log.info("Global anatomy Data collected")
         self.log.debug(json.dumps(context_data, indent=4))
diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 08088479d0..f598c540e5 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -119,10 +119,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         if families & set(self.exclude_families):
             return
 
-        # TODO: Avoid the need to do any adjustments to anatomy data
-        #       Best case scenario that's all handled by collectors
-        self.prepare_anatomy(instance)
-
         file_transactions = FileTransaction(log=self.log)
         try:
             self.register(instance, file_transactions)
@@ -137,15 +133,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         # the try, except.
         file_transactions.finalize()
 
-    def prepare_anatomy(self, instance):
-        """Prepare anatomy data used to define representation destinations"""
-        # TODO: This logic should move to CollectAnatomyContextData
-        intent_value = instance.context.data.get("intent")
-        if intent_value and isinstance(intent_value, dict):
-            intent_value = intent_value.get("value")
-            if intent_value:
-                instance.data["anatomyData"]["intent"] = intent_value
-
     def get_profile_filter_criteria(self, instance):
         """Return filter criteria for `filter_profiles`"""
         # Anatomy data is pre-filled by Collectors

From 177e83ec8bf55e28ca551affefc4ac775570fe98 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 26 Mar 2022 14:43:00 +0100
Subject: [PATCH 25/72] Restore "published_path" backwards compatibility for
 IntegrateFtrackInstance on Farm

---
 openpype/plugins/publish/integrate_new.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index f598c540e5..05cbb357e3 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -532,6 +532,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         # Store first transferred destination as published path data
         # todo: can we remove this?
         published_path = transfers[0][1]
+        repre["published_path"] = published_path  # Backwards compatibility
 
         # todo: `repre` is not the actual `representation` entity
         #       we should simplify/clarify difference between data above

From 7189954a3c29ca00139a9a50b58606a3c335de04 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 26 Mar 2022 14:44:19 +0100
Subject: [PATCH 26/72] Use `os.path.abspath` instead of `os.path.normpath`
 when adding a transfer

---
 openpype/lib/file_transaction.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openpype/lib/file_transaction.py b/openpype/lib/file_transaction.py
index 57592e297f..1626bec6b6 100644
--- a/openpype/lib/file_transaction.py
+++ b/openpype/lib/file_transaction.py
@@ -66,8 +66,8 @@ class FileTransaction(object):
         """Add a new file to transfer queue"""
         opts = {"mode": mode}
 
-        src = os.path.normpath(src)
-        dst = os.path.normpath(dst)
+        src = os.path.abspath(src)
+        dst = os.path.abspath(dst)
 
         if dst in self._transfers:
             queued_src = self._transfers[dst][0]

From 8f8b578f0ce660b1c8182ad2486aca21ed1828e2 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 26 Mar 2022 19:58:55 +0100
Subject: [PATCH 27/72] Move Subset Grouping Profiles settings to Collect
 Subset Group

- This is moved from the Integrate Asset New settings
---
 .../plugins/publish/collect_subset_group.py   |  16 +--
 openpype/plugins/publish/integrate_new.py     |   1 -
 .../defaults/project_settings/global.json     |  20 ++--
 .../schemas/schema_global_publish.json        | 101 ++++++++++--------
 4 files changed, 71 insertions(+), 67 deletions(-)

diff --git a/openpype/plugins/publish/collect_subset_group.py b/openpype/plugins/publish/collect_subset_group.py
index 60c1c04e70..075699e304 100644
--- a/openpype/plugins/publish/collect_subset_group.py
+++ b/openpype/plugins/publish/collect_subset_group.py
@@ -24,28 +24,22 @@ class CollectSubsetGroup(pyblish.api.ContextPlugin):
     order = pyblish.api.CollectorOrder + 0.495
     label = "Collect Subset Group"
 
+    # Defined in OpenPype settings
+    subset_grouping_profiles = None
+
     def process(self, instance):
         """Look into subset group profiles set by settings.
 
         Attribute 'subset_grouping_profiles' is defined by OpenPype settings.
         """
 
-        # TODO: Move this setting to this Collector instead of Integrator
-        project_settings = instance.context.data["project_settings"]
-        subset_grouping_profiles = (
-            project_settings["global"]
-                            ["publish"]
-                            ["IntegrateAssetNew"]
-                            ["subset_grouping_profiles"]
-        )
-
         # Skip if 'subset_grouping_profiles' is empty
-        if not subset_grouping_profiles:
+        if not self.subset_grouping_profiles:
             return
 
         # Skip if there is no matching profile
         filter_criteria = self.get_profile_filter_criteria(instance)
-        profile = filter_profiles(subset_grouping_profiles,
+        profile = filter_profiles(self.subset_grouping_profiles,
                                   filter_criteria,
                                   logger=self.log)
         if not profile:
diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 05cbb357e3..4706d4d093 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -110,7 +110,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
     # Attributes set by settings
     template_name_profiles = None
-    subset_grouping_profiles = None
 
     def process(self, instance):
 
diff --git a/openpype/settings/defaults/project_settings/global.json b/openpype/settings/defaults/project_settings/global.json
index 30a71b044a..528df111f0 100644
--- a/openpype/settings/defaults/project_settings/global.json
+++ b/openpype/settings/defaults/project_settings/global.json
@@ -20,6 +20,17 @@
             ],
             "skip_hosts_headless_publish": []
         },
+        "CollectSubsetGroup": {
+            "subset_grouping_profiles": [
+                {
+                    "families": [],
+                    "hosts": [],
+                    "task_types": [],
+                    "tasks": [],
+                    "template": ""
+                }
+            ]
+        },
         "ValidateEditorialAssetName": {
             "enabled": true,
             "optional": false
@@ -193,15 +204,6 @@
                     "tasks": [],
                     "template_name": "render"
                 }
-            ],
-            "subset_grouping_profiles": [
-                {
-                    "families": [],
-                    "hosts": [],
-                    "task_types": [],
-                    "tasks": [],
-                    "template": ""
-                }
             ]
         },
         "CleanUp": {
diff --git a/openpype/settings/entities/schemas/projects_schema/schemas/schema_global_publish.json b/openpype/settings/entities/schemas/projects_schema/schemas/schema_global_publish.json
index 12043d4205..ab968037f6 100644
--- a/openpype/settings/entities/schemas/projects_schema/schemas/schema_global_publish.json
+++ b/openpype/settings/entities/schemas/projects_schema/schemas/schema_global_publish.json
@@ -39,6 +39,61 @@
                 }
             ]
         },
+        {
+            "type": "dict",
+            "collapsible": true,
+            "key": "CollectSubsetGroup",
+            "label": "Collect Subset Group",
+            "is_group": true,
+            "children": [
+                {
+                    "type": "list",
+                    "key": "subset_grouping_profiles",
+                    "label": "Subset grouping profiles",
+                    "use_label_wrap": true,
+                    "object_type": {
+                        "type": "dict",
+                        "children": [
+                            {
+                                "type": "label",
+                                "label": "Set all published instances as a part of specific group named according to 'Template'. <br>Implemented all variants of placeholders [{task},{family},{host},{subset},{renderlayer}]"
+                            },
+                            {
+                                "key": "families",
+                                "label": "Families",
+                                "type": "list",
+                                "object_type": "text"
+                            },
+                            {
+                                "type": "hosts-enum",
+                                "key": "hosts",
+                                "label": "Hosts",
+                                "multiselection": true
+                            },
+                            {
+                                "key": "task_types",
+                                "label": "Task types",
+                                "type": "task-types-enum"
+                            },
+                            {
+                                "key": "tasks",
+                                "label": "Task names",
+                                "type": "list",
+                                "object_type": "text"
+                            },
+                            {
+                                "type": "separator"
+                            },
+                            {
+                                "type": "text",
+                                "key": "template",
+                                "label": "Template"
+                            }
+                        ]
+                    }
+                }
+            ]
+        },
         {
             "type": "dict",
             "collapsible": true,
@@ -603,52 +658,6 @@
                             }
                         ]
                     }
-                },
-                {
-                    "type": "list",
-                    "key": "subset_grouping_profiles",
-                    "label": "Subset grouping profiles",
-                    "use_label_wrap": true,
-                    "object_type": {
-                        "type": "dict",
-                        "children": [
-                            {
-                                "type": "label",
-                                "label": "Set all published instances as a part of specific group named according to 'Template'. <br>Implemented all variants of placeholders [{task},{family},{host},{subset},{renderlayer}]"
-                            },
-                            {
-                                "key": "families",
-                                "label": "Families",
-                                "type": "list",
-                                "object_type": "text"
-                            },
-                            {
-                                "type": "hosts-enum",
-                                "key": "hosts",
-                                "label": "Hosts",
-                                "multiselection": true
-                            },
-                            {
-                                "key": "task_types",
-                                "label": "Task types",
-                                "type": "task-types-enum"
-                            },
-                            {
-                                "key": "tasks",
-                                "label": "Task names",
-                                "type": "list",
-                                "object_type": "text"
-                            },
-                            {
-                                "type": "separator"
-                            },
-                            {
-                                "type": "text",
-                                "key": "template",
-                                "label": "Template"
-                            }
-                        ]
-                    }
                 }
             ]
         },

From 6ff7167d54e8a70441300ba4d21acb5a01eb5071 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 26 Mar 2022 20:09:08 +0100
Subject: [PATCH 28/72] Separate get_template_name into its own method + use
 `self.default_template_name`

---
 openpype/plugins/publish/integrate_new.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 4706d4d093..c1fa7ccaf2 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -172,14 +172,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             )
         )
 
-        # Define publish template name from profiles
-        filter_criteria = self.get_profile_filter_criteria(instance)
-        profile = filter_profiles(self.template_name_profiles,
-                                  filter_criteria,
-                                  logger=self.log)
-        template_name = "publish"
-        if profile:
-            template_name = profile["template_name"]
+        template_name = self._get_template_name(instance)
 
         subset = self.register_subset(instance)
 
@@ -582,6 +575,19 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         return families
 
+    def _get_template_name(self, instance):
+        """Return anatomy template name to use for integration"""
+
+        # Define publish template name from profiles
+        filter_criteria = self.get_profile_filter_criteria(instance)
+        profile = filter_profiles(self.template_name_profiles,
+                                  filter_criteria,
+                                  logger=self.log)
+        template_name = self.default_template_name
+        if profile:
+            template_name = profile["template_name"]
+        return template_name
+
     def register_subset(self, instance):
         asset = instance.data.get("assetEntity")
         subset_name = instance.data["subset"]

From 821293d3b855acf2cadd914328a975fd619acd56 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 26 Mar 2022 20:09:31 +0100
Subject: [PATCH 29/72] Match comment from Integrator for consistency

---
 openpype/plugins/publish/collect_subset_group.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openpype/plugins/publish/collect_subset_group.py b/openpype/plugins/publish/collect_subset_group.py
index 075699e304..5756563ed3 100644
--- a/openpype/plugins/publish/collect_subset_group.py
+++ b/openpype/plugins/publish/collect_subset_group.py
@@ -24,7 +24,7 @@ class CollectSubsetGroup(pyblish.api.ContextPlugin):
     order = pyblish.api.CollectorOrder + 0.495
     label = "Collect Subset Group"
 
-    # Defined in OpenPype settings
+    # Attributes set by settings
     subset_grouping_profiles = None
 
     def process(self, instance):

From c3e0162c436a081ccf809cd429f5b828202569d0 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 26 Mar 2022 20:11:31 +0100
Subject: [PATCH 30/72] Debug log when exclude family was found for the
 instance

---
 openpype/plugins/publish/integrate_new.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index c1fa7ccaf2..8a71c0d5aa 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -115,7 +115,10 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         # Exclude instances that also contain families from exclude families
         families = set(self._get_instance_families(instance))
-        if families & set(self.exclude_families):
+        exclude = families & set(self.exclude_families)
+        if exclude:
+            self.log.debug("Instance not integrated due to exclude "
+                           "families found: {}".format(", ".join(exclude)))
             return
 
         file_transactions = FileTransaction(log=self.log)

From fbdb385e5b855c0762583256311501b78a2ca730 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 26 Mar 2022 20:20:00 +0100
Subject: [PATCH 31/72] Perform database registering of Subset and Version in a
 single Bulk Write

---
 openpype/plugins/publish/integrate_new.py | 30 +++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 8a71c0d5aa..6f1d745b9a 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -177,11 +177,17 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         template_name = self._get_template_name(instance)
 
-        subset = self.register_subset(instance)
-
-        version = self.register_version(instance, subset)
+        subset, subset_writes = self.register_subset(instance)
+        version, version_writes = self.register_version(instance, subset)
         instance.data["versionEntity"] = version
 
+        # Bulk write to the database
+        # todo: Try to avoid writing already until after we've prepared
+        #       representations to allow easier rollback?
+        io._database[io.Session["AVALON_PROJECT"]].bulk_write(
+            subset_writes + version_writes
+        )
+
         archived_repres = list(io.find({
             "parent": version["_id"],
             "type": "archived_representation"
@@ -330,16 +336,9 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 repre["type"] = "archived_representation"
                 bulk_writes.append(InsertOne(repre))
 
-        # bulk updates
-        # todo: Try to avoid writing already until after we've prepared
-        #       representations to allow easier rollback?
-        io._database[io.Session["AVALON_PROJECT"]].bulk_write(
-            bulk_writes
-        )
-
         self.log.info("Registered version: v{0:03d}".format(version["name"]))
 
-        return version
+        return version, bulk_writes
 
     def prepare_representation(self, repre,
                                template_name,
@@ -612,6 +611,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         if subset_group:
             data["subsetGroup"] = subset_group
 
+        bulk_writes = []
         if subset is None:
             # Create a new subset
             self.log.info("Subset '%s' not found, creating ..." % subset_name)
@@ -623,22 +623,22 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 "data": data,
                 "parent": asset["_id"]
             }
-            io.insert_one(subset)
+            bulk_writes.append(InsertOne(subset))
 
         else:
             # Update existing subset data with new data and set in database.
             # We also change the found subset  in-place so we don't need to
             # re-query the subset afterwards
             subset["data"].update(data)
-            io.update_many(
+            bulk_writes.append(UpdateOne(
                 {"type": "subset", "_id": subset["_id"]},
                 {"$set": {
                     "data": subset["data"]
                 }}
-            )
+            ))
 
         self.log.info("Registered subset: {}".format(subset_name))
-        return subset
+        return subset, bulk_writes
 
     def create_version_data(self, instance):
         """Create the data collection for the version

From 1844281c68d0e357eccdc8c277db278ef0651f31 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 26 Mar 2022 20:41:22 +0100
Subject: [PATCH 32/72] Match assertion for collection of files (allow no
 absolute paths) similar to single files

---
 openpype/plugins/publish/integrate_new.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 6f1d745b9a..ead00452da 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -398,6 +398,10 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         is_sequence_representation = isinstance(files, (list, tuple))
         if is_sequence_representation:
             # Collection of files (sequence)
+            assert not any(os.path.isabs(fname) for fname in files), (
+                "Given file names contain full paths"
+            )
+
             # Get the sequence as a collection. The files must be of a single
             # sequence and have no remainder outside of the collections.
             collections, remainder = clique.assemble(files,

From 8e0161bec7353bff8bc581d4d676b3ba7c090ba8 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Mon, 28 Mar 2022 15:04:24 +0200
Subject: [PATCH 33/72] Also Bulk Write representation changes + more cleanup

- Don't create intermediate archived representations
- Move writing of Subset + Version to database to just before file transactions
- Perform ReplaceOne for version instead of update with "$set" for the full version
---
 openpype/plugins/publish/integrate_new.py | 166 ++++++++++------------
 1 file changed, 79 insertions(+), 87 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index ead00452da..7a3ca2bdf7 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -6,7 +6,7 @@ import clique
 import six
 
 from bson.objectid import ObjectId
-from pymongo import DeleteOne, InsertOne, UpdateOne
+from pymongo import DeleteMany, ReplaceOne, InsertOne, UpdateOne
 import pyblish.api
 from avalon import io
 import openpype.api
@@ -28,6 +28,11 @@ def get_first_frame_padded(collection):
     return get_frame_padded(start_frame, padding=collection.padding)
 
 
+def bulk_write(writes):
+    """Convenience function to bulk write into active project database"""
+    return io._database[io.Session["AVALON_PROJECT"]].bulk_write(writes)
+
+
 class IntegrateAssetNew(pyblish.api.InstancePlugin):
     """Resolve any dependency issues
 
@@ -177,21 +182,17 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         template_name = self._get_template_name(instance)
 
-        subset, subset_writes = self.register_subset(instance)
-        version, version_writes = self.register_version(instance, subset)
+        subset, subset_writes = self.prepare_subset(instance)
+        version, version_writes = self.prepare_version(instance, subset)
         instance.data["versionEntity"] = version
 
-        # Bulk write to the database
-        # todo: Try to avoid writing already until after we've prepared
-        #       representations to allow easier rollback?
-        io._database[io.Session["AVALON_PROJECT"]].bulk_write(
-            subset_writes + version_writes
-        )
-
-        archived_repres = list(io.find({
-            "parent": version["_id"],
-            "type": "archived_representation"
-        }))
+        # Get existing representations (if any)
+        existing_repres_by_name = {
+            repres["name"].lower(): repres for repres in io.find({
+                "parent": version["_id"],
+                "type": "representation"
+            })
+        }
 
         # Prepare all representations
         prepared_representations = []
@@ -205,7 +206,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             # todo: reduce/simplify what is returned from this function
             prepared = self.prepare_representation(repre,
                                                    template_name,
-                                                   archived_repres,
+                                                   existing_repres_by_name,
                                                    version,
                                                    instance_stagingdir,
                                                    instance)
@@ -225,40 +226,70 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         for src, dst in instance.data.get("hardlinks", []):
             file_transactions.add(src, dst, mode=FileTransaction.MODE_HARDLINK)
 
+        # Bulk write to the database
+        # todo: Can we move this even to after the file transfers?
+        bulk_write(subset_writes + version_writes)
+        self.log.info("Subset {subset[name]} and Version {version[name]} "
+                      "written to database..".format(subset=subset,
+                                                     version=version))
+
         # Process all file transfers of all integrations now
         self.log.debug("Integrating source files to destination ...")
         file_transactions.process()
-        self.log.debug("Backup files "
+        self.log.debug("Backed up existing files: "
                        "{}".format(file_transactions.backups))
-        self.log.debug("Integrated files "
+        self.log.debug("Transferred files: "
                        "{}".format(file_transactions.transferred))
 
         # Finalize the representations now the published files are integrated
         # Get 'files' info for representations and its attached resources
-        self.log.debug("Retrieving Representation files information ...")
+        self.log.debug("Retrieving Representation Site Sync information ...")
         sites = SiteSync.compute_resource_sync_sites(
             system_settings=instance.context.data["system_settings"],
             project_settings=instance.context.data["project_settings"]
         )
-        log.debug("final sites:: {}".format(sites))
+        self.log.debug("final sites:: {}".format(sites))
 
         anatomy = instance.context.data["anatomy"]
-        representations = []
+        representation_writes = []
+        new_repre_names_low = set()
         for prepared in prepared_representations:
             transfers = prepared["transfers"]
             representation = prepared["representation"]
             representation["files"] = self.get_files_info(
                 transfers, sites, anatomy
             )
-            representations.append(representation)
 
-        # Remove all archived representations
-        if archived_repres:
-            repre_ids_to_remove = [repre["_id"] for repre in archived_repres]
-            io.delete_many({"_id": {"$in": repre_ids_to_remove}})
+            # Set up representation for writing to the database. Since
+            # we *might* be overwriting an existing entry if the version
+            # already existed we'll use ReplaceOnce with `upsert=True`
+            representation_writes.append(ReplaceOne(
+                filter={"_id": representation["_id"]},
+                replacement=representation,
+                upsert=True
+            ))
 
-        # Write the new representations to the database
-        io.insert_many(representations)
+            new_repre_names_low.add(representation["name"].lower())
+
+        # Delete any existing representations that didn't get any new data
+        # if the instance is not set to append mode
+        if not instance.data.get("append", False):
+            delete_names = set()
+            for name, existing_repres in existing_repres_by_name.items():
+                if name not in new_repre_names_low:
+                    # We add the exact representation name because `name` is
+                    # lowercase for name matching only and not in the database
+                    delete_names.add(existing_repres["name"])
+            if delete_names:
+                representation_writes.append(DeleteMany(
+                    filter={
+                        "parent": version["_id"],
+                        "name": {"$in": list(delete_names)}
+                    }
+                ))
+
+        # Write representations to the database
+        bulk_write(representation_writes)
 
         # Backwards compatibility
         # todo: can we avoid the need to store this?
@@ -267,12 +298,11 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         }
 
         self.log.info("Registered {} representations"
-                      "".format(len(representations)))
+                      "".format(len(prepared_representations)))
 
-    def register_version(self, instance, subset):
+    def prepare_version(self, instance, subset):
 
         version_number = instance.data["version"]
-        self.log.debug("Version: v{0:03d}".format(version_number))
 
         version = {
             "schema": "openpype:version-3.0",
@@ -288,61 +318,26 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             'name': version_number
         })
 
-        bulk_writes = []
-        if existing_version is None:
+        if existing_version:
+            self.log.debug("Updating existing version ...")
+            version["_id"] = existing_version["_id"]
+        else:
             self.log.debug("Creating new version ...")
             version["_id"] = ObjectId()
-            bulk_writes.append(InsertOne(version))
-        else:
-            self.log.debug("Updating existing version ...")
-            # Check if instance have set `append` mode which cause that
-            # only replicated representations are set to archive
-            append_repres = instance.data.get("append", False)
 
-            # Update version data
-            version_id = existing_version['_id']
-            bulk_writes.append(UpdateOne({
-                '_id': version_id
-            }, {
-                '$set': version
-            }))
+        bulk_writes = [ReplaceOne(
+            filter={"_id": version["_id"]},
+            replacement=version,
+            upsert=True
+        )]
 
-            # Instead of directly writing and querying we reproduce what
-            # the resulting version would look like so we can hold off making
-            # changes to the database to avoid the need for 'rollback'
-            version = copy.deepcopy(version)
-            version["_id"] = existing_version["_id"]
-
-            # Find representations of existing version and archive them
-            repres = instance.data.get("representations", [])
-            new_repre_names_low = [_repre["name"].lower() for _repre in repres]
-            current_repres = io.find({
-                "type": "representation",
-                "parent": version_id
-            })
-            for repre in current_repres:
-                if append_repres:
-                    # archive only duplicated representations
-                    if repre["name"].lower() not in new_repre_names_low:
-                        continue
-                # Representation must change type,
-                # `_id` must be stored to other key and replaced with new
-                # - that is because new representations should have same ID
-                repre_id = repre["_id"]
-                bulk_writes.append(DeleteOne({"_id": repre_id}))
-
-                repre["orig_id"] = repre_id
-                repre["_id"] = ObjectId()
-                repre["type"] = "archived_representation"
-                bulk_writes.append(InsertOne(repre))
-
-        self.log.info("Registered version: v{0:03d}".format(version["name"]))
+        self.log.info("Prepared version: v{0:03d}".format(version["name"]))
 
         return version, bulk_writes
 
     def prepare_representation(self, repre,
                                template_name,
-                               archived_repres,
+                               existing_repres_by_name,
                                version,
                                instance_stagingdir,
                                instance):
@@ -516,15 +511,12 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         if repre.get("udim"):
             repre_context["udim"] = repre.get("udim")  # store list
 
-        # Define representation id
-        repre_id = ObjectId()
-
         # Use previous representation's id if there is a name match
-        repre_name_lower = repre["name"].lower()
-        for _archived_repres in archived_repres:
-            if repre_name_lower == _archived_repres["name"].lower():
-                repre_id = _archived_repres["orig_id"]
-                break
+        existing = existing_repres_by_name.get(repre["name"].lower())
+        if existing:
+            repre_id = existing["_id"]
+        else:
+            repre_id = ObjectId()
 
         # Backwards compatibility:
         # Store first transferred destination as published path data
@@ -594,7 +586,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             template_name = profile["template_name"]
         return template_name
 
-    def register_subset(self, instance):
+    def prepare_subset(self, instance):
         asset = instance.data.get("assetEntity")
         subset_name = instance.data["subset"]
         self.log.debug("Subset: {}".format(subset_name))
@@ -631,7 +623,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         else:
             # Update existing subset data with new data and set in database.
-            # We also change the found subset  in-place so we don't need to
+            # We also change the found subset in-place so we don't need to
             # re-query the subset afterwards
             subset["data"].update(data)
             bulk_writes.append(UpdateOne(
@@ -641,7 +633,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 }}
             ))
 
-        self.log.info("Registered subset: {}".format(subset_name))
+        self.log.info("Prepared subset: {}".format(subset_name))
         return subset, bulk_writes
 
     def create_version_data(self, instance):

From ba2c6e6f084e5829f32250735f13f045cabca800 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Mon, 28 Mar 2022 15:43:57 +0200
Subject: [PATCH 34/72] Fix class type

---
 openpype/plugins/publish/collect_subset_group.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openpype/plugins/publish/collect_subset_group.py b/openpype/plugins/publish/collect_subset_group.py
index 5756563ed3..56cd7de94e 100644
--- a/openpype/plugins/publish/collect_subset_group.py
+++ b/openpype/plugins/publish/collect_subset_group.py
@@ -17,7 +17,7 @@ from openpype.lib import (
 )
 
 
-class CollectSubsetGroup(pyblish.api.ContextPlugin):
+class CollectSubsetGroup(pyblish.api.InstancePlugin):
     """Collect Subset Group for publish."""
 
     # Run after CollectAnatomyInstanceData

From e6665e579ee069b30a02b1034e53d48c85553761 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Mon, 28 Mar 2022 20:32:46 +0200
Subject: [PATCH 35/72] Restructure code and more cleanup

---
 openpype/plugins/publish/integrate_new.py | 250 +++++++++++-----------
 1 file changed, 123 insertions(+), 127 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 7a3ca2bdf7..6401806394 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -17,6 +17,21 @@ from openpype.lib.file_transaction import FileTransaction
 log = logging.getLogger(__name__)
 
 
+def get_instance_families(instance):
+    """Get all families of the instance"""
+    # todo: move this to lib?
+    family = instance.data.get("family")
+    families = []
+    if family:
+        families.append(family)
+
+    for _family in (instance.data.get("families") or []):
+        if _family not in families:
+            families.append(_family)
+
+    return families
+
+
 def get_frame_padded(frame, padding):
     """Return frame number as string with `padding` amount of padded zeros"""
     return "{frame:0{padding}d}".format(padding=padding, frame=frame)
@@ -119,7 +134,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
     def process(self, instance):
 
         # Exclude instances that also contain families from exclude families
-        families = set(self._get_instance_families(instance))
+        families = set(get_instance_families(instance))
         exclude = families & set(self.exclude_families)
         if exclude:
             self.log.debug("Instance not integrated due to exclude "
@@ -140,22 +155,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         # the try, except.
         file_transactions.finalize()
 
-    def get_profile_filter_criteria(self, instance):
-        """Return filter criteria for `filter_profiles`"""
-        # Anatomy data is pre-filled by Collectors
-        anatomy_data = instance.data["anatomyData"]
-
-        # Task can be optional in anatomy data
-        task = anatomy_data.get("task", {})
-
-        # Return filter criteria
-        return {
-            "families": anatomy_data["family"],
-            "tasks": task.get("name"),
-            "hosts": anatomy_data["app"],
-            "task_types": task.get("type")
-        }
-
     def register(self, instance, file_transactions):
 
         instance_stagingdir = instance.data.get("stagingDir")
@@ -171,16 +170,16 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 "@ {0}".format(instance_stagingdir)
             )
 
-        # Ensure at least one file is set up for transfer in staging dir.
+        # Ensure at least one representation is set up for registering.
         repres = instance.data.get("representations")
-        assert repres, "Instance has no files to transfer"
+        assert repres, "Instance has representations data"
         assert isinstance(repres, (list, tuple)), (
-            "Instance 'files' must be a list, got: {0} {1}".format(
+            "Instance 'repres' must be a list, got: {0} {1}".format(
                 str(type(repres)), str(repres)
             )
         )
 
-        template_name = self._get_template_name(instance)
+        template_name = self.get_template_name(instance)
 
         subset, subset_writes = self.prepare_subset(instance)
         version, version_writes = self.prepare_version(instance, subset)
@@ -300,6 +299,56 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         self.log.info("Registered {} representations"
                       "".format(len(prepared_representations)))
 
+    def prepare_subset(self, instance):
+        asset = instance.data.get("assetEntity")
+        subset_name = instance.data["subset"]
+        self.log.debug("Subset: {}".format(subset_name))
+
+        # Get existing subset if it exists
+        subset = io.find_one({
+            "type": "subset",
+            "parent": asset["_id"],
+            "name": subset_name
+        })
+
+        # Define subset data
+        data = {
+            "families": get_instance_families(instance)
+        }
+
+        subset_group = instance.data.get("subsetGroup")
+        if subset_group:
+            data["subsetGroup"] = subset_group
+
+        bulk_writes = []
+        if subset is None:
+            # Create a new subset
+            self.log.info("Subset '%s' not found, creating ..." % subset_name)
+            subset = {
+                "_id": ObjectId(),
+                "schema": "openpype:subset-3.0",
+                "type": "subset",
+                "name": subset_name,
+                "data": data,
+                "parent": asset["_id"]
+            }
+            bulk_writes.append(InsertOne(subset))
+
+        else:
+            # Update existing subset data with new data and set in database.
+            # We also change the found subset in-place so we don't need to
+            # re-query the subset afterwards
+            subset["data"].update(data)
+            bulk_writes.append(UpdateOne(
+                {"type": "subset", "_id": subset["_id"]},
+                {"$set": {
+                    "data": subset["data"]
+                }}
+            ))
+
+        self.log.info("Prepared subset: {}".format(subset_name))
+        return subset, bulk_writes
+
     def prepare_version(self, instance, subset):
 
         version_number = instance.data["version"]
@@ -559,91 +608,14 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             "published_files": [transfer[1] for transfer in transfers]
         }
 
-    def _get_instance_families(self, instance):
-        """Get all families of the instance"""
-        # todo: move this to lib?
-        family = instance.data.get("family")
-        families = []
-        if family:
-            families.append(family)
-
-        for _family in (instance.data.get("families") or []):
-            if _family not in families:
-                families.append(_family)
-
-        return families
-
-    def _get_template_name(self, instance):
-        """Return anatomy template name to use for integration"""
-
-        # Define publish template name from profiles
-        filter_criteria = self.get_profile_filter_criteria(instance)
-        profile = filter_profiles(self.template_name_profiles,
-                                  filter_criteria,
-                                  logger=self.log)
-        template_name = self.default_template_name
-        if profile:
-            template_name = profile["template_name"]
-        return template_name
-
-    def prepare_subset(self, instance):
-        asset = instance.data.get("assetEntity")
-        subset_name = instance.data["subset"]
-        self.log.debug("Subset: {}".format(subset_name))
-
-        # Get existing subset if it exists
-        subset = io.find_one({
-            "type": "subset",
-            "parent": asset["_id"],
-            "name": subset_name
-        })
-
-        # Define subset data
-        data = {
-            "families": self._get_instance_families(instance)
-        }
-
-        subset_group = instance.data.get("subsetGroup")
-        if subset_group:
-            data["subsetGroup"] = subset_group
-
-        bulk_writes = []
-        if subset is None:
-            # Create a new subset
-            self.log.info("Subset '%s' not found, creating ..." % subset_name)
-            subset = {
-                "_id": ObjectId(),
-                "schema": "openpype:subset-3.0",
-                "type": "subset",
-                "name": subset_name,
-                "data": data,
-                "parent": asset["_id"]
-            }
-            bulk_writes.append(InsertOne(subset))
-
-        else:
-            # Update existing subset data with new data and set in database.
-            # We also change the found subset in-place so we don't need to
-            # re-query the subset afterwards
-            subset["data"].update(data)
-            bulk_writes.append(UpdateOne(
-                {"type": "subset", "_id": subset["_id"]},
-                {"$set": {
-                    "data": subset["data"]
-                }}
-            ))
-
-        self.log.info("Prepared subset: {}".format(subset_name))
-        return subset, bulk_writes
-
     def create_version_data(self, instance):
-        """Create the data collection for the version
+        """Create the data dictionary for the version
 
         Args:
             instance: the current instance being published
 
         Returns:
-            dict: the required information with instance.data as key
+            dict: the required information for version["data"]
         """
 
         context = instance.context
@@ -658,7 +630,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         self.log.debug("Source: {}".format(source))
 
         version_data = {
-            "families": self._get_instance_families(instance),
+            "families": get_instance_families(instance),
             "time": context.data["time"],
             "author": context.data["user"],
             "source": source,
@@ -692,28 +664,52 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         return version_data
 
-    def main_family_from_instance(self, instance):
-        """Returns main family of entered instance."""
-        return self._get_instance_families(instance)[0]
+    def get_template_name(self, instance):
+        """Return anatomy template name to use for integration"""
+
+        # Define publish template name from profiles
+        filter_criteria = self.get_profile_filter_criteria(instance)
+        profile = filter_profiles(self.template_name_profiles,
+                                  filter_criteria,
+                                  logger=self.log)
+        template_name = self.default_template_name
+        if profile:
+            template_name = profile["template_name"]
+        return template_name
+
+    def get_profile_filter_criteria(self, instance):
+        """Return filter criteria for `filter_profiles`"""
+        # Anatomy data is pre-filled by Collectors
+        anatomy_data = instance.data["anatomyData"]
+
+        # Task can be optional in anatomy data
+        task = anatomy_data.get("task", {})
+
+        # Return filter criteria
+        return {
+            "families": anatomy_data["family"],
+            "tasks": task.get("name"),
+            "hosts": anatomy_data["app"],
+            "task_types": task.get("type")
+        }
 
     def get_rootless_path(self, anatomy, path):
-        """  Returns, if possible, path without absolute portion from host
-             (eg. 'c:\' or '/opt/..')
-             This information is host dependent and shouldn't be captured.
-             Example:
-                 'c:/projects/MyProject1/Assets/publish...' >
-                 '{root}/MyProject1/Assets...'
+        """Returns, if possible, path without absolute portion from root
+            (eg. 'c:\' or '/opt/..')
+
+         This information is platform dependent and shouldn't be captured.
+         Example:
+             'c:/projects/MyProject1/Assets/publish...' >
+             '{root}/MyProject1/Assets...'
 
         Args:
-                anatomy: anatomy part from instance
-                path: path (absolute)
+            anatomy: anatomy part from instance
+            path: path (absolute)
         Returns:
-                path: modified path if possible, or unmodified path
-                + warning logged
+            path: modified path if possible, or unmodified path
+            + warning logged
         """
-        success, rootless_path = (
-            anatomy.find_root_template_from_path(path)
-        )
+        success, rootless_path = anatomy.find_root_template_from_path(path)
         if success:
             path = rootless_path
         else:
@@ -731,9 +727,9 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             Context info.
 
         Arguments:
-            instance: the current instance being published
-            integrated_file_sizes: dictionary of destination path (absolute)
-            and its file size
+            transfers (list): List of transferred files (source, destination)
+            sites (list): array of published locations
+            anatomy: anatomy part from instance
         Returns:
             output_resources: array of dictionaries to be added to 'files' key
             in representation
@@ -749,14 +745,14 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         """ Prepare information for one file (asset or resource)
 
         Arguments:
-            path: destination url of published file (rootless)
-            size(optional): size of file in bytes
-            file_hash(optional): hash of file for synchronization validation
-            sites(optional): array of published locations,
-                            [ {'name':'studio', 'created_dt':date} by default
-                                keys expected ['studio', 'site1', 'gdrive1']
+            path: destination url of published file
+            anatomy: anatomy part from instance
+            sites: array of published locations,
+                [ {'name':'studio', 'created_dt':date} by default
+                keys expected ['studio', 'site1', 'gdrive1']
+
         Returns:
-            rec: dictionary with filled info
+            dict: file info dictionary
         """
         file_hash = openpype.api.source_hash(path)
 

From 2777c36eb52e7390b15accc93c9b9a9a771ba21d Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Mon, 28 Mar 2022 20:34:16 +0200
Subject: [PATCH 36/72] Rely on `instance.data["fps"] over
 `context.data["fps"]` if available

---
 openpype/plugins/publish/integrate_new.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 6401806394..00922b0ed3 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -636,9 +636,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             "source": source,
             "comment": context.data.get("comment"),
             "machine": context.data.get("machine"),
-            "fps": context.data.get(
-                "fps", instance.data.get("fps")
-            )
+            "fps": instance.data.get("fps", context.data.get("fps"))
         }
 
         intent_value = context.data.get("intent")

From add4958d4c9078b6ecad131f6e40beb66ecdd348 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 29 Mar 2022 09:43:27 +0200
Subject: [PATCH 37/72] Fix message

---
 openpype/plugins/publish/integrate_new.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 00922b0ed3..f6aa720dbb 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -172,7 +172,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         # Ensure at least one representation is set up for registering.
         repres = instance.data.get("representations")
-        assert repres, "Instance has representations data"
+        assert repres, "Instance has no representations data"
         assert isinstance(repres, (list, tuple)), (
             "Instance 'repres' must be a list, got: {0} {1}".format(
                 str(type(repres)), str(repres)

From 77b5c24370b61615b2380fdc464137d3eba13ab9 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 29 Mar 2022 11:44:30 +0200
Subject: [PATCH 38/72] Fix message

---
 openpype/plugins/publish/integrate_new.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index f6aa720dbb..020b1d2b9c 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -174,7 +174,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         repres = instance.data.get("representations")
         assert repres, "Instance has no representations data"
         assert isinstance(repres, (list, tuple)), (
-            "Instance 'repres' must be a list, got: {0} {1}".format(
+            "Instance 'representations' must be a list, got: {0} {1}".format(
                 str(type(repres)), str(repres)
             )
         )

From 127f19873f876d58a2c954c4a56c73ddd4d4d4af Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 29 Mar 2022 11:58:52 +0200
Subject: [PATCH 39/72] Streamlining some code, optimize some database queries
 with projection

---
 openpype/plugins/publish/integrate_new.py | 36 ++++++++++++-----------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 020b1d2b9c..d869a1b6be 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -187,10 +187,14 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         # Get existing representations (if any)
         existing_repres_by_name = {
-            repres["name"].lower(): repres for repres in io.find({
-                "parent": version["_id"],
-                "type": "representation"
-            })
+            repres["name"].lower(): repres for repres in io.find(
+                {
+                    "parent": version["_id"],
+                    "type": "representation"
+                },
+                # Only care about id and name of existing representations
+                projection={"_id": True, "name": True}
+            )
         }
 
         # Prepare all representations
@@ -239,16 +243,17 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                        "{}".format(file_transactions.backups))
         self.log.debug("Transferred files: "
                        "{}".format(file_transactions.transferred))
-
-        # Finalize the representations now the published files are integrated
-        # Get 'files' info for representations and its attached resources
         self.log.debug("Retrieving Representation Site Sync information ...")
+
+        # Get the accessible sites for Site Sync
         sites = SiteSync.compute_resource_sync_sites(
             system_settings=instance.context.data["system_settings"],
             project_settings=instance.context.data["project_settings"]
         )
-        self.log.debug("final sites:: {}".format(sites))
+        self.log.debug("Site Sync Sites: {}".format(sites))
 
+        # Finalize the representations now the published files are integrated
+        # Get 'files' info for representations and its attached resources
         anatomy = instance.context.data["anatomy"]
         representation_writes = []
         new_repre_names_low = set()
@@ -365,7 +370,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             'type': 'version',
             'parent': subset["_id"],
             'name': version_number
-        })
+        }, projection={"_id": True})
 
         if existing_version:
             self.log.debug("Updating existing version ...")
@@ -576,7 +581,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         # todo: `repre` is not the actual `representation` entity
         #       we should simplify/clarify difference between data above
         #       and the actual representation entity for the database
-        data = repre.get("data") or {}
+        data = repre.get("data", {})
         data.update({'path': published_path, 'template': template})
         representation = {
             "_id": repre_id,
@@ -664,16 +669,15 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
     def get_template_name(self, instance):
         """Return anatomy template name to use for integration"""
-
         # Define publish template name from profiles
         filter_criteria = self.get_profile_filter_criteria(instance)
         profile = filter_profiles(self.template_name_profiles,
                                   filter_criteria,
                                   logger=self.log)
-        template_name = self.default_template_name
         if profile:
-            template_name = profile["template_name"]
-        return template_name
+            return profile["template_name"]
+        else:
+            return self.default_template_name
 
     def get_profile_filter_criteria(self, instance):
         """Return filter criteria for `filter_profiles`"""
@@ -752,13 +756,11 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         Returns:
             dict: file info dictionary
         """
-        file_hash = openpype.api.source_hash(path)
-
         return {
             "_id": ObjectId(),
             "path": self.get_rootless_path(anatomy, path),
             "size": os.path.getsize(path),
-            "hash": file_hash,
+            "hash": openpype.api.source_hash(path),
             "sites": sites
         }
 

From 0c2c60d37b05411193acf8c60f6a2562463ba558 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 29 Mar 2022 12:23:24 +0200
Subject: [PATCH 40/72] Unify usage of `clique.assemble`

---
 openpype/plugins/publish/integrate_new.py | 60 ++++++++++++++---------
 1 file changed, 37 insertions(+), 23 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index d869a1b6be..1ceb99e9fe 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -17,6 +17,41 @@ from openpype.lib.file_transaction import FileTransaction
 log = logging.getLogger(__name__)
 
 
+def assemble(files):
+    """Convenience `clique.assemble` wrapper for files of a single collection.
+
+    Unlike `clique.assemble` this wrapper does not allow more than a single
+    Collection nor any remainder files. Errors will be raised when not only
+    a single collection is assembled.
+
+    Returns:
+        clique.Collection: A single sequence Collection
+
+    Raises:
+        ValueError: Error is raised when files do not result in a single
+                    collected Collection.
+
+    """
+    # todo: move this to lib?
+    # Get the sequence as a collection. The files must be of a single
+    # sequence and have no remainder outside of the collections.
+    patterns = [clique.PATTERNS["frames"]]
+    collections, remainder = clique.assemble(files,
+                                             minimum_items=1,
+                                             patterns=patterns)
+    if not collections:
+        raise ValueError("No collections found in files: "
+                         "{}".format(files))
+    if remainder:
+        raise ValueError("Files found not detected as part"
+                         " of a sequence: {}".format(remainder))
+    if len(collections) > 1:
+        raise ValueError("Files in sequence are not part of a"
+                         " single sequence collection: "
+                         "{}".format(collections))
+    return collections[0]
+
+
 def get_instance_families(instance):
     """Get all families of the instance"""
     # todo: move this to lib?
@@ -451,21 +486,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 "Given file names contain full paths"
             )
 
-            # Get the sequence as a collection. The files must be of a single
-            # sequence and have no remainder outside of the collections.
-            collections, remainder = clique.assemble(files,
-                                                     minimum_items=1)
-            if not collections:
-                raise ValueError("No collections found in files: "
-                                 "{}".format(files))
-            if remainder:
-                raise ValueError("Files found not detected as part"
-                                 " of a sequence: {}".format(remainder))
-            if len(collections) > 1:
-                raise ValueError("Files in sequence are not part of a"
-                                 " single sequence collection: "
-                                 "{}".format(collections))
-            src_collection = collections[0]
+            src_collection = assemble(files)
 
             # If the representation has `frameStart` set it renumbers the
             # frame indices of the published collection. It will start from
@@ -512,14 +533,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             template_filled = anatomy_filled[template_name]["path"]
             repre_context = template_filled.used_values
             self.log.debug("Template filled: {}".format(str(template_filled)))
-            dst_collections, _remainder = clique.assemble(
-                [os.path.normpath(template_filled)],
-                minimum_items=1,
-                patterns=[clique.PATTERNS["frames"]]
-            )
-            assert not _remainder, "This is a bug"
-            assert len(dst_collections) == 1, "This is a bug"
-            dst_collection = dst_collections[0]
+            dst_collection = assemble([os.path.normpath(template_filled)])
 
             # Update the destination indexes and padding
             dst_collection.indexes.clear()

From 44d6199a9e4ea7342fb2ef6bd583e0e373da2545 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 29 Mar 2022 12:28:47 +0200
Subject: [PATCH 41/72] Organize single file code more like sequence file code

---
 openpype/plugins/publish/integrate_new.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 1ceb99e9fe..1592789390 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -551,21 +551,24 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         else:
             # Single file
-            template_data.pop("frame", None)
             fname = files
             assert not os.path.isabs(fname), (
                 "Given file name is a full path"
             )
-            # Store used frame value to template data
+
+            # Manage anatomy template data
+            template_data.pop("frame", None)
             if repre.get("udim"):
                 template_data["udim"] = repre["udim"][0]
-            src = os.path.join(stagingdir, fname)
+
+            # Construct destination filepath from template
             anatomy_filled = anatomy.format(template_data)
             template_filled = anatomy_filled[template_name]["path"]
             repre_context = template_filled.used_values
             dst = os.path.normpath(template_filled)
 
             # Single file transfer
+            src = os.path.join(stagingdir, fname)
             transfers = [(src, dst)]
 
         for key in self.db_representation_context_keys:

From a2a77b8a2099b902e01816ec66a2f308e43004d1 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 29 Mar 2022 12:51:08 +0200
Subject: [PATCH 42/72] Cleanup `get_files_info` docstring

---
 openpype/plugins/publish/integrate_new.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 1592789390..0ee2a6286f 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -739,11 +739,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         return path
 
     def get_files_info(self, transfers, sites, anatomy):
-        """ Prepare 'files' portion for attached resources and main asset.
-            Combining records from 'transfers' and 'hardlinks' parts from
-            instance.
-            All attached resources should be added, currently without
-            Context info.
+        """Prepare 'files' info portion for representations.
 
         Arguments:
             transfers (list): List of transferred files (source, destination)

From 6fe6841c996594871a535daf2c21914e5cc32575 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 29 Mar 2022 13:18:04 +0200
Subject: [PATCH 43/72] Capture edge case where all "representations" are
 tagged for delete

---
 openpype/plugins/publish/integrate_new.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 0ee2a6286f..80e1909687 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -255,6 +255,12 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
             prepared_representations.append(prepared)
 
+        if not prepared_representations:
+            # Even though we check `instance.data["representations"]` earlier
+            # this could still happen if all representations were tagged with
+            # "delete" and thus are skipped for integration
+            raise RuntimeError("No representations prepared to publish.")
+
         # Each instance can also have pre-defined transfers not explicitly
         # part of a representation - like texture resources used by a
         # .ma representation. Those destination paths are pre-defined, etc.

From a7a908d1348381ab0c4df9c29861d7c02be635cb Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 29 Mar 2022 13:20:51 +0200
Subject: [PATCH 44/72] Improve docstring

---
 openpype/plugins/publish/integrate_new.py | 39 +++++++++++------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 80e1909687..8e666f3400 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -84,29 +84,26 @@ def bulk_write(writes):
 
 
 class IntegrateAssetNew(pyblish.api.InstancePlugin):
-    """Resolve any dependency issues
+    """Register publish in the database and transfer files to destinations.
 
-    This plug-in resolves any paths which, if not updated might break
-    the published file.
+    Steps:
+        1) Register the subset and version
+        2) Transfer the representation files to the destination
+        3) Register the representation
 
-    The order of families is important, when working with lookdev you want to
-    first publish the texture, update the texture paths in the nodes and then
-    publish the shading network. Same goes for file dependent assets.
-
-    Requirements for instance to be correctly integrated
-
-    instance.data['representations'] - must be a list and each member
-    must be a dictionary with following data:
-        'files': list of filenames for sequence, string for single file.
-                 Only the filename is allowed, without the folder path.
-        'stagingDir': "path/to/folder/with/files"
-        'name': representation name (usually the same as extension)
-        'ext': file extension
-    optional data
-        "frameStart"
-        "frameEnd"
-        'fps'
-        "data": additional metadata for each representation.
+    Requires:
+        instance.data['representations'] - must be a list and each member
+        must be a dictionary with following data:
+            'files': list of filenames for sequence, string for single file.
+                     Only the filename is allowed, without the folder path.
+            'stagingDir': "path/to/folder/with/files"
+            'name': representation name (usually the same as extension)
+            'ext': file extension
+        optional data
+            "frameStart"
+            "frameEnd"
+            'fps'
+            "data": additional metadata for each representation.
     """
 
     label = "Integrate Asset New"

From 3ec9684239b7afc326cad7e184a7c6ed4e7a6058 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 11:28:13 +0200
Subject: [PATCH 45/72] Only add `frame` to context if used by the destination
 template

---
 openpype/plugins/publish/integrate_new.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 3543786949..99a915af73 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -158,7 +158,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
     exclude_families = ["clip"]
     db_representation_context_keys = [
         "project", "asset", "task", "subset", "version", "representation",
-        "family", "hierarchy", "task", "username", "frame"
+        "family", "hierarchy", "task", "username"
     ]
     default_template_name = "publish"
 

From 6733df77f1f693b89078f216457621d129eb4f71 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 11:30:23 +0200
Subject: [PATCH 46/72] Remove double entry of "task"

---
 openpype/plugins/publish/integrate_new.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 99a915af73..da4dafb133 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -158,7 +158,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
     exclude_families = ["clip"]
     db_representation_context_keys = [
         "project", "asset", "task", "subset", "version", "representation",
-        "family", "hierarchy", "task", "username"
+        "family", "hierarchy", "username"
     ]
     default_template_name = "publish"
 

From c95c9f92b92f37eca20b1dbc82c3ef0620f8f753 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 11:34:52 +0200
Subject: [PATCH 47/72] Add comment

---
 openpype/plugins/publish/integrate_new.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index da4dafb133..a2943e2972 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -577,6 +577,8 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             transfers = [(src, dst)]
 
         for key in self.db_representation_context_keys:
+            # Also add these values to the context even if not used by the
+            # destination template
             value = template_data.get(key)
             if not value:
                 continue

From 65691bf5207cf57b679dd4b36b3abb6ae57e0be5 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 11:36:32 +0200
Subject: [PATCH 48/72] Explain why we write subset+version first

---
 openpype/plugins/publish/integrate_new.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index a2943e2972..bab46803cb 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -270,7 +270,10 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             file_transactions.add(src, dst, mode=FileTransaction.MODE_HARDLINK)
 
         # Bulk write to the database
-        # todo: Can we move this even to after the file transfers?
+        # We write the subset and version to the database before the File
+        # Transaction to reduce the chances of another publish trying to
+        # publish to the same version number since that chance can greatly
+        # increase if the file transaction takes a long time.
         bulk_write(subset_writes + version_writes)
         self.log.info("Subset {subset[name]} and Version {version[name]} "
                       "written to database..".format(subset=subset,

From 0d83f3c76c880d088de718a416370e69529ad4a5 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 11:38:43 +0200
Subject: [PATCH 49/72] Add to do for potential erroneous case

---
 openpype/plugins/publish/integrate_new.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index bab46803cb..84adccb633 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -602,6 +602,9 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         # Backwards compatibility:
         # Store first transferred destination as published path data
         # todo: can we remove this?
+        # todo: We shouldn't change data that makes its way back into
+        #       instance.data[] until we know the publish actually succeeded
+        #       otherwise `published_path` might not actually be valid?
         published_path = transfers[0][1]
         repre["published_path"] = published_path  # Backwards compatibility
 

From 89376a97e4ef85069a3afdfd5e3115b33bd27284 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 20:47:00 +0200
Subject: [PATCH 50/72] Also include file infos of resource files like textures
 into each representation

- This should fix Site Sync for lookdev textures, etc.
---
 openpype/plugins/publish/integrate_new.py | 29 ++++++++++++++++-------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 84adccb633..25ab7817c9 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -264,10 +264,13 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         # part of a representation - like texture resources used by a
         # .ma representation. Those destination paths are pre-defined, etc.
         # todo: should we move or simplify this logic?
+        resource_destinations = set()
         for src, dst in instance.data.get("transfers", []):
             file_transactions.add(src, dst, mode=FileTransaction.MODE_COPY)
+            resource_destinations.add(os.path.abspath(dst))
         for src, dst in instance.data.get("hardlinks", []):
             file_transactions.add(src, dst, mode=FileTransaction.MODE_HARDLINK)
+            resource_destinations.add(os.path.abspath(dst))
 
         # Bulk write to the database
         # We write the subset and version to the database before the File
@@ -295,18 +298,29 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         )
         self.log.debug("Site Sync Sites: {}".format(sites))
 
+        # Compute the resource file infos once (files belonging to the
+        # version instance instead of an individual representation) so
+        # we can re-use those file infos per representation
+        anatomy = instance.context.data["anatomy"]
+        resource_file_infos = self.prepare_file_info(resource_destinations,
+                                                     sites=sites,
+                                                     anatomy=anatomy)
+
         # Finalize the representations now the published files are integrated
         # Get 'files' info for representations and its attached resources
-        anatomy = instance.context.data["anatomy"]
         representation_writes = []
         new_repre_names_low = set()
         for prepared in prepared_representations:
-            transfers = prepared["transfers"]
             representation = prepared["representation"]
+            transfers = prepared["transfers"]
+            destinations = [dst for src, dst in transfers]
             representation["files"] = self.get_files_info(
-                transfers, sites, anatomy
+                destinations, sites=sites, anatomy=anatomy
             )
 
+            # Add the version resource file infos to each representation
+            representation["files"] += resource_file_infos
+
             # Set up representation for writing to the database. Since
             # we *might* be overwriting an existing entry if the version
             # already existed we'll use ReplaceOnce with `upsert=True`
@@ -751,11 +765,11 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             ).format(path))
         return path
 
-    def get_files_info(self, transfers, sites, anatomy):
+    def get_files_info(self, destinations, sites, anatomy):
         """Prepare 'files' info portion for representations.
 
         Arguments:
-            transfers (list): List of transferred files (source, destination)
+            destinations (list): List of transferred file destinations
             sites (list): array of published locations
             anatomy: anatomy part from instance
         Returns:
@@ -763,10 +777,9 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             in representation
         """
         file_infos = []
-        for _src, dest in transfers:
-            file_info = self.prepare_file_info(dest, anatomy, sites=sites)
+        for file_path in destinations:
+            file_info = self.prepare_file_info(file_path, anatomy, sites=sites)
             file_infos.append(file_info)
-
         return file_infos
 
     def prepare_file_info(self, path, anatomy, sites):

From e6209555b01a0330186bc9176c8331a130325186 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 20:50:13 +0200
Subject: [PATCH 51/72] Match behavior more with what integrator did before
 refactor

---
 openpype/plugins/publish/collect_anatomy_context_data.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/openpype/plugins/publish/collect_anatomy_context_data.py b/openpype/plugins/publish/collect_anatomy_context_data.py
index 346caf6b83..c3fabba2ce 100644
--- a/openpype/plugins/publish/collect_anatomy_context_data.py
+++ b/openpype/plugins/publish/collect_anatomy_context_data.py
@@ -93,9 +93,9 @@ class CollectAnatomyContextData(pyblish.api.ContextPlugin):
 
         intent = context.data.get("intent")
         if intent and isinstance(intent, dict):
-            intent_value = intent.get("value")
-            if intent_value:
-                context_data["intent"] = intent_value
+            intent = intent.get("value")
+        if intent:
+            context_data["intent"] = intent
 
         self.log.info("Global anatomy Data collected")
         self.log.debug(json.dumps(context_data, indent=4))

From 52fd21d85494dacd0071a3b08d79dbdd04789b30 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 20:51:56 +0200
Subject: [PATCH 52/72] Add todo/question regarding `intent`

---
 openpype/plugins/publish/collect_anatomy_context_data.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/openpype/plugins/publish/collect_anatomy_context_data.py b/openpype/plugins/publish/collect_anatomy_context_data.py
index c3fabba2ce..3f7e65ecd3 100644
--- a/openpype/plugins/publish/collect_anatomy_context_data.py
+++ b/openpype/plugins/publish/collect_anatomy_context_data.py
@@ -91,6 +91,8 @@ class CollectAnatomyContextData(pyblish.api.ContextPlugin):
                 }
             })
 
+        # todo: some code actually expects the dict itself and others doesn't
+        #       question: what should it be?
         intent = context.data.get("intent")
         if intent and isinstance(intent, dict):
             intent = intent.get("value")

From 4c78976d3d834a5cb1fd0bce44f465cbf3ac6375 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 20:55:40 +0200
Subject: [PATCH 53/72] Add todo

---
 openpype/plugins/publish/integrate_new.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 25ab7817c9..e0c0632548 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -688,6 +688,8 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             "fps": instance.data.get("fps", context.data.get("fps"))
         }
 
+        # todo: preferably we wouldn't need this "if dict" etc. logic and
+        #       instead be able to rely what the input value is if it's set.
         intent_value = context.data.get("intent")
         if intent_value and isinstance(intent_value, dict):
             intent_value = intent_value.get("value")

From 3e095bc7554a24ef13282ccfd87e0327eb3b8745 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 20:57:38 +0200
Subject: [PATCH 54/72] Use template name for frame padding anatomy template

---
 openpype/plugins/publish/integrate_new.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index e0c0632548..0f3b11a025 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -520,8 +520,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             if repre.get("frameStart") is not None:
                 index_frame_start = int(repre.get("frameStart"))
 
-                # TODO use frame padding from right template group
-                render_template = anatomy.templates["render"]
+                render_template = anatomy.templates[template_name]
                 frame_start_padding = int(
                     render_template.get(
                         "frame_padding",

From b12b1c80f2facbe343333ba3d70dcbe463383538 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 21:00:10 +0200
Subject: [PATCH 55/72] Never shift udim sequences

---
 openpype/plugins/publish/integrate_new.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 0f3b11a025..fd0d57c646 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -501,6 +501,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         anatomy = instance.context.data['anatomy']
         template = os.path.normpath(anatomy.templates[template_name]["path"])
 
+        is_udim = bool(repre.get("udim"))
         is_sequence_representation = isinstance(files, (list, tuple))
         if is_sequence_representation:
             # Collection of files (sequence)
@@ -517,7 +518,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             # frame indices from the source collection.
             destination_indexes = list(src_collection.indexes)
             destination_padding = len(get_first_frame_padded(src_collection))
-            if repre.get("frameStart") is not None:
+            if repre.get("frameStart") is not None and not is_udim:
                 index_frame_start = int(repre.get("frameStart"))
 
                 render_template = anatomy.templates[template_name]
@@ -543,7 +544,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             # from the source indexes, etc.
             first_index_padded = get_frame_padded(frame=destination_indexes[0],
                                                   padding=destination_padding)
-            if repre.get("udim"):
+            if is_udim:
                 # UDIM representations handle ranges in a different manner
                 template_data["udim"] = first_index_padded
             else:
@@ -579,7 +580,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
             # Manage anatomy template data
             template_data.pop("frame", None)
-            if repre.get("udim"):
+            if is_udim:
                 template_data["udim"] = repre["udim"][0]
 
             # Construct destination filepath from template

From f7d35c4fed0885c6656da03eb852706c6bf20117 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 21:01:09 +0200
Subject: [PATCH 56/72] add todo/question

---
 openpype/plugins/publish/integrate_new.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index fd0d57c646..52c7686473 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -522,6 +522,8 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 index_frame_start = int(repre.get("frameStart"))
 
                 render_template = anatomy.templates[template_name]
+                # todo: should we ALWAYS manage the frame padding even when not
+                #       having `frameStart` set?
                 frame_start_padding = int(
                     render_template.get(
                         "frame_padding",

From 70bfdd09b40936efc45efa6bbd1ea029447058f2 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sat, 2 Apr 2022 21:07:02 +0200
Subject: [PATCH 57/72] Remove old "dependencies" data

---
 openpype/plugins/publish/integrate_new.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 52c7686473..37c68ffa6d 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -636,7 +636,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             "parent": version["_id"],
             "name": repre['name'],
             "data": data,
-            "dependencies": instance.data.get("dependencies", "").split(),
 
             # Imprint shortcut to context for performance reasons.
             "context": repre_context

From 45745cc514236d64cc7f2feddbff9e6217b720fa Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sun, 3 Apr 2022 20:37:28 +0200
Subject: [PATCH 58/72] Improve clarity of comment

---
 openpype/plugins/publish/integrate_new.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 37c68ffa6d..cb469251e6 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -604,7 +604,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             repre_context[key] = template_data[key]
 
         # Explicitly store the full list even though template data might
-        # have a different value
+        # have a different value because it uses just a single udim tile
         if repre.get("udim"):
             repre_context["udim"] = repre.get("udim")  # store list
 

From fe72197a9feb413c8f6c5f9e02339ed891fdda07 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sun, 3 Apr 2022 20:40:25 +0200
Subject: [PATCH 59/72] Add comment

---
 openpype/plugins/publish/integrate_new.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index cb469251e6..f1cceb9ca7 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -156,11 +156,14 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 "usdOverride"
                 ]
     exclude_families = ["clip"]
+    default_template_name = "publish"
+
+    # Representation context keys that should always be written to
+    # the database even if not used by the destination template
     db_representation_context_keys = [
         "project", "asset", "task", "subset", "version", "representation",
         "family", "hierarchy", "username"
     ]
-    default_template_name = "publish"
 
     # Attributes set by settings
     template_name_profiles = None

From c3c8281e0134222677b32f91ec644322dd996a74 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Sun, 3 Apr 2022 20:41:34 +0200
Subject: [PATCH 60/72] tweak comment

---
 openpype/plugins/publish/integrate_new.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index f1cceb9ca7..238ae82bba 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -183,7 +183,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             self.register(instance, file_transactions)
         except Exception:
             # clean destination
-            # todo: rollback any registered entities? (or how safe are we?)
+            # todo: preferably we'd also rollback *any* changes to the database
             file_transactions.rollback()
             self.log.critical("Error when registering", exc_info=True)
             six.reraise(*sys.exc_info())

From 2e2deb349d082096d056f878a5cf629c2f95e12c Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 14 Apr 2022 13:06:14 +0200
Subject: [PATCH 61/72] Match changes that were made to original IntegrateAsset

Changes of:
- https://github.com/pypeclub/OpenPype/commit/312d0309ab92de834629c58587f1a758d1d1e90c
- https://github.com/pypeclub/OpenPype/commit/507f3615ab8f42f5664afcac01d339e0517afdf5
- https://github.com/pypeclub/OpenPype/commit/29dca65202d45a79e66c619b95d3408e227a9c05
---
 openpype/plugins/publish/integrate_new.py | 61 ++++++++++++++++++-----
 1 file changed, 49 insertions(+), 12 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 9e8dfefc9e..768c413bf9 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -4,6 +4,7 @@ import sys
 import copy
 import clique
 import six
+from collections import deque, defaultdict
 
 from bson.objectid import ObjectId
 from pymongo import DeleteMany, ReplaceOne, InsertOne, UpdateOne
@@ -871,18 +872,18 @@ class SiteSync(object):
             attached_sites[remote_site] = create_metadata(remote_site,
                                                           created=False)
 
+        # add alternative sites
+        cls._add_alternative_sites(system_sync_server_presets, attached_sites)
+
         # add skeleton for sites where it should be always synced to
         always_accessible_sites = (
             sync_project_presets["config"].get("always_accessible_on", [])
         )
-        for site in always_accessible_sites:
+        for site in set(always_accessible_sites):
             site = site.strip()
             if site not in attached_sites:
                 attached_sites[site] = create_metadata(site, created=False)
 
-        # add alternative sites
-        cls._add_alternative_sites(system_sync_server_presets, attached_sites)
-
         return list(attached_sites.values())
 
     @staticmethod
@@ -904,8 +905,9 @@ class SiteSync(object):
 
         return local_site, remote_site
 
-    @staticmethod
-    def _add_alternative_sites(system_sync_server_presets,
+    @classmethod
+    def _add_alternative_sites(cls,
+                               system_sync_server_presets,
                                attached_sites):
         """Loop through all configured sites and add alternatives.
 
@@ -916,18 +918,14 @@ class SiteSync(object):
             See SyncServerModule.handle_alternate_site
         """
         conf_sites = system_sync_server_presets.get("sites", {})
+        alt_site_pairs = cls._get_alt_site_pairs(conf_sites)
 
-        for site_name, site_info in conf_sites.items():
+        for site_name, alt_sites in alt_site_pairs.items():
 
             # Skip if already defined
             if site_name in attached_sites:
                 continue
 
-            # Get alternate sites (stripped names) for this site name
-            alt_sites = site_info.get("alternative_sites", [])
-            alt_sites = [site.strip() for site in alt_sites]
-            alt_sites = set(alt_sites)
-
             # If no alternative sites we don't need to add
             if not alt_sites:
                 continue
@@ -944,3 +942,42 @@ class SiteSync(object):
 
             # Note: We change mutable `attached_site` dict in-place
             attached_sites[site_name] = alt_site_meta
+
+        @staticmethod
+        def _get_alt_site_pairs(conf_sites):
+            """Returns dict of site and its alternative sites.
+            If `site` has alternative site, it means that alt_site has
+            'site' as
+            alternative site
+            Args:
+                conf_sites (dict)
+            Returns:
+                (dict): {'site': [alternative sites]...}
+            """
+            alt_site_pairs = defaultdict(list)
+            for site_name, site_info in conf_sites.items():
+                alt_sites = set(site_info.get("alternative_sites", []))
+                alt_site_pairs[site_name].extend(alt_sites)
+
+                for alt_site in alt_sites:
+                    alt_site_pairs[alt_site].append(site_name)
+
+            for site_name, alt_sites in alt_site_pairs.items():
+                sites_queue = deque(alt_sites)
+                while sites_queue:
+                    alt_site = sites_queue.popleft()
+
+                    # safety against wrong config
+                    # {"SFTP": {"alternative_site": "SFTP"}
+                    if alt_site == site_name or alt_site not in alt_site_pairs:
+                        continue
+
+                    for alt_alt_site in alt_site_pairs[alt_site]:
+                        if (
+                                alt_alt_site != site_name
+                                and alt_alt_site not in alt_sites
+                        ):
+                            alt_sites.append(alt_alt_site)
+                            sites_queue.append(alt_alt_site)
+
+            return alt_site_pairs

From 0fdd4f1aecd3b5fa09496d4aa48ee605a003e61d Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 14 Apr 2022 13:07:33 +0200
Subject: [PATCH 62/72] Fix indentation

---
 openpype/plugins/publish/integrate_new.py | 66 +++++++++++------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 768c413bf9..4eccce4e81 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -943,41 +943,41 @@ class SiteSync(object):
             # Note: We change mutable `attached_site` dict in-place
             attached_sites[site_name] = alt_site_meta
 
-        @staticmethod
-        def _get_alt_site_pairs(conf_sites):
-            """Returns dict of site and its alternative sites.
-            If `site` has alternative site, it means that alt_site has
-            'site' as
-            alternative site
-            Args:
-                conf_sites (dict)
-            Returns:
-                (dict): {'site': [alternative sites]...}
-            """
-            alt_site_pairs = defaultdict(list)
-            for site_name, site_info in conf_sites.items():
-                alt_sites = set(site_info.get("alternative_sites", []))
-                alt_site_pairs[site_name].extend(alt_sites)
+    @staticmethod
+    def _get_alt_site_pairs(conf_sites):
+        """Returns dict of site and its alternative sites.
+        If `site` has alternative site, it means that alt_site has
+        'site' as
+        alternative site
+        Args:
+            conf_sites (dict)
+        Returns:
+            (dict): {'site': [alternative sites]...}
+        """
+        alt_site_pairs = defaultdict(list)
+        for site_name, site_info in conf_sites.items():
+            alt_sites = set(site_info.get("alternative_sites", []))
+            alt_site_pairs[site_name].extend(alt_sites)
 
-                for alt_site in alt_sites:
-                    alt_site_pairs[alt_site].append(site_name)
+            for alt_site in alt_sites:
+                alt_site_pairs[alt_site].append(site_name)
 
-            for site_name, alt_sites in alt_site_pairs.items():
-                sites_queue = deque(alt_sites)
-                while sites_queue:
-                    alt_site = sites_queue.popleft()
+        for site_name, alt_sites in alt_site_pairs.items():
+            sites_queue = deque(alt_sites)
+            while sites_queue:
+                alt_site = sites_queue.popleft()
 
-                    # safety against wrong config
-                    # {"SFTP": {"alternative_site": "SFTP"}
-                    if alt_site == site_name or alt_site not in alt_site_pairs:
-                        continue
+                # safety against wrong config
+                # {"SFTP": {"alternative_site": "SFTP"}
+                if alt_site == site_name or alt_site not in alt_site_pairs:
+                    continue
 
-                    for alt_alt_site in alt_site_pairs[alt_site]:
-                        if (
-                                alt_alt_site != site_name
-                                and alt_alt_site not in alt_sites
-                        ):
-                            alt_sites.append(alt_alt_site)
-                            sites_queue.append(alt_alt_site)
+                for alt_alt_site in alt_site_pairs[alt_site]:
+                    if (
+                            alt_alt_site != site_name
+                            and alt_alt_site not in alt_sites
+                    ):
+                        alt_sites.append(alt_alt_site)
+                        sites_queue.append(alt_alt_site)
 
-            return alt_site_pairs
+        return alt_site_pairs

From 1a03bbe48a37cc62918152985488a0bd99d43473 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 14 Apr 2022 13:11:57 +0200
Subject: [PATCH 63/72] Store alt sites in a `set`

---
 openpype/plugins/publish/integrate_new.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 4eccce4e81..2795b59482 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -954,13 +954,13 @@ class SiteSync(object):
         Returns:
             (dict): {'site': [alternative sites]...}
         """
-        alt_site_pairs = defaultdict(list)
+        alt_site_pairs = defaultdict(set)
         for site_name, site_info in conf_sites.items():
             alt_sites = set(site_info.get("alternative_sites", []))
-            alt_site_pairs[site_name].extend(alt_sites)
+            alt_site_pairs[site_name].update(alt_sites)
 
             for alt_site in alt_sites:
-                alt_site_pairs[alt_site].append(site_name)
+                alt_site_pairs[alt_site].add(site_name)
 
         for site_name, alt_sites in alt_site_pairs.items():
             sites_queue = deque(alt_sites)
@@ -977,7 +977,7 @@ class SiteSync(object):
                             alt_alt_site != site_name
                             and alt_alt_site not in alt_sites
                     ):
-                        alt_sites.append(alt_alt_site)
+                        alt_sites.add(alt_alt_site)
                         sites_queue.append(alt_alt_site)
 
         return alt_site_pairs

From 8a970b123c1697d7db28f31debf4f7113c3c3177 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 21 Apr 2022 11:24:49 +0200
Subject: [PATCH 64/72] Use logic directly from Sync Server module

---
 openpype/plugins/publish/integrate_new.py | 165 +---------------------
 1 file changed, 6 insertions(+), 159 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 2795b59482..cc6856e407 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -4,14 +4,13 @@ import sys
 import copy
 import clique
 import six
-from collections import deque, defaultdict
 
 from bson.objectid import ObjectId
 from pymongo import DeleteMany, ReplaceOne, InsertOne, UpdateOne
 import pyblish.api
 from avalon import io
 import openpype.api
-from datetime import datetime
+from openpype.modules import ModulesManager
 from openpype.lib.profiles_filtering import filter_profiles
 from openpype.lib.file_transaction import FileTransaction
 
@@ -299,11 +298,12 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         self.log.debug("Retrieving Representation Site Sync information ...")
 
         # Get the accessible sites for Site Sync
-        sites = SiteSync.compute_resource_sync_sites(
-            system_settings=instance.context.data["system_settings"],
-            project_settings=instance.context.data["project_settings"]
+        manager = ModulesManager()
+        sync_server_module = manager.modules_by_name["sync_server"]
+        sites = sync_server_module.compute_resource_sync_sites(
+            project_name=instance.data["projectEntity"]["name"]
         )
-        self.log.debug("Site Sync Sites: {}".format(sites))
+        self.log.debug("Sync Server Sites: {}".format(sites))
 
         # Compute the resource file infos once (files belonging to the
         # version instance instead of an individual representation) so
@@ -828,156 +828,3 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             "hash": openpype.api.source_hash(path),
             "sites": sites
         }
-
-
-class SiteSync(object):
-    """Logic for Site Sync Module functionality"""
-
-    @classmethod
-    def compute_resource_sync_sites(cls,
-                                    system_settings,
-                                    project_settings):
-        """Get available resource sync sites"""
-
-        def create_metadata(name, created=True):
-            """Create sync site metadata for site with `name`"""
-            metadata = {"name": name}
-            if created:
-                metadata["created_dt"] = datetime.now()
-            return metadata
-
-        default_sites = [create_metadata("studio")]
-
-        # If sync site module is disabled return default fallback site
-        system_sync_server_presets = system_settings["modules"]["sync_server"]
-        log.debug("system_sett:: {}".format(system_sync_server_presets))
-        if not system_sync_server_presets["enabled"]:
-            return default_sites
-
-        # If sync site module is disabled in current
-        # project return default fallback site
-        sync_project_presets = project_settings["global"]["sync_server"]
-        if not sync_project_presets["enabled"]:
-            return default_sites
-
-        local_site, remote_site = cls._get_sites(sync_project_presets)
-
-        # Attached sites metadata by site name
-        # That is the local site, remote site, the always accesible sites
-        # and their alternate sites (alias of sites with different protocol)
-        attached_sites = dict()
-        attached_sites[local_site] = create_metadata(local_site)
-
-        if remote_site and remote_site != local_site:
-            attached_sites[remote_site] = create_metadata(remote_site,
-                                                          created=False)
-
-        # add alternative sites
-        cls._add_alternative_sites(system_sync_server_presets, attached_sites)
-
-        # add skeleton for sites where it should be always synced to
-        always_accessible_sites = (
-            sync_project_presets["config"].get("always_accessible_on", [])
-        )
-        for site in set(always_accessible_sites):
-            site = site.strip()
-            if site not in attached_sites:
-                attached_sites[site] = create_metadata(site, created=False)
-
-        return list(attached_sites.values())
-
-    @staticmethod
-    def _get_sites(sync_project_presets):
-        """Returns tuple (local_site, remote_site)"""
-        local_site_id = openpype.api.get_local_site_id()
-        local_site = sync_project_presets["config"]. \
-            get("active_site", "studio").strip()
-
-        if local_site == 'local':
-            local_site = local_site_id
-
-        remote_site = sync_project_presets["config"].get("remote_site")
-        if remote_site:
-            remote_site.strip()
-
-        if remote_site == 'local':
-            remote_site = local_site_id
-
-        return local_site, remote_site
-
-    @classmethod
-    def _add_alternative_sites(cls,
-                               system_sync_server_presets,
-                               attached_sites):
-        """Loop through all configured sites and add alternatives.
-
-        For all sites if an alternative site is detected that has an
-        accessible site then we can also register to that alternative site
-        with the same "created" state. So we match the existing data.
-
-            See SyncServerModule.handle_alternate_site
-        """
-        conf_sites = system_sync_server_presets.get("sites", {})
-        alt_site_pairs = cls._get_alt_site_pairs(conf_sites)
-
-        for site_name, alt_sites in alt_site_pairs.items():
-
-            # Skip if already defined
-            if site_name in attached_sites:
-                continue
-
-            # If no alternative sites we don't need to add
-            if not alt_sites:
-                continue
-
-            # Take a copy of data of the first alternate site that is already
-            # defined as an attached site to match the same state.
-            match_meta = next((attached_sites[site] for site in alt_sites
-                               if site in attached_sites), None)
-            if not match_meta:
-                continue
-
-            alt_site_meta = copy.deepcopy(match_meta)
-            alt_site_meta["name"] = site_name
-
-            # Note: We change mutable `attached_site` dict in-place
-            attached_sites[site_name] = alt_site_meta
-
-    @staticmethod
-    def _get_alt_site_pairs(conf_sites):
-        """Returns dict of site and its alternative sites.
-        If `site` has alternative site, it means that alt_site has
-        'site' as
-        alternative site
-        Args:
-            conf_sites (dict)
-        Returns:
-            (dict): {'site': [alternative sites]...}
-        """
-        alt_site_pairs = defaultdict(set)
-        for site_name, site_info in conf_sites.items():
-            alt_sites = set(site_info.get("alternative_sites", []))
-            alt_site_pairs[site_name].update(alt_sites)
-
-            for alt_site in alt_sites:
-                alt_site_pairs[alt_site].add(site_name)
-
-        for site_name, alt_sites in alt_site_pairs.items():
-            sites_queue = deque(alt_sites)
-            while sites_queue:
-                alt_site = sites_queue.popleft()
-
-                # safety against wrong config
-                # {"SFTP": {"alternative_site": "SFTP"}
-                if alt_site == site_name or alt_site not in alt_site_pairs:
-                    continue
-
-                for alt_alt_site in alt_site_pairs[alt_site]:
-                    if (
-                            alt_alt_site != site_name
-                            and alt_alt_site not in alt_sites
-                    ):
-                        alt_sites.add(alt_alt_site)
-                        sites_queue.append(alt_alt_site)
-
-        return alt_site_pairs

From ae1acb950bbb69b203c36f19d40e3952eca46bfd Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Thu, 21 Apr 2022 14:08:53 +0200
Subject: [PATCH 65/72] Fix: refactor to use correct function

---
 openpype/plugins/publish/integrate_new.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index cc6856e407..419e2b4e4b 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -309,9 +309,9 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         # version instance instead of an individual representation) so
         # we can re-use those file infos per representation
         anatomy = instance.context.data["anatomy"]
-        resource_file_infos = self.prepare_file_info(resource_destinations,
-                                                     sites=sites,
-                                                     anatomy=anatomy)
+        resource_file_infos = self.get_files_info(resource_destinations,
+                                                  sites=sites,
+                                                  anatomy=anatomy)
 
         # Finalize the representations now the published files are integrated
         # Get 'files' info for representations and its attached resources

From 3e058c6e8ac79ebb9933d0ad02957b0467f3a578 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 5 Jul 2022 09:20:00 +0200
Subject: [PATCH 66/72] Move IntegrateAsset

---
 openpype/plugins/publish/integrate.py | 832 ++++++++++++++++++++++++++
 1 file changed, 832 insertions(+)
 create mode 100644 openpype/plugins/publish/integrate.py

diff --git a/openpype/plugins/publish/integrate.py b/openpype/plugins/publish/integrate.py
new file mode 100644
index 0000000000..6ad0849ff7
--- /dev/null
+++ b/openpype/plugins/publish/integrate.py
@@ -0,0 +1,832 @@
+import os
+import logging
+import sys
+import copy
+import clique
+import six
+
+from bson.objectid import ObjectId
+from pymongo import DeleteMany, ReplaceOne, InsertOne, UpdateOne
+import pyblish.api
+
+import openpype.api
+from openpype.modules import ModulesManager
+from openpype.lib.profiles_filtering import filter_profiles
+from openpype.lib.file_transaction import FileTransaction
+from openpype.pipeline import legacy_io
+
+log = logging.getLogger(__name__)
+
+
+def assemble(files):
+    """Convenience `clique.assemble` wrapper for files of a single collection.
+
+    Unlike `clique.assemble` this wrapper does not allow more than a single
+    Collection nor any remainder files. Errors will be raised when not only
+    a single collection is assembled.
+
+    Returns:
+        clique.Collection: A single sequence Collection
+
+    Raises:
+        ValueError: Error is raised when files do not result in a single
+                    collected Collection.
+
+    """
+    # todo: move this to lib?
+    # Get the sequence as a collection. The files must be of a single
+    # sequence and have no remainder outside of the collections.
+    patterns = [clique.PATTERNS["frames"]]
+    collections, remainder = clique.assemble(files,
+                                             minimum_items=1,
+                                             patterns=patterns)
+    if not collections:
+        raise ValueError("No collections found in files: "
+                         "{}".format(files))
+    if remainder:
+        raise ValueError("Files found not detected as part"
+                         " of a sequence: {}".format(remainder))
+    if len(collections) > 1:
+        raise ValueError("Files in sequence are not part of a"
+                         " single sequence collection: "
+                         "{}".format(collections))
+    return collections[0]
+
+
+def get_instance_families(instance):
+    """Get all families of the instance"""
+    # todo: move this to lib?
+    family = instance.data.get("family")
+    families = []
+    if family:
+        families.append(family)
+
+    for _family in (instance.data.get("families") or []):
+        if _family not in families:
+            families.append(_family)
+
+    return families
+
+
+def get_frame_padded(frame, padding):
+    """Return frame number as string with `padding` amount of padded zeros"""
+    return "{frame:0{padding}d}".format(padding=padding, frame=frame)
+
+
+def get_first_frame_padded(collection):
+    """Return first frame as padded number from `clique.Collection`"""
+    start_frame = next(iter(collection.indexes))
+    return get_frame_padded(start_frame, padding=collection.padding)
+
+
+def bulk_write(writes):
+    """Convenience function to bulk write into active project database"""
+    project = legacy_io.Session["AVALON_PROJECT"]
+    return legacy_io._database[project].bulk_write(writes)
+
+
+class IntegrateAsset(pyblish.api.InstancePlugin):
+    """Register publish in the database and transfer files to destinations.
+
+    Steps:
+        1) Register the subset and version
+        2) Transfer the representation files to the destination
+        3) Register the representation
+
+    Requires:
+        instance.data['representations'] - must be a list and each member
+        must be a dictionary with following data:
+            'files': list of filenames for sequence, string for single file.
+                     Only the filename is allowed, without the folder path.
+            'stagingDir': "path/to/folder/with/files"
+            'name': representation name (usually the same as extension)
+            'ext': file extension
+        optional data
+            "frameStart"
+            "frameEnd"
+            'fps'
+            "data": additional metadata for each representation.
+    """
+
+    label = "Integrate Asset New"
+    order = pyblish.api.IntegratorOrder
+    families = ["workfile",
+                "pointcache",
+                "camera",
+                "animation",
+                "model",
+                "mayaAscii",
+                "mayaScene",
+                "setdress",
+                "layout",
+                "ass",
+                "vdbcache",
+                "scene",
+                "vrayproxy",
+                "vrayscene_layer",
+                "render",
+                "prerender",
+                "imagesequence",
+                "review",
+                "rendersetup",
+                "rig",
+                "plate",
+                "look",
+                "audio",
+                "yetiRig",
+                "yeticache",
+                "nukenodes",
+                "gizmo",
+                "source",
+                "matchmove",
+                "image",
+                "assembly",
+                "fbx",
+                "textures",
+                "action",
+                "harmony.template",
+                "harmony.palette",
+                "editorial",
+                "background",
+                "camerarig",
+                "redshiftproxy",
+                "effect",
+                "xgen",
+                "hda",
+                "usd",
+                "staticMesh",
+                "skeletalMesh",
+                "usdComposition",
+                "usdOverride",
+                "simpleUnrealTexture"
+                ]
+    exclude_families = ["clip", "render.farm"]
+    default_template_name = "publish"
+
+    # Representation context keys that should always be written to
+    # the database even if not used by the destination template
+    db_representation_context_keys = [
+        "project", "asset", "task", "subset", "version", "representation",
+        "family", "hierarchy", "username"
+    ]
+
+    # Attributes set by settings
+    template_name_profiles = None
+
+    def process(self, instance):
+
+        # Exclude instances that also contain families from exclude families
+        families = set(get_instance_families(instance))
+        exclude = families & set(self.exclude_families)
+        if exclude:
+            self.log.debug("Instance not integrated due to exclude "
+                           "families found: {}".format(", ".join(exclude)))
+            return
+
+        file_transactions = FileTransaction(log=self.log)
+        try:
+            self.register(instance, file_transactions)
+        except Exception:
+            # clean destination
+            # todo: preferably we'd also rollback *any* changes to the database
+            file_transactions.rollback()
+            self.log.critical("Error when registering", exc_info=True)
+            six.reraise(*sys.exc_info())
+
+        # Finalizing can't rollback safely so no use for moving it to
+        # the try, except.
+        file_transactions.finalize()
+
+    def register(self, instance, file_transactions):
+
+        instance_stagingdir = instance.data.get("stagingDir")
+        if not instance_stagingdir:
+            self.log.info((
+                "{0} is missing reference to staging directory."
+                " Will try to get it from representation."
+            ).format(instance))
+
+        else:
+            self.log.debug(
+                "Establishing staging directory "
+                "@ {0}".format(instance_stagingdir)
+            )
+
+        # Ensure at least one representation is set up for registering.
+        repres = instance.data.get("representations")
+        assert repres, "Instance has no representations data"
+        assert isinstance(repres, (list, tuple)), (
+            "Instance 'representations' must be a list, got: {0} {1}".format(
+                str(type(repres)), str(repres)
+            )
+        )
+
+        template_name = self.get_template_name(instance)
+
+        subset, subset_writes = self.prepare_subset(instance)
+        version, version_writes = self.prepare_version(instance, subset)
+        instance.data["versionEntity"] = version
+
+        # Get existing representations (if any)
+        existing_repres_by_name = {
+            repres["name"].lower(): repres for repres in legacy_io.find(
+                {
+                    "parent": version["_id"],
+                    "type": "representation"
+                },
+                # Only care about id and name of existing representations
+                projection={"_id": True, "name": True}
+            )
+        }
+
+        # Prepare all representations
+        prepared_representations = []
+        for repre in instance.data["representations"]:
+
+            if "delete" in repre.get("tags", []):
+                self.log.debug("Skipping representation marked for deletion: "
+                               "{}".format(repre))
+                continue
+
+            # todo: reduce/simplify what is returned from this function
+            prepared = self.prepare_representation(repre,
+                                                   template_name,
+                                                   existing_repres_by_name,
+                                                   version,
+                                                   instance_stagingdir,
+                                                   instance)
+
+            for src, dst in prepared["transfers"]:
+                # todo: add support for hardlink transfers
+                file_transactions.add(src, dst)
+
+            prepared_representations.append(prepared)
+
+        if not prepared_representations:
+            # Even though we check `instance.data["representations"]` earlier
+            # this could still happen if all representations were tagged with
+            # "delete" and thus are skipped for integration
+            raise RuntimeError("No representations prepared to publish.")
+
+        # Each instance can also have pre-defined transfers not explicitly
+        # part of a representation - like texture resources used by a
+        # .ma representation. Those destination paths are pre-defined, etc.
+        # todo: should we move or simplify this logic?
+        resource_destinations = set()
+        for src, dst in instance.data.get("transfers", []):
+            file_transactions.add(src, dst, mode=FileTransaction.MODE_COPY)
+            resource_destinations.add(os.path.abspath(dst))
+        for src, dst in instance.data.get("hardlinks", []):
+            file_transactions.add(src, dst, mode=FileTransaction.MODE_HARDLINK)
+            resource_destinations.add(os.path.abspath(dst))
+
+        # Bulk write to the database
+        # We write the subset and version to the database before the File
+        # Transaction to reduce the chances of another publish trying to
+        # publish to the same version number since that chance can greatly
+        # increase if the file transaction takes a long time.
+        bulk_write(subset_writes + version_writes)
+        self.log.info("Subset {subset[name]} and Version {version[name]} "
+                      "written to database..".format(subset=subset,
+                                                     version=version))
+
+        # Process all file transfers of all integrations now
+        self.log.debug("Integrating source files to destination ...")
+        file_transactions.process()
+        self.log.debug("Backed up existing files: "
+                       "{}".format(file_transactions.backups))
+        self.log.debug("Transferred files: "
+                       "{}".format(file_transactions.transferred))
+        self.log.debug("Retrieving Representation Site Sync information ...")
+
+        # Get the accessible sites for Site Sync
+        manager = ModulesManager()
+        sync_server_module = manager.modules_by_name["sync_server"]
+        sites = sync_server_module.compute_resource_sync_sites(
+            project_name=instance.data["projectEntity"]["name"]
+        )
+        self.log.debug("Sync Server Sites: {}".format(sites))
+
+        # Compute the resource file infos once (files belonging to the
+        # version instance instead of an individual representation) so
+        # we can re-use those file infos per representation
+        anatomy = instance.context.data["anatomy"]
+        resource_file_infos = self.get_files_info(resource_destinations,
+                                                  sites=sites,
+                                                  anatomy=anatomy)
+
+        # Finalize the representations now the published files are integrated
+        # Get 'files' info for representations and its attached resources
+        representation_writes = []
+        new_repre_names_low = set()
+        for prepared in prepared_representations:
+            representation = prepared["representation"]
+            transfers = prepared["transfers"]
+            destinations = [dst for src, dst in transfers]
+            representation["files"] = self.get_files_info(
+                destinations, sites=sites, anatomy=anatomy
+            )
+
+            # Add the version resource file infos to each representation
+            representation["files"] += resource_file_infos
+
+            # Set up representation for writing to the database. Since
+            # we *might* be overwriting an existing entry if the version
+            # already existed we'll use ReplaceOnce with `upsert=True`
+            representation_writes.append(ReplaceOne(
+                filter={"_id": representation["_id"]},
+                replacement=representation,
+                upsert=True
+            ))
+
+            new_repre_names_low.add(representation["name"].lower())
+
+        # Delete any existing representations that didn't get any new data
+        # if the instance is not set to append mode
+        if not instance.data.get("append", False):
+            delete_names = set()
+            for name, existing_repres in existing_repres_by_name.items():
+                if name not in new_repre_names_low:
+                    # We add the exact representation name because `name` is
+                    # lowercase for name matching only and not in the database
+                    delete_names.add(existing_repres["name"])
+            if delete_names:
+                representation_writes.append(DeleteMany(
+                    filter={
+                        "parent": version["_id"],
+                        "name": {"$in": list(delete_names)}
+                    }
+                ))
+
+        # Write representations to the database
+        bulk_write(representation_writes)
+
+        # Backwards compatibility
+        # todo: can we avoid the need to store this?
+        instance.data["published_representations"] = {
+            p["representation"]["_id"]: p for p in prepared_representations
+        }
+
+        self.log.info("Registered {} representations"
+                      "".format(len(prepared_representations)))
+
+    def prepare_subset(self, instance):
+        asset = instance.data.get("assetEntity")
+        subset_name = instance.data["subset"]
+        self.log.debug("Subset: {}".format(subset_name))
+
+        # Get existing subset if it exists
+        subset = legacy_io.find_one({
+            "type": "subset",
+            "parent": asset["_id"],
+            "name": subset_name
+        })
+
+        # Define subset data
+        data = {
+            "families": get_instance_families(instance)
+        }
+
+        subset_group = instance.data.get("subsetGroup")
+        if subset_group:
+            data["subsetGroup"] = subset_group
+
+        bulk_writes = []
+        if subset is None:
+            # Create a new subset
+            self.log.info("Subset '%s' not found, creating ..." % subset_name)
+            subset = {
+                "_id": ObjectId(),
+                "schema": "openpype:subset-3.0",
+                "type": "subset",
+                "name": subset_name,
+                "data": data,
+                "parent": asset["_id"]
+            }
+            bulk_writes.append(InsertOne(subset))
+
+        else:
+            # Update existing subset data with new data and set in database.
+            # We also change the found subset in-place so we don't need to
+            # re-query the subset afterwards
+            subset["data"].update(data)
+            bulk_writes.append(UpdateOne(
+                {"type": "subset", "_id": subset["_id"]},
+                {"$set": {
+                    "data": subset["data"]
+                }}
+            ))
+
+        self.log.info("Prepared subset: {}".format(subset_name))
+        return subset, bulk_writes
+
+    def prepare_version(self, instance, subset):
+
+        version_number = instance.data["version"]
+
+        version = {
+            "schema": "openpype:version-3.0",
+            "type": "version",
+            "parent": subset["_id"],
+            "name": version_number,
+            "data": self.create_version_data(instance)
+        }
+
+        existing_version = legacy_io.find_one({
+            'type': 'version',
+            'parent': subset["_id"],
+            'name': version_number
+        }, projection={"_id": True})
+
+        if existing_version:
+            self.log.debug("Updating existing version ...")
+            version["_id"] = existing_version["_id"]
+        else:
+            self.log.debug("Creating new version ...")
+            version["_id"] = ObjectId()
+
+        bulk_writes = [ReplaceOne(
+            filter={"_id": version["_id"]},
+            replacement=version,
+            upsert=True
+        )]
+
+        self.log.info("Prepared version: v{0:03d}".format(version["name"]))
+
+        return version, bulk_writes
+
+    def prepare_representation(self, repre,
+                               template_name,
+                               existing_repres_by_name,
+                               version,
+                               instance_stagingdir,
+                               instance):
+
+        # pre-flight validations
+        if repre["ext"].startswith("."):
+            raise ValueError("Extension must not start with a dot '.': "
+                             "{}".format(repre["ext"]))
+
+        if repre.get("transfers"):
+            raise ValueError("Representation is not allowed to have transfers"
+                             "data before integration. They are computed in "
+                             "the integrator"
+                             "Got: {}".format(repre["transfers"]))
+
+        # create template data for Anatomy
+        template_data = copy.deepcopy(instance.data["anatomyData"])
+
+        # required representation keys
+        files = repre['files']
+        template_data["representation"] = repre["name"]
+        template_data["ext"] = repre["ext"]
+
+        # optionals
+        # retrieve additional anatomy data from representation if exists
+        for key, anatomy_key in {
+            # Representation Key: Anatomy data key
+            "resolutionWidth": "resolution_width",
+            "resolutionHeight": "resolution_height",
+            "fps": "fps",
+            "outputName": "output",
+            "originalBasename": "originalBasename"
+        }.items():
+            # Allow to take value from representation
+            # if not found also consider instance.data
+            if key in repre:
+                value = repre[key]
+            elif key in instance.data:
+                value = instance.data[key]
+            else:
+                continue
+            template_data[anatomy_key] = value
+
+        if repre.get('stagingDir'):
+            stagingdir = repre['stagingDir']
+        else:
+            # Fall back to instance staging dir if not explicitly
+            # set for representation in the instance
+            self.log.debug("Representation uses instance staging dir: "
+                           "{}".format(instance_stagingdir))
+            stagingdir = instance_stagingdir
+        if not stagingdir:
+            raise ValueError("No staging directory set for representation: "
+                             "{}".format(repre))
+
+        self.log.debug("Anatomy template name: {}".format(template_name))
+        anatomy = instance.context.data['anatomy']
+        template = os.path.normpath(anatomy.templates[template_name]["path"])
+
+        is_udim = bool(repre.get("udim"))
+        is_sequence_representation = isinstance(files, (list, tuple))
+        if is_sequence_representation:
+            # Collection of files (sequence)
+            assert not any(os.path.isabs(fname) for fname in files), (
+                "Given file names contain full paths"
+            )
+
+            src_collection = assemble(files)
+
+            # If the representation has `frameStart` set it renumbers the
+            # frame indices of the published collection. It will start from
+            # that `frameStart` index instead. Thus if that frame start
+            # differs from the collection we want to shift the destination
+            # frame indices from the source collection.
+            destination_indexes = list(src_collection.indexes)
+            destination_padding = len(get_first_frame_padded(src_collection))
+            if repre.get("frameStart") is not None and not is_udim:
+                index_frame_start = int(repre.get("frameStart"))
+
+                render_template = anatomy.templates[template_name]
+                # todo: should we ALWAYS manage the frame padding even when not
+                #       having `frameStart` set?
+                frame_start_padding = int(
+                    render_template.get(
+                        "frame_padding",
+                        render_template.get("padding")
+                    )
+                )
+
+                # Shift destination sequence to the start frame
+                src_start_frame = next(iter(src_collection.indexes))
+                shift = index_frame_start - src_start_frame
+                if shift:
+                    destination_indexes = [
+                        frame + shift for frame in destination_indexes
+                    ]
+                destination_padding = frame_start_padding
+
+            # To construct the destination template with anatomy we require
+            # a Frame or UDIM tile set for the template data. We use the first
+            # index of the destination for that because that could've shifted
+            # from the source indexes, etc.
+            first_index_padded = get_frame_padded(frame=destination_indexes[0],
+                                                  padding=destination_padding)
+            if is_udim:
+                # UDIM representations handle ranges in a different manner
+                template_data["udim"] = first_index_padded
+            else:
+                template_data["frame"] = first_index_padded
+
+            # Construct destination collection from template
+            anatomy_filled = anatomy.format(template_data)
+            template_filled = anatomy_filled[template_name]["path"]
+            repre_context = template_filled.used_values
+            self.log.debug("Template filled: {}".format(str(template_filled)))
+            dst_collection = assemble([os.path.normpath(template_filled)])
+
+            # Update the destination indexes and padding
+            dst_collection.indexes.clear()
+            dst_collection.indexes.update(set(destination_indexes))
+            dst_collection.padding = destination_padding
+            assert (
+                len(src_collection.indexes) == len(dst_collection.indexes)
+            ), "This is a bug"
+
+            # Multiple file transfers
+            transfers = []
+            for src_file_name, dst in zip(src_collection, dst_collection):
+                src = os.path.join(stagingdir, src_file_name)
+                transfers.append((src, dst))
+
+        else:
+            # Single file
+            fname = files
+            assert not os.path.isabs(fname), (
+                "Given file name is a full path"
+            )
+
+            # Manage anatomy template data
+            template_data.pop("frame", None)
+            if is_udim:
+                template_data["udim"] = repre["udim"][0]
+
+            # Construct destination filepath from template
+            anatomy_filled = anatomy.format(template_data)
+            template_filled = anatomy_filled[template_name]["path"]
+            repre_context = template_filled.used_values
+            dst = os.path.normpath(template_filled)
+
+            # Single file transfer
+            src = os.path.join(stagingdir, fname)
+            transfers = [(src, dst)]
+
+        # todo: Are we sure the assumption each representation
+        #       ends up in the same folder is valid?
+        if not instance.data.get("publishDir"):
+            instance.data["publishDir"] = (
+                anatomy_filled
+                [template_name]
+                ["folder"]
+            )
+
+        for key in self.db_representation_context_keys:
+            # Also add these values to the context even if not used by the
+            # destination template
+            value = template_data.get(key)
+            if not value:
+                continue
+            repre_context[key] = template_data[key]
+
+        # Explicitly store the full list even though template data might
+        # have a different value because it uses just a single udim tile
+        if repre.get("udim"):
+            repre_context["udim"] = repre.get("udim")  # store list
+
+        # Use previous representation's id if there is a name match
+        existing = existing_repres_by_name.get(repre["name"].lower())
+        if existing:
+            repre_id = existing["_id"]
+        else:
+            repre_id = ObjectId()
+
+        # Backwards compatibility:
+        # Store first transferred destination as published path data
+        # todo: can we remove this?
+        # todo: We shouldn't change data that makes its way back into
+        #       instance.data[] until we know the publish actually succeeded
+        #       otherwise `published_path` might not actually be valid?
+        published_path = transfers[0][1]
+        repre["published_path"] = published_path  # Backwards compatibility
+
+        # todo: `repre` is not the actual `representation` entity
+        #       we should simplify/clarify difference between data above
+        #       and the actual representation entity for the database
+        data = repre.get("data", {})
+        data.update({'path': published_path, 'template': template})
+        representation = {
+            "_id": repre_id,
+            "schema": "openpype:representation-2.0",
+            "type": "representation",
+            "parent": version["_id"],
+            "name": repre['name'],
+            "data": data,
+
+            # Imprint shortcut to context for performance reasons.
+            "context": repre_context
+        }
+
+        # todo: simplify/streamline which additional data makes its way into
+        #       the representation context
+        if repre.get("outputName"):
+            representation["context"]["output"] = repre['outputName']
+
+        if is_sequence_representation and repre.get("frameStart") is not None:
+            representation['context']['frame'] = template_data["frame"]
+
+        return {
+            "representation": representation,
+            "anatomy_data": template_data,
+            "transfers": transfers,
+            # todo: avoid the need for 'published_files' used by Integrate Hero
+            # backwards compatibility
+            "published_files": [transfer[1] for transfer in transfers]
+        }
+
+    def create_version_data(self, instance):
+        """Create the data dictionary for the version
+
+        Args:
+            instance: the current instance being published
+
+        Returns:
+            dict: the required information for version["data"]
+        """
+
+        context = instance.context
+
+        # create relative source path for DB
+        if "source" in instance.data:
+            source = instance.data["source"]
+        else:
+            source = context.data["currentFile"]
+            anatomy = instance.context.data["anatomy"]
+            source = self.get_rootless_path(anatomy, source)
+        self.log.debug("Source: {}".format(source))
+
+        version_data = {
+            "families": get_instance_families(instance),
+            "time": context.data["time"],
+            "author": context.data["user"],
+            "source": source,
+            "comment": context.data.get("comment"),
+            "machine": context.data.get("machine"),
+            "fps": instance.data.get("fps", context.data.get("fps"))
+        }
+
+        # todo: preferably we wouldn't need this "if dict" etc. logic and
+        #       instead be able to rely what the input value is if it's set.
+        intent_value = context.data.get("intent")
+        if intent_value and isinstance(intent_value, dict):
+            intent_value = intent_value.get("value")
+
+        if intent_value:
+            version_data["intent"] = intent_value
+
+        # Include optional data if present in
+        optionals = [
+            "frameStart", "frameEnd", "step", "handles",
+            "handleEnd", "handleStart", "sourceHashes"
+        ]
+        for key in optionals:
+            if key in instance.data:
+                version_data[key] = instance.data[key]
+
+        # Include instance.data[versionData] directly
+        version_data_instance = instance.data.get('versionData')
+        if version_data_instance:
+            version_data.update(version_data_instance)
+
+        return version_data
+
+    def get_template_name(self, instance):
+        """Return anatomy template name to use for integration"""
+        # Define publish template name from profiles
+        filter_criteria = self.get_profile_filter_criteria(instance)
+        profile = filter_profiles(self.template_name_profiles,
+                                  filter_criteria,
+                                  logger=self.log)
+        if profile:
+            return profile["template_name"]
+        else:
+            return self.default_template_name
+
+    def get_profile_filter_criteria(self, instance):
+        """Return filter criteria for `filter_profiles`"""
+        # Anatomy data is pre-filled by Collectors
+        anatomy_data = instance.data["anatomyData"]
+
+        # Task can be optional in anatomy data
+        task = anatomy_data.get("task", {})
+
+        # Return filter criteria
+        return {
+            "families": anatomy_data["family"],
+            "tasks": task.get("name"),
+            "hosts": anatomy_data["app"],
+            "task_types": task.get("type")
+        }
+
+    def get_rootless_path(self, anatomy, path):
+        """Returns, if possible, path without absolute portion from root
+            (eg. 'c:\' or '/opt/..')
+
+         This information is platform dependent and shouldn't be captured.
+         Example:
+             'c:/projects/MyProject1/Assets/publish...' >
+             '{root}/MyProject1/Assets...'
+
+        Args:
+            anatomy: anatomy part from instance
+            path: path (absolute)
+        Returns:
+            path: modified path if possible, or unmodified path
+            + warning logged
+        """
+        success, rootless_path = anatomy.find_root_template_from_path(path)
+        if success:
+            path = rootless_path
+        else:
+            self.log.warning((
+                "Could not find root path for remapping \"{}\"."
+                " This may cause issues on farm."
+            ).format(path))
+        return path
+
+    def get_files_info(self, destinations, sites, anatomy):
+        """Prepare 'files' info portion for representations.
+
+        Arguments:
+            destinations (list): List of transferred file destinations
+            sites (list): array of published locations
+            anatomy: anatomy part from instance
+        Returns:
+            output_resources: array of dictionaries to be added to 'files' key
+            in representation
+        """
+        file_infos = []
+        for file_path in destinations:
+            file_info = self.prepare_file_info(file_path, anatomy, sites=sites)
+            file_infos.append(file_info)
+        return file_infos
+
+    def prepare_file_info(self, path, anatomy, sites):
+        """ Prepare information for one file (asset or resource)
+
+        Arguments:
+            path: destination url of published file
+            anatomy: anatomy part from instance
+            sites: array of published locations,
+                [ {'name':'studio', 'created_dt':date} by default
+                keys expected ['studio', 'site1', 'gdrive1']
+
+        Returns:
+            dict: file info dictionary
+        """
+        return {
+            "_id": ObjectId(),
+            "path": self.get_rootless_path(anatomy, path),
+            "size": os.path.getsize(path),
+            "hash": openpype.api.source_hash(path),
+            "sites": sites
+        }

From fd2d07e94c0fb34730547c396e09ddc314b56983 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 5 Jul 2022 09:22:29 +0200
Subject: [PATCH 67/72] Revert integrator to latest develop

---
 openpype/plugins/publish/integrate_new.py | 1710 +++++++++++++--------
 1 file changed, 1088 insertions(+), 622 deletions(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index a07e8a1e0f..4c14c17dae 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -1,111 +1,63 @@
 import os
+from os.path import getsize
 import logging
 import sys
 import copy
 import clique
+import errno
 import six
+import re
+import shutil
+from collections import deque, defaultdict
+from datetime import datetime
 
 from bson.objectid import ObjectId
-from pymongo import DeleteMany, ReplaceOne, InsertOne, UpdateOne
+from pymongo import DeleteOne, InsertOne
 import pyblish.api
 
 import openpype.api
-from openpype.modules import ModulesManager
 from openpype.lib.profiles_filtering import filter_profiles
-from openpype.lib.file_transaction import FileTransaction
+from openpype.lib import (
+    prepare_template_data,
+    create_hard_link,
+    StringTemplate,
+    TemplateUnsolved
+)
 from openpype.pipeline import legacy_io
 
+# this is needed until speedcopy for linux is fixed
+if sys.platform == "win32":
+    from speedcopy import copyfile
+else:
+    from shutil import copyfile
+
 log = logging.getLogger(__name__)
 
 
-def assemble(files):
-    """Convenience `clique.assemble` wrapper for files of a single collection.
-
-    Unlike `clique.assemble` this wrapper does not allow more than a single
-    Collection nor any remainder files. Errors will be raised when not only
-    a single collection is assembled.
-
-    Returns:
-        clique.Collection: A single sequence Collection
-
-    Raises:
-        ValueError: Error is raised when files do not result in a single
-                    collected Collection.
-
-    """
-    # todo: move this to lib?
-    # Get the sequence as a collection. The files must be of a single
-    # sequence and have no remainder outside of the collections.
-    patterns = [clique.PATTERNS["frames"]]
-    collections, remainder = clique.assemble(files,
-                                             minimum_items=1,
-                                             patterns=patterns)
-    if not collections:
-        raise ValueError("No collections found in files: "
-                         "{}".format(files))
-    if remainder:
-        raise ValueError("Files found not detected as part"
-                         " of a sequence: {}".format(remainder))
-    if len(collections) > 1:
-        raise ValueError("Files in sequence are not part of a"
-                         " single sequence collection: "
-                         "{}".format(collections))
-    return collections[0]
-
-
-def get_instance_families(instance):
-    """Get all families of the instance"""
-    # todo: move this to lib?
-    family = instance.data.get("family")
-    families = []
-    if family:
-        families.append(family)
-
-    for _family in (instance.data.get("families") or []):
-        if _family not in families:
-            families.append(_family)
-
-    return families
-
-
-def get_frame_padded(frame, padding):
-    """Return frame number as string with `padding` amount of padded zeros"""
-    return "{frame:0{padding}d}".format(padding=padding, frame=frame)
-
-
-def get_first_frame_padded(collection):
-    """Return first frame as padded number from `clique.Collection`"""
-    start_frame = next(iter(collection.indexes))
-    return get_frame_padded(start_frame, padding=collection.padding)
-
-
-def bulk_write(writes):
-    """Convenience function to bulk write into active project database"""
-    project = legacy_io.Session["AVALON_PROJECT"]
-    return legacy_io._database[project].bulk_write(writes)
-
-
 class IntegrateAssetNew(pyblish.api.InstancePlugin):
-    """Register publish in the database and transfer files to destinations.
+    """Resolve any dependency issues
 
-    Steps:
-        1) Register the subset and version
-        2) Transfer the representation files to the destination
-        3) Register the representation
+    This plug-in resolves any paths which, if not updated might break
+    the published file.
 
-    Requires:
-        instance.data['representations'] - must be a list and each member
-        must be a dictionary with following data:
-            'files': list of filenames for sequence, string for single file.
-                     Only the filename is allowed, without the folder path.
-            'stagingDir': "path/to/folder/with/files"
-            'name': representation name (usually the same as extension)
-            'ext': file extension
-        optional data
-            "frameStart"
-            "frameEnd"
-            'fps'
-            "data": additional metadata for each representation.
+    The order of families is important, when working with lookdev you want to
+    first publish the texture, update the texture paths in the nodes and then
+    publish the shading network. Same goes for file dependent assets.
+
+    Requirements for instance to be correctly integrated
+
+    instance.data['representations'] - must be a list and each member
+    must be a dictionary with following data:
+        'files': list of filenames for sequence, string for single file.
+                 Only the filename is allowed, without the folder path.
+        'stagingDir': "path/to/folder/with/files"
+        'name': representation name (usually the same as extension)
+        'ext': file extension
+    optional data
+        "frameStart"
+        "frameEnd"
+        'fps'
+        "data": additional metadata for each representation.
     """
 
     label = "Integrate Asset New"
@@ -140,6 +92,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 "source",
                 "matchmove",
                 "image",
+                "source",
                 "assembly",
                 "fbx",
                 "textures",
@@ -156,51 +109,157 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 "usd",
                 "staticMesh",
                 "skeletalMesh",
-                "usdComposition",
-                "usdOverride",
+                "mvLook",
+                "mvUsd",
+                "mvUsdComposition",
+                "mvUsdOverride",
                 "simpleUnrealTexture"
                 ]
-    exclude_families = ["clip", "render.farm"]
-    default_template_name = "publish"
-
-    # Representation context keys that should always be written to
-    # the database even if not used by the destination template
+    exclude_families = ["render.farm"]
     db_representation_context_keys = [
         "project", "asset", "task", "subset", "version", "representation",
-        "family", "hierarchy", "username"
+        "family", "hierarchy", "task", "username"
     ]
+    default_template_name = "publish"
+
+    # suffix to denote temporary files, use without '.'
+    TMP_FILE_EXT = 'tmp'
+
+    # file_url : file_size of all published and uploaded files
+    integrated_file_sizes = {}
 
     # Attributes set by settings
     template_name_profiles = None
+    subset_grouping_profiles = None
 
     def process(self, instance):
+        for ef in self.exclude_families:
+            if (
+                    instance.data["family"] == ef or
+                    ef in instance.data["families"]):
+                self.log.debug("Excluded family '{}' in '{}' or {}".format(
+                    ef, instance.data["family"], instance.data["families"]))
+                return
 
-        # Exclude instances that also contain families from exclude families
-        families = set(get_instance_families(instance))
-        exclude = families & set(self.exclude_families)
-        if exclude:
-            self.log.debug("Instance not integrated due to exclude "
-                           "families found: {}".format(", ".join(exclude)))
+        # instance should be published on a farm
+        if instance.data.get("farm"):
             return
 
-        file_transactions = FileTransaction(log=self.log)
+        # Prepare repsentations that should be integrated
+        repres = instance.data.get("representations")
+        # Raise error if instance don't have any representations
+        if not repres:
+            raise ValueError(
+                "Instance {} has no files to transfer".format(
+                    instance.data["family"]
+                )
+            )
+
+        # Validate type of stored representations
+        if not isinstance(repres, (list, tuple)):
+            raise TypeError(
+                "Instance 'files' must be a list, got: {0} {1}".format(
+                    str(type(repres)), str(repres)
+                )
+            )
+
+        # Filter representations
+        filtered_repres = []
+        for repre in repres:
+            if "delete" in repre.get("tags", []):
+                continue
+            filtered_repres.append(repre)
+
+        # Skip instance if there are not representations to integrate
+        #   all representations should not be integrated
+        if not filtered_repres:
+            self.log.warning((
+                "Skipping, there are no representations"
+                " to integrate for instance {}"
+            ).format(instance.data["family"]))
+            return
+
+        self.integrated_file_sizes = {}
         try:
-            self.register(instance, file_transactions)
+            self.register(instance, filtered_repres)
+            self.log.info("Integrated Asset in to the database ...")
+            self.log.info("instance.data: {}".format(instance.data))
+            self.handle_destination_files(self.integrated_file_sizes,
+                                          'finalize')
         except Exception:
             # clean destination
-            # todo: preferably we'd also rollback *any* changes to the database
-            file_transactions.rollback()
             self.log.critical("Error when registering", exc_info=True)
+            self.handle_destination_files(self.integrated_file_sizes, 'remove')
             six.reraise(*sys.exc_info())
 
-        # Finalizing can't rollback safely so no use for moving it to
-        # the try, except.
-        file_transactions.finalize()
+    def register(self, instance, repres):
+        # Required environment variables
+        anatomy_data = instance.data["anatomyData"]
 
-    def register(self, instance, file_transactions):
+        legacy_io.install()
 
-        instance_stagingdir = instance.data.get("stagingDir")
-        if not instance_stagingdir:
+        context = instance.context
+
+        project_entity = instance.data["projectEntity"]
+
+        context_asset_name = None
+        context_asset_doc = context.data.get("assetEntity")
+        if context_asset_doc:
+            context_asset_name = context_asset_doc["name"]
+
+        asset_name = instance.data["asset"]
+        asset_entity = instance.data.get("assetEntity")
+        if not asset_entity or asset_entity["name"] != context_asset_name:
+            asset_entity = legacy_io.find_one({
+                "type": "asset",
+                "name": asset_name,
+                "parent": project_entity["_id"]
+            })
+            assert asset_entity, (
+                "No asset found by the name \"{0}\" in project \"{1}\""
+            ).format(asset_name, project_entity["name"])
+
+            instance.data["assetEntity"] = asset_entity
+
+            # update anatomy data with asset specific keys
+            # - name should already been set
+            hierarchy = ""
+            parents = asset_entity["data"]["parents"]
+            if parents:
+                hierarchy = "/".join(parents)
+            anatomy_data["hierarchy"] = hierarchy
+
+        # Make sure task name in anatomy data is same as on instance.data
+        asset_tasks = (
+            asset_entity.get("data", {}).get("tasks")
+        ) or {}
+        task_name = instance.data.get("task")
+        if task_name:
+            task_info = asset_tasks.get(task_name) or {}
+            task_type = task_info.get("type")
+
+            project_task_types = project_entity["config"]["tasks"]
+            task_code = project_task_types.get(task_type, {}).get("short_name")
+            anatomy_data["task"] = {
+                "name": task_name,
+                "type": task_type,
+                "short": task_code
+            }
+
+        elif "task" in anatomy_data:
+            # Just set 'task_name' variable to context task
+            task_name = anatomy_data["task"]["name"]
+            task_type = anatomy_data["task"]["type"]
+
+        else:
+            task_name = None
+            task_type = None
+
+        # Fill family in anatomy data
+        anatomy_data["family"] = instance.data.get("family")
+
+        stagingdir = instance.data.get("stagingDir")
+        if not stagingdir:
             self.log.info((
                 "{0} is missing reference to staging directory."
                 " Will try to get it from representation."
@@ -208,515 +267,718 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
         else:
             self.log.debug(
-                "Establishing staging directory "
-                "@ {0}".format(instance_stagingdir)
+                "Establishing staging directory @ {0}".format(stagingdir)
             )
 
-        # Ensure at least one representation is set up for registering.
-        repres = instance.data.get("representations")
-        assert repres, "Instance has no representations data"
-        assert isinstance(repres, (list, tuple)), (
-            "Instance 'representations' must be a list, got: {0} {1}".format(
-                str(type(repres)), str(repres)
-            )
+        subset = self.get_subset(asset_entity, instance)
+        instance.data["subsetEntity"] = subset
+
+        version_number = instance.data["version"]
+        self.log.debug("Next version: v{}".format(version_number))
+
+        version_data = self.create_version_data(context, instance)
+
+        version_data_instance = instance.data.get('versionData')
+        if version_data_instance:
+            version_data.update(version_data_instance)
+
+        # TODO rename method from `create_version` to
+        # `prepare_version` or similar...
+        version = self.create_version(
+            subset=subset,
+            version_number=version_number,
+            data=version_data
         )
 
-        template_name = self.get_template_name(instance)
+        self.log.debug("Creating version ...")
 
-        subset, subset_writes = self.prepare_subset(instance)
-        version, version_writes = self.prepare_version(instance, subset)
+        new_repre_names_low = [
+            _repre["name"].lower()
+            for _repre in repres
+        ]
+
+        existing_version = legacy_io.find_one({
+            'type': 'version',
+            'parent': subset["_id"],
+            'name': version_number
+        })
+
+        if existing_version is None:
+            version_id = legacy_io.insert_one(version).inserted_id
+        else:
+            # Check if instance have set `append` mode which cause that
+            # only replicated representations are set to archive
+            append_repres = instance.data.get("append", False)
+
+            # Update version data
+            # TODO query by _id and
+            legacy_io.update_many({
+                'type': 'version',
+                'parent': subset["_id"],
+                'name': version_number
+            }, {
+                '$set': version
+            })
+            version_id = existing_version['_id']
+
+            # Find representations of existing version and archive them
+            current_repres = list(legacy_io.find({
+                "type": "representation",
+                "parent": version_id
+            }))
+            bulk_writes = []
+            for repre in current_repres:
+                if append_repres:
+                    # archive only duplicated representations
+                    if repre["name"].lower() not in new_repre_names_low:
+                        continue
+                # Representation must change type,
+                # `_id` must be stored to other key and replaced with new
+                # - that is because new representations should have same ID
+                repre_id = repre["_id"]
+                bulk_writes.append(DeleteOne({"_id": repre_id}))
+
+                repre["orig_id"] = repre_id
+                repre["_id"] = ObjectId()
+                repre["type"] = "archived_representation"
+                bulk_writes.append(InsertOne(repre))
+
+            # bulk updates
+            if bulk_writes:
+                project_name = legacy_io.Session["AVALON_PROJECT"]
+                legacy_io.database[project_name].bulk_write(
+                    bulk_writes
+                )
+
+        version = legacy_io.find_one({"_id": version_id})
         instance.data["versionEntity"] = version
 
-        # Get existing representations (if any)
-        existing_repres_by_name = {
-            repres["name"].lower(): repres for repres in legacy_io.find(
-                {
-                    "parent": version["_id"],
-                    "type": "representation"
-                },
-                # Only care about id and name of existing representations
-                projection={"_id": True, "name": True}
-            )
+        existing_repres = list(legacy_io.find({
+            "parent": version_id,
+            "type": "archived_representation"
+        }))
+
+        instance.data['version'] = version['name']
+
+        intent_value = instance.context.data.get("intent")
+        if intent_value and isinstance(intent_value, dict):
+            intent_value = intent_value.get("value")
+
+        if intent_value:
+            anatomy_data["intent"] = intent_value
+
+        anatomy = instance.context.data['anatomy']
+
+        # Find the representations to transfer amongst the files
+        # Each should be a single representation (as such, a single extension)
+        representations = []
+        destination_list = []
+
+        orig_transfers = []
+        if 'transfers' not in instance.data:
+            instance.data['transfers'] = []
+        else:
+            orig_transfers = list(instance.data['transfers'])
+
+        family = self.main_family_from_instance(instance)
+
+        key_values = {
+            "families": family,
+            "tasks": task_name,
+            "hosts": instance.context.data["hostName"],
+            "task_types": task_type
         }
-
-        # Prepare all representations
-        prepared_representations = []
-        for repre in instance.data["representations"]:
-
-            if "delete" in repre.get("tags", []):
-                self.log.debug("Skipping representation marked for deletion: "
-                               "{}".format(repre))
-                continue
-
-            # todo: reduce/simplify what is returned from this function
-            prepared = self.prepare_representation(repre,
-                                                   template_name,
-                                                   existing_repres_by_name,
-                                                   version,
-                                                   instance_stagingdir,
-                                                   instance)
-
-            for src, dst in prepared["transfers"]:
-                # todo: add support for hardlink transfers
-                file_transactions.add(src, dst)
-
-            prepared_representations.append(prepared)
-
-        if not prepared_representations:
-            # Even though we check `instance.data["representations"]` earlier
-            # this could still happen if all representations were tagged with
-            # "delete" and thus are skipped for integration
-            raise RuntimeError("No representations prepared to publish.")
-
-        # Each instance can also have pre-defined transfers not explicitly
-        # part of a representation - like texture resources used by a
-        # .ma representation. Those destination paths are pre-defined, etc.
-        # todo: should we move or simplify this logic?
-        resource_destinations = set()
-        for src, dst in instance.data.get("transfers", []):
-            file_transactions.add(src, dst, mode=FileTransaction.MODE_COPY)
-            resource_destinations.add(os.path.abspath(dst))
-        for src, dst in instance.data.get("hardlinks", []):
-            file_transactions.add(src, dst, mode=FileTransaction.MODE_HARDLINK)
-            resource_destinations.add(os.path.abspath(dst))
-
-        # Bulk write to the database
-        # We write the subset and version to the database before the File
-        # Transaction to reduce the chances of another publish trying to
-        # publish to the same version number since that chance can greatly
-        # increase if the file transaction takes a long time.
-        bulk_write(subset_writes + version_writes)
-        self.log.info("Subset {subset[name]} and Version {version[name]} "
-                      "written to database..".format(subset=subset,
-                                                     version=version))
-
-        # Process all file transfers of all integrations now
-        self.log.debug("Integrating source files to destination ...")
-        file_transactions.process()
-        self.log.debug("Backed up existing files: "
-                       "{}".format(file_transactions.backups))
-        self.log.debug("Transferred files: "
-                       "{}".format(file_transactions.transferred))
-        self.log.debug("Retrieving Representation Site Sync information ...")
-
-        # Get the accessible sites for Site Sync
-        manager = ModulesManager()
-        sync_server_module = manager.modules_by_name["sync_server"]
-        sites = sync_server_module.compute_resource_sync_sites(
-            project_name=instance.data["projectEntity"]["name"]
+        profile = filter_profiles(
+            self.template_name_profiles,
+            key_values,
+            logger=self.log
         )
-        self.log.debug("Sync Server Sites: {}".format(sites))
 
-        # Compute the resource file infos once (files belonging to the
-        # version instance instead of an individual representation) so
-        # we can re-use those file infos per representation
-        anatomy = instance.context.data["anatomy"]
-        resource_file_infos = self.get_files_info(resource_destinations,
-                                                  sites=sites,
-                                                  anatomy=anatomy)
+        template_name = "publish"
+        if profile:
+            template_name = profile["template_name"]
 
-        # Finalize the representations now the published files are integrated
-        # Get 'files' info for representations and its attached resources
-        representation_writes = []
-        new_repre_names_low = set()
-        for prepared in prepared_representations:
-            representation = prepared["representation"]
-            transfers = prepared["transfers"]
-            destinations = [dst for src, dst in transfers]
+        published_representations = {}
+        for idx, repre in enumerate(repres):
+            published_files = []
+
+            # create template data for Anatomy
+            template_data = copy.deepcopy(anatomy_data)
+            if intent_value is not None:
+                template_data["intent"] = intent_value
+
+            resolution_width = repre.get("resolutionWidth")
+            resolution_height = repre.get("resolutionHeight")
+            fps = instance.data.get("fps")
+
+            if resolution_width:
+                template_data["resolution_width"] = resolution_width
+            if resolution_width:
+                template_data["resolution_height"] = resolution_height
+            if resolution_width:
+                template_data["fps"] = fps
+
+            if "originalBasename" in instance.data:
+                template_data.update({
+                    "originalBasename": instance.data.get("originalBasename")
+                })
+
+            files = repre['files']
+            if repre.get('stagingDir'):
+                stagingdir = repre['stagingDir']
+
+            if repre.get("outputName"):
+                template_data["output"] = repre['outputName']
+
+            template_data["representation"] = repre["name"]
+
+            ext = repre["ext"]
+            if ext.startswith("."):
+                self.log.warning((
+                    "Implementaion warning: <\"{}\">"
+                    " Representation's extension stored under \"ext\" key "
+                    " started with dot (\"{}\")."
+                ).format(repre["name"], ext))
+                ext = ext[1:]
+            repre["ext"] = ext
+            template_data["ext"] = ext
+
+            self.log.info(template_name)
+            template = os.path.normpath(
+                anatomy.templates[template_name]["path"])
+
+            sequence_repre = isinstance(files, list)
+            repre_context = None
+            if sequence_repre:
+                self.log.debug(
+                    "files: {}".format(files))
+                src_collections, remainder = clique.assemble(files)
+                self.log.debug(
+                    "src_tail_collections: {}".format(str(src_collections)))
+                src_collection = src_collections[0]
+
+                # Assert that each member has identical suffix
+                src_head = src_collection.format("{head}")
+                src_tail = src_collection.format("{tail}")
+
+                # fix dst_padding
+                valid_files = [x for x in files if src_collection.match(x)]
+                padd_len = len(
+                    valid_files[0].replace(src_head, "").replace(src_tail, "")
+                )
+                src_padding_exp = "%0{}d".format(padd_len)
+
+                test_dest_files = list()
+                for i in [1, 2]:
+                    template_data["representation"] = repre['ext']
+                    if not repre.get("udim"):
+                        template_data["frame"] = src_padding_exp % i
+                    else:
+                        template_data["udim"] = src_padding_exp % i
+
+                    anatomy_filled = anatomy.format(template_data)
+                    template_filled = anatomy_filled[template_name]["path"]
+                    if repre_context is None:
+                        repre_context = template_filled.used_values
+                    test_dest_files.append(
+                        os.path.normpath(template_filled)
+                    )
+                if not repre.get("udim"):
+                    template_data["frame"] = repre_context["frame"]
+                else:
+                    template_data["udim"] = repre_context["udim"]
+
+                self.log.debug(
+                    "test_dest_files: {}".format(str(test_dest_files)))
+
+                dst_collections, remainder = clique.assemble(test_dest_files)
+                dst_collection = dst_collections[0]
+                dst_head = dst_collection.format("{head}")
+                dst_tail = dst_collection.format("{tail}")
+
+                index_frame_start = None
+
+                # TODO use frame padding from right template group
+                if repre.get("frameStart") is not None:
+                    frame_start_padding = int(
+                        anatomy.templates["render"].get(
+                            "frame_padding",
+                            anatomy.templates["render"].get("padding")
+                        )
+                    )
+
+                    index_frame_start = int(repre.get("frameStart"))
+
+                # exception for slate workflow
+                if index_frame_start and "slate" in instance.data["families"]:
+                    index_frame_start -= 1
+
+                dst_padding_exp = src_padding_exp
+                dst_start_frame = None
+                collection_start = list(src_collection.indexes)[0]
+                for i in src_collection.indexes:
+                    # TODO 1.) do not count padding in each index iteration
+                    # 2.) do not count dst_padding from src_padding before
+                    #   index_frame_start check
+                    frame_number = i - collection_start
+                    src_padding = src_padding_exp % i
+
+                    src_file_name = "{0}{1}{2}".format(
+                        src_head, src_padding, src_tail)
+
+                    dst_padding = src_padding_exp % frame_number
+
+                    if index_frame_start is not None:
+                        dst_padding_exp = "%0{}d".format(frame_start_padding)
+                        dst_padding = dst_padding_exp % (index_frame_start + frame_number)  # noqa: E501
+                    elif repre.get("udim"):
+                        dst_padding = int(i)
+
+                    dst = "{0}{1}{2}".format(
+                        dst_head,
+                        dst_padding,
+                        dst_tail
+                    )
+
+                    self.log.debug("destination: `{}`".format(dst))
+                    src = os.path.join(stagingdir, src_file_name)
+
+                    self.log.debug("source: {}".format(src))
+                    instance.data["transfers"].append([src, dst])
+
+                    published_files.append(dst)
+
+                    # for adding first frame into db
+                    if not dst_start_frame:
+                        dst_start_frame = dst_padding
+
+                # Store used frame value to template data
+                if repre.get("frame"):
+                    template_data["frame"] = dst_start_frame
+
+                dst = "{0}{1}{2}".format(
+                    dst_head,
+                    dst_start_frame,
+                    dst_tail
+                )
+                repre['published_path'] = dst
+
+            else:
+                # Single file
+                #  _______
+                # |      |\
+                # |       |
+                # |       |
+                # |       |
+                # |_______|
+                #
+                template_data.pop("frame", None)
+                fname = files
+                assert not os.path.isabs(fname), (
+                    "Given file name is a full path"
+                )
+
+                template_data["representation"] = repre['ext']
+                # Store used frame value to template data
+                if repre.get("udim"):
+                    template_data["udim"] = repre["udim"][0]
+                src = os.path.join(stagingdir, fname)
+                anatomy_filled = anatomy.format(template_data)
+                template_filled = anatomy_filled[template_name]["path"]
+                repre_context = template_filled.used_values
+                dst = os.path.normpath(template_filled)
+
+                instance.data["transfers"].append([src, dst])
+
+                published_files.append(dst)
+                repre['published_path'] = dst
+                self.log.debug("__ dst: {}".format(dst))
+
+            if not instance.data.get("publishDir"):
+                instance.data["publishDir"] = (
+                    anatomy_filled
+                    [template_name]
+                    ["folder"]
+                )
+            if repre.get("udim"):
+                repre_context["udim"] = repre.get("udim")  # store list
+
+            repre["publishedFiles"] = published_files
+
+            for key in self.db_representation_context_keys:
+                value = template_data.get(key)
+                if not value:
+                    continue
+                repre_context[key] = template_data[key]
+
+            # Use previous representation's id if there are any
+            repre_id = None
+            repre_name_low = repre["name"].lower()
+            for _repre in existing_repres:
+                # NOTE should we check lowered names?
+                if repre_name_low == _repre["name"]:
+                    repre_id = _repre["orig_id"]
+                    break
+
+            # Create new id if existing representations does not match
+            if repre_id is None:
+                repre_id = ObjectId()
+
+            data = repre.get("data") or {}
+            data.update({'path': dst, 'template': template})
+            representation = {
+                "_id": repre_id,
+                "schema": "openpype:representation-2.0",
+                "type": "representation",
+                "parent": version_id,
+                "name": repre['name'],
+                "data": data,
+                "dependencies": instance.data.get("dependencies", "").split(),
+
+                # Imprint shortcut to context
+                # for performance reasons.
+                "context": repre_context
+            }
+
+            if repre.get("outputName"):
+                representation["context"]["output"] = repre['outputName']
+
+            if sequence_repre and repre.get("frameStart") is not None:
+                representation['context']['frame'] = (
+                    dst_padding_exp % int(repre.get("frameStart"))
+                )
+
+            # any file that should be physically copied is expected in
+            # 'transfers' or 'hardlinks'
+            if instance.data.get('transfers', False) or \
+               instance.data.get('hardlinks', False):
+                # could throw exception, will be caught in 'process'
+                # all integration to DB is being done together lower,
+                # so no rollback needed
+                self.log.debug("Integrating source files to destination ...")
+                self.integrated_file_sizes.update(self.integrate(instance))
+                self.log.debug("Integrated files {}".
+                               format(self.integrated_file_sizes))
+
+            # get 'files' info for representation and all attached resources
+            self.log.debug("Preparing files information ...")
             representation["files"] = self.get_files_info(
-                destinations, sites=sites, anatomy=anatomy
-            )
+                instance,
+                self.integrated_file_sizes)
 
-            # Add the version resource file infos to each representation
-            representation["files"] += resource_file_infos
+            self.log.debug("__ representation: {}".format(representation))
+            destination_list.append(dst)
+            self.log.debug("__ destination_list: {}".format(destination_list))
+            instance.data['destination_list'] = destination_list
+            representations.append(representation)
+            published_representations[repre_id] = {
+                "representation": representation,
+                "anatomy_data": template_data,
+                "published_files": published_files
+            }
+            self.log.debug("__ representations: {}".format(representations))
+            # reset transfers for next representation
+            # instance.data['transfers'] is used as a global variable
+            # in current codebase
+            instance.data['transfers'] = list(orig_transfers)
 
-            # Set up representation for writing to the database. Since
-            # we *might* be overwriting an existing entry if the version
-            # already existed we'll use ReplaceOnce with `upsert=True`
-            representation_writes.append(ReplaceOne(
-                filter={"_id": representation["_id"]},
-                replacement=representation,
-                upsert=True
-            ))
+        # Remove old representations if there are any (before insertion of new)
+        if existing_repres:
+            repre_ids_to_remove = []
+            for repre in existing_repres:
+                repre_ids_to_remove.append(repre["_id"])
+            legacy_io.delete_many({"_id": {"$in": repre_ids_to_remove}})
 
-            new_repre_names_low.add(representation["name"].lower())
+        for rep in instance.data["representations"]:
+            self.log.debug("__ rep: {}".format(rep))
 
-        # Delete any existing representations that didn't get any new data
-        # if the instance is not set to append mode
-        if not instance.data.get("append", False):
-            delete_names = set()
-            for name, existing_repres in existing_repres_by_name.items():
-                if name not in new_repre_names_low:
-                    # We add the exact representation name because `name` is
-                    # lowercase for name matching only and not in the database
-                    delete_names.add(existing_repres["name"])
-            if delete_names:
-                representation_writes.append(DeleteMany(
-                    filter={
-                        "parent": version["_id"],
-                        "name": {"$in": list(delete_names)}
-                    }
-                ))
+        legacy_io.insert_many(representations)
+        instance.data["published_representations"] = (
+            published_representations
+        )
+        # self.log.debug("Representation: {}".format(representations))
+        self.log.info("Registered {} items".format(len(representations)))
 
-        # Write representations to the database
-        bulk_write(representation_writes)
+    def integrate(self, instance):
+        """ Move the files.
 
-        # Backwards compatibility
-        # todo: can we avoid the need to store this?
-        instance.data["published_representations"] = {
-            p["representation"]["_id"]: p for p in prepared_representations
-        }
+            Through `instance.data["transfers"]`
 
-        self.log.info("Registered {} representations"
-                      "".format(len(prepared_representations)))
+            Args:
+                instance: the instance to integrate
+            Returns:
+                integrated_file_sizes: dictionary of destination file url and
+                its size in bytes
+        """
+        # store destination url and size for reporting and rollback
+        integrated_file_sizes = {}
+        transfers = list(instance.data.get("transfers", list()))
+        for src, dest in transfers:
+            if os.path.normpath(src) != os.path.normpath(dest):
+                dest = self.get_dest_temp_url(dest)
+                self.copy_file(src, dest)
+                # TODO needs to be updated during site implementation
+                integrated_file_sizes[dest] = os.path.getsize(dest)
 
-    def prepare_subset(self, instance):
-        asset = instance.data.get("assetEntity")
+        # Produce hardlinked copies
+        # Note: hardlink can only be produced between two files on the same
+        # server/disk and editing one of the two will edit both files at once.
+        # As such it is recommended to only make hardlinks between static files
+        # to ensure publishes remain safe and non-edited.
+        hardlinks = instance.data.get("hardlinks", list())
+        for src, dest in hardlinks:
+            dest = self.get_dest_temp_url(dest)
+            self.log.debug("Hardlinking file ... {} -> {}".format(src, dest))
+            if not os.path.exists(dest):
+                self.hardlink_file(src, dest)
+
+            # TODO needs to be updated during site implementation
+            integrated_file_sizes[dest] = os.path.getsize(dest)
+
+        return integrated_file_sizes
+
+    def copy_file(self, src, dst):
+        """ Copy given source to destination
+
+        Arguments:
+            src (str): the source file which needs to be copied
+            dst (str): the destination of the sourc file
+        Returns:
+            None
+        """
+        src = os.path.normpath(src)
+        dst = os.path.normpath(dst)
+        self.log.debug("Copying file ... {} -> {}".format(src, dst))
+        dirname = os.path.dirname(dst)
+        try:
+            os.makedirs(dirname)
+        except OSError as e:
+            if e.errno == errno.EEXIST:
+                pass
+            else:
+                self.log.critical("An unexpected error occurred.")
+                six.reraise(*sys.exc_info())
+
+        # copy file with speedcopy and check if size of files are simetrical
+        while True:
+            if not shutil._samefile(src, dst):
+                copyfile(src, dst)
+            else:
+                self.log.critical(
+                    "files are the same {} to {}".format(src, dst)
+                )
+                os.remove(dst)
+                try:
+                    shutil.copyfile(src, dst)
+                    self.log.debug("Copying files with shutil...")
+                except OSError as e:
+                    self.log.critical("Cannot copy {} to {}".format(src, dst))
+                    self.log.critical(e)
+                    six.reraise(*sys.exc_info())
+            if str(getsize(src)) in str(getsize(dst)):
+                break
+
+    def hardlink_file(self, src, dst):
+        dirname = os.path.dirname(dst)
+
+        try:
+            os.makedirs(dirname)
+        except OSError as e:
+            if e.errno == errno.EEXIST:
+                pass
+            else:
+                self.log.critical("An unexpected error occurred.")
+                six.reraise(*sys.exc_info())
+
+        create_hard_link(src, dst)
+
+    def get_subset(self, asset, instance):
         subset_name = instance.data["subset"]
-        self.log.debug("Subset: {}".format(subset_name))
-
-        # Get existing subset if it exists
         subset = legacy_io.find_one({
             "type": "subset",
             "parent": asset["_id"],
             "name": subset_name
         })
 
-        # Define subset data
-        data = {
-            "families": get_instance_families(instance)
-        }
-
-        subset_group = instance.data.get("subsetGroup")
-        if subset_group:
-            data["subsetGroup"] = subset_group
-
-        bulk_writes = []
         if subset is None:
-            # Create a new subset
             self.log.info("Subset '%s' not found, creating ..." % subset_name)
-            subset = {
-                "_id": ObjectId(),
+            self.log.debug("families.  %s" % instance.data.get('families'))
+            self.log.debug(
+                "families.  %s" % type(instance.data.get('families')))
+
+            family = instance.data.get("family")
+            families = []
+            if family:
+                families.append(family)
+
+            for _family in (instance.data.get("families") or []):
+                if _family not in families:
+                    families.append(_family)
+
+            _id = legacy_io.insert_one({
                 "schema": "openpype:subset-3.0",
                 "type": "subset",
                 "name": subset_name,
-                "data": data,
+                "data": {
+                    "families": families
+                },
                 "parent": asset["_id"]
-            }
-            bulk_writes.append(InsertOne(subset))
+            }).inserted_id
 
-        else:
-            # Update existing subset data with new data and set in database.
-            # We also change the found subset in-place so we don't need to
-            # re-query the subset afterwards
-            subset["data"].update(data)
-            bulk_writes.append(UpdateOne(
-                {"type": "subset", "_id": subset["_id"]},
-                {"$set": {
-                    "data": subset["data"]
-                }}
-            ))
+            subset = legacy_io.find_one({"_id": _id})
 
-        self.log.info("Prepared subset: {}".format(subset_name))
-        return subset, bulk_writes
+        # QUESTION Why is changing of group and updating it's
+        #   families in 'get_subset'?
+        self._set_subset_group(instance, subset["_id"])
 
-    def prepare_version(self, instance, subset):
+        # Update families on subset.
+        families = [instance.data["family"]]
+        families.extend(instance.data.get("families", []))
+        legacy_io.update_many(
+            {"type": "subset", "_id": ObjectId(subset["_id"])},
+            {"$set": {"data.families": families}}
+        )
 
-        version_number = instance.data["version"]
+        return subset
 
-        version = {
-            "schema": "openpype:version-3.0",
-            "type": "version",
-            "parent": subset["_id"],
-            "name": version_number,
-            "data": self.create_version_data(instance)
+    def _set_subset_group(self, instance, subset_id):
+        """
+            Mark subset as belonging to group in DB.
+
+            Uses Settings > Global > Publish plugins > IntegrateAssetNew
+
+            Args:
+                instance (dict): processed instance
+                subset_id (str): DB's subset _id
+
+        """
+        # Fist look into instance data
+        subset_group = instance.data.get("subsetGroup")
+        if not subset_group:
+            subset_group = self._get_subset_group(instance)
+
+        if subset_group:
+            legacy_io.update_many({
+                'type': 'subset',
+                '_id': ObjectId(subset_id)
+            }, {'$set': {'data.subsetGroup': subset_group}})
+
+    def _get_subset_group(self, instance):
+        """Look into subset group profiles set by settings.
+
+        Attribute 'subset_grouping_profiles' is defined by OpenPype settings.
+        """
+        # Skip if 'subset_grouping_profiles' is empty
+        if not self.subset_grouping_profiles:
+            return None
+
+        # QUESTION
+        #   - is there a chance that task name is not filled in anatomy
+        #       data?
+        #   - should we use context task in that case?
+        anatomy_data = instance.data["anatomyData"]
+        task_name = None
+        task_type = None
+        if "task" in anatomy_data:
+            task_name = anatomy_data["task"]["name"]
+            task_type = anatomy_data["task"]["type"]
+        filtering_criteria = {
+            "families": instance.data["family"],
+            "hosts": instance.context.data["hostName"],
+            "tasks": task_name,
+            "task_types": task_type
         }
+        matching_profile = filter_profiles(
+            self.subset_grouping_profiles,
+            filtering_criteria
+        )
+        # Skip if there is not matchin profile
+        if not matching_profile:
+            return None
 
-        existing_version = legacy_io.find_one({
-            'type': 'version',
-            'parent': subset["_id"],
-            'name': version_number
-        }, projection={"_id": True})
+        filled_template = None
+        template = matching_profile["template"]
+        fill_pairs = (
+            ("family", filtering_criteria["families"]),
+            ("task", filtering_criteria["tasks"]),
+            ("host", filtering_criteria["hosts"]),
+            ("subset", instance.data["subset"]),
+            ("renderlayer", instance.data.get("renderlayer"))
+        )
+        fill_pairs = prepare_template_data(fill_pairs)
 
-        if existing_version:
-            self.log.debug("Updating existing version ...")
-            version["_id"] = existing_version["_id"]
-        else:
-            self.log.debug("Creating new version ...")
-            version["_id"] = ObjectId()
-
-        bulk_writes = [ReplaceOne(
-            filter={"_id": version["_id"]},
-            replacement=version,
-            upsert=True
-        )]
-
-        self.log.info("Prepared version: v{0:03d}".format(version["name"]))
-
-        return version, bulk_writes
-
-    def prepare_representation(self, repre,
-                               template_name,
-                               existing_repres_by_name,
-                               version,
-                               instance_stagingdir,
-                               instance):
-
-        # pre-flight validations
-        if repre["ext"].startswith("."):
-            raise ValueError("Extension must not start with a dot '.': "
-                             "{}".format(repre["ext"]))
-
-        if repre.get("transfers"):
-            raise ValueError("Representation is not allowed to have transfers"
-                             "data before integration. They are computed in "
-                             "the integrator"
-                             "Got: {}".format(repre["transfers"]))
-
-        # create template data for Anatomy
-        template_data = copy.deepcopy(instance.data["anatomyData"])
-
-        # required representation keys
-        files = repre['files']
-        template_data["representation"] = repre["name"]
-        template_data["ext"] = repre["ext"]
-
-        # optionals
-        # retrieve additional anatomy data from representation if exists
-        for key, anatomy_key in {
-            # Representation Key: Anatomy data key
-            "resolutionWidth": "resolution_width",
-            "resolutionHeight": "resolution_height",
-            "fps": "fps",
-            "outputName": "output",
-            "originalBasename": "originalBasename"
-        }.items():
-            # Allow to take value from representation
-            # if not found also consider instance.data
-            if key in repre:
-                value = repre[key]
-            elif key in instance.data:
-                value = instance.data[key]
-            else:
-                continue
-            template_data[anatomy_key] = value
-
-        if repre.get('stagingDir'):
-            stagingdir = repre['stagingDir']
-        else:
-            # Fall back to instance staging dir if not explicitly
-            # set for representation in the instance
-            self.log.debug("Representation uses instance staging dir: "
-                           "{}".format(instance_stagingdir))
-            stagingdir = instance_stagingdir
-        if not stagingdir:
-            raise ValueError("No staging directory set for representation: "
-                             "{}".format(repre))
-
-        self.log.debug("Anatomy template name: {}".format(template_name))
-        anatomy = instance.context.data['anatomy']
-        template = os.path.normpath(anatomy.templates[template_name]["path"])
-
-        is_udim = bool(repre.get("udim"))
-        is_sequence_representation = isinstance(files, (list, tuple))
-        if is_sequence_representation:
-            # Collection of files (sequence)
-            assert not any(os.path.isabs(fname) for fname in files), (
-                "Given file names contain full paths"
+        try:
+            filled_template = StringTemplate.format_strict_template(
+                template, fill_pairs
             )
+        except (KeyError, TemplateUnsolved):
+            keys = []
+            if fill_pairs:
+                keys = fill_pairs.keys()
 
-            src_collection = assemble(files)
+            msg = "Subset grouping failed. " \
+                  "Only {} are expected in Settings".format(','.join(keys))
+            self.log.warning(msg)
 
-            # If the representation has `frameStart` set it renumbers the
-            # frame indices of the published collection. It will start from
-            # that `frameStart` index instead. Thus if that frame start
-            # differs from the collection we want to shift the destination
-            # frame indices from the source collection.
-            destination_indexes = list(src_collection.indexes)
-            destination_padding = len(get_first_frame_padded(src_collection))
-            if repre.get("frameStart") is not None and not is_udim:
-                index_frame_start = int(repre.get("frameStart"))
+        return filled_template
 
-                render_template = anatomy.templates[template_name]
-                # todo: should we ALWAYS manage the frame padding even when not
-                #       having `frameStart` set?
-                frame_start_padding = int(
-                    render_template.get(
-                        "frame_padding",
-                        render_template.get("padding")
-                    )
-                )
-
-                # Shift destination sequence to the start frame
-                src_start_frame = next(iter(src_collection.indexes))
-                shift = index_frame_start - src_start_frame
-                if shift:
-                    destination_indexes = [
-                        frame + shift for frame in destination_indexes
-                    ]
-                destination_padding = frame_start_padding
-
-            # To construct the destination template with anatomy we require
-            # a Frame or UDIM tile set for the template data. We use the first
-            # index of the destination for that because that could've shifted
-            # from the source indexes, etc.
-            first_index_padded = get_frame_padded(frame=destination_indexes[0],
-                                                  padding=destination_padding)
-            if is_udim:
-                # UDIM representations handle ranges in a different manner
-                template_data["udim"] = first_index_padded
-            else:
-                template_data["frame"] = first_index_padded
-
-            # Construct destination collection from template
-            anatomy_filled = anatomy.format(template_data)
-            template_filled = anatomy_filled[template_name]["path"]
-            repre_context = template_filled.used_values
-            self.log.debug("Template filled: {}".format(str(template_filled)))
-            dst_collection = assemble([os.path.normpath(template_filled)])
-
-            # Update the destination indexes and padding
-            dst_collection.indexes.clear()
-            dst_collection.indexes.update(set(destination_indexes))
-            dst_collection.padding = destination_padding
-            assert (
-                len(src_collection.indexes) == len(dst_collection.indexes)
-            ), "This is a bug"
-
-            # Multiple file transfers
-            transfers = []
-            for src_file_name, dst in zip(src_collection, dst_collection):
-                src = os.path.join(stagingdir, src_file_name)
-                transfers.append((src, dst))
-
-        else:
-            # Single file
-            fname = files
-            assert not os.path.isabs(fname), (
-                "Given file name is a full path"
-            )
-
-            # Manage anatomy template data
-            template_data.pop("frame", None)
-            if is_udim:
-                template_data["udim"] = repre["udim"][0]
-
-            # Construct destination filepath from template
-            anatomy_filled = anatomy.format(template_data)
-            template_filled = anatomy_filled[template_name]["path"]
-            repre_context = template_filled.used_values
-            dst = os.path.normpath(template_filled)
-
-            # Single file transfer
-            src = os.path.join(stagingdir, fname)
-            transfers = [(src, dst)]
-
-        # todo: Are we sure the assumption each representation
-        #       ends up in the same folder is valid?
-        if not instance.data.get("publishDir"):
-            instance.data["publishDir"] = (
-                anatomy_filled
-                [template_name]
-                ["folder"]
-            )
-
-        for key in self.db_representation_context_keys:
-            # Also add these values to the context even if not used by the
-            # destination template
-            value = template_data.get(key)
-            if not value:
-                continue
-            repre_context[key] = template_data[key]
-
-        # Explicitly store the full list even though template data might
-        # have a different value because it uses just a single udim tile
-        if repre.get("udim"):
-            repre_context["udim"] = repre.get("udim")  # store list
-
-        # Use previous representation's id if there is a name match
-        existing = existing_repres_by_name.get(repre["name"].lower())
-        if existing:
-            repre_id = existing["_id"]
-        else:
-            repre_id = ObjectId()
-
-        # Backwards compatibility:
-        # Store first transferred destination as published path data
-        # todo: can we remove this?
-        # todo: We shouldn't change data that makes its way back into
-        #       instance.data[] until we know the publish actually succeeded
-        #       otherwise `published_path` might not actually be valid?
-        published_path = transfers[0][1]
-        repre["published_path"] = published_path  # Backwards compatibility
-
-        # todo: `repre` is not the actual `representation` entity
-        #       we should simplify/clarify difference between data above
-        #       and the actual representation entity for the database
-        data = repre.get("data", {})
-        data.update({'path': published_path, 'template': template})
-        representation = {
-            "_id": repre_id,
-            "schema": "openpype:representation-2.0",
-            "type": "representation",
-            "parent": version["_id"],
-            "name": repre['name'],
-            "data": data,
-
-            # Imprint shortcut to context for performance reasons.
-            "context": repre_context
-        }
-
-        # todo: simplify/streamline which additional data makes its way into
-        #       the representation context
-        if repre.get("outputName"):
-            representation["context"]["output"] = repre['outputName']
-
-        if is_sequence_representation and repre.get("frameStart") is not None:
-            representation['context']['frame'] = template_data["frame"]
-
-        return {
-            "representation": representation,
-            "anatomy_data": template_data,
-            "transfers": transfers,
-            # todo: avoid the need for 'published_files' used by Integrate Hero
-            # backwards compatibility
-            "published_files": [transfer[1] for transfer in transfers]
-        }
-
-    def create_version_data(self, instance):
-        """Create the data dictionary for the version
+    def create_version(self, subset, version_number, data=None):
+        """ Copy given source to destination
 
         Args:
+            subset (dict): the registered subset of the asset
+            version_number (int): the version number
+
+        Returns:
+            dict: collection of data to create a version
+        """
+
+        return {"schema": "openpype:version-3.0",
+                "type": "version",
+                "parent": subset["_id"],
+                "name": version_number,
+                "data": data}
+
+    def create_version_data(self, context, instance):
+        """Create the data collection for the version
+
+        Args:
+            context: the current context
             instance: the current instance being published
 
         Returns:
-            dict: the required information for version["data"]
+            dict: the required information with instance.data as key
         """
 
-        context = instance.context
+        families = []
+        current_families = instance.data.get("families", list())
+        instance_family = instance.data.get("family", None)
+
+        if instance_family is not None:
+            families.append(instance_family)
+        families += current_families
 
         # create relative source path for DB
-        if "source" in instance.data:
-            source = instance.data["source"]
-        else:
+        source = instance.data.get("source")
+        if not source:
             source = context.data["currentFile"]
             anatomy = instance.context.data["anatomy"]
             source = self.get_rootless_path(anatomy, source)
-        self.log.debug("Source: {}".format(source))
 
+        self.log.debug("Source: {}".format(source))
         version_data = {
-            "families": get_instance_families(instance),
+            "families": families,
             "time": context.data["time"],
             "author": context.data["user"],
             "source": source,
             "comment": context.data.get("comment"),
             "machine": context.data.get("machine"),
-            "fps": instance.data.get("fps", context.data.get("fps"))
+            "fps": context.data.get(
+                "fps", instance.data.get("fps")
+            )
         }
 
-        # todo: preferably we wouldn't need this "if dict" etc. logic and
-        #       instead be able to rely what the input value is if it's set.
-        intent_value = context.data.get("intent")
+        intent_value = instance.context.data.get("intent")
         if intent_value and isinstance(intent_value, dict):
             intent_value = intent_value.get("value")
 
@@ -732,58 +994,33 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             if key in instance.data:
                 version_data[key] = instance.data[key]
 
-        # Include instance.data[versionData] directly
-        version_data_instance = instance.data.get('versionData')
-        if version_data_instance:
-            version_data.update(version_data_instance)
-
         return version_data
 
-    def get_template_name(self, instance):
-        """Return anatomy template name to use for integration"""
-        # Define publish template name from profiles
-        filter_criteria = self.get_profile_filter_criteria(instance)
-        profile = filter_profiles(self.template_name_profiles,
-                                  filter_criteria,
-                                  logger=self.log)
-        if profile:
-            return profile["template_name"]
-        else:
-            return self.default_template_name
-
-    def get_profile_filter_criteria(self, instance):
-        """Return filter criteria for `filter_profiles`"""
-        # Anatomy data is pre-filled by Collectors
-        anatomy_data = instance.data["anatomyData"]
-
-        # Task can be optional in anatomy data
-        task = anatomy_data.get("task", {})
-
-        # Return filter criteria
-        return {
-            "families": anatomy_data["family"],
-            "tasks": task.get("name"),
-            "hosts": anatomy_data["app"],
-            "task_types": task.get("type")
-        }
+    def main_family_from_instance(self, instance):
+        """Returns main family of entered instance."""
+        family = instance.data.get("family")
+        if not family:
+            family = instance.data["families"][0]
+        return family
 
     def get_rootless_path(self, anatomy, path):
-        """Returns, if possible, path without absolute portion from root
-            (eg. 'c:\' or '/opt/..')
-
-         This information is platform dependent and shouldn't be captured.
-         Example:
-             'c:/projects/MyProject1/Assets/publish...' >
-             '{root}/MyProject1/Assets...'
+        """  Returns, if possible, path without absolute portion from host
+             (eg. 'c:\' or '/opt/..')
+             This information is host dependent and shouldn't be captured.
+             Example:
+                 'c:/projects/MyProject1/Assets/publish...' >
+                 '{root}/MyProject1/Assets...'
 
         Args:
-            anatomy: anatomy part from instance
-            path: path (absolute)
+                anatomy: anatomy part from instance
+                path: path (absolute)
         Returns:
-            path: modified path if possible, or unmodified path
-            + warning logged
+                path: modified path if possible, or unmodified path
+                + warning logged
         """
-        success, rootless_path = anatomy.find_root_template_from_path(path)
+        success, rootless_path = (
+            anatomy.find_root_template_from_path(path)
+        )
         if success:
             path = rootless_path
         else:
@@ -793,40 +1030,269 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
             ).format(path))
         return path
 
-    def get_files_info(self, destinations, sites, anatomy):
-        """Prepare 'files' info portion for representations.
+    def get_files_info(self, instance, integrated_file_sizes):
+        """ Prepare 'files' portion for attached resources and main asset.
+            Combining records from 'transfers' and 'hardlinks' parts from
+            instance.
+            All attached resources should be added, currently without
+            Context info.
 
         Arguments:
-            destinations (list): List of transferred file destinations
-            sites (list): array of published locations
-            anatomy: anatomy part from instance
+            instance: the current instance being published
+            integrated_file_sizes: dictionary of destination path (absolute)
+            and its file size
         Returns:
             output_resources: array of dictionaries to be added to 'files' key
             in representation
         """
-        file_infos = []
-        for file_path in destinations:
-            file_info = self.prepare_file_info(file_path, anatomy, sites=sites)
-            file_infos.append(file_info)
-        return file_infos
+        resources = list(instance.data.get("transfers", []))
+        resources.extend(list(instance.data.get("hardlinks", [])))
 
-    def prepare_file_info(self, path, anatomy, sites):
+        self.log.debug("get_resource_files_info.resources:{}".
+                       format(resources))
+
+        output_resources = []
+        anatomy = instance.context.data["anatomy"]
+        for _src, dest in resources:
+            path = self.get_rootless_path(anatomy, dest)
+            dest = self.get_dest_temp_url(dest)
+            file_hash = openpype.api.source_hash(dest)
+            if self.TMP_FILE_EXT and \
+               ',{}'.format(self.TMP_FILE_EXT) in file_hash:
+                file_hash = file_hash.replace(',{}'.format(self.TMP_FILE_EXT),
+                                              '')
+
+            file_info = self.prepare_file_info(path,
+                                               integrated_file_sizes[dest],
+                                               file_hash,
+                                               instance=instance)
+            output_resources.append(file_info)
+
+        return output_resources
+
+    def get_dest_temp_url(self, dest):
+        """ Enhance destination path with TMP_FILE_EXT to denote temporary
+            file.
+            Temporary files will be renamed after successful registration
+            into DB and full copy to destination
+
+        Arguments:
+            dest: destination url of published file (absolute)
+        Returns:
+            dest: destination path + '.TMP_FILE_EXT'
+        """
+        if self.TMP_FILE_EXT and '.{}'.format(self.TMP_FILE_EXT) not in dest:
+            dest += '.{}'.format(self.TMP_FILE_EXT)
+        return dest
+
+    def prepare_file_info(self, path, size=None, file_hash=None,
+                          sites=None, instance=None):
         """ Prepare information for one file (asset or resource)
 
         Arguments:
-            path: destination url of published file
-            anatomy: anatomy part from instance
-            sites: array of published locations,
-                [ {'name':'studio', 'created_dt':date} by default
-                keys expected ['studio', 'site1', 'gdrive1']
-
+            path: destination url of published file (rootless)
+            size(optional): size of file in bytes
+            file_hash(optional): hash of file for synchronization validation
+            sites(optional): array of published locations,
+                            [ {'name':'studio', 'created_dt':date} by default
+                                keys expected ['studio', 'site1', 'gdrive1']
+            instance(dict, optional): to get collected settings
         Returns:
-            dict: file info dictionary
+            rec: dictionary with filled info
         """
-        return {
+        local_site = 'studio'  # default
+        remote_site = None
+        always_accesible = []
+        sync_project_presets = None
+
+        rec = {
             "_id": ObjectId(),
-            "path": self.get_rootless_path(anatomy, path),
-            "size": os.path.getsize(path),
-            "hash": openpype.api.source_hash(path),
-            "sites": sites
+            "path": path
         }
+        if size:
+            rec["size"] = size
+
+        if file_hash:
+            rec["hash"] = file_hash
+
+        if sites:
+            rec["sites"] = sites
+        else:
+            system_sync_server_presets = (
+                instance.context.data["system_settings"]
+                                     ["modules"]
+                                     ["sync_server"])
+            log.debug("system_sett:: {}".format(system_sync_server_presets))
+
+            if system_sync_server_presets["enabled"]:
+                sync_project_presets = (
+                    instance.context.data["project_settings"]
+                                         ["global"]
+                                         ["sync_server"])
+
+            if sync_project_presets and sync_project_presets["enabled"]:
+                local_site, remote_site = self._get_sites(sync_project_presets)
+
+                always_accesible = sync_project_presets["config"]. \
+                    get("always_accessible_on", [])
+
+            already_attached_sites = {}
+            meta = {"name": local_site, "created_dt": datetime.now()}
+            rec["sites"] = [meta]
+            already_attached_sites[meta["name"]] = meta["created_dt"]
+
+            if sync_project_presets and sync_project_presets["enabled"]:
+                if remote_site and \
+                        remote_site not in already_attached_sites.keys():
+                    # add remote
+                    meta = {"name": remote_site.strip()}
+                    rec["sites"].append(meta)
+                    already_attached_sites[meta["name"]] = None
+
+                # add alternative sites
+                rec, already_attached_sites = self._add_alternative_sites(
+                    system_sync_server_presets, already_attached_sites, rec)
+
+                # add skeleton for site where it should be always synced to
+                for always_on_site in set(always_accesible):
+                    if always_on_site not in already_attached_sites.keys():
+                        meta = {"name": always_on_site.strip()}
+                        rec["sites"].append(meta)
+                        already_attached_sites[meta["name"]] = None
+
+            log.debug("final sites:: {}".format(rec["sites"]))
+
+        return rec
+
+    def _get_sites(self, sync_project_presets):
+        """Returns tuple (local_site, remote_site)"""
+        local_site_id = openpype.api.get_local_site_id()
+        local_site = sync_project_presets["config"]. \
+            get("active_site", "studio").strip()
+
+        if local_site == 'local':
+            local_site = local_site_id
+
+        remote_site = sync_project_presets["config"].get("remote_site")
+
+        if remote_site == 'local':
+            remote_site = local_site_id
+
+        return local_site, remote_site
+
+    def _add_alternative_sites(self,
+                               system_sync_server_presets,
+                               already_attached_sites,
+                               rec):
+        """Loop through all configured sites and add alternatives.
+
+            See SyncServerModule.handle_alternate_site
+        """
+        conf_sites = system_sync_server_presets.get("sites", {})
+
+        alt_site_pairs = self._get_alt_site_pairs(conf_sites)
+
+        already_attached_keys = list(already_attached_sites.keys())
+        for added_site in already_attached_keys:
+            real_created = already_attached_sites[added_site]
+            for alt_site in alt_site_pairs.get(added_site, []):
+                if alt_site in already_attached_sites.keys():
+                    continue
+                meta = {"name": alt_site}
+                # alt site inherits state of 'created_dt'
+                if real_created:
+                    meta["created_dt"] = real_created
+                rec["sites"].append(meta)
+                already_attached_sites[meta["name"]] = real_created
+
+        return rec, already_attached_sites
+
+    def _get_alt_site_pairs(self, conf_sites):
+        """Returns dict of site and its alternative sites.
+
+        If `site` has alternative site, it means that alt_site has 'site' as
+        alternative site
+        Args:
+            conf_sites (dict)
+        Returns:
+            (dict): {'site': [alternative sites]...}
+        """
+        alt_site_pairs = defaultdict(list)
+        for site_name, site_info in conf_sites.items():
+            alt_sites = set(site_info.get("alternative_sites", []))
+            alt_site_pairs[site_name].extend(alt_sites)
+
+            for alt_site in alt_sites:
+                alt_site_pairs[alt_site].append(site_name)
+
+        for site_name, alt_sites in alt_site_pairs.items():
+            sites_queue = deque(alt_sites)
+            while sites_queue:
+                alt_site = sites_queue.popleft()
+
+                # safety against wrong config
+                # {"SFTP": {"alternative_site": "SFTP"}
+                if alt_site == site_name or alt_site not in alt_site_pairs:
+                    continue
+
+                for alt_alt_site in alt_site_pairs[alt_site]:
+                    if (
+                            alt_alt_site != site_name
+                            and alt_alt_site not in alt_sites
+                    ):
+                        alt_sites.append(alt_alt_site)
+                        sites_queue.append(alt_alt_site)
+
+        return alt_site_pairs
+
+    def handle_destination_files(self, integrated_file_sizes, mode):
+        """ Clean destination files
+            Called when error happened during integrating to DB or to disk
+            OR called to rename uploaded files from temporary name to final to
+            highlight publishing in progress/broken
+            Used to clean unwanted files
+
+        Arguments:
+            integrated_file_sizes: dictionary, file urls as keys, size as value
+            mode: 'remove' - clean files,
+                  'finalize' - rename files,
+                               remove TMP_FILE_EXT suffix denoting temp file
+        """
+        if integrated_file_sizes:
+            for file_url, _file_size in integrated_file_sizes.items():
+                if not os.path.exists(file_url):
+                    self.log.debug(
+                        "File {} was not found.".format(file_url)
+                    )
+                    continue
+
+                try:
+                    if mode == 'remove':
+                        self.log.debug("Removing file {}".format(file_url))
+                        os.remove(file_url)
+                    if mode == 'finalize':
+                        new_name = re.sub(
+                            r'\.{}$'.format(self.TMP_FILE_EXT),
+                            '',
+                            file_url
+                        )
+
+                        if os.path.exists(new_name):
+                            self.log.debug(
+                                "Overwriting file {} to {}".format(
+                                    file_url, new_name
+                                )
+                            )
+                            shutil.copy(file_url, new_name)
+                            os.remove(file_url)
+                        else:
+                            self.log.debug(
+                                "Renaming file {} to {}".format(
+                                    file_url, new_name
+                                )
+                            )
+                            os.rename(file_url, new_name)
+                except OSError:
+                    self.log.error("Cannot {} file {}".format(mode, file_url),
+                                   exc_info=True)
+                    six.reraise(*sys.exc_info())

From 271a829f6d441bcf26e6ddaf33510f984dc0c703 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 5 Jul 2022 09:24:38 +0200
Subject: [PATCH 68/72] Remove duplicate source family

---
 openpype/plugins/publish/integrate_new.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index 4c14c17dae..fd3cf8882d 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -92,7 +92,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
                 "source",
                 "matchmove",
                 "image",
-                "source",
                 "assembly",
                 "fbx",
                 "textures",

From 148ac26bf961aa8e44ffcd453efbdbb0f4a8df75 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 5 Jul 2022 09:28:00 +0200
Subject: [PATCH 69/72] Update USD families with latest develop

---
 openpype/plugins/publish/integrate.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/openpype/plugins/publish/integrate.py b/openpype/plugins/publish/integrate.py
index 6ad0849ff7..6253a3ec11 100644
--- a/openpype/plugins/publish/integrate.py
+++ b/openpype/plugins/publish/integrate.py
@@ -156,8 +156,10 @@ class IntegrateAsset(pyblish.api.InstancePlugin):
                 "usd",
                 "staticMesh",
                 "skeletalMesh",
-                "usdComposition",
-                "usdOverride",
+                "mvLook",
+                "mvUsd",
+                "mvUsdComposition",
+                "mvUsdOverride",
                 "simpleUnrealTexture"
                 ]
     exclude_families = ["clip", "render.farm"]

From 035c4d2f93fd0a29ba8f6f1789a327878861284a Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 5 Jul 2022 09:30:07 +0200
Subject: [PATCH 70/72] Set up old vs. new integrator per host

---
 openpype/plugins/publish/integrate.py     | 1 +
 openpype/plugins/publish/integrate_new.py | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/openpype/plugins/publish/integrate.py b/openpype/plugins/publish/integrate.py
index 6253a3ec11..d098147603 100644
--- a/openpype/plugins/publish/integrate.py
+++ b/openpype/plugins/publish/integrate.py
@@ -110,6 +110,7 @@ class IntegrateAsset(pyblish.api.InstancePlugin):
 
     label = "Integrate Asset New"
     order = pyblish.api.IntegratorOrder
+    hosts = ["maya"]
     families = ["workfile",
                 "pointcache",
                 "camera",
diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index fd3cf8882d..c9848abc14 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -62,6 +62,10 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
 
     label = "Integrate Asset New"
     order = pyblish.api.IntegratorOrder
+    hosts = ["aftereffects", "blender", "celaction", "flame", "harmony",
+             "hiero", "houdini", "nuke", "photoshop", "resolve",
+             "standalonepublisher", "traypublisher", "tvpaint", "unreal",
+             "webpublisher"]
     families = ["workfile",
                 "pointcache",
                 "camera",

From a3757636e7705b34699adff7e1e23f7ff57284d6 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 5 Jul 2022 09:31:53 +0200
Subject: [PATCH 71/72] Remove 'intent' context data override

@iLLiCiTiT says: Intent should be a dictionary with "value" and "label", to be able tell if you want use value or label of the intent in templates.
---
 openpype/plugins/publish/collect_anatomy_context_data.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/openpype/plugins/publish/collect_anatomy_context_data.py b/openpype/plugins/publish/collect_anatomy_context_data.py
index 8db9d0d3d7..0794adfb67 100644
--- a/openpype/plugins/publish/collect_anatomy_context_data.py
+++ b/openpype/plugins/publish/collect_anatomy_context_data.py
@@ -92,13 +92,5 @@ class CollectAnatomyContextData(pyblish.api.ContextPlugin):
                 }
             })
 
-        # todo: some code actually expects the dict itself and others doesn't
-        #       question: what should it be?
-        intent = context.data.get("intent")
-        if intent and isinstance(intent, dict):
-            intent = intent.get("value")
-        if intent:
-            context_data["intent"] = intent
-
         self.log.info("Global anatomy Data collected")
         self.log.debug(json.dumps(context_data, indent=4))

From b4697b6e1a0cc778765d617b68d1e516ca7dcea9 Mon Sep 17 00:00:00 2001
From: Roy Nieterau <roy_nieterau@hotmail.com>
Date: Tue, 5 Jul 2022 10:36:37 +0200
Subject: [PATCH 72/72] Refactor integrator labels

---
 openpype/plugins/publish/integrate.py     | 2 +-
 openpype/plugins/publish/integrate_new.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/openpype/plugins/publish/integrate.py b/openpype/plugins/publish/integrate.py
index d098147603..5e86eb014a 100644
--- a/openpype/plugins/publish/integrate.py
+++ b/openpype/plugins/publish/integrate.py
@@ -108,7 +108,7 @@ class IntegrateAsset(pyblish.api.InstancePlugin):
             "data": additional metadata for each representation.
     """
 
-    label = "Integrate Asset New"
+    label = "Integrate Asset"
     order = pyblish.api.IntegratorOrder
     hosts = ["maya"]
     families = ["workfile",
diff --git a/openpype/plugins/publish/integrate_new.py b/openpype/plugins/publish/integrate_new.py
index c9848abc14..baa14b285c 100644
--- a/openpype/plugins/publish/integrate_new.py
+++ b/openpype/plugins/publish/integrate_new.py
@@ -60,7 +60,7 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
         "data": additional metadata for each representation.
     """
 
-    label = "Integrate Asset New"
+    label = "Integrate Asset (legacy)"
     order = pyblish.api.IntegratorOrder
     hosts = ["aftereffects", "blender", "celaction", "flame", "harmony",
              "hiero", "houdini", "nuke", "photoshop", "resolve",