import os from os.path import getsize import logging import sys import copy import clique import errno import six import re import shutil from collections import deque, defaultdict from datetime import datetime from bson.objectid import ObjectId from pymongo import DeleteOne, InsertOne import pyblish.api from openpype.client import ( get_asset_by_name, get_subset_by_id, get_subset_by_name, get_version_by_id, get_version_by_name, get_representations, get_archived_representations, ) from openpype.lib import ( prepare_template_data, create_hard_link, StringTemplate, TemplateUnsolved, source_hash, filter_profiles, get_local_site_id, ) from openpype.pipeline import legacy_io from openpype.pipeline.publish import get_publish_template_name # this is needed until speedcopy for linux is fixed if sys.platform == "win32": from speedcopy import copyfile else: from shutil import copyfile log = logging.getLogger(__name__) class IntegrateAssetNew(pyblish.api.InstancePlugin): """Resolve any dependency issues This plug-in resolves any paths which, if not updated might break the published file. The order of families is important, when working with lookdev you want to first publish the texture, update the texture paths in the nodes and then publish the shading network. Same goes for file dependent assets. Requirements for instance to be correctly integrated instance.data['representations'] - must be a list and each member must be a dictionary with following data: 'files': list of filenames for sequence, string for single file. Only the filename is allowed, without the folder path. 'stagingDir': "path/to/folder/with/files" 'name': representation name (usually the same as extension) 'ext': file extension optional data "frameStart" "frameEnd" 'fps' "data": additional metadata for each representation. """ label = "Integrate Asset (legacy)" # Make sure it happens after new integrator order = pyblish.api.IntegratorOrder + 0.00001 families = ["workfile", "pointcache", "pointcloud", "proxyAbc", "camera", "animation", "model", "maxScene", "mayaAscii", "mayaScene", "setdress", "layout", "ass", "vdbcache", "scene", "vrayproxy", "vrayscene_layer", "render", "prerender", "imagesequence", "review", "rendersetup", "rig", "plate", "look", "audio", "yetiRig", "yeticache", "nukenodes", "gizmo", "source", "matchmove", "image", "assembly", "fbx", "gltf", "textures", "action", "harmony.template", "harmony.palette", "editorial", "background", "camerarig", "redshiftproxy", "effect", "xgen", "hda", "usd", "staticMesh", "skeletalMesh", "mvLook", "mvUsdComposition", "mvUsdOverride", "simpleUnrealTexture" ] exclude_families = ["render.farm"] db_representation_context_keys = [ "project", "asset", "task", "subset", "version", "representation", "family", "hierarchy", "task", "username", "user" ] default_template_name = "publish" # suffix to denote temporary files, use without '.' TMP_FILE_EXT = 'tmp' # file_url : file_size of all published and uploaded files integrated_file_sizes = {} # Attributes set by settings subset_grouping_profiles = None def process(self, instance): if instance.data.get("processedWithNewIntegrator"): self.log.debug( "Instance was already processed with new integrator" ) return for ef in self.exclude_families: if ( instance.data["family"] == ef or ef in instance.data["families"]): self.log.debug("Excluded family '{}' in '{}' or {}".format( ef, instance.data["family"], instance.data["families"])) return # instance should be published on a farm if instance.data.get("farm"): return # Prepare repsentations that should be integrated repres = instance.data.get("representations") # Raise error if instance don't have any representations if not repres: raise ValueError( "Instance {} has no files to transfer".format( instance.data["family"] ) ) # Validate type of stored representations if not isinstance(repres, (list, tuple)): raise TypeError( "Instance 'files' must be a list, got: {0} {1}".format( str(type(repres)), str(repres) ) ) # Filter representations filtered_repres = [] for repre in repres: if "delete" in repre.get("tags", []): continue filtered_repres.append(repre) # Skip instance if there are not representations to integrate # all representations should not be integrated if not filtered_repres: self.log.warning(( "Skipping, there are no representations" " to integrate for instance {}" ).format(instance.data["family"])) return self.integrated_file_sizes = {} try: self.register(instance, filtered_repres) self.log.info("Integrated Asset in to the database ...") self.log.info("instance.data: {}".format(instance.data)) self.handle_destination_files(self.integrated_file_sizes, 'finalize') except Exception: # clean destination self.log.critical("Error when registering", exc_info=True) self.handle_destination_files(self.integrated_file_sizes, 'remove') six.reraise(*sys.exc_info()) def register(self, instance, repres): # Required environment variables anatomy_data = instance.data["anatomyData"] legacy_io.install() context = instance.context project_entity = instance.data["projectEntity"] project_name = project_entity["name"] context_asset_name = None context_asset_doc = context.data.get("assetEntity") if context_asset_doc: context_asset_name = context_asset_doc["name"] asset_name = instance.data["asset"] asset_entity = instance.data.get("assetEntity") if not asset_entity or asset_entity["name"] != context_asset_name: asset_entity = get_asset_by_name(project_name, asset_name) assert asset_entity, ( "No asset found by the name \"{0}\" in project \"{1}\"" ).format(asset_name, project_entity["name"]) instance.data["assetEntity"] = asset_entity # update anatomy data with asset specific keys # - name should already been set hierarchy = "" parents = asset_entity["data"]["parents"] if parents: hierarchy = "/".join(parents) anatomy_data["hierarchy"] = hierarchy # Make sure task name in anatomy data is same as on instance.data asset_tasks = ( asset_entity.get("data", {}).get("tasks") ) or {} task_name = instance.data.get("task") if task_name: task_info = asset_tasks.get(task_name) or {} task_type = task_info.get("type") project_task_types = project_entity["config"]["tasks"] task_code = project_task_types.get(task_type, {}).get("short_name") anatomy_data["task"] = { "name": task_name, "type": task_type, "short": task_code } elif "task" in anatomy_data: # Just set 'task_name' variable to context task task_name = anatomy_data["task"]["name"] task_type = anatomy_data["task"]["type"] else: task_name = None task_type = None # Fill family in anatomy data anatomy_data["family"] = instance.data.get("family") stagingdir = instance.data.get("stagingDir") if not stagingdir: self.log.debug(( "{0} is missing reference to staging directory." " Will try to get it from representation." ).format(instance)) else: self.log.debug( "Establishing staging directory @ {0}".format(stagingdir) ) subset = self.get_subset(project_name, asset_entity, instance) instance.data["subsetEntity"] = subset version_number = instance.data["version"] self.log.debug("Next version: v{}".format(version_number)) version_data = self.create_version_data(context, instance) version_data_instance = instance.data.get('versionData') if version_data_instance: version_data.update(version_data_instance) # TODO rename method from `create_version` to # `prepare_version` or similar... version = self.create_version( subset=subset, version_number=version_number, data=version_data ) self.log.debug("Creating version ...") new_repre_names_low = [ _repre["name"].lower() for _repre in repres ] existing_version = get_version_by_name( project_name, version_number, subset["_id"] ) if existing_version is None: version_id = legacy_io.insert_one(version).inserted_id else: # Check if instance have set `append` mode which cause that # only replicated representations are set to archive append_repres = instance.data.get("append", False) # Update version data # TODO query by _id and legacy_io.update_many({ 'type': 'version', 'parent': subset["_id"], 'name': version_number }, { '$set': version }) version_id = existing_version['_id'] # Find representations of existing version and archive them current_repres = list(get_representations( project_name, version_ids=[version_id] )) bulk_writes = [] for repre in current_repres: if append_repres: # archive only duplicated representations if repre["name"].lower() not in new_repre_names_low: continue # Representation must change type, # `_id` must be stored to other key and replaced with new # - that is because new representations should have same ID repre_id = repre["_id"] bulk_writes.append(DeleteOne({"_id": repre_id})) repre["orig_id"] = repre_id repre["_id"] = ObjectId() repre["type"] = "archived_representation" bulk_writes.append(InsertOne(repre)) # bulk updates if bulk_writes: legacy_io.database[project_name].bulk_write( bulk_writes ) version = get_version_by_id(project_name, version_id) instance.data["versionEntity"] = version existing_repres = list(get_archived_representations( project_name, version_ids=[version_id] )) instance.data['version'] = version['name'] intent_value = instance.context.data.get("intent") if intent_value and isinstance(intent_value, dict): intent_value = intent_value.get("value") if intent_value: anatomy_data["intent"] = intent_value anatomy = instance.context.data['anatomy'] # Find the representations to transfer amongst the files # Each should be a single representation (as such, a single extension) representations = [] destination_list = [] orig_transfers = [] if 'transfers' not in instance.data: instance.data['transfers'] = [] else: orig_transfers = list(instance.data['transfers']) family = self.main_family_from_instance(instance) template_name = get_publish_template_name( project_name, instance.context.data["hostName"], family, task_name=task_info.get("name"), task_type=task_info.get("type"), project_settings=instance.context.data["project_settings"], logger=self.log ) published_representations = {} for idx, repre in enumerate(repres): published_files = [] # create template data for Anatomy template_data = copy.deepcopy(anatomy_data) if intent_value is not None: template_data["intent"] = intent_value resolution_width = repre.get("resolutionWidth") resolution_height = repre.get("resolutionHeight") fps = instance.data.get("fps") if resolution_width: template_data["resolution_width"] = resolution_width if resolution_width: template_data["resolution_height"] = resolution_height if resolution_width: template_data["fps"] = fps if "originalBasename" in instance.data: template_data.update({ "originalBasename": instance.data.get("originalBasename") }) files = repre['files'] if repre.get('stagingDir'): stagingdir = repre['stagingDir'] if repre.get("outputName"): template_data["output"] = repre['outputName'] template_data["representation"] = repre["name"] ext = repre["ext"] if ext.startswith("."): self.log.warning(( "Implementaion warning: <\"{}\">" " Representation's extension stored under \"ext\" key " " started with dot (\"{}\")." ).format(repre["name"], ext)) ext = ext[1:] repre["ext"] = ext template_data["ext"] = ext self.log.info(template_name) template = os.path.normpath( anatomy.templates[template_name]["path"]) sequence_repre = isinstance(files, list) repre_context = None if sequence_repre: self.log.debug( "files: {}".format(files)) src_collections, remainder = clique.assemble(files) self.log.debug( "src_tail_collections: {}".format(str(src_collections))) src_collection = src_collections[0] # Assert that each member has identical suffix src_head = src_collection.format("{head}") src_tail = src_collection.format("{tail}") # fix dst_padding valid_files = [x for x in files if src_collection.match(x)] padd_len = len( valid_files[0].replace(src_head, "").replace(src_tail, "") ) src_padding_exp = "%0{}d".format(padd_len) test_dest_files = list() for i in [1, 2]: template_data["representation"] = repre['ext'] if not repre.get("udim"): template_data["frame"] = src_padding_exp % i else: template_data["udim"] = src_padding_exp % i template_obj = anatomy.templates_obj[template_name]["path"] template_filled = template_obj.format_strict(template_data) if repre_context is None: repre_context = template_filled.used_values test_dest_files.append( os.path.normpath(template_filled) ) if not repre.get("udim"): template_data["frame"] = repre_context["frame"] else: template_data["udim"] = repre_context["udim"] self.log.debug( "test_dest_files: {}".format(str(test_dest_files))) dst_collections, remainder = clique.assemble(test_dest_files) dst_collection = dst_collections[0] dst_head = dst_collection.format("{head}") dst_tail = dst_collection.format("{tail}") index_frame_start = None # TODO use frame padding from right template group if repre.get("frameStart") is not None: frame_start_padding = int( anatomy.templates["render"].get( "frame_padding", anatomy.templates["render"].get("padding") ) ) index_frame_start = int(repre.get("frameStart")) # exception for slate workflow if index_frame_start and "slate" in instance.data["families"]: index_frame_start -= 1 dst_padding_exp = src_padding_exp dst_start_frame = None collection_start = list(src_collection.indexes)[0] for i in src_collection.indexes: # TODO 1.) do not count padding in each index iteration # 2.) do not count dst_padding from src_padding before # index_frame_start check frame_number = i - collection_start src_padding = src_padding_exp % i src_file_name = "{0}{1}{2}".format( src_head, src_padding, src_tail) dst_padding = src_padding_exp % frame_number if index_frame_start is not None: dst_padding_exp = "%0{}d".format(frame_start_padding) dst_padding = dst_padding_exp % (index_frame_start + frame_number) # noqa: E501 elif repre.get("udim"): dst_padding = int(i) dst = "{0}{1}{2}".format( dst_head, dst_padding, dst_tail ) self.log.debug("destination: `{}`".format(dst)) src = os.path.join(stagingdir, src_file_name) self.log.debug("source: {}".format(src)) instance.data["transfers"].append([src, dst]) published_files.append(dst) # for adding first frame into db if not dst_start_frame: dst_start_frame = dst_padding # Store used frame value to template data if repre.get("frame"): template_data["frame"] = dst_start_frame dst = "{0}{1}{2}".format( dst_head, dst_start_frame, dst_tail ) repre['published_path'] = dst else: # Single file # _______ # | |\ # | | # | | # | | # |_______| # template_data.pop("frame", None) fname = files assert not os.path.isabs(fname), ( "Given file name is a full path" ) template_data["representation"] = repre['ext'] # Store used frame value to template data if repre.get("udim"): template_data["udim"] = repre["udim"][0] src = os.path.join(stagingdir, fname) template_obj = anatomy.templates_obj[template_name]["path"] template_filled = template_obj.format_strict(template_data) repre_context = template_filled.used_values dst = os.path.normpath(template_filled) instance.data["transfers"].append([src, dst]) published_files.append(dst) repre['published_path'] = dst self.log.debug("__ dst: {}".format(dst)) if not instance.data.get("publishDir"): instance.data["publishDir"] = ( anatomy.templates_obj[template_name]["folder"] .format_strict(template_data) ) if repre.get("udim"): repre_context["udim"] = repre.get("udim") # store list repre["publishedFiles"] = published_files for key in self.db_representation_context_keys: value = template_data.get(key) if not value: continue repre_context[key] = template_data[key] # Use previous representation's id if there are any repre_id = None repre_name_low = repre["name"].lower() for _repre in existing_repres: # NOTE should we check lowered names? if repre_name_low == _repre["name"]: repre_id = _repre["orig_id"] break # Create new id if existing representations does not match if repre_id is None: repre_id = ObjectId() data = repre.get("data") or {} data.update({'path': dst, 'template': template}) representation = { "_id": repre_id, "schema": "openpype:representation-2.0", "type": "representation", "parent": version_id, "name": repre['name'], "data": data, "dependencies": instance.data.get("dependencies", "").split(), # Imprint shortcut to context # for performance reasons. "context": repre_context } if repre.get("outputName"): representation["context"]["output"] = repre['outputName'] if sequence_repre and repre.get("frameStart") is not None: representation['context']['frame'] = ( dst_padding_exp % int(repre.get("frameStart")) ) # any file that should be physically copied is expected in # 'transfers' or 'hardlinks' if instance.data.get('transfers', False) or \ instance.data.get('hardlinks', False): # could throw exception, will be caught in 'process' # all integration to DB is being done together lower, # so no rollback needed self.log.debug("Integrating source files to destination ...") self.integrated_file_sizes.update(self.integrate(instance)) self.log.debug("Integrated files {}". format(self.integrated_file_sizes)) # get 'files' info for representation and all attached resources self.log.debug("Preparing files information ...") representation["files"] = self.get_files_info( instance, self.integrated_file_sizes) self.log.debug("__ representation: {}".format(representation)) destination_list.append(dst) self.log.debug("__ destination_list: {}".format(destination_list)) instance.data['destination_list'] = destination_list representations.append(representation) published_representations[repre_id] = { "representation": representation, "anatomy_data": template_data, "published_files": published_files } self.log.debug("__ representations: {}".format(representations)) # reset transfers for next representation # instance.data['transfers'] is used as a global variable # in current codebase instance.data['transfers'] = list(orig_transfers) # Remove old representations if there are any (before insertion of new) if existing_repres: repre_ids_to_remove = [] for repre in existing_repres: repre_ids_to_remove.append(repre["_id"]) legacy_io.delete_many({"_id": {"$in": repre_ids_to_remove}}) for rep in instance.data["representations"]: self.log.debug("__ rep: {}".format(rep)) legacy_io.insert_many(representations) instance.data["published_representations"] = ( published_representations ) # self.log.debug("Representation: {}".format(representations)) self.log.info("Registered {} items".format(len(representations))) def integrate(self, instance): """ Move the files. Through `instance.data["transfers"]` Args: instance: the instance to integrate Returns: integrated_file_sizes: dictionary of destination file url and its size in bytes """ # store destination url and size for reporting and rollback integrated_file_sizes = {} transfers = list(instance.data.get("transfers", list())) for src, dest in transfers: if os.path.normpath(src) != os.path.normpath(dest): dest = self.get_dest_temp_url(dest) self.copy_file(src, dest) # TODO needs to be updated during site implementation integrated_file_sizes[dest] = os.path.getsize(dest) # Produce hardlinked copies # Note: hardlink can only be produced between two files on the same # server/disk and editing one of the two will edit both files at once. # As such it is recommended to only make hardlinks between static files # to ensure publishes remain safe and non-edited. hardlinks = instance.data.get("hardlinks", list()) for src, dest in hardlinks: dest = self.get_dest_temp_url(dest) self.log.debug("Hardlinking file ... {} -> {}".format(src, dest)) if not os.path.exists(dest): self.hardlink_file(src, dest) # TODO needs to be updated during site implementation integrated_file_sizes[dest] = os.path.getsize(dest) return integrated_file_sizes def copy_file(self, src, dst): """ Copy given source to destination Arguments: src (str): the source file which needs to be copied dst (str): the destination of the sourc file Returns: None """ src = os.path.normpath(src) dst = os.path.normpath(dst) self.log.debug("Copying file ... {} -> {}".format(src, dst)) dirname = os.path.dirname(dst) try: os.makedirs(dirname) except OSError as e: if e.errno == errno.EEXIST: pass else: self.log.critical("An unexpected error occurred.") six.reraise(*sys.exc_info()) # copy file with speedcopy and check if size of files are simetrical while True: if not shutil._samefile(src, dst): copyfile(src, dst) else: self.log.critical( "files are the same {} to {}".format(src, dst) ) os.remove(dst) try: shutil.copyfile(src, dst) self.log.debug("Copying files with shutil...") except OSError as e: self.log.critical("Cannot copy {} to {}".format(src, dst)) self.log.critical(e) six.reraise(*sys.exc_info()) if str(getsize(src)) in str(getsize(dst)): break def hardlink_file(self, src, dst): dirname = os.path.dirname(dst) try: os.makedirs(dirname) except OSError as e: if e.errno == errno.EEXIST: pass else: self.log.critical("An unexpected error occurred.") six.reraise(*sys.exc_info()) create_hard_link(src, dst) def get_subset(self, project_name, asset, instance): subset_name = instance.data["subset"] subset = get_subset_by_name(project_name, subset_name, asset["_id"]) if subset is None: self.log.info("Subset '%s' not found, creating ..." % subset_name) self.log.debug("families. %s" % instance.data.get('families')) self.log.debug( "families. %s" % type(instance.data.get('families'))) family = instance.data.get("family") families = [] if family: families.append(family) for _family in (instance.data.get("families") or []): if _family not in families: families.append(_family) _id = legacy_io.insert_one({ "schema": "openpype:subset-3.0", "type": "subset", "name": subset_name, "data": { "families": families }, "parent": asset["_id"] }).inserted_id subset = get_subset_by_id(project_name, _id) # QUESTION Why is changing of group and updating it's # families in 'get_subset'? self._set_subset_group(instance, subset["_id"]) # Update families on subset. families = [instance.data["family"]] families.extend(instance.data.get("families", [])) legacy_io.update_many( {"type": "subset", "_id": ObjectId(subset["_id"])}, {"$set": {"data.families": families}} ) return subset def _set_subset_group(self, instance, subset_id): """ Mark subset as belonging to group in DB. Uses Settings > Global > Publish plugins > IntegrateAssetNew Args: instance (dict): processed instance subset_id (str): DB's subset _id """ # Fist look into instance data subset_group = instance.data.get("subsetGroup") if not subset_group: subset_group = self._get_subset_group(instance) if subset_group: legacy_io.update_many({ 'type': 'subset', '_id': ObjectId(subset_id) }, {'$set': {'data.subsetGroup': subset_group}}) def _get_subset_group(self, instance): """Look into subset group profiles set by settings. Attribute 'subset_grouping_profiles' is defined by OpenPype settings. """ # Skip if 'subset_grouping_profiles' is empty if not self.subset_grouping_profiles: return None # QUESTION # - is there a chance that task name is not filled in anatomy # data? # - should we use context task in that case? anatomy_data = instance.data["anatomyData"] task_name = None task_type = None if "task" in anatomy_data: task_name = anatomy_data["task"]["name"] task_type = anatomy_data["task"]["type"] filtering_criteria = { "families": instance.data["family"], "hosts": instance.context.data["hostName"], "tasks": task_name, "task_types": task_type } matching_profile = filter_profiles( self.subset_grouping_profiles, filtering_criteria ) # Skip if there is not matchin profile if not matching_profile: return None filled_template = None template = matching_profile["template"] fill_pairs = ( ("family", filtering_criteria["families"]), ("task", filtering_criteria["tasks"]), ("host", filtering_criteria["hosts"]), ("subset", instance.data["subset"]), ("renderlayer", instance.data.get("renderlayer")) ) fill_pairs = prepare_template_data(fill_pairs) try: filled_template = StringTemplate.format_strict_template( template, fill_pairs ) except (KeyError, TemplateUnsolved): keys = [] if fill_pairs: keys = fill_pairs.keys() msg = "Subset grouping failed. " \ "Only {} are expected in Settings".format(','.join(keys)) self.log.warning(msg) return filled_template def create_version(self, subset, version_number, data=None): """ Copy given source to destination Args: subset (dict): the registered subset of the asset version_number (int): the version number Returns: dict: collection of data to create a version """ return {"schema": "openpype:version-3.0", "type": "version", "parent": subset["_id"], "name": version_number, "data": data} def create_version_data(self, context, instance): """Create the data collection for the version Args: context: the current context instance: the current instance being published Returns: dict: the required information with instance.data as key """ families = [] current_families = instance.data.get("families", list()) instance_family = instance.data.get("family", None) if instance_family is not None: families.append(instance_family) families += current_families # create relative source path for DB source = instance.data.get("source") if not source: source = context.data["currentFile"] anatomy = instance.context.data["anatomy"] source = self.get_rootless_path(anatomy, source) self.log.debug("Source: {}".format(source)) version_data = { "families": families, "time": context.data["time"], "author": context.data["user"], "source": source, "comment": instance.data["comment"], "machine": context.data.get("machine"), "fps": context.data.get( "fps", instance.data.get("fps") ) } intent_value = instance.context.data.get("intent") if intent_value and isinstance(intent_value, dict): intent_value = intent_value.get("value") if intent_value: version_data["intent"] = intent_value # Include optional data if present in optionals = [ "frameStart", "frameEnd", "step", "handleEnd", "handleStart", "sourceHashes" ] for key in optionals: if key in instance.data: version_data[key] = instance.data[key] return version_data def main_family_from_instance(self, instance): """Returns main family of entered instance.""" family = instance.data.get("family") if not family: family = instance.data["families"][0] return family def get_rootless_path(self, anatomy, path): """ Returns, if possible, path without absolute portion from host (eg. 'c:\' or '/opt/..') This information is host dependent and shouldn't be captured. Example: 'c:/projects/MyProject1/Assets/publish...' > '{root}/MyProject1/Assets...' Args: anatomy: anatomy part from instance path: path (absolute) Returns: path: modified path if possible, or unmodified path + warning logged """ success, rootless_path = ( anatomy.find_root_template_from_path(path) ) if success: path = rootless_path else: self.log.warning(( "Could not find root path for remapping \"{}\"." " This may cause issues on farm." ).format(path)) return path def get_files_info(self, instance, integrated_file_sizes): """ Prepare 'files' portion for attached resources and main asset. Combining records from 'transfers' and 'hardlinks' parts from instance. All attached resources should be added, currently without Context info. Arguments: instance: the current instance being published integrated_file_sizes: dictionary of destination path (absolute) and its file size Returns: output_resources: array of dictionaries to be added to 'files' key in representation """ resources = list(instance.data.get("transfers", [])) resources.extend(list(instance.data.get("hardlinks", []))) self.log.debug("get_resource_files_info.resources:{}". format(resources)) output_resources = [] anatomy = instance.context.data["anatomy"] for _src, dest in resources: path = self.get_rootless_path(anatomy, dest) dest = self.get_dest_temp_url(dest) file_hash = source_hash(dest) if self.TMP_FILE_EXT and \ ',{}'.format(self.TMP_FILE_EXT) in file_hash: file_hash = file_hash.replace(',{}'.format(self.TMP_FILE_EXT), '') file_info = self.prepare_file_info(path, integrated_file_sizes[dest], file_hash, instance=instance) output_resources.append(file_info) return output_resources def get_dest_temp_url(self, dest): """ Enhance destination path with TMP_FILE_EXT to denote temporary file. Temporary files will be renamed after successful registration into DB and full copy to destination Arguments: dest: destination url of published file (absolute) Returns: dest: destination path + '.TMP_FILE_EXT' """ if self.TMP_FILE_EXT and '.{}'.format(self.TMP_FILE_EXT) not in dest: dest += '.{}'.format(self.TMP_FILE_EXT) return dest def prepare_file_info(self, path, size=None, file_hash=None, sites=None, instance=None): """ Prepare information for one file (asset or resource) Arguments: path: destination url of published file (rootless) size(optional): size of file in bytes file_hash(optional): hash of file for synchronization validation sites(optional): array of published locations, [ {'name':'studio', 'created_dt':date} by default keys expected ['studio', 'site1', 'gdrive1'] instance(dict, optional): to get collected settings Returns: rec: dictionary with filled info """ local_site = 'studio' # default remote_site = None always_accesible = [] sync_project_presets = None rec = { "_id": ObjectId(), "path": path } if size: rec["size"] = size if file_hash: rec["hash"] = file_hash if sites: rec["sites"] = sites else: system_sync_server_presets = ( instance.context.data["system_settings"] ["modules"] ["sync_server"]) log.debug("system_sett:: {}".format(system_sync_server_presets)) if system_sync_server_presets["enabled"]: sync_project_presets = ( instance.context.data["project_settings"] ["global"] ["sync_server"]) if sync_project_presets and sync_project_presets["enabled"]: local_site, remote_site = self._get_sites(sync_project_presets) always_accesible = sync_project_presets["config"]. \ get("always_accessible_on", []) already_attached_sites = {} meta = {"name": local_site, "created_dt": datetime.now()} rec["sites"] = [meta] already_attached_sites[meta["name"]] = meta["created_dt"] if sync_project_presets and sync_project_presets["enabled"]: if remote_site and \ remote_site not in already_attached_sites.keys(): # add remote meta = {"name": remote_site.strip()} rec["sites"].append(meta) already_attached_sites[meta["name"]] = None # add alternative sites rec, already_attached_sites = self._add_alternative_sites( system_sync_server_presets, already_attached_sites, rec) # add skeleton for site where it should be always synced to for always_on_site in set(always_accesible): if always_on_site not in already_attached_sites.keys(): meta = {"name": always_on_site.strip()} rec["sites"].append(meta) already_attached_sites[meta["name"]] = None log.debug("final sites:: {}".format(rec["sites"])) return rec def _get_sites(self, sync_project_presets): """Returns tuple (local_site, remote_site)""" local_site_id = get_local_site_id() local_site = sync_project_presets["config"]. \ get("active_site", "studio").strip() if local_site == 'local': local_site = local_site_id remote_site = sync_project_presets["config"].get("remote_site") if remote_site == 'local': remote_site = local_site_id return local_site, remote_site def _add_alternative_sites(self, system_sync_server_presets, already_attached_sites, rec): """Loop through all configured sites and add alternatives. See SyncServerModule.handle_alternate_site """ conf_sites = system_sync_server_presets.get("sites", {}) alt_site_pairs = self._get_alt_site_pairs(conf_sites) already_attached_keys = list(already_attached_sites.keys()) for added_site in already_attached_keys: real_created = already_attached_sites[added_site] for alt_site in alt_site_pairs.get(added_site, []): if alt_site in already_attached_sites.keys(): continue meta = {"name": alt_site} # alt site inherits state of 'created_dt' if real_created: meta["created_dt"] = real_created rec["sites"].append(meta) already_attached_sites[meta["name"]] = real_created return rec, already_attached_sites def _get_alt_site_pairs(self, conf_sites): """Returns dict of site and its alternative sites. If `site` has alternative site, it means that alt_site has 'site' as alternative site Args: conf_sites (dict) Returns: (dict): {'site': [alternative sites]...} """ alt_site_pairs = defaultdict(list) for site_name, site_info in conf_sites.items(): alt_sites = set(site_info.get("alternative_sites", [])) alt_site_pairs[site_name].extend(alt_sites) for alt_site in alt_sites: alt_site_pairs[alt_site].append(site_name) for site_name, alt_sites in alt_site_pairs.items(): sites_queue = deque(alt_sites) while sites_queue: alt_site = sites_queue.popleft() # safety against wrong config # {"SFTP": {"alternative_site": "SFTP"} if alt_site == site_name or alt_site not in alt_site_pairs: continue for alt_alt_site in alt_site_pairs[alt_site]: if ( alt_alt_site != site_name and alt_alt_site not in alt_sites ): alt_sites.append(alt_alt_site) sites_queue.append(alt_alt_site) return alt_site_pairs def handle_destination_files(self, integrated_file_sizes, mode): """ Clean destination files Called when error happened during integrating to DB or to disk OR called to rename uploaded files from temporary name to final to highlight publishing in progress/broken Used to clean unwanted files Arguments: integrated_file_sizes: dictionary, file urls as keys, size as value mode: 'remove' - clean files, 'finalize' - rename files, remove TMP_FILE_EXT suffix denoting temp file """ if integrated_file_sizes: for file_url, _file_size in integrated_file_sizes.items(): if not os.path.exists(file_url): self.log.debug( "File {} was not found.".format(file_url) ) continue try: if mode == 'remove': self.log.debug("Removing file {}".format(file_url)) os.remove(file_url) if mode == 'finalize': new_name = re.sub( r'\.{}$'.format(self.TMP_FILE_EXT), '', file_url ) if os.path.exists(new_name): self.log.debug( "Overwriting file {} to {}".format( file_url, new_name ) ) shutil.copy(file_url, new_name) os.remove(file_url) else: self.log.debug( "Renaming file {} to {}".format( file_url, new_name ) ) os.rename(file_url, new_name) except OSError: self.log.error("Cannot {} file {}".format(mode, file_url), exc_info=True) six.reraise(*sys.exc_info())