Sync Server - added multisite support

One provider could have multiple different sites,
each with different account.
This commit is contained in:
Petr Kalis 2020-10-22 13:31:24 +02:00
parent 59753ddb1b
commit 6670af63e0
5 changed files with 163 additions and 70 deletions

View file

@ -2,9 +2,47 @@ Synchronization server
--------------------- ---------------------
This server is scheduled at start of Pype, it periodically checks avalon DB This server is scheduled at start of Pype, it periodically checks avalon DB
for 'representation' records which have in theirs files.sites record with for 'representation' records which have in theirs files.sites record with
name: 'gdrive' without field 'created_dt'. name: 'gdrive' (or any other site name from 'gdrive.json') without
This denotes that this representation should be sync to GDrive. field 'created_dt'.
This denotes that this representation should be synced to GDrive.
Records like these are created by IntegrateNew process based on configuration. Records like these are created by IntegrateNew process based on configuration.
Leave 'config.json.remote_site' empty for not synchronizing at all.
One provider could have multiple sites. (GDrive implementation is 'a provider',
target folder on it is 'a site')
Quick HOWTOs:
-------------
I want to start syncing my newly published files:
------------------------------------------------
Get credentials for service account, share target folder on Gdrive with it
Set path to stored credentils file in gdrive.json
Set name of site, root folder in gdrive.json
Update config.json/remote_site to name of site you set in previous step
Start Pype and publish
My published file is not syncing:
--------------------------------
Check that representation record contains for all 'files.site' skeleton in
format: {name: "MY_CONFIGURED_REMOTE_SITE"}
Check if that record doesn't have already 'created_dt' filled. That would
denote that file was synced but someone might have had removed it on remote
site.
If that records contains field "error", check that "tries" field doesn't
contain same value as threshold in config.json.retry_cnt. If it does fix
the problem mentioned in 'error' field, delete 'tries' field.
I want to sync my already published files:
-----------------------------------------
Configure your Pype for syncing (see first section of Howtos).
Manually add skeleton {name: "MY_CONFIGURED_REMOTE_SITE"} to all
representation.files.sites:
db.getCollection('MY_PROJECT').update({type:"representation"},
{$set:{"files.$[].sites.MY_CONFIGURED_REMOTE_SITE" : {}}}, true, true)
Needed configuration: Needed configuration:
-------------------- --------------------
@ -16,16 +54,23 @@ pype-config/presets/config.json:
could by same as 'local_id' if user is working could by same as 'local_id' if user is working
from home without connection to studio from home without connection to studio
infrastructure infrastructure
"remote_site": "gdrive" -- key for site to synchronize to (currently only "remote_site": "gdrive" -- key for site to synchronize to. Must match to site
'gdrive' implemented, but could be any provider configured in 'gdrive.json'.
implemented in 'pype/modules/sync_server') Used in IntegrateNew to prepare skeleton for
syncing in the representation record.
Leave empty if no syncing is wanted.
This is a general configuration, 'local_id', 'active_site' and 'remote_site'
will be set and changed by some GUI in the future.
pype-config/presets/gdrive.json: pype-config/presets/gdrive.json:
"credentials_url": "/my_secret_folder/credentials.json", "gdrive": { - site name, must be unique
-- path to credentials for service account "credentials_url": "/my_secret_folder/credentials.json",
"root": { -- "root": "/My Drive" in simple scenario, this could be for -- path to credentials for service account
multiroot projects "root": { -- "root": "/My Drive" in simple scenario, config here for
"root_one": "/My Drive/work_folder", -- multiroot projects
"root_tow": "/My Drive/publish_folder" "root_one": "/My Drive/work_folder",
"root_tow": "/My Drive/publish_folder"
}
} }

View file

@ -22,19 +22,30 @@ class GDriveHandler(AbstractProvider):
structure is build in constructor to map folder paths to folder ids, structure is build in constructor to map folder paths to folder ids,
which are used in API. Building of this tree might be expensive and which are used in API. Building of this tree might be expensive and
slow and should be run only when necessary. Currently is set to slow and should be run only when necessary. Currently is set to
lazy creation, created only after first call when necessary lazy creation, created only after first call when necessary.
Configuration for provider is in pype-config/presets/gdrive.json
Example of config:
"gdrive": { - site name
"credentials_url": "/my_secret_folder/credentials.json",
"root": { - could be "root": "/My Drive" for single root
"root_one": "/My Drive",
"root_two": "/My Drive/different_folder"
}
}
""" """
FOLDER_STR = 'application/vnd.google-apps.folder' FOLDER_STR = 'application/vnd.google-apps.folder'
def __init__(self, tree=None): def __init__(self, site_name, tree=None):
self.presets = None self.presets = None
self.active = False self.active = False
try: self.site_name = site_name
self.presets = config.get_presets()["sync_server"]["gdrive"]
except KeyError: self.presets = self.get_presets().get(site_name, None)
log.info(("Sync Server: There are no presets for Gdrive " + if not self.presets:
"provider."). log.info("Sync Server: There are no presets for {}.".
format(str(self.presets))) format(site_name))
return return
if not os.path.exists(self.presets["credentials_url"]): if not os.path.exists(self.presets["credentials_url"]):
@ -501,6 +512,24 @@ class GDriveHandler(AbstractProvider):
return False return False
return file[0] return file[0]
@classmethod
def get_presets(cls):
"""
Get presets for this provider
Returns:
(dictionary) of configured sites
"""
provider_presets = None
try:
provider_presets = config.get_presets()["sync_server"]["gdrive"]
except KeyError:
log.info(("Sync Server: There are no presets for Gdrive " +
"provider.").
format(str(provider_presets)))
return
log.info("Provider_presets::{}".format(provider_presets))
return provider_presets
def _handle_q(self, q, trashed=False): def _handle_q(self, q, trashed=False):
""" API list call contain trashed and hidden files/folder by default. """ API list call contain trashed and hidden files/folder by default.
Usually we dont want those, must be included in query explicitly. Usually we dont want those, must be included in query explicitly.
@ -520,6 +549,6 @@ class GDriveHandler(AbstractProvider):
if __name__ == '__main__': if __name__ == '__main__':
gd = GDriveHandler() gd = GDriveHandler('gdrive')
print(gd.root) print(gd.root)
print(gd.get_tree()) print(gd.get_tree())

View file

@ -14,33 +14,41 @@ class ProviderFactory:
enum. enum.
""" """
def __init__(self): def __init__(self):
self.providers = {} self.providers = {} # {'PROVIDER_LABEL: {cls, int},..}
self.creators = {}
def register_provider(self, provider, creator, batch_limit): def register_provider(self, provider, creator, batch_limit):
""" """
Provide all necessary information for one specific remote provider Provide all necessary information for one specific remote provider
:param provider: <string> - name of provider Args:
:param creator: <class> - class implementing AbstractProvider provider (string): name of provider
:param batch_limit: <int> - number of files that could be processed in creator (class): class implementing AbstractProvider
batch_limit (int): number of files that could be processed in
one loop (based on provider API quota) one loop (based on provider API quota)
:return: modifies self.providers Returns:
modifies self.providers and self.sites
""" """
self.providers[provider] = (creator, batch_limit) self.providers[provider] = (creator, batch_limit)
def get_provider(self, provider, tree=None): def get_provider(self, provider, site_name, tree=None):
""" """
Returns new instance of provider client. Returns new instance of provider client for specific site.
One provider could have multiple sites.
'tree' is used for injecting already created memory structure, 'tree' is used for injecting already created memory structure,
without it constructor of provider would need to calculate it without it constructor of provider would need to calculate it
from scratch, which could be expensive. from scratch, which could be expensive.
:param provider: <string> 'gdrive','S3' Args:
:param tree: <dictionary> - folder paths to folder id structure provider (string): 'gdrive','S3'
:return: <implementation of AbstractProvider> site_name (string): descriptor of site, different service accounts
must have different site name
tree (dictionary): - folder paths to folder id structure
Returns:
(implementation of AbstractProvider)
""" """
creator_info = self._get_creator_info(provider) creator_info = self._get_creator_info(provider)
site = creator_info[0](site_name, tree) # call init
return creator_info[0](tree) return site
def get_provider_batch_limit(self, provider): def get_provider_batch_limit(self, provider):
""" """
@ -50,8 +58,9 @@ class ProviderFactory:
(For example 'gdrive' has 1000 queries for 100 sec, one file could (For example 'gdrive' has 1000 queries for 100 sec, one file could
be multiple queries (one for each level of path + check if file be multiple queries (one for each level of path + check if file
exists) exists)
:param provider: <string> 'gdrive','S3' Args:
:return: provider (string): 'gdrive','S3'
Returns:
""" """
info = self._get_creator_info(provider) info = self._get_creator_info(provider)
return info[1] return info[1]
@ -60,8 +69,9 @@ class ProviderFactory:
""" """
Collect all necessary info for provider. Currently only creator Collect all necessary info for provider. Currently only creator
class and batch limit class and batch limit
:param provider: Args:
:return: provider (string): 'gdrive' etc
Returns:
""" """
creator_info = self.providers.get(provider) creator_info = self.providers.get(provider)
if not creator_info: if not creator_info:

View file

@ -1,5 +1,4 @@
from pype.api import config, Logger from pype.api import config, Logger
from pypeapp.lib.anatomy import Roots
from pype.lib import timeit from pype.lib import timeit
import threading import threading
@ -93,7 +92,6 @@ class SyncServer():
self.lock = threading.Lock() self.lock = threading.Lock()
self.connection = AvalonMongoDB() self.connection = AvalonMongoDB()
log.debug("connection {}".format(self.connection))
try: try:
self.presets = config.get_presets()["sync_server"]["config"] self.presets = config.get_presets()["sync_server"]["config"]
@ -108,11 +106,13 @@ class SyncServer():
self.remote_site = self.presets["remote_site"] self.remote_site = self.presets["remote_site"]
# try to activate providers, need to have valid credentials # try to activate providers, need to have valid credentials
self.active_provider_names = [] self.active_sites = []
for provider in lib.factory.providers.keys(): for provider in lib.factory.providers.keys():
handler = lib.factory.get_provider(provider) for site in lib.factory.providers[provider][0].get_presets().\
if handler.is_active(): keys():
self.active_provider_names.append(provider) handler = lib.factory.get_provider(provider, site)
if handler.is_active():
self.active_sites.append((provider, site))
@property @property
def active_site(self): def active_site(self):
@ -268,8 +268,8 @@ class SyncServer():
return SyncStatus.DO_UPLOAD return SyncStatus.DO_UPLOAD
else: else:
_, local_rec = self._get_provider_rec( _, local_rec = self._get_provider_rec(
sites, sites,
self.presets["active_site"]) or {} self.presets["active_site"]) or {}
if not local_rec or not local_rec.get("created_dt"): if not local_rec or not local_rec.get("created_dt"):
tries = self._get_tries_count_from_rec(local_rec) tries = self._get_tries_count_from_rec(local_rec)
@ -281,7 +281,8 @@ class SyncServer():
return SyncStatus.DO_NOTHING return SyncStatus.DO_NOTHING
async def upload(self, file, representation, provider_name, tree=None): async def upload(self, file, representation, provider_name, site_name,
tree=None):
""" """
Upload single 'file' of a 'representation' to 'provider'. Upload single 'file' of a 'representation' to 'provider'.
Source url is taken from 'file' portion, where {root} placeholder Source url is taken from 'file' portion, where {root} placeholder
@ -292,10 +293,12 @@ class SyncServer():
from GDrive), 'created_dt' - time of upload from GDrive), 'created_dt' - time of upload
Args: Args:
file <dictionary>: of file from representation in Mongo file (dictionary): of file from representation in Mongo
representation <dictionary>: of representation representation (dictionary): of representation
provider_name <string>: gdrive, gdc etc. provider_name (string): gdrive, gdc etc.
tree <dictionary>: injected memory structure for performance site_name (string): site on provider, single provider(gdrive) could
have multiple sites (different accounts, credentials)
tree (dictionary): injected memory structure for performance
""" """
# create ids sequentially, upload file in parallel later # create ids sequentially, upload file in parallel later
@ -303,7 +306,7 @@ class SyncServer():
# this part modifies structure on 'remote_site', only single # this part modifies structure on 'remote_site', only single
# thread can do that at a time, upload/download to prepared # thread can do that at a time, upload/download to prepared
# structure should be run in parallel # structure should be run in parallel
handler = lib.factory.get_provider(provider_name, tree) handler = lib.factory.get_provider(provider_name, site_name, tree)
remote_file = self._get_remote_file_path(file, remote_file = self._get_remote_file_path(file,
handler.get_roots_config() handler.get_roots_config()
) )
@ -315,7 +318,7 @@ class SyncServer():
if not folder_id: if not folder_id:
err = "Folder {} wasn't created. Check permissions.".\ err = "Folder {} wasn't created. Check permissions.".\
format(target_folder) format(target_folder)
raise NotADirectoryError(err) raise NotADirectoryError(err)
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
@ -326,7 +329,8 @@ class SyncServer():
True) True)
return file_id return file_id
async def download(self, file, representation, provider_name, tree=None): async def download(self, file, representation, provider_name,
site_name, tree=None):
""" """
Downloads file to local folder denoted in representation.Context. Downloads file to local folder denoted in representation.Context.
@ -334,13 +338,15 @@ class SyncServer():
file (dictionary) : info about processed file file (dictionary) : info about processed file
representation (dictionary): repr that 'file' belongs to representation (dictionary): repr that 'file' belongs to
provider_name (string): 'gdrive' etc provider_name (string): 'gdrive' etc
site_name (string): site on provider, single provider(gdrive) could
have multiple sites (different accounts, credentials)
tree (dictionary): injected memory structure for performance tree (dictionary): injected memory structure for performance
Returns: Returns:
(string) - 'name' of local file (string) - 'name' of local file
""" """
with self.lock: with self.lock:
handler = lib.factory.get_provider(provider_name, tree) handler = lib.factory.get_provider(provider_name, site_name, tree)
remote_file = self._get_remote_file_path(file, remote_file = self._get_remote_file_path(file,
handler.get_roots_config() handler.get_roots_config()
) )
@ -411,7 +417,9 @@ class SyncServer():
source_file = file.get("path", "") source_file = file.get("path", "")
log.debug("File {source_file} process {status} {error_str}". log.debug("File {source_file} process {status} {error_str}".
format(status, source_file, error_str)) format(status=status,
source_file=source_file,
error_str=error_str))
def tray_start(self): def tray_start(self):
""" """
@ -421,7 +429,7 @@ class SyncServer():
Returns: Returns:
None None
""" """
if self.presets and self.active_provider_names: if self.presets and self.active_sites:
self.sync_server_thread.start() self.sync_server_thread.start()
else: else:
log.debug("No presets or active providers. " + log.debug("No presets or active providers. " +
@ -612,11 +620,10 @@ class SyncServer():
local_root (string): value of {root} for local projects local_root (string): value of {root} for local projects
Returns: Returns:
<string> - absolute path on local system (string) - absolute path on local system
""" """
if not local_root: if not local_root:
raise ValueError("Unknown local root for file {}") raise ValueError("Unknown local root for file {}")
roots = Roots().default_roots()
path = file.get("path", "") path = file.get("path", "")
return path.format(**{"root": local_root}) return path.format(**{"root": local_root})
@ -631,7 +638,6 @@ class SyncServer():
Returns: Returns:
(string) - absolute path on remote location (string) - absolute path on remote location
""" """
log.debug("root_config::{}".format(root_config))
if isinstance(root_config, str): if isinstance(root_config, str):
root_config = {'root': root_config} root_config = {'root': root_config}
@ -720,8 +726,9 @@ class SyncServerThread(threading.Thread):
# upload process can find already uploaded file and # upload process can find already uploaded file and
# reuse same id # reuse same id
processed_file_path = set() processed_file_path = set()
for provider in self.module.active_provider_names: for active_site in self.module.active_sites:
handler = lib.factory.get_provider(provider) provider, site = active_site
handler = lib.factory.get_provider(provider, site)
limit = lib.factory.get_provider_batch_limit(provider) limit = lib.factory.get_provider_batch_limit(provider)
# first call to get_provider could be expensive, its # first call to get_provider could be expensive, its
# building folder tree structure in memory # building folder tree structure in memory
@ -743,15 +750,16 @@ class SyncServerThread(threading.Thread):
tree = handler.get_tree() tree = handler.get_tree()
limit -= 1 limit -= 1
task = asyncio.create_task( task = asyncio.create_task(
self.module.upload(file, self.module.upload(file,
sync, sync,
provider, provider,
tree)) site,
tree))
task_files_to_process.append(task) task_files_to_process.append(task)
# store info for exception handling # store info for exception handling
files_processed_info.append((file, files_processed_info.append((file,
sync, sync,
provider)) site))
processed_file_path.add(file_path) processed_file_path.add(file_path)
if status == SyncStatus.DO_DOWNLOAD: if status == SyncStatus.DO_DOWNLOAD:
tree = handler.get_tree() tree = handler.get_tree()
@ -760,6 +768,7 @@ class SyncServerThread(threading.Thread):
self.module.download(file, self.module.download(file,
sync, sync,
provider, provider,
site,
tree)) tree))
task_files_to_process.append(task) task_files_to_process.append(task)
@ -771,11 +780,11 @@ class SyncServerThread(threading.Thread):
log.debug("Sync tasks count {}". log.debug("Sync tasks count {}".
format(len(task_files_to_process))) format(len(task_files_to_process)))
files_created = await asyncio.gather( files_created = await asyncio.gather(
*task_files_to_process, *task_files_to_process,
return_exceptions=True) return_exceptions=True)
for file_id, info in zip(files_created, for file_id, info in zip(files_created,
files_processed_info): files_processed_info):
file, representation, provider = info file, representation, site = info
error = None error = None
if isinstance(file_id, BaseException): if isinstance(file_id, BaseException):
error = str(file_id) error = str(file_id)
@ -783,7 +792,7 @@ class SyncServerThread(threading.Thread):
self.module.update_db(file_id, self.module.update_db(file_id,
file, file,
representation, representation,
provider, site,
error) error)
duration = time.time() - start_time duration = time.time() - start_time

View file

@ -30,7 +30,7 @@ class WebSocketServer():
WebSocketServer._instance = self WebSocketServer._instance = self
self.client = None self.client = None
self.handlers = {} self.handlers = {}
port = 8099
websocket_url = os.getenv("WEBSOCKET_URL") websocket_url = os.getenv("WEBSOCKET_URL")
if websocket_url: if websocket_url:
parsed = urllib.parse.urlparse(websocket_url) parsed = urllib.parse.urlparse(websocket_url)