Project packager: Backup and restore can store only database (#4879)

* added helper functions to client mongo api

* pack and unpack project functions can work without project files

* added flag argument to pack project command to zip only project files

* unpack project has also only project argument

* Fix extractions
This commit is contained in:
Jakub Trllo 2023-04-27 23:41:00 +02:00 committed by GitHub
parent dc527e3c5e
commit 4107874eb9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 394 additions and 119 deletions

View file

@ -415,11 +415,12 @@ def repack_version(directory):
@main.command()
@click.option("--project", help="Project name")
@click.option(
"--dirpath", help="Directory where package is stored", default=None
)
def pack_project(project, dirpath):
"--dirpath", help="Directory where package is stored", default=None)
@click.option(
"--dbonly", help="Store only Database data", default=False, is_flag=True)
def pack_project(project, dirpath, dbonly):
"""Create a package of project with all files and database dump."""
PypeCommands().pack_project(project, dirpath)
PypeCommands().pack_project(project, dirpath, dbonly)
@main.command()
@ -427,9 +428,11 @@ def pack_project(project, dirpath):
@click.option(
"--root", help="Replace root which was stored in project", default=None
)
def unpack_project(zipfile, root):
@click.option(
"--dbonly", help="Store only Database data", default=False, is_flag=True)
def unpack_project(zipfile, root, dbonly):
"""Create a package of project with all files and database dump."""
PypeCommands().unpack_project(zipfile, root)
PypeCommands().unpack_project(zipfile, root, dbonly)
@main.command()

View file

@ -5,6 +5,12 @@ import logging
import pymongo
import certifi
from bson.json_util import (
loads,
dumps,
CANONICAL_JSON_OPTIONS
)
if sys.version_info[0] == 2:
from urlparse import urlparse, parse_qs
else:
@ -15,6 +21,49 @@ class MongoEnvNotSet(Exception):
pass
def documents_to_json(docs):
"""Convert documents to json string.
Args:
Union[list[dict[str, Any]], dict[str, Any]]: Document/s to convert to
json string.
Returns:
str: Json string with mongo documents.
"""
return dumps(docs, json_options=CANONICAL_JSON_OPTIONS)
def load_json_file(filepath):
"""Load mongo documents from a json file.
Args:
filepath (str): Path to a json file.
Returns:
Union[dict[str, Any], list[dict[str, Any]]]: Loaded content from a
json file.
"""
if not os.path.exists(filepath):
raise ValueError("Path {} was not found".format(filepath))
with open(filepath, "r") as stream:
content = stream.read()
return loads("".join(content))
def get_project_database_name():
"""Name of database name where projects are available.
Returns:
str: Name of database name where projects are.
"""
return os.environ.get("AVALON_DB") or "avalon"
def _decompose_url(url):
"""Decompose mongo url to basic components.
@ -210,12 +259,102 @@ class OpenPypeMongoConnection:
return mongo_client
def get_project_database():
db_name = os.environ.get("AVALON_DB") or "avalon"
return OpenPypeMongoConnection.get_mongo_client()[db_name]
# ------ Helper Mongo functions ------
# Functions can be helpful with custom tools to backup/restore mongo state.
# Not meant as API functionality that should be used in production codebase!
def get_collection_documents(database_name, collection_name, as_json=False):
"""Query all documents from a collection.
Args:
database_name (str): Name of database where to look for collection.
collection_name (str): Name of collection where to look for collection.
as_json (Optional[bool]): Output should be a json string.
Default: 'False'
Returns:
Union[list[dict[str, Any]], str]: Queried documents.
"""
client = OpenPypeMongoConnection.get_mongo_client()
output = list(client[database_name][collection_name].find({}))
if as_json:
output = documents_to_json(output)
return output
def get_project_connection(project_name):
def store_collection(filepath, database_name, collection_name):
"""Store collection documents to a json file.
Args:
filepath (str): Path to a json file where documents will be stored.
database_name (str): Name of database where to look for collection.
collection_name (str): Name of collection to store.
"""
# Make sure directory for output file exists
dirpath = os.path.dirname(filepath)
if not os.path.isdir(dirpath):
os.makedirs(dirpath)
content = get_collection_documents(database_name, collection_name, True)
with open(filepath, "w") as stream:
stream.write(content)
def replace_collection_documents(docs, database_name, collection_name):
"""Replace all documents in a collection with passed documents.
Warnings:
All existing documents in collection will be removed if there are any.
Args:
docs (list[dict[str, Any]]): New documents.
database_name (str): Name of database where to look for collection.
collection_name (str): Name of collection where new documents are
uploaded.
"""
client = OpenPypeMongoConnection.get_mongo_client()
database = client[database_name]
if collection_name in database.list_collection_names():
database.drop_collection(collection_name)
col = database[collection_name]
col.insert_many(docs)
def restore_collection(filepath, database_name, collection_name):
"""Restore/replace collection from a json filepath.
Warnings:
All existing documents in collection will be removed if there are any.
Args:
filepath (str): Path to a json with documents.
database_name (str): Name of database where to look for collection.
collection_name (str): Name of collection where new documents are
uploaded.
"""
docs = load_json_file(filepath)
replace_collection_documents(docs, database_name, collection_name)
def get_project_database(database_name=None):
"""Database object where project collections are.
Args:
database_name (Optional[str]): Custom name of database.
Returns:
pymongo.database.Database: Collection related to passed project.
"""
if not database_name:
database_name = get_project_database_name()
return OpenPypeMongoConnection.get_mongo_client()[database_name]
def get_project_connection(project_name, database_name=None):
"""Direct access to mongo collection.
We're trying to avoid using direct access to mongo. This should be used
@ -223,13 +362,83 @@ def get_project_connection(project_name):
api calls for that.
Args:
project_name(str): Project name for which collection should be
project_name (str): Project name for which collection should be
returned.
database_name (Optional[str]): Custom name of database.
Returns:
pymongo.Collection: Collection realated to passed project.
pymongo.collection.Collection: Collection related to passed project.
"""
if not project_name:
raise ValueError("Invalid project name {}".format(str(project_name)))
return get_project_database()[project_name]
return get_project_database(database_name)[project_name]
def get_project_documents(project_name, database_name=None):
"""Query all documents from project collection.
Args:
project_name (str): Name of project.
database_name (Optional[str]): Name of mongo database where to look for
project.
Returns:
list[dict[str, Any]]: Documents in project collection.
"""
if not database_name:
database_name = get_project_database_name()
return get_collection_documents(database_name, project_name)
def store_project_documents(project_name, filepath, database_name=None):
"""Store project documents to a file as json string.
Args:
project_name (str): Name of project to store.
filepath (str): Path to a json file where output will be stored.
database_name (Optional[str]): Name of mongo database where to look for
project.
"""
if not database_name:
database_name = get_project_database_name()
store_collection(filepath, database_name, project_name)
def replace_project_documents(project_name, docs, database_name=None):
"""Replace documents in mongo with passed documents.
Warnings:
Existing project collection is removed if exists in mongo.
Args:
project_name (str): Name of project.
docs (list[dict[str, Any]]): Documents to restore.
database_name (Optional[str]): Name of mongo database where project
collection will be created.
"""
if not database_name:
database_name = get_project_database_name()
replace_collection_documents(docs, database_name, project_name)
def restore_project_documents(project_name, filepath, database_name=None):
"""Replace documents in mongo with passed documents.
Warnings:
Existing project collection is removed if exists in mongo.
Args:
project_name (str): Name of project.
filepath (str): File to json file with project documents.
database_name (Optional[str]): Name of mongo database where project
collection will be created.
"""
if not database_name:
database_name = get_project_database_name()
restore_collection(filepath, database_name, project_name)

View file

@ -1,16 +1,19 @@
"""These lib functions are primarily for development purposes.
"""These lib functions are for development purposes.
WARNING: This is not meant for production data.
WARNING:
This is not meant for production data. Please don't write code which is
dependent on functionality here.
Goal is to be able create package of current state of project with related
documents from mongo and files from disk to zip file and then be able recreate
the project based on the zip.
Goal is to be able to create package of current state of project with related
documents from mongo and files from disk to zip file and then be able
to recreate the project based on the zip.
This gives ability to create project where a changes and tests can be done.
Keep in mind that to be able create a package of project has few requirements.
Possible requirement should be listed in 'pack_project' function.
Keep in mind that to be able to create a package of project has few
requirements. Possible requirement should be listed in 'pack_project' function.
"""
import os
import json
import platform
@ -19,16 +22,12 @@ import shutil
import datetime
import zipfile
from bson.json_util import (
loads,
dumps,
CANONICAL_JSON_OPTIONS
from openpype.client.mongo import (
load_json_file,
get_project_connection,
replace_project_documents,
store_project_documents,
)
from openpype.client import (
get_project,
get_whole_project,
)
from openpype.pipeline import AvalonMongoDB
DOCUMENTS_FILE_NAME = "database"
METADATA_FILE_NAME = "metadata"
@ -43,7 +42,52 @@ def add_timestamp(filepath):
return new_base + ext
def pack_project(project_name, destination_dir=None):
def get_project_document(project_name, database_name=None):
"""Query project document.
Function 'get_project' from client api cannot be used as it does not allow
to change which 'database_name' is used.
Args:
project_name (str): Name of project.
database_name (Optional[str]): Name of mongo database where to look for
project.
Returns:
Union[dict[str, Any], None]: Project document or None.
"""
col = get_project_connection(project_name, database_name)
return col.find_one({"type": "project"})
def _pack_files_to_zip(zip_stream, source_path, root_path):
"""Pack files to a zip stream.
Args:
zip_stream (zipfile.ZipFile): Stream to a zipfile.
source_path (str): Path to a directory where files are.
root_path (str): Path to a directory which is used for calculation
of relative path.
"""
for root, _, filenames in os.walk(source_path):
for filename in filenames:
filepath = os.path.join(root, filename)
# TODO add one more folder
archive_name = os.path.join(
PROJECT_FILES_DIR,
os.path.relpath(filepath, root_path)
)
zip_stream.write(filepath, archive_name)
def pack_project(
project_name,
destination_dir=None,
only_documents=False,
database_name=None
):
"""Make a package of a project with mongo documents and files.
This function has few restrictions:
@ -52,13 +96,18 @@ def pack_project(project_name, destination_dir=None):
"{root[...]}/{project[name]}"
Args:
project_name(str): Project that should be packaged.
destination_dir(str): Optional path where zip will be stored. Project's
root is used if not passed.
project_name (str): Project that should be packaged.
destination_dir (Optional[str]): Optional path where zip will be
stored. Project's root is used if not passed.
only_documents (Optional[bool]): Pack only Mongo documents and skip
files.
database_name (Optional[str]): Custom database name from which is
project queried.
"""
print("Creating package of project \"{}\"".format(project_name))
# Validate existence of project
project_doc = get_project(project_name)
project_doc = get_project_document(project_name, database_name)
if not project_doc:
raise ValueError("Project \"{}\" was not found in database".format(
project_name
@ -119,12 +168,7 @@ def pack_project(project_name, destination_dir=None):
temp_docs_json = s.name
# Query all project documents and store them to temp json
docs = list(get_whole_project(project_name))
data = dumps(
docs, json_options=CANONICAL_JSON_OPTIONS
)
with open(temp_docs_json, "w") as stream:
stream.write(data)
store_project_documents(project_name, temp_docs_json, database_name)
print("Packing files into zip")
# Write all to zip file
@ -133,16 +177,10 @@ def pack_project(project_name, destination_dir=None):
zip_stream.write(temp_metadata_json, METADATA_FILE_NAME + ".json")
# Add database documents
zip_stream.write(temp_docs_json, DOCUMENTS_FILE_NAME + ".json")
# Add project files to zip
for root, _, filenames in os.walk(project_source_path):
for filename in filenames:
filepath = os.path.join(root, filename)
# TODO add one more folder
archive_name = os.path.join(
PROJECT_FILES_DIR,
os.path.relpath(filepath, root_path)
)
zip_stream.write(filepath, archive_name)
if not only_documents:
_pack_files_to_zip(zip_stream, project_source_path, root_path)
print("Cleaning up")
# Cleanup
@ -152,80 +190,30 @@ def pack_project(project_name, destination_dir=None):
print("*** Packing finished ***")
def unpack_project(path_to_zip, new_root=None):
"""Unpack project zip file to recreate project.
def _unpack_project_files(unzip_dir, root_path, project_name):
"""Move project files from unarchived temp folder to new root.
Unpack is skipped if source files are not available in the zip. That can
happen if nothing was published yet or only documents were stored to
package.
Args:
path_to_zip(str): Path to zip which was created using 'pack_project'
function.
new_root(str): Optional way how to set different root path for unpacked
project.
unzip_dir (str): Location where zip was unzipped.
root_path (str): Path to new root.
project_name (str): Name of project.
"""
print("Unpacking project from zip {}".format(path_to_zip))
if not os.path.exists(path_to_zip):
print("Zip file does not exists: {}".format(path_to_zip))
src_project_files_dir = os.path.join(
unzip_dir, PROJECT_FILES_DIR, project_name
)
# Skip if files are not in the zip
if not os.path.exists(src_project_files_dir):
return
tmp_dir = tempfile.mkdtemp(prefix="unpack_")
print("Zip is extracted to temp: {}".format(tmp_dir))
with zipfile.ZipFile(path_to_zip, "r") as zip_stream:
zip_stream.extractall(tmp_dir)
metadata_json_path = os.path.join(tmp_dir, METADATA_FILE_NAME + ".json")
with open(metadata_json_path, "r") as stream:
metadata = json.load(stream)
docs_json_path = os.path.join(tmp_dir, DOCUMENTS_FILE_NAME + ".json")
with open(docs_json_path, "r") as stream:
content = stream.readlines()
docs = loads("".join(content))
low_platform = platform.system().lower()
project_name = metadata["project_name"]
source_root = metadata["root"]
root_path = source_root[low_platform]
# Drop existing collection
dbcon = AvalonMongoDB()
database = dbcon.database
if project_name in database.list_collection_names():
database.drop_collection(project_name)
print("Removed existing project collection")
print("Creating project documents ({})".format(len(docs)))
# Create new collection with loaded docs
collection = database[project_name]
collection.insert_many(docs)
# Skip change of root if is the same as the one stored in metadata
if (
new_root
and (os.path.normpath(new_root) == os.path.normpath(root_path))
):
new_root = None
if new_root:
print("Using different root path {}".format(new_root))
root_path = new_root
project_doc = get_project(project_name)
roots = project_doc["config"]["roots"]
key = tuple(roots.keys())[0]
update_key = "config.roots.{}.{}".format(key, low_platform)
collection.update_one(
{"_id": project_doc["_id"]},
{"$set": {
update_key: new_root
}}
)
# Make sure root path exists
if not os.path.exists(root_path):
os.makedirs(root_path)
src_project_files_dir = os.path.join(
tmp_dir, PROJECT_FILES_DIR, project_name
)
dst_project_files_dir = os.path.normpath(
os.path.join(root_path, project_name)
)
@ -241,8 +229,83 @@ def unpack_project(path_to_zip, new_root=None):
))
shutil.move(src_project_files_dir, dst_project_files_dir)
def unpack_project(
path_to_zip, new_root=None, database_only=None, database_name=None
):
"""Unpack project zip file to recreate project.
Args:
path_to_zip (str): Path to zip which was created using 'pack_project'
function.
new_root (str): Optional way how to set different root path for
unpacked project.
database_only (Optional[bool]): Unpack only database from zip.
database_name (str): Name of database where project will be recreated.
"""
if database_only is None:
database_only = False
print("Unpacking project from zip {}".format(path_to_zip))
if not os.path.exists(path_to_zip):
print("Zip file does not exists: {}".format(path_to_zip))
return
tmp_dir = tempfile.mkdtemp(prefix="unpack_")
print("Zip is extracted to temp: {}".format(tmp_dir))
with zipfile.ZipFile(path_to_zip, "r") as zip_stream:
if database_only:
for filename in (
"{}.json".format(METADATA_FILE_NAME),
"{}.json".format(DOCUMENTS_FILE_NAME),
):
zip_stream.extract(filename, tmp_dir)
else:
zip_stream.extractall(tmp_dir)
metadata_json_path = os.path.join(tmp_dir, METADATA_FILE_NAME + ".json")
with open(metadata_json_path, "r") as stream:
metadata = json.load(stream)
docs_json_path = os.path.join(tmp_dir, DOCUMENTS_FILE_NAME + ".json")
docs = load_json_file(docs_json_path)
low_platform = platform.system().lower()
project_name = metadata["project_name"]
source_root = metadata["root"]
root_path = source_root[low_platform]
# Drop existing collection
replace_project_documents(project_name, docs, database_name)
print("Creating project documents ({})".format(len(docs)))
# Skip change of root if is the same as the one stored in metadata
if (
new_root
and (os.path.normpath(new_root) == os.path.normpath(root_path))
):
new_root = None
if new_root:
print("Using different root path {}".format(new_root))
root_path = new_root
project_doc = get_project_document(project_name)
roots = project_doc["config"]["roots"]
key = tuple(roots.keys())[0]
update_key = "config.roots.{}.{}".format(key, low_platform)
collection = get_project_connection(project_name, database_name)
collection.update_one(
{"_id": project_doc["_id"]},
{"$set": {
update_key: new_root
}}
)
_unpack_project_files(tmp_dir, root_path, project_name)
# CLeanup
print("Cleaning up")
shutil.rmtree(tmp_dir)
dbcon.uninstall()
print("*** Unpack finished ***")

View file

@ -353,12 +353,12 @@ class PypeCommands:
version_packer = VersionRepacker(directory)
version_packer.process()
def pack_project(self, project_name, dirpath):
def pack_project(self, project_name, dirpath, database_only):
from openpype.lib.project_backpack import pack_project
pack_project(project_name, dirpath)
pack_project(project_name, dirpath, database_only)
def unpack_project(self, zip_filepath, new_root):
def unpack_project(self, zip_filepath, new_root, database_only):
from openpype.lib.project_backpack import unpack_project
unpack_project(zip_filepath, new_root)
unpack_project(zip_filepath, new_root, database_only)