From a908e2867ec0ac0f1b3e11cbc8b21965cae2207b Mon Sep 17 00:00:00 2001 From: "petr.kalis" Date: Fri, 12 Jun 2020 20:42:58 +0200 Subject: [PATCH] Performance testing script Could be deleted later, no real functionality --- pype/tests/test_mongo_performance.py | 232 +++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 pype/tests/test_mongo_performance.py diff --git a/pype/tests/test_mongo_performance.py b/pype/tests/test_mongo_performance.py new file mode 100644 index 0000000000..6aa8e2ca43 --- /dev/null +++ b/pype/tests/test_mongo_performance.py @@ -0,0 +1,232 @@ +import pytest +import logging +from pprint import pprint +import os +import re +import random +import timeit + +import pymongo +import bson + +class TestPerformance(): + ''' + Class for testing performance of representation and their 'files' parts. + Discussion is if embedded array: + 'files' : [ {'_id': '1111', 'path':'....}, + {'_id'...}] + OR documents: + 'files' : { + '1111': {'path':'....'}, + '2222': {'path':'...'} + } + is faster. + + Current results: without additional partial index documents is 3x faster + With index is array 50x faster then document + + Partial index something like: + db.getCollection('performance_test').createIndex + ({'files._id': 1}, + {partialFilterExpresion: {'files': {'$exists': true}}) + !DIDNT work for me, had to create manually in Compass + + ''' + + MONGO_URL = 'mongodb://localhost:27017' + MONGO_DB = 'performance_test' + MONGO_COLLECTION = 'performance_test' + + inserted_ids = [] + + def __init__(self, version='array'): + ''' + It creates and fills collection, based on value of 'version'. + + :param version: 'array' - files as embedded array, + 'doc' - as document + ''' + self.client = pymongo.MongoClient(self.MONGO_URL) + self.db = self.client[self.MONGO_DB] + self.collection_name = self.MONGO_COLLECTION + + self.version = version + + if self.version != 'array': + self.collection_name = self.MONGO_COLLECTION + '_doc' + + self.collection = self.db[self.collection_name] + + self.ids = [] # for testing + self.inserted_ids = [] + + def prepare(self, no_of_records=100000): + ''' + Produce 'no_of_records' of representations with 'files' segment. + It depends on 'version' value in constructor, 'arrray' or 'doc' + :return: + ''' + print('Purging {} collection'.format(self.collection_name)) + self.collection.delete_many({}) + + id = bson.objectid.ObjectId() + + insert_recs = [] + for i in range(no_of_records): + file_id = bson.objectid.ObjectId() + file_id2 = bson.objectid.ObjectId() + file_id3 = bson.objectid.ObjectId() + + self.inserted_ids.extend([file_id, file_id2, file_id3]) + + document = {"files": self.get_files(self.version, i, + file_id, file_id2, file_id3) + , + "context": { + "subset": "workfileLookdev", + "username": "petrk", + "task": "lookdev", + "family": "workfile", + "hierarchy": "Assets", + "project": {"code": "test", "name": "Test"}, + "version": 1, + "asset": "Cylinder", + "representation": "mb", + "root": "C:/projects" + }, + "dependencies": [], + "name": "mb", + "parent": {"oid": '{}'.format(id)}, + "data": { + "path": "C:\\projects\\Test\\Assets\\Cylinder\\publish\\workfile\\workfileLookdev\\v001\\test_Cylinder_workfileLookdev_v001.mb", + "template": "{root}\\{project[name]}\\{hierarchy}\\{asset}\\publish\\{family}\\{subset}\\v{version:0>3}\\{project[code]}_{asset}_{subset}_v{version:0>3}<_{output}><.{frame:0>4}>.{representation}" + }, + "type": "representation", + "schema": "pype:representation-2.0" + } + + insert_recs.append(document) + + print('Prepared {} records in {} collection'.format(no_of_records, self.collection_name)) + id = self.collection.insert_many(insert_recs) + # TODO refactore to produce real array and not needeing ugly regex + self.collection.insert_one({"inserted_id" : self.inserted_ids}) + print('-' * 50) + + def run(self, queries=1000, loops=3): + ''' + Run X'queries' that are searching collection Y'loops' times + :param queries: how many times do ..find(...) + :param loops: loop of testing X queries + :return: None + ''' + print('Testing version {} on {}'.format(self.version, self.collection_name)) + + inserted_ids = list(self.collection.find({"inserted_id":{"$exists":True}})) + import re + self.ids = re.findall("'[0-9a-z]*'", str(inserted_ids)) + + import time + + found_cnt = 0 + for _ in range(loops): + start = time.time() + for i in range(queries): + val = random.choice(self.ids) + val = val.replace("'",'') + #print(val) + if (self.version == 'array'): + # prepared for partial index, without 'files': exists + # wont engage + found = self.collection.find_one({'files': {"$exists": True}, + 'files._id': "{}".format(val)}) + else: + key = "files.{}".format(val) + found = self.collection.find_one({key: {"$exists": True}}) + if found: + found_cnt += 1 + + end = time.time() + print('duration per loop {}'.format(end - start)) + print("found_cnt {}".format(found_cnt)) + + def get_files(self, mode, i, file_id, file_id2, file_id3): + ''' + Wrapper to decide if 'array' or document version should be used + :param mode: 'array'|'doc' + :param i: step number + :param file_id: ObjectId of first dummy file + :param file_id2: .. + :param file_id3: .. + :return: + ''' + if mode == 'array': + return self.get_files_array(i, file_id, file_id2, file_id3) + else: + return self.get_files_doc(i, file_id, file_id2, file_id3) + + def get_files_array(self, i, file_id, file_id2, file_id3): + return [ + { + "path":"c:/Test/Assets/Cylinder/publish/workfile/workfileLookdev/v001/test_CylinderA_workfileLookdev_v{0:03}.mb".format(i), + "_id": '{}'.format(file_id), + "hash":"temphash", + "sites":["studio"], + "size":87236 + }, + { + "path": "c:/Test/Assets/Cylinder/publish/workfile/workfileLookdev/v001/test_CylinderB_workfileLookdev_v{0:03}.mb".format( + i), + "_id": '{}'.format(file_id2), + "hash": "temphash", + "sites": ["studio"], + "size": 87236 + }, + { + "path": "c:/Test/Assets/Cylinder/publish/workfile/workfileLookdev/v001/test_CylinderC_workfileLookdev_v{0:03}.mb".format( + i), + "_id": '{}'.format(file_id3), + "hash": "temphash", + "sites": ["studio"], + "size": 87236 + } + + ] + + + def get_files_doc(self, i, file_id, file_id2, file_id3): + ret = {} + ret['{}'.format(file_id)] = { + "path": "c:/Test/Assets/Cylinder/publish/workfile/workfileLookdev/v001/test_CylinderA_workfileLookdev_v{0:03}.mb".format( + i), + "hash": "temphash", + "sites": ["studio"], + "size": 87236 + } + + + ret['{}'.format(file_id2)] = { + "path": "c:/Test/Assets/Cylinder/publish/workfile/workfileLookdev/v001/test_CylinderB_workfileLookdev_v{0:03}.mb".format(i), + "hash": "temphash", + "sites": ["studio"], + "size": 87236 + } + ret['{}'.format(file_id3)] = { + "path": "c:/Test/Assets/Cylinder/publish/workfile/workfileLookdev/v001/test_CylinderC_workfileLookdev_v{0:03}.mb".format(i), + "hash": "temphash", + "sites": ["studio"], + "size": 87236 + } + + return ret + +if __name__ == '__main__': + tp = TestPerformance('array') + tp.prepare() # enable to prepare data + tp.run(1000, 3) + + print('-'*50) + + tp = TestPerformance('doc') + tp.prepare() # enable to prepare data + tp.run(1000, 3)