From a908e2867ec0ac0f1b3e11cbc8b21965cae2207b Mon Sep 17 00:00:00 2001
From: "petr.kalis" <petr.kalis@gmail.com>
Date: Fri, 12 Jun 2020 20:42:58 +0200
Subject: [PATCH] Performance testing script

Could be deleted later, no real functionality
---
 pype/tests/test_mongo_performance.py | 232 +++++++++++++++++++++++++++
 1 file changed, 232 insertions(+)
 create mode 100644 pype/tests/test_mongo_performance.py

diff --git a/pype/tests/test_mongo_performance.py b/pype/tests/test_mongo_performance.py
new file mode 100644
index 0000000000..6aa8e2ca43
--- /dev/null
+++ b/pype/tests/test_mongo_performance.py
@@ -0,0 +1,232 @@
+import pytest
+import logging
+from pprint import pprint
+import os
+import re
+import random
+import timeit
+
+import pymongo
+import bson
+
+class TestPerformance():
+    '''
+        Class for testing performance of representation and their 'files' parts.
+        Discussion is if embedded array:
+                            'files' : [ {'_id': '1111', 'path':'....},
+                                        {'_id'...}]
+                     OR documents:
+                            'files' : {
+                                            '1111': {'path':'....'},
+                                            '2222': {'path':'...'}
+                                        }
+                     is faster.
+
+        Current results: without additional partial index documents is 3x faster
+            With index is array 50x faster then document
+
+        Partial index something like:
+        db.getCollection('performance_test').createIndex
+            ({'files._id': 1},
+            {partialFilterExpresion: {'files': {'$exists': true}})
+        !DIDNT work for me, had to create manually in Compass
+
+    '''
+
+    MONGO_URL = 'mongodb://localhost:27017'
+    MONGO_DB = 'performance_test'
+    MONGO_COLLECTION = 'performance_test'
+
+    inserted_ids = []
+
+    def __init__(self, version='array'):
+        '''
+            It creates and fills collection, based on value of 'version'.
+
+        :param version: 'array' - files as embedded array,
+                        'doc' - as document
+        '''
+        self.client = pymongo.MongoClient(self.MONGO_URL)
+        self.db = self.client[self.MONGO_DB]
+        self.collection_name = self.MONGO_COLLECTION
+
+        self.version = version
+
+        if self.version != 'array':
+            self.collection_name = self.MONGO_COLLECTION + '_doc'
+
+        self.collection = self.db[self.collection_name]
+
+        self.ids = [] # for testing
+        self.inserted_ids = []
+
+    def prepare(self, no_of_records=100000):
+        '''
+            Produce 'no_of_records' of representations with 'files' segment.
+            It depends on 'version' value in constructor, 'arrray' or 'doc'
+        :return:
+        '''
+        print('Purging {} collection'.format(self.collection_name))
+        self.collection.delete_many({})
+
+        id = bson.objectid.ObjectId()
+
+        insert_recs = []
+        for i in range(no_of_records):
+            file_id =  bson.objectid.ObjectId()
+            file_id2 =  bson.objectid.ObjectId()
+            file_id3 =  bson.objectid.ObjectId()
+
+            self.inserted_ids.extend([file_id, file_id2, file_id3])
+
+            document = {"files": self.get_files(self.version, i,
+                                                file_id, file_id2, file_id3)
+                        ,
+                        "context": {
+                            "subset": "workfileLookdev",
+                            "username": "petrk",
+                            "task": "lookdev",
+                            "family": "workfile",
+                            "hierarchy": "Assets",
+                            "project": {"code": "test", "name": "Test"},
+                            "version": 1,
+                            "asset": "Cylinder",
+                            "representation": "mb",
+                            "root": "C:/projects"
+                        },
+                        "dependencies": [],
+                        "name": "mb",
+                        "parent": {"oid": '{}'.format(id)},
+                        "data": {
+                                    "path": "C:\\projects\\Test\\Assets\\Cylinder\\publish\\workfile\\workfileLookdev\\v001\\test_Cylinder_workfileLookdev_v001.mb",
+                                    "template": "{root}\\{project[name]}\\{hierarchy}\\{asset}\\publish\\{family}\\{subset}\\v{version:0>3}\\{project[code]}_{asset}_{subset}_v{version:0>3}<_{output}><.{frame:0>4}>.{representation}"
+                        },
+                        "type": "representation",
+                        "schema": "pype:representation-2.0"
+            }
+
+            insert_recs.append(document)
+
+        print('Prepared {} records in {} collection'.format(no_of_records, self.collection_name))
+        id = self.collection.insert_many(insert_recs)
+        # TODO refactore to produce real array and not needeing ugly regex
+        self.collection.insert_one({"inserted_id" : self.inserted_ids})
+        print('-' * 50)
+
+    def run(self, queries=1000, loops=3):
+        '''
+            Run X'queries' that are searching collection Y'loops' times
+        :param queries: how many times do ..find(...)
+        :param loops:  loop of testing X queries
+        :return: None
+        '''
+        print('Testing version {} on {}'.format(self.version, self.collection_name))
+
+        inserted_ids = list(self.collection.find({"inserted_id":{"$exists":True}}))
+        import re
+        self.ids = re.findall("'[0-9a-z]*'", str(inserted_ids))
+
+        import time
+
+        found_cnt = 0
+        for _ in range(loops):
+            start = time.time()
+            for i in range(queries):
+                val = random.choice(self.ids)
+                val = val.replace("'",'')
+                #print(val)
+                if (self.version == 'array'):
+                    # prepared for partial index, without 'files': exists
+                    # wont engage
+                    found = self.collection.find_one({'files': {"$exists": True},
+                                                      'files._id': "{}".format(val)})
+                else:
+                    key = "files.{}".format(val)
+                    found = self.collection.find_one({key: {"$exists": True}})
+                if found:
+                    found_cnt += 1
+
+            end = time.time()
+            print('duration per loop {}'.format(end - start))
+            print("found_cnt {}".format(found_cnt))
+
+    def get_files(self, mode, i, file_id, file_id2, file_id3):
+        '''
+            Wrapper to decide if 'array' or document version should be used
+        :param mode: 'array'|'doc'
+        :param i: step number
+        :param file_id: ObjectId of first dummy file
+        :param file_id2: ..
+        :param file_id3: ..
+        :return:
+        '''
+        if mode == 'array':
+            return self.get_files_array(i, file_id, file_id2, file_id3)
+        else:
+            return self.get_files_doc(i, file_id, file_id2, file_id3)
+
+    def get_files_array(self, i, file_id, file_id2, file_id3):
+        return [
+                                    {
+                                          "path":"c:/Test/Assets/Cylinder/publish/workfile/workfileLookdev/v001/test_CylinderA_workfileLookdev_v{0:03}.mb".format(i),
+                                          "_id": '{}'.format(file_id),
+                                          "hash":"temphash",
+                                          "sites":["studio"],
+                                          "size":87236
+                                     },
+                                    {
+                                        "path": "c:/Test/Assets/Cylinder/publish/workfile/workfileLookdev/v001/test_CylinderB_workfileLookdev_v{0:03}.mb".format(
+                                            i),
+                                        "_id": '{}'.format(file_id2),
+                                        "hash": "temphash",
+                                        "sites": ["studio"],
+                                        "size": 87236
+                                    },
+                                    {
+                                        "path": "c:/Test/Assets/Cylinder/publish/workfile/workfileLookdev/v001/test_CylinderC_workfileLookdev_v{0:03}.mb".format(
+                                            i),
+                                        "_id": '{}'.format(file_id3),
+                                        "hash": "temphash",
+                                        "sites": ["studio"],
+                                        "size": 87236
+                                    }
+
+                                ]
+
+
+    def get_files_doc(self, i, file_id, file_id2, file_id3):
+        ret = {}
+        ret['{}'.format(file_id)] = {
+            "path": "c:/Test/Assets/Cylinder/publish/workfile/workfileLookdev/v001/test_CylinderA_workfileLookdev_v{0:03}.mb".format(
+                i),
+            "hash": "temphash",
+            "sites": ["studio"],
+            "size": 87236
+        }
+
+
+        ret['{}'.format(file_id2)] = {
+                                        "path": "c:/Test/Assets/Cylinder/publish/workfile/workfileLookdev/v001/test_CylinderB_workfileLookdev_v{0:03}.mb".format(i),
+                                        "hash": "temphash",
+                                        "sites": ["studio"],
+                                        "size": 87236
+                                    }
+        ret['{}'.format(file_id3)] = {
+                                        "path": "c:/Test/Assets/Cylinder/publish/workfile/workfileLookdev/v001/test_CylinderC_workfileLookdev_v{0:03}.mb".format(i),
+                                        "hash": "temphash",
+                                        "sites": ["studio"],
+                                        "size": 87236
+                                    }
+
+        return ret
+
+if __name__ == '__main__':
+    tp = TestPerformance('array')
+    tp.prepare() # enable to prepare data
+    tp.run(1000, 3)
+
+    print('-'*50)
+
+    tp = TestPerformance('doc')
+    tp.prepare() # enable to prepare data
+    tp.run(1000, 3)