From 3facd293188b3f3d5c149187a6365b1c4de18880 Mon Sep 17 00:00:00 2001 From: Jakub Jezek Date: Wed, 1 Apr 2020 15:17:33 +0200 Subject: [PATCH] feat(premiere): updating pysync in vendors --- pype/vendor/pysync.py | 632 ++++++++++++++---------------------------- 1 file changed, 204 insertions(+), 428 deletions(-) diff --git a/pype/vendor/pysync.py b/pype/vendor/pysync.py index 5c42b63482..14a6dda34c 100644 --- a/pype/vendor/pysync.py +++ b/pype/vendor/pysync.py @@ -1,440 +1,216 @@ -#!/usr/bin/env python -""" -A Python implementation of rsync +#!/usr/local/bin/python3 +# https://github.com/snullp/pySync/blob/master/pySync.py -This is a demonstration implementation of the rsync algorithm in Python. It is -not fast and is not optimised. The primary aim is to provide a simple example -implementation of the algorithm for reference, so code clarity is more important -than performance. Ideas have been liberaly taken from libhsync, xdelta and -rsync. +import sys +import shutil +import os +import time +import configparser +from os.path import ( + getsize, + getmtime, + isfile, + isdir, + join, + abspath, + expanduser, + realpath +) +import logging - $Id: pysync.py 1.21 Sat, 18 Oct 2003 00:17:54 +1000 abo $ -Author : Donovan Baarda -License : LGPL -Download: ftp://minkirri.apana.org.au/pub/python +log = logging.getLogger(__name__) -Requires: sys, zlib, types, md4sum - rollsum (included md4sum-alike rollsum wrapper) +ignoreFiles = ("Thumbs.db", ".DS_Store") -Usage: - # Low level API signature calculation - sig=calcsig(oldfile) - - # Low level API rsync style incremental delta calc from sig and newdata - delta=rdeltaobj(sig) - # or for xdelta style incremental delta calc from oldfile and newdata - # delta=xdeltaobj(oldfile) - incdelta=delta.calcdelta(newdata) - : - incdelta=delta.flush() +# this feature is not yet implemented +ignorePaths = [] - # Low level API applying incremental delta to oldfile to get newdata - patch=patchobj(oldfile) - newdata=patch.calcpatch(incdelta) - : +if os.name == 'nt': + # msvcrt can't function correctly in IDLE + if 'idlelib.run' in sys.modules: + print("Please don't run this script in IDLE.") + sys.exit(0) + import msvcrt - # High level API - sig=calcsig(oldfile) # create a sig object - delta=calcrdelta(sig,newfile) # create a rdelta object - delta=calcxdelta(oldfile,newfile) # create a xdelta object - calcpatch(oldfile,delta,newfile) # apply a delta object - - # File level API - stats=filesig(oldfile,sigfile) # create sigfile - stats=filerdelta(sigfile,newfile,diffile) # create a rdelta diffile - stats=filexdelta(oldfile,newfile,diffile) # create a xdelta diffile - stats=filepatch(oldfile,diffile,newfile) # apply a diffile - -Where: - sig - a signature object - delta - a delta object - stats - a statistics object that can be printed - newdata - the target incremental data sequence - incdelta - the incremental delta list - oldfile - the source file - newfile - the target file - sigfile - the signature file - diffile - the delta file - -a delta is implemented as a list containing a sequence of (context) -compressed insert strings and (offset,length) match pairs. - -A signature is a (length, blocksize, sigtable) tuple, where length and blocksize -are integers. The sigtable is implemented as a rollsum keyed dictionary of -md4sum keyed dictionaries containing offsets. -ie sigtable[rollsum][md4sum]=offset - -Note rsync uses md4sums because they are faster than md5sums, but -python doesn't have a built in md4sum wrapper. I use an md4 module -based on the libmd RSA md4 implementation and a modified md5module.c - -thoughts on using zlib to compress deltas; - -1) compress the whole instruction stream -2) compress the inserts only using Z_SYNC_FLUSH to delimit and put - inserts into the instruction stream. -3) compress everything using Z_SYNC_FLUSH to delimit boundaries, inserting - only output for inserts into the instruction stream (rsync?) -4) compress the insert stream without Z_SYNC_FLUSH and put offset/lengths in - instruction stream, sending compressed inserts seperately (xdelta?) - -it depends on how zlib performs with heaps of Z_SYNC_FLUSH's. If it hurts -performance badly, then 4 is best. Otherwise, it would pay to see if zlib -improves compression with inserted context data not included in the output -stream. - -My tests on zlib suggest that syncs do hurt a little, but dispite that -including context by compressing _all_ the data, not just the deltas, gives -the best compression. Unfortunately this has extra load on applying patches -because it requires all data to be compressed to supply the compression -stream for the missing context info for decompression. - -thoughts on instruction stream; - -use fixed length and put only offsets into instruction stream for matches, -put inserts directly into the instruction stream. - -use source/offset/length in the instruction stream, and make the inserts a -seperate source (xdelta). - -by putting offset/length in the instruction stream rather than just block id's -the instruction stream becomes more generic... anything that can generate -offset/lengths can generate patches... possibly more optimal ones than rsync -(ie, xdelta's largest possible match type tricks). - -Including a source along with offset/length means multiple sources can be used -for a single patch (like xdelta), though this can be fudged by appending sources -into one long stream. - -""" -# psyco is a python accelerator which speeds up pysync by 33% -try: - import psyco - psyco.profile() -except: - pass -from zlib import * -from types import TupleType,StringType -import md4,rollsum - -# the default block size used throughout. This is carefuly chosen to try and -# avoid the zlib decompressor sync bug which strikes at about 16K -BLOCK_SIZE=8192 - -# the various supported flush modes. -R_SYNC_FLUSH=Z_SYNC_FLUSH -R_FINISH=Z_FINISH - -def calcsig(oldfile,blocksize=BLOCK_SIZE): - "Calculates and returns a signature" - offset=0 - sigtable={} - data=oldfile.read(blocksize) - while data: - sum=md4.new(data).digest() - sig=rollsum.new(data).digest() - try: - sigtable[sig][sum]=offset - except KeyError: - sigtable[sig]={} - sigtable[sig][sum]=offset - offset=offset+len(data) - data=oldfile.read(blocksize) - return (offset,blocksize,sigtable) - -class rdeltaobj: - "Incremental delta calculation class for deltas from signature to newfile" - def __init__(self,(length,blocksize,sigtable)): - self.length = length - self.blocksize = blocksize - self.sigtable = sigtable - self.data = "" # the yet to be processed data - self.pos = 0 # the position processed up to in data - self.sig = None # the rollsum sig of the next data block - self.last = None # the last processed delta match/miss - self.delta = [] # the delta list calculated thus far - self.comp = compressobj(9) # the delta zlib compressor object - def _compress(self): - "compress and return up to pos, adjusting data and pos" - data=buffer(self.data,0,self.pos) - self.data,self.pos=buffer(self.data,self.pos),0 - return self.comp.compress(data) - def _flush(self,mode=R_SYNC_FLUSH): - "compress, flush, and return up to pos, adjusting data and pos" - return self._compress()+self.comp.flush(mode) - def _findmatch(self): - "return a match tuple, or raise KeyError if there isn't one" - # get the rollsum digest, calculating sig if needed - try: - sig=self.sig.digest() - except AttributeError: - self.sig=rollsum.new(buffer(self.data,self.pos,self.blocksize)) - sig=self.sig.digest() - # get the matching offset, if it exists, otherwise raise KeyError - sumtable=self.sigtable[sig] - sum=md4.new(buffer(self.data,self.pos,self.blocksize)) - return sumtable[sum.digest()],self.sig.count - def _appendmatch(self,(offset,length)): - "append a match to delta" - # if last was a match that can be extended, extend it - if type(self.last)==TupleType and self.last[0]+self.last[1]==offset: - self.last=(self.last[0],self.last[1]+length) + def flush_input(str, set=None): + if not set: + while msvcrt.kbhit(): + ch = msvcrt.getch() + if ch == '\xff': + print("msvcrt is broken, this is weird.") + sys.exit(0) + return input(str) else: - # else appendflush the last value - self._appendflush(R_SYNC_FLUSH) - # make this match the new last - self.last=(offset,length) - # increment pos and compress the matched data for context - self.pos=self.pos+length - self._compress() - def _appendmiss(self,length): - "append a miss to delta" - if type(self.last)!=StringType: - # if last was not a miss, appendflush the last value - self._appendflush(R_SYNC_FLUSH) - # make this miss the new last - self.last="" - # increment pos and compress if greater than blocksize - self.pos=self.pos+length - #if self.pos >= self.blocksize: - # self.last=self.last+self._compress() - def _appendflush(self,mode=R_FINISH): - "append a flush to delta" - if type(self.last)==StringType: - self.delta.append(self.last+self._flush(mode)) - elif self.last: - self.delta.append(self.last) - self._flush(mode) - self.last=None - def calcdelta(self,newdata): - "incrementaly calculates and returns a delta list" - self.data=self.data+newdata - while self.pos+self.blocksize 0: + os.read(sys.stdin.fileno(), 4096) + return input(str) else: - inserts=inserts+1 - insert_length=insert_length + len(i) - return """delta stats -segments: %i -matches : %i %i -inserts : %i %i -""" % (len(delta),matches,match_length,inserts,insert_length) + return set -if __name__ == "__main__": - import os - from sys import argv,stdin,stdout,stderr,exit - def openarg(argno,mode='rb'): - if (len(argv) <= argno) or (argv[argno] == '-'): - if 'r' in mode: return stdin - return stdout - return open(argv[argno],mode) - - if len(argv)>=2 and argv[1]=="signature": - oldfile,sigfile=openarg(2,'rb'),openarg(3,'wb') - stats=filesig(oldfile,sigfile,1024) - stderr.write(str(stats)) - elif len(argv)>=3 and argv[1]=="rdelta": - sigfile,newfile,diffile=openarg(2,'rb'),openarg(3,'rb'),openarg(4,'wb') - stats=filerdelta(sigfile,newfile,diffile) - stderr.write(str(stats)) - elif len(argv)>=3 and argv[1]=="xdelta": - oldfile,newfile,diffile=openarg(2,'rb'),openarg(3,'rb'),openarg(4,'wb') - stats=filexdelta(oldfile,newfile,diffile) - stderr.write(str(stats)) - elif len(argv)>=3 and argv[1]=="patch": - oldfile,diffile,newfile=openarg(2,'rb'),openarg(3,'rb'),openarg(4,'wb') - stats=filepatch(oldfile,diffile,newfile) - stderr.write(str(stats)) +def compare(fa, fb, options_input=[]): + if isfile(fa) == isfile(fb): + if isdir(fa): + walktree(fa, fb, options_input) + elif isfile(fa): + if getsize(fa) != getsize(fb) \ + or int(getmtime(fa)) != int(getmtime(fb)): + log.info(str((fa, ': size=', getsize(fa), 'mtime=', + time.asctime(time.localtime(getmtime(fa)))))) + log.info(str((fb, ': size=', getsize(fb), 'mtime=', + time.asctime(time.localtime(getmtime(fb)))))) + if getmtime(fa) > getmtime(fb): + act = '>' + else: + act = '<' + + set = [i for i in options_input if i in [">", "<"]][0] + + s = flush_input('What to do?(>,<,r,n)[' + act + ']', set=set) + if len(s) > 0: + act = s[0] + if act == '>': + shutil.copy2(fa, fb) + elif act == '<': + shutil.copy2(fb, fa) + elif act == 'r': + if isdir(fa): + shutil.rmtree(fa) + elif isfile(fa): + os.remove(fa) + else: + log.info(str(('Remove: Skipping', fa))) + if isdir(fb): + shutil.rmtree(fb) + elif isfile(fb): + os.remove(fb) + else: + log.info(str(('Remove: Skipping', fb))) + + else: + log.debug(str(('Compare: Skipping non-dir and non-file', fa))) else: - print """ -Usage: - %s signature [ []] - ... generates signature file from - - %s rdelta [ []] - ... generates rdelta file for from - - %s xdelta [ []] - ... generates xdelta file for from - - %s patch [ []] - ... applies delta file to to generate - -Where file parameters ommitted or specified as '-' indicate standard -input or output as appropriate. -""" % ((os.path.basename(argv[0]),) * 4) - exit(1) + log.error(str(('Error:', fa, ',', fb, 'have different file type'))) + + +def copy(fa, fb, options_input=[]): + set = [i for i in options_input if i in ["y"]][0] + s = flush_input('Copy ' + fa + ' to another side?(r,y,n)[y]', set=set) + if len(s) > 0: + act = s[0] + else: + act = 'y' + if act == 'y': + if isdir(fa): + shutil.copytree(fa, fb) + elif isfile(fa): + shutil.copy2(fa, fb) + else: + log.debug(str(('Copy: Skipping ', fa))) + elif act == 'r': + if isdir(fa): + shutil.rmtree(fa) + elif isfile(fa): + os.remove(fa) + else: + log.debug(str(('Remove: Skipping ', fa))) + + +stoentry = [] +tarentry = [] + + +def walktree(source, target, options_input=[]): + srclist = os.listdir(source) + tarlist = os.listdir(target) + if '!sync' in srclist: + return + if '!sync' in tarlist: + return + # files in source dir... + for f in srclist: + if f in ignoreFiles: + continue + spath = join(source, f) + tpath = join(target, f) + if spath in ignorePaths: + continue + if spath in stoentry: + # just in case target also have this one + if f in tarlist: + del tarlist[tarlist.index(f)] + continue + + # if also exists in target dir + if f in tarlist: + del tarlist[tarlist.index(f)] + compare(spath, tpath, options_input) + + # exists in source dir only + else: + copy(spath, tpath, options_input) + + # exists in target dir only + set = [i for i in options_input if i in ["<"]] + + for f in tarlist: + if f in ignoreFiles: + continue + spath = join(source, f) + tpath = join(target, f) + if tpath in ignorePaths: + continue + if tpath in tarentry: + continue + if set: + copy(tpath, spath, options_input) + else: + print("REMOVING: {}".format(f)) + if os.path.isdir(tpath): + shutil.rmtree(tpath) + else: + os.remove(tpath) + print("REMOVING: {}".format(f)) + + +if __name__ == '__main__': + stoconf = configparser.RawConfigParser() + tarconf = configparser.RawConfigParser() + stoconf.read("pySync.ini") + tarconf.read(expanduser("~/.pysync")) + stoname = stoconf.sections()[0] + tarname = tarconf.sections()[0] + + # calculate storage's base folder + if stoconf.has_option(stoname, 'BASE'): + stobase = abspath(stoconf.get(stoname, 'BASE')) + stoconf.remove_option(stoname, 'BASE') + else: + stobase = os.getcwd() + + # same, for target's base folder + if tarconf.has_option(tarname, 'BASE'): + tarbase = abspath(tarconf.get(tarname, 'BASE')) + tarconf.remove_option(tarname, 'BASE') + else: + tarbase = expanduser('~/') + + print("Syncing between", stoname, "and", tarname) + sto_content = {x: realpath(join(stobase, stoconf.get(stoname, x))) + for x in stoconf.options(stoname)} + tar_content = {x: realpath(join(tarbase, tarconf.get(tarname, x))) + for x in tarconf.options(tarname)} + stoentry = [sto_content[x] for x in sto_content] + tarentry = [tar_content[x] for x in tar_content] + + for folder in sto_content: + if folder in tar_content: + print('Processing', folder) + walktree(sto_content[folder], tar_content[folder], options_input) + print("Done.")