Optimize validate dir/zip logic

- Opt out early if file list differs, no need to check checksums if file list differs
- Remove "checksums" from the set instead of a list
This commit is contained in:
Roy Nieterau 2022-01-29 14:12:22 +01:00
parent c6eb14ab62
commit 50d1f8c4be

View file

@ -973,9 +973,19 @@ class BootstrapRepos:
for line in checksums_data.split("\n") if line
]
# get list of files in zip minus `checksums` file itself
# and turn in to set to compare against list of files
# from checksum file. If difference exists, something is
# wrong
files_in_zip = set(zip_file.namelist())
files_in_zip.remove("checksums")
files_in_checksum = {file[1] for file in checksums}
diff = files_in_zip.difference(files_in_checksum)
if diff:
return False, f"Missing files {diff}"
# calculate and compare checksums in the zip file
for file in checksums:
file_name = file[1]
for file_checksum, file_name in checksums:
if platform.system().lower() == "windows":
file_name = file_name.replace("/", "\\")
h = hashlib.sha256()
@ -983,21 +993,9 @@ class BootstrapRepos:
h.update(zip_file.read(file_name))
except FileNotFoundError:
return False, f"Missing file [ {file_name} ]"
if h.hexdigest() != file[0]:
if h.hexdigest() != file_checksum:
return False, f"Invalid checksum on {file_name}"
# get list of files in zip minus `checksums` file itself
# and turn in to set to compare against list of files
# from checksum file. If difference exists, something is
# wrong
files_in_zip = zip_file.namelist()
files_in_zip.remove("checksums")
files_in_zip = set(files_in_zip)
files_in_checksum = {file[1] for file in checksums}
diff = files_in_zip.difference(files_in_checksum)
if diff:
return False, f"Missing files {diff}"
return True, "All ok"
@staticmethod
@ -1011,16 +1009,22 @@ class BootstrapRepos:
tuple(line.split(":"))
for line in checksums_data.split("\n") if line
]
files_in_dir = [
# compare file list against list of files from checksum file.
# If difference exists, something is wrong and we invalidate directly
files_in_dir = set(
file.relative_to(path).as_posix()
for file in path.iterdir() if file.is_file()
]
)
files_in_dir.remove("checksums")
files_in_dir = set(files_in_dir)
files_in_checksum = {file[1] for file in checksums}
for file in checksums:
file_name = file[1]
diff = files_in_dir.difference(files_in_checksum)
if diff:
return False, f"Missing files {diff}"
# calculate and compare checksums
for file_checksum, file_name in checksums:
if platform.system().lower() == "windows":
file_name = file_name.replace("/", "\\")
try:
@ -1028,11 +1032,8 @@ class BootstrapRepos:
except FileNotFoundError:
return False, f"Missing file [ {file_name} ]"
if file[0] != current:
if file_checksum != current:
return False, f"Invalid checksum on {file_name}"
diff = files_in_dir.difference(files_in_checksum)
if diff:
return False, f"Missing files {diff}"
return True, "All ok"