Refactor upload retry logic to handle only transient network issues and improve error handling

This commit is contained in:
Aleks Berland 2025-08-26 09:55:47 -04:00
parent a0f6a3f379
commit 32c022cd4d

View file

@ -9,6 +9,10 @@ from ayon_core.pipeline.publish import (
PublishXmlValidationError, PublishXmlValidationError,
get_publish_repre_path, get_publish_repre_path,
) )
from requests import exceptions as req_exc
# Narrow retryable failures to transient network issues
RETRYABLE_EXCEPTIONS = (req_exc.Timeout, req_exc.ConnectionError)
class IntegrateAYONReview(pyblish.api.InstancePlugin): class IntegrateAYONReview(pyblish.api.InstancePlugin):
@ -47,7 +51,7 @@ class IntegrateAYONReview(pyblish.api.InstancePlugin):
if "webreview" not in repre_tags: if "webreview" not in repre_tags:
continue continue
# exclude representations with are going to be published on farm # exclude representations going to be published on farm
if "publish_on_farm" in repre_tags: if "publish_on_farm" in repre_tags:
continue continue
@ -120,7 +124,8 @@ class IntegrateAYONReview(pyblish.api.InstancePlugin):
to guide the user to retry publish. to guide the user to retry publish.
""" """
last_error = None last_error = None
for attempt in range(1, max_retries + 1): for attempt in range(max_retries):
attempt_num = attempt + 1
try: try:
ayon_con.upload_file( ayon_con.upload_file(
endpoint, endpoint,
@ -129,30 +134,36 @@ class IntegrateAYONReview(pyblish.api.InstancePlugin):
request_type=RequestTypes.post, request_type=RequestTypes.post,
) )
return return
except Exception as exc: # noqa: BLE001 - bubble after retries except RETRYABLE_EXCEPTIONS as exc:
last_error = exc last_error = exc
# Log and retry with backoff if attempts remain # Log and retry with backoff if attempts remain
if attempt < max_retries: if attempt_num < max_retries:
wait = backoff_seconds * (2 ** (attempt - 1)) wait = backoff_seconds * (2 ** attempt)
self.log.warning( self.log.warning(
f"Review upload failed (attempt {attempt}/{max_retries}): {exc}. " "Review upload failed (attempt %s/%s). Retrying in %ss...",
f"Retrying in {wait}s..." attempt_num, max_retries, wait,
exc_info=True,
) )
try: try:
time.sleep(wait) time.sleep(wait)
except Exception: # Sleep errors are highly unlikely; continue except Exception:
pass pass
else: else:
# Exhausted retries - raise a user-friendly validation error with help break
raise PublishXmlValidationError( except Exception:
self, # Non retryable failures bubble immediately
( raise
"Upload of reviewable timed out or failed after multiple attempts. "
"Please try publishing again." # Exhausted retries - raise a user-friendly validation error with help
), raise PublishXmlValidationError(
key="upload_timeout", self,
formatting_data={ (
"file": repre_path, "Upload of reviewable timed out or failed after multiple attempts."
"error": str(last_error), " Please try publishing again."
}, ),
) key="upload_timeout",
formatting_data={
"file": repre_path,
"error": str(last_error),
},
)