From fcb38b81cfab1f435363d13bb017e816baae7dc4 Mon Sep 17 00:00:00 2001 From: Jakub Trllo Date: Tue, 15 Feb 2022 12:14:39 +0100 Subject: [PATCH 1/5] fix loading of unused chars in xml format --- openpype/lib/transcoding.py | 40 +++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/openpype/lib/transcoding.py b/openpype/lib/transcoding.py index 36f6858a78..5caea32ee1 100644 --- a/openpype/lib/transcoding.py +++ b/openpype/lib/transcoding.py @@ -30,6 +30,38 @@ INT_TAGS = { "deep", "subimages", } + +XML_UNUSED_CHARS = { + "�", + "", + "", + "", + "", + "", + "", + "", + "", + " ", + " ", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" +} # Regex to parse array attributes ARRAY_TYPE_REGEX = re.compile(r"^(int|float|string)\[\d+\]$") @@ -191,6 +223,14 @@ def parse_oiio_xml_output(xml_string, logger=None): if not xml_string: return output + # Fix values with ampresand (lazy fix) + # - ElementTree can't handle all escaped values with ampresand + # e.g. "" + for unused_char in XML_UNUSED_CHARS: + if unused_char in xml_string: + new_char = unused_char.replace("&", "&") + xml_string = xml_string.replace(unused_char, new_char) + if logger is None: logger = logging.getLogger("OIIO-xml-parse") From 7e1203ea5144e0983768941e70431fec54e2c6dd Mon Sep 17 00:00:00 2001 From: Jakub Trllo Date: Tue, 15 Feb 2022 17:02:41 +0100 Subject: [PATCH 2/5] use regex rather then explicit values --- openpype/lib/transcoding.py | 45 +++++++++---------------------------- 1 file changed, 10 insertions(+), 35 deletions(-) diff --git a/openpype/lib/transcoding.py b/openpype/lib/transcoding.py index 5caea32ee1..cc31016bb2 100644 --- a/openpype/lib/transcoding.py +++ b/openpype/lib/transcoding.py @@ -31,37 +31,9 @@ INT_TAGS = { "subimages", } -XML_UNUSED_CHARS = { - "�", - "", - "", - "", - "", - "", - "", - "", - "", - " ", - " ", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "" -} +CHAR_REF_REGEX_DECIMAL = re.compile(r"&#[0-9]+;") +CHAR_REF_REGEX_HEX = re.compile(r"&#x[0-9a-zA-Z]+;") + # Regex to parse array attributes ARRAY_TYPE_REGEX = re.compile(r"^(int|float|string)\[\d+\]$") @@ -226,10 +198,13 @@ def parse_oiio_xml_output(xml_string, logger=None): # Fix values with ampresand (lazy fix) # - ElementTree can't handle all escaped values with ampresand # e.g. "" - for unused_char in XML_UNUSED_CHARS: - if unused_char in xml_string: - new_char = unused_char.replace("&", "&") - xml_string = xml_string.replace(unused_char, new_char) + matches = ( + set(CHAR_REF_REGEX_HEX.findall(xml_string)) + | set(CHAR_REF_REGEX_DECIMAL.findall(xml_string)) + ) + for match in matches: + new_value = match.replace("&", "&") + xml_string = xml_string.replace(match, new_value) if logger is None: logger = logging.getLogger("OIIO-xml-parse") From 646eb2e5199e27a38362fb814ebcb58119013f43 Mon Sep 17 00:00:00 2001 From: Jakub Trllo Date: Tue, 15 Feb 2022 17:24:32 +0100 Subject: [PATCH 3/5] use single regex --- openpype/lib/transcoding.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/openpype/lib/transcoding.py b/openpype/lib/transcoding.py index cc31016bb2..4b1c721428 100644 --- a/openpype/lib/transcoding.py +++ b/openpype/lib/transcoding.py @@ -31,8 +31,7 @@ INT_TAGS = { "subimages", } -CHAR_REF_REGEX_DECIMAL = re.compile(r"&#[0-9]+;") -CHAR_REF_REGEX_HEX = re.compile(r"&#x[0-9a-zA-Z]+;") +XML_CHAR_REF_REGEX_HEX = re.compile(r"&#x?[0-9a-fA-F]+;") # Regex to parse array attributes ARRAY_TYPE_REGEX = re.compile(r"^(int|float|string)\[\d+\]$") @@ -196,12 +195,9 @@ def parse_oiio_xml_output(xml_string, logger=None): return output # Fix values with ampresand (lazy fix) - # - ElementTree can't handle all escaped values with ampresand + # - oiiotool exports invalid xml which ElementTree can't handle # e.g. "" - matches = ( - set(CHAR_REF_REGEX_HEX.findall(xml_string)) - | set(CHAR_REF_REGEX_DECIMAL.findall(xml_string)) - ) + matches = XML_CHAR_REF_REGEX_HEX.findall(xml_string) for match in matches: new_value = match.replace("&", "&") xml_string = xml_string.replace(match, new_value) From 7a81e8dc1cac135d8ecfc921a8389a303895b744 Mon Sep 17 00:00:00 2001 From: Jakub Trllo <43494761+iLLiCiTiT@users.noreply.github.com> Date: Tue, 15 Feb 2022 17:41:02 +0100 Subject: [PATCH 4/5] Added warning comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ondřej Samohel <33513211+antirotor@users.noreply.github.com> --- openpype/lib/transcoding.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openpype/lib/transcoding.py b/openpype/lib/transcoding.py index 4b1c721428..00945a4471 100644 --- a/openpype/lib/transcoding.py +++ b/openpype/lib/transcoding.py @@ -197,6 +197,8 @@ def parse_oiio_xml_output(xml_string, logger=None): # Fix values with ampresand (lazy fix) # - oiiotool exports invalid xml which ElementTree can't handle # e.g. "" + # WARNING: this will affect even valid character entities. If you need those values correctly, this must + # take care of valid character ranges. See https://github.com/pypeclub/OpenPype/pull/2729 matches = XML_CHAR_REF_REGEX_HEX.findall(xml_string) for match in matches: new_value = match.replace("&", "&") From 985a6c89954b711951ec02eb17c403b9872eb7f1 Mon Sep 17 00:00:00 2001 From: Jakub Trllo Date: Tue, 15 Feb 2022 17:42:10 +0100 Subject: [PATCH 5/5] hound fix --- openpype/lib/transcoding.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/openpype/lib/transcoding.py b/openpype/lib/transcoding.py index 00945a4471..e89fa6331e 100644 --- a/openpype/lib/transcoding.py +++ b/openpype/lib/transcoding.py @@ -197,8 +197,9 @@ def parse_oiio_xml_output(xml_string, logger=None): # Fix values with ampresand (lazy fix) # - oiiotool exports invalid xml which ElementTree can't handle # e.g. "" - # WARNING: this will affect even valid character entities. If you need those values correctly, this must - # take care of valid character ranges. See https://github.com/pypeclub/OpenPype/pull/2729 + # WARNING: this will affect even valid character entities. If you need + # those values correctly, this must take care of valid character ranges. + # See https://github.com/pypeclub/OpenPype/pull/2729 matches = XML_CHAR_REF_REGEX_HEX.findall(xml_string) for match in matches: new_value = match.replace("&", "&")