Branch
Hash :
d9a258be
Author :
Date :
2025-05-31T16:27:54
[perf] Add hb-draw-compare and hb-svg-compare
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
#!/usr/bin/env python3
import xml.etree.ElementTree as ET
import re
import math
def tokenize_path_data(d_attribute):
"""
Splits an SVG path 'd' attribute string into a list of tokens:
- Single-letter commands (M, m, L, l, C, c, etc.)
- Float values (including possible scientific notation)
Returns a list of strings/float, e.g. ['M', 10.0, 20.0, 'L', 30.5, 40.7, 'Z'].
"""
# Regex explanation:
# ([MmZzLlHhVvCcSsQqTtAa]) => one of the valid path command letters
# | => OR
# ([+-]?\d+(\.\d+)?([eE][+-]?\d+)?) => a number that may include optional +/-,
# decimals, and scientific notation
token_pattern = re.compile(r'([MmZzLlHhVvCcSsQqTtAa])|([+-]?\d+(\.\d+)?([eE][+-]?\d+)?)')
raw_tokens = token_pattern.findall(d_attribute)
# raw_tokens is a list of tuples from the capturing groups. We only need one
# of those fields from each match. We'll filter out empty strings and parse floats.
tokens = []
for match in raw_tokens:
# match is a tuple: (command_letter, number_string, ..., ...)
command_letter = match[0]
number_string = match[1]
if command_letter:
# It's a path command like 'M', 'L', etc.
tokens.append(command_letter)
else:
# It's a numeric value
tokens.append(float(number_string))
return tokens
def extract_paths_from_svg(svg_data):
"""
Parses an SVG file and extracts the 'd' attribute from
all <path> elements in order. Returns a list of token lists.
"""
root = ET.fromstring(svg_data)
# Inkscape / Illustrator SVGs often have a default namespace.
# You may need to adjust the namespace if the <path> tags are qualified.
# For example:
# namespace = {'svg': 'http://www.w3.org/2000/svg'}
# for path_elem in root.findall('.//svg:path', namespace):
# ...
# If your SVGs do not use a default namespace, the below should work.
all_token_lists = []
for path_elem in root.findall('.//{http://www.w3.org/2000/svg}path'):
d_attribute = path_elem.get('d')
if d_attribute:
tokens = tokenize_path_data(d_attribute)
all_token_lists.append(tokens)
# If your SVG has no default namespace or if you removed it, you might do:
# for path_elem in root.findall('.//path'):
# d_attribute = path_elem.get('d')
# ...
#
# Adjust as appropriate depending on your actual SVG structure/namespaces.
return all_token_lists
def compare_token_lists(tokens_a, tokens_b):
"""
Compares two lists of path tokens (commands and floats).
Returns True if they match (same commands in same positions,
numeric values within given tolerance), otherwise False.
"""
if len(tokens_a) != len(tokens_b):
return None # Different lengths => not a match
max_diff = 0
for a, b in zip(tokens_a, tokens_b):
if isinstance(a, str) and isinstance(b, str):
# Must match exactly the same command letter
if a != b:
return None
elif isinstance(a, float) and isinstance(b, float):
# Compare numeric values
diff = abs(a - b)
max_diff = max(max_diff, diff)
else:
# One is command, the other is float => mismatch
return None
return max_diff
def compare_svg_files(svg_file_1, svg_file_2):
"""
Compares two SVG files to check if they have the same number of <path> elements,
and each corresponding path has the same structure of commands.
Return max difference between respective numeric values in the paths.
"""
svg_data_1 = open(svg_file_1).read()
svg_data_2 = open(svg_file_2).read()
# If contents match exactly, return 0
if svg_data_1 == svg_data_2:
return 0
paths1 = extract_paths_from_svg(svg_data_1)
paths2 = extract_paths_from_svg(svg_data_2)
# Check that we have the same number of <path> elements
if len(paths1) != len(paths2):
return None
# Compare each path token list
max_diff = 0
for tokens1, tokens2 in zip(paths1, paths2):
ret = compare_token_lists(tokens1, tokens2)
if ret is None:
return ret
max_diff = max(max_diff, ret)
return max_diff
if __name__ == "__main__":
import sys
if '--help' in sys.argv[1:]:
print("Usage: hb-svg-compare TOLERANCE < file_with_svg_pairs.txt")
sys.exit(1)
tolerance = 0
if len(sys.argv) > 1:
tolerance = float(sys.argv[1])
# Read all lines of two SVG file paths from stdin and compare
for line in sys.stdin:
svg1, svg2 = line.strip().split()
diff = compare_svg_files(svg1, svg2)
if diff is None:
diff = "DIFF"
elif diff <= tolerance:
continue
else:
diff = f"{diff:.5f}"
print(f"{diff}\t{svg1}\t{svg2}")
# Flush stdout to make sure output is immediate
sys.stdout.flush()