From 716c6cc6e67fe34fd7659aaac2f17ffe8bbfea80 Mon Sep 17 00:00:00 2001 From: Alexander Bocken Date: Tue, 17 Feb 2026 16:05:51 +0100 Subject: [PATCH] fix: use python3 for emoji codepoint extraction in font subsetting grep -oP '.' splits multi-byte emoji into individual bytes when the locale is not UTF-8 (e.g. CI runners with LANG=C), causing pyftsubset to fail on invalid codepoints. --- scripts/subset-emoji-font.sh | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/scripts/subset-emoji-font.sh b/scripts/subset-emoji-font.sh index b3ad34b..0280ffc 100755 --- a/scripts/subset-emoji-font.sh +++ b/scripts/subset-emoji-font.sh @@ -29,18 +29,11 @@ fi EMOJIS="β˜€βœβ„πŸŒ·πŸ‚πŸŽ„πŸ‡πŸ½πŸ₯«πŸ›’πŸ›πŸš†βš‘πŸŽ‰πŸ€πŸ’Έβ€πŸ–€βœ…βŒπŸš€βš βœ¨πŸ”„πŸ“‹πŸ–ΌπŸ“–πŸ€–πŸŒπŸ”πŸ”πŸš«" # ──────────────────────────────────────────────────────────────────── -# Build Unicode codepoint list from the emoji string -UNICODES="" -for char in $(echo "$EMOJIS" | grep -oP '.'); do - code=$(printf 'U+%04X' "'$char") - if [ -n "$UNICODES" ]; then - UNICODES="$UNICODES,$code" - else - UNICODES="$code" - fi -done +# Build Unicode codepoint list from the emoji string (Python for reliable Unicode handling) +UNICODES=$(python3 -c "print(','.join(f'U+{ord(c):04X}' for c in '$EMOJIS'))") +GLYPH_COUNT=$(python3 -c "print(len('$EMOJIS'))") -echo "Subsetting NotoColorEmoji with $(echo "$EMOJIS" | grep -oP '.' | wc -l) glyphs..." +echo "Subsetting NotoColorEmoji with $GLYPH_COUNT glyphs..." # Subset to TTF pyftsubset "$SRC_FONT" \