From 716c6cc6e67fe34fd7659aaac2f17ffe8bbfea80 Mon Sep 17 00:00:00 2001
From: Alexander Bocken <alexander@bocken.org>
Date: Tue, 17 Feb 2026 16:05:51 +0100
Subject: [PATCH] fix: use python3 for emoji codepoint extraction in font
 subsetting

grep -oP '.' splits multi-byte emoji into individual bytes when the
locale is not UTF-8 (e.g. CI runners with LANG=C), causing pyftsubset
to fail on invalid codepoints.
---
 scripts/subset-emoji-font.sh | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/scripts/subset-emoji-font.sh b/scripts/subset-emoji-font.sh
index b3ad34b..0280ffc 100755
--- a/scripts/subset-emoji-font.sh
+++ b/scripts/subset-emoji-font.sh
@@ -29,18 +29,11 @@ fi
 EMOJIS="☀✝❄🌷🍂🎄🐇🍽🥫🛒🛍🚆⚡🎉🤝💸❤🖤✅❌🚀⚠✨🔄📋🖼📖🤖🌐🔐🔍🚫"
 # ────────────────────────────────────────────────────────────────────
 
-# Build Unicode codepoint list from the emoji string
-UNICODES=""
-for char in $(echo "$EMOJIS" | grep -oP '.'); do
-	code=$(printf 'U+%04X' "'$char")
-	if [ -n "$UNICODES" ]; then
-		UNICODES="$UNICODES,$code"
-	else
-		UNICODES="$code"
-	fi
-done
+# Build Unicode codepoint list from the emoji string (Python for reliable Unicode handling)
+UNICODES=$(python3 -c "print(','.join(f'U+{ord(c):04X}' for c in '$EMOJIS'))")
+GLYPH_COUNT=$(python3 -c "print(len('$EMOJIS'))")
 
-echo "Subsetting NotoColorEmoji with $(echo "$EMOJIS" | grep -oP '.' | wc -l) glyphs..."
+echo "Subsetting NotoColorEmoji with $GLYPH_COUNT glyphs..."
 
 # Subset to TTF
 pyftsubset "$SRC_FONT" \