From cdd50117dd8095d308957c6b7168af985f36f936 Mon Sep 17 00:00:00 2001
From: Alexander Bocken <alexander@bocken.org>
Date: Sat, 2 Jan 2021 20:53:03 +0100
Subject: [PATCH] yt fallback uses xml parser now

---
 .local/bin/tools/ripper | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/.local/bin/tools/ripper b/.local/bin/tools/ripper
index 4c3d094..20c977c 100755
--- a/.local/bin/tools/ripper
+++ b/.local/bin/tools/ripper
@@ -49,9 +49,15 @@ for channel_id in $IDs; do
 	#Fallback to legacy mode if API quota is exceeded
 	if [ "$(echo "$json" | jq '."error"."errors"[]."reason"')" = '"quotaExceeded"' ];then
 		echo "YT API Quota exceeded, using fallback"
-		lynx --dump --nonumbers -listonly "https://www.youtube.com/channel/$channel_id" | grep 'videos.xml' | xargs curl -s > "${channel_id}.xml"
-
-		#| grep -oE 'yt:video:[^\s\t ]{11,15}' | perl -pe 's/^yt:video:([^ \t\s]*)$/https:\/\/www\.youtube\.com\/watch\?v=\1/' | sed 's|</id||' | grep -vf "$BLACKLIST" >> /tmp/todownload$$ #TODO: Use an actual xml parser instead of regexp
+		lynx --dump --nonumbers -listonly "https://www.youtube.com/channel/$channel_id" | grep 'videos.xml' | xargs curl -s > /tmp/"${channel_id}.xml"
+		python -c "from lxml import etree
+file=\"/tmp/${channel_id}.xml\"
+root = etree.parse(file)
+for el in root.iter():
+	if(el.tag in '{http://www.youtube.com/xml/schemas/2015}videoId'):
+		print(el.text)" |
+			sed 's/^/https:\/\/www\.youtube\.com\/watch\?v=/' | grep -vf "$BLACKLIST" >> /tmp/todownload$$
+		rm -f "/tmp/${channel_id}.xml"
 	else
 		echo "$json" | jq '."items"[].id."videoId"' | tr -d '"' | grep -v '^null$'| sed 's/^/https:\/\/www\.youtube\.com\/watch\?v=/' | grep -vf "$BLACKLIST" >> /tmp/todownload$$
 	fi