From cdd50117dd8095d308957c6b7168af985f36f936 Mon Sep 17 00:00:00 2001 From: Alexander Bocken Date: Sat, 2 Jan 2021 20:53:03 +0100 Subject: [PATCH] yt fallback uses xml parser now --- .local/bin/tools/ripper | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.local/bin/tools/ripper b/.local/bin/tools/ripper index 4c3d094..20c977c 100755 --- a/.local/bin/tools/ripper +++ b/.local/bin/tools/ripper @@ -49,9 +49,15 @@ for channel_id in $IDs; do #Fallback to legacy mode if API quota is exceeded if [ "$(echo "$json" | jq '."error"."errors"[]."reason"')" = '"quotaExceeded"' ];then echo "YT API Quota exceeded, using fallback" - lynx --dump --nonumbers -listonly "https://www.youtube.com/channel/$channel_id" | grep 'videos.xml' | xargs curl -s > "${channel_id}.xml" - - #| grep -oE 'yt:video:[^\s\t ]{11,15}' | perl -pe 's/^yt:video:([^ \t\s]*)$/https:\/\/www\.youtube\.com\/watch\?v=\1/' | sed 's|> /tmp/todownload$$ #TODO: Use an actual xml parser instead of regexp + lynx --dump --nonumbers -listonly "https://www.youtube.com/channel/$channel_id" | grep 'videos.xml' | xargs curl -s > /tmp/"${channel_id}.xml" + python -c "from lxml import etree +file=\"/tmp/${channel_id}.xml\" +root = etree.parse(file) +for el in root.iter(): + if(el.tag in '{http://www.youtube.com/xml/schemas/2015}videoId'): + print(el.text)" | + sed 's/^/https:\/\/www\.youtube\.com\/watch\?v=/' | grep -vf "$BLACKLIST" >> /tmp/todownload$$ + rm -f "/tmp/${channel_id}.xml" else echo "$json" | jq '."items"[].id."videoId"' | tr -d '"' | grep -v '^null$'| sed 's/^/https:\/\/www\.youtube\.com\/watch\?v=/' | grep -vf "$BLACKLIST" >> /tmp/todownload$$ fi