dotfiles/.local/bin/threadwatcher

113 lines
4.3 KiB
Plaintext
Raw Normal View History

2021-04-07 15:23:34 +02:00
#!/bin/bash
#A script that interacts with 4chans API to checks for media to download out of threads.
#It uses the file name used by the uploader.
#consider using it in a cronjob intermittently with something like
#*/10 * * * * /home/<yourname>/.local/bin/threadwatcher scan
2021-04-07 16:48:07 +02:00
THREADWATCHER_DIR=${XDG_DATA_HOME:-$HOME/.local/share}/4chan_watcher
2021-04-07 15:23:34 +02:00
URLFILE="$THREADWATCHER_DIR/threads"
TMP_URLFILE=/tmp/4chan_thread_watcher_tmp$$
[ -d "$THREADWATCHER_DIR" ] || mkdir -p "$THREADWATCHER_DIR"
[ -f "$URLFILE" ] || touch "$URLFILE"
2021-04-08 21:57:51 +02:00
2021-04-07 15:23:34 +02:00
scan(){
2021-04-14 18:10:55 +02:00
ping -q -c 1 1.1.1.1 > /dev/null || ping -q -c 1 1.0.0.1 > /dev/null || ping -q -c 1 example.org || { echo "No internet connection detected."; exit ;}
2021-04-07 16:48:07 +02:00
if [ $(wc -l "$URLFILE"| cut -d ' ' -f1) -gt 0 ]; then
echo "scanning threads..."
else
echo "no threads to watch over currently"
exit
fi
2021-04-07 15:23:34 +02:00
while read -r line; do
2021-04-07 16:48:07 +02:00
running_dls=0
2021-04-07 15:23:34 +02:00
url="$(echo "$line" | cut -f1)"
echo "scanning $url"
dl_location="$(echo "$line" | cut -f2)"
2021-04-07 16:48:07 +02:00
mkdir -p "$dl_location"
2021-04-07 15:23:34 +02:00
echo "downloading to $dl_location"
json_url="$(echo "$url" | sed -E 's/boards\.(4chan|4channel)/a.4cdn/; s/$/.json/')"
curl -s -L "$json_url" | jq . > /tmp/content$$
if [ -z "$(</tmp/content$$)" ]; then
echo "Thread $url not found ($dl_location) deleting from cached list of threads to watch"
continue
else
echo "$line" >> "$TMP_URLFILE"
2021-04-08 21:57:51 +02:00
mkdir -p "$dl_location"
2021-04-07 15:23:34 +02:00
fi
2021-04-07 16:48:07 +02:00
files_json="$(jq '.posts[] | if has("filename") then {filename: "\(.no)_\(.filename)\(.ext)", location: "\(.tim)\(.ext)", md5: .md5} else null end ' < /tmp/content$$ | grep -vE '^null$')"
2021-04-14 18:10:55 +02:00
rm /tmp/content$$
2021-04-08 21:57:51 +02:00
#pastes together a multiline var using process substitution with layout: filename location md5
files="$(paste <(paste <(echo "$files_json" | jq '.filename' | tr -d '"') <(echo "$files_json" | jq '.location' | tr -d '"')) <(echo "$files_json" | jq '.md5' | tr -d '"'))"
2021-04-07 15:23:34 +02:00
echo "$files" | while read -r file_line; do
2021-04-14 18:10:55 +02:00
filename="$(echo "$file_line" | cut -f1 | tr ' ' '_')"
2021-04-07 15:23:34 +02:00
master_location="$(echo "$file_line" | cut -f2 | tr -d '"')"
filelocation="$dl_location/$filename"
correct_md5="$(echo "$file_line" | cut -f3)"
2021-04-07 16:48:07 +02:00
[ -f "$filelocation" ] && slave_md5="$(openssl dgst -md5 -binary "$filelocation" | openssl enc -base64)"
2021-04-07 15:23:34 +02:00
board="$(echo "$url" | cut -d '/' -f4)"
file_url="https://i.4cdn.org/$board/$master_location"
2021-04-07 16:48:07 +02:00
if [ -f "$filelocation" ] && [ "$correct_md5" = "$slave_md5" ]; then
2021-04-07 15:23:34 +02:00
true
else
2021-04-07 16:48:07 +02:00
if [ "$correct_md5" != "$slave_md5" ] && [ -f "$filelocation" ]; then
rm "$filelocation"
echo "removed $filename because of incorrect checksum, redownloading."
fi
[ -f "$filelocation" ] ||
touch "$filelocation" #to keep atime order correct?
#limit concurrent dls
if [ $running_dls -gt 25 ]; then
wait
running_dls=0
fi
2021-04-07 15:23:34 +02:00
wget -q -O "$filelocation" "$file_url" &
echo downloading "$filelocation"
2021-04-07 16:48:07 +02:00
((running_dls=running_dls+1))
2021-04-07 15:23:34 +02:00
fi
done
2021-04-07 16:48:07 +02:00
wait
2021-04-07 15:23:34 +02:00
done<"$URLFILE"
mv "$TMP_URLFILE" "$URLFILE"
}
2021-04-14 18:10:55 +02:00
makepathabsolute(){
if echo "$1" | grep -qE '^/'; then
dl_location="$1"
else
dl_location="$HOME/$1"
fi
echo "$dl_location"
}
2021-04-07 15:23:34 +02:00
case "$1" in
2021-04-14 18:10:55 +02:00
"add") dl_location="$(makepathabsolute "$3")"
2021-04-07 16:48:07 +02:00
if grep -qP "^$2\t" "$URLFILE"; then
dl_location_already="$(grep -P "^$2\t" "$URLFILE" | cut -f2)"
2021-04-07 15:23:34 +02:00
notify-send "threadwatcher" "Thread already being watched. currently downloads to $dl_location_already"
2021-04-07 16:48:07 +02:00
choice="$(dmenuinput "If you want to change location of thread to download, please insert new directory now:")"
2021-04-14 18:10:55 +02:00
new_location="$(makepathabsolute "$choice")"
2021-04-07 16:48:07 +02:00
[ -z "$new_location" ] && exit
sed -i "s|$dl_location_already|$new_location|" "$URLFILE"
2021-04-14 18:10:55 +02:00
#find "$dl_location_already/" -type f -exec mv {} "$new_location" \;
2021-04-07 16:48:07 +02:00
mv $dl_location_already/* "$new_location"
2021-04-07 15:23:34 +02:00
notify-send "threadwatcher" "already downloaded files moved to $new_location. New files will also be downloaded there"
else
printf "%s\t%s\n" "$2" "$dl_location" | tee -ai "$URLFILE"
echo "added $2 to threadwatcher list. Downloading to $dl_location"
fi
2021-04-14 18:10:55 +02:00
echo "dl_location:$dl_location"
2021-04-07 15:23:34 +02:00
scan;;
"scan") scan;;
"list") printf "Thread:\t\t\t\t\t\tDownload location:\n"
sed "s|$HOME|~|" "$URLFILE";;
"clean")
echo "Watchlist used up to now:"
cat "$URLFILE"
echo "Deleting..."
rm "$URLFILE"
touch "$URLFILE";;
2021-04-08 21:57:51 +02:00
"edit") ${EDITOR:-vim} "$URLFILE";;
*)printf "Incorrect usage.\n\tthreadwatcher [add URL DL_LOCATION] [list] [edit] [clean]\n"; exit 1;;
2021-04-07 15:23:34 +02:00
esac