threadwatcher/threadwatcher

205 lines
8.1 KiB
Bash
Executable File

#!/bin/bash
#A script that interacts with 4chans API to checks for media to download out of threads.
#It uses the file name used by the uploader.
#(and adds post no. to distinguish possible duplicate file names)
#consider using it in a cronjob intermittently with something like
#*/10 * * * * /usr/bin/threadwatcher scan
THREADWATCHER_DIR=${XDG_DATA_HOME:-$HOME/.local/share}/threadwatcher
URLFILE="$THREADWATCHER_DIR/threads"
TMP_URLFILE=/tmp/4chan_thread_watcher_tmp$$
[ -d "$THREADWATCHER_DIR" ] || mkdir -p "$THREADWATCHER_DIR"
[ -f "$URLFILE" ] || touch "$URLFILE"
#Cronjob Notifications
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/$(id -u)/bus
export DBUS_SESSION_BUS_ADDRESS
export DISPLAY=:0.0
help="threadwatcher [add URL DL_LOCATION] [list] [edit] [clean] [help]
add URL DL_LOCATION
downloads specified thread to given location. Paths can be relative to HOME or absolute.
list lists all currently watched URLs and where they are downloading to
edit open threads file in \$EDITOR/vim to manually edit.
clean deletes threads file. This will not delete already downloaded material.
help display this help and exit."
#included personal prompt script here as function for portability.
prompt(){
[ "$(printf "No\\nYes" | dmenu -i -n -p "$1" -nb darkred -sb red -sf white -nf gray )" = "Yes" ] && $2
}
makepathabsolute(){
if echo "$1" | grep -qE '^/'; then
dl_location="$1"
else
dl_location="$HOME/$1"
fi
echo "$dl_location"
}
scan(){
[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
echo "Threadwatcher already scanning... waiting for it to finish before rescanning."
while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
sleep 1
done
#Create lock file to stop override of URLFILE while scanning
touch /tmp/threadwatcher.lock
ping -q -c 1 4channel.org > /dev/null|| { echo "Cannot connect to 4chan."; exit 1;}
if [ "$(wc -l < "$URLFILE")" -gt 0 ]; then
echo "Scanning threads..."
else
echo "No threads to watch over currently."
exit
fi
#tac used to prioritze newly added threads.
tac "$URLFILE" | while read -r line; do
running_dls=0
url="$(echo "$line" | cut -f1)"
dl_location="$(echo "$line" | cut -f2)"
mkdir -p "$dl_location"
json_url="$(echo "$url" | sed -E 's/boards\.(4chan|4channel)/a.4cdn/; s/$/.json/')"
curl -s -L "$json_url" | jq . > /tmp/content$$
thread_title="$(jq '.posts[0].sub' < /tmp/content$$ | tr -d '"')"
echo "$url ($thread_title) $(echo "$dl_location" | sed "s|$HOME|~|")"
if [ -z "$(</tmp/content$$)" ]; then
#check for internet again, in case something has changed during the download process
if ping -q -c 1 4channel.org > /dev/null
then
echo "Thread $url not found ($dl_location) deleting from cached list of threads to watch"
notify-send "threadwatcher" "Thread downloading to $(echo "$dl_location" | sed "s|$HOME|~|") is complete now."
else
echo "Cannot connect to 4chan."
exit 1
fi
continue
else
echo "$line" >> "$TMP_URLFILE"
mkdir -p "$dl_location"
fi
files_json="$(jq '.posts[] | if has("filename") then {filename: "\(.no)_\(.filename)\(.ext)", location: "\(.tim)\(.ext)", md5: .md5} else empty end ' < /tmp/content$$ )"
rm /tmp/content$$
#pastes together a multiline var using process substitution with
#layout: filename location md5
#only real reason for bash here with process substitution
#(gets messy with lots of temp files otherwise)
files="$(paste <(paste <(echo "$files_json" | jq '.filename' | tr -d '"') <(echo "$files_json" | jq '.location' | tr -d '"')) <(echo "$files_json" | jq '.md5' | tr -d '"'))"
echo "$files" | while read -r file_line; do
#TODO: better cleanup like in booksplitter?
filename="$(echo "$file_line" | cut -f1 | tr ' ' '_')"
master_location="$(echo "$file_line" | cut -f2 | tr -d '"')"
filelocation="$dl_location/$filename"
correct_md5="$(echo "$file_line" | cut -f3)"
[ -f "$filelocation" ] && slave_md5="$(openssl dgst -md5 -binary "$filelocation" | openssl enc -base64)"
board="$(echo "$url" | cut -d '/' -f4)"
file_url="https://i.4cdn.org/$board/$master_location"
if [ -f "$filelocation" ] && [ "$correct_md5" = "$slave_md5" ]; then
true
else
if [ "$correct_md5" != "$slave_md5" ] && [ -f "$filelocation" ]; then
rm "$filelocation"
echo "[-] $filename because of incorrect checksum, redownloading."
fi
#limit concurrent dls
if [ $running_dls -gt 25 ]; then
wait
running_dls=0
fi
wget -q -O "$filelocation" "$file_url" &
echo "[+] $filelocation"
((running_dls=running_dls+1))
fi
done
wait
done
tac "$TMP_URLFILE" > "$URLFILE"
rm "$TMP_URLFILE"
rm /tmp/threadwatcher.lock
}
add() {
dl_location="$(makepathabsolute "$2")"
if grep -qP "^$1\t" "$URLFILE"; then
dl_location_already="$(grep -P "^$1\t" "$URLFILE" | cut -f2)"
notify-send "threadwatcher" "Thread already being watched. currently downloads to $(echo "$dl_location_already" | sed "s|$HOME|~|")"
if [ "$dl_location" != "$dl_location_already" ]; then
prompt "Do you want to change download directory to $2?" &&
new_location="$dl_location" ||
exit 0
[ -z "$new_location" ] && exit
# Wait for last scan to finish in case of quick successive additions.
# Otherwise there is a potential loss of threads
[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
echo "Threadwatcher currently scanning. Waiting for it to finish before adding new thread and rescanning."
while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
sleep 1
done
sed -i "s|$dl_location_already|$new_location|" "$URLFILE"
else
echo "Already downloading thread to same location, exiting..."
exit 0
fi
## Move already downloaded files to new location
ping -q -c 1 1.1.1.1 > /dev/null || ping -q -c 1 1.0.0.1 > /dev/null || ping -q -c 1 4channel.org > /dev/null || { echo "No internet connection detected."; exit ;}
mkdir -p "$new_location"
url="$1"
json_url="$(echo "$url" | sed -E 's/boards\.(4chan|4channel)/a.4cdn/; s/$/.json/')"
curl -s -L "$json_url" | jq . > /tmp/content$$
files_json="$(jq '.posts[] | if has("filename") then {filename: "\(.no)_\(.filename)\(.ext)", location: "\(.tim)\(.ext)"} else empty end ' < /tmp/content$$)"
rm /tmp/content$$
#only reason for bash here with process substitution
files="$(paste <(echo "$files_json" | jq '.filename' | tr -d '"') <(echo "$files_json" | jq '.location' | tr -d '"'))"
echo "$files" | while read -r file_line; do
filename="$(echo "$file_line" | cut -f1 | tr ' ' '_')"
mv -v "$dl_location_already/$filename" "$new_location"
done
rmdir --ignore-fail-on-non-empty "$dl_location_already"
notify-send "threadwatcher" "already downloaded files moved to $new_location. New files will also be downloaded there"
else
# Wait for last scan to finish in case of quick successive additions.
# Otherwise there is a potential loss of threads
[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
echo "Threadwatcher currently scanning. Waiting for it to finish before adding new thread and rescanning."
while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
sleep 1
done
printf "%s\t%s\n" "$1" "$dl_location" | tee -ai "$URLFILE"
echo "added $1 to threadwatcher list. Downloading to $dl_location"
fi
echo "dl_location:$dl_location"
}
case "$1" in
"add") add "$2" "$3"
scan;;
"scan") scan;;
"list") printf "Thread:\t\t\t\t\t\tDownload location:\n"
sed "s|$HOME|~|" "$URLFILE";;
"clean")
echo "Watchlist used up to now:"
cat "$URLFILE"
prompt "Do you want to stop watching over all current threads?" || exit 0
echo "Deleting..."
[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
echo "Threadwatcher currently scanning. Waiting for it to finish before deleting file"
while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
sleep 1
done
rm "$URLFILE"
touch "$URLFILE";;
"edit") ${EDITOR:-vim} "$URLFILE";;
"help") echo "$help";;
*)echo "Incorrect usage. Correct usage:"
echo "$help" && exit 1;;
esac