205 lines
8.1 KiB
Bash
Executable File
205 lines
8.1 KiB
Bash
Executable File
#!/bin/bash
|
|
#A script that interacts with 4chans API to checks for media to download out of threads.
|
|
#It uses the file name used by the uploader.
|
|
#(and adds post no. to distinguish possible duplicate file names)
|
|
#consider using it in a cronjob intermittently with something like
|
|
#*/10 * * * * /usr/bin/threadwatcher scan
|
|
THREADWATCHER_DIR=${XDG_DATA_HOME:-$HOME/.local/share}/threadwatcher
|
|
URLFILE="$THREADWATCHER_DIR/threads"
|
|
TMP_URLFILE=/tmp/4chan_thread_watcher_tmp$$
|
|
|
|
[ -d "$THREADWATCHER_DIR" ] || mkdir -p "$THREADWATCHER_DIR"
|
|
[ -f "$URLFILE" ] || touch "$URLFILE"
|
|
|
|
#Cronjob Notifications
|
|
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/$(id -u)/bus
|
|
export DBUS_SESSION_BUS_ADDRESS
|
|
export DISPLAY=:0.0
|
|
|
|
help="threadwatcher [add URL DL_LOCATION] [list] [edit] [clean] [help]
|
|
|
|
add URL DL_LOCATION
|
|
downloads specified thread to given location. Paths can be relative to HOME or absolute.
|
|
list lists all currently watched URLs and where they are downloading to
|
|
edit open threads file in \$EDITOR/vim to manually edit.
|
|
clean deletes threads file. This will not delete already downloaded material.
|
|
help display this help and exit."
|
|
|
|
#included personal prompt script here as function for portability.
|
|
prompt(){
|
|
[ "$(printf "No\\nYes" | dmenu -i -n -p "$1" -nb darkred -sb red -sf white -nf gray )" = "Yes" ] && $2
|
|
}
|
|
|
|
makepathabsolute(){
|
|
if echo "$1" | grep -qE '^/'; then
|
|
dl_location="$1"
|
|
else
|
|
dl_location="$HOME/$1"
|
|
fi
|
|
echo "$dl_location"
|
|
}
|
|
|
|
scan(){
|
|
[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
|
|
echo "Threadwatcher already scanning... waiting for it to finish before rescanning."
|
|
while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
|
|
sleep 1
|
|
done
|
|
#Create lock file to stop override of URLFILE while scanning
|
|
touch /tmp/threadwatcher.lock
|
|
ping -q -c 1 4channel.org > /dev/null|| { echo "Cannot connect to 4chan."; exit 1;}
|
|
if [ "$(wc -l < "$URLFILE")" -gt 0 ]; then
|
|
echo "Scanning threads..."
|
|
else
|
|
echo "No threads to watch over currently."
|
|
exit
|
|
fi
|
|
|
|
#tac used to prioritze newly added threads.
|
|
tac "$URLFILE" | while read -r line; do
|
|
running_dls=0
|
|
url="$(echo "$line" | cut -f1)"
|
|
dl_location="$(echo "$line" | cut -f2)"
|
|
mkdir -p "$dl_location"
|
|
json_url="$(echo "$url" | sed -E 's/boards\.(4chan|4channel)/a.4cdn/; s/$/.json/')"
|
|
curl -s -L "$json_url" | jq . > /tmp/content$$
|
|
thread_title="$(jq '.posts[0].sub' < /tmp/content$$ | tr -d '"')"
|
|
echo "$url ($thread_title) $(echo "$dl_location" | sed "s|$HOME|~|")"
|
|
if [ -z "$(</tmp/content$$)" ]; then
|
|
#check for internet again, in case something has changed during the download process
|
|
if ping -q -c 1 4channel.org > /dev/null
|
|
then
|
|
echo "Thread $url not found ($dl_location) deleting from cached list of threads to watch"
|
|
notify-send "threadwatcher" "Thread downloading to $(echo "$dl_location" | sed "s|$HOME|~|") is complete now."
|
|
else
|
|
echo "Cannot connect to 4chan."
|
|
exit 1
|
|
fi
|
|
continue
|
|
else
|
|
echo "$line" >> "$TMP_URLFILE"
|
|
mkdir -p "$dl_location"
|
|
fi
|
|
files_json="$(jq '.posts[] | if has("filename") then {filename: "\(.no)_\(.filename)\(.ext)", location: "\(.tim)\(.ext)", md5: .md5} else empty end ' < /tmp/content$$ )"
|
|
rm /tmp/content$$
|
|
#pastes together a multiline var using process substitution with
|
|
#layout: filename location md5
|
|
#only real reason for bash here with process substitution
|
|
#(gets messy with lots of temp files otherwise)
|
|
files="$(paste <(paste <(echo "$files_json" | jq '.filename' | tr -d '"') <(echo "$files_json" | jq '.location' | tr -d '"')) <(echo "$files_json" | jq '.md5' | tr -d '"'))"
|
|
echo "$files" | while read -r file_line; do
|
|
#TODO: better cleanup like in booksplitter?
|
|
filename="$(echo "$file_line" | cut -f1 | tr ' ' '_')"
|
|
master_location="$(echo "$file_line" | cut -f2 | tr -d '"')"
|
|
filelocation="$dl_location/$filename"
|
|
correct_md5="$(echo "$file_line" | cut -f3)"
|
|
[ -f "$filelocation" ] && slave_md5="$(openssl dgst -md5 -binary "$filelocation" | openssl enc -base64)"
|
|
board="$(echo "$url" | cut -d '/' -f4)"
|
|
file_url="https://i.4cdn.org/$board/$master_location"
|
|
if [ -f "$filelocation" ] && [ "$correct_md5" = "$slave_md5" ]; then
|
|
true
|
|
else
|
|
if [ "$correct_md5" != "$slave_md5" ] && [ -f "$filelocation" ]; then
|
|
rm "$filelocation"
|
|
echo "[-] $filename because of incorrect checksum, redownloading."
|
|
fi
|
|
#limit concurrent dls
|
|
if [ $running_dls -gt 25 ]; then
|
|
wait
|
|
running_dls=0
|
|
fi
|
|
wget -q -O "$filelocation" "$file_url" &
|
|
echo "[+] $filelocation"
|
|
((running_dls=running_dls+1))
|
|
fi
|
|
done
|
|
wait
|
|
done
|
|
tac "$TMP_URLFILE" > "$URLFILE"
|
|
rm "$TMP_URLFILE"
|
|
rm /tmp/threadwatcher.lock
|
|
}
|
|
|
|
add() {
|
|
|
|
dl_location="$(makepathabsolute "$2")"
|
|
if grep -qP "^$1\t" "$URLFILE"; then
|
|
dl_location_already="$(grep -P "^$1\t" "$URLFILE" | cut -f2)"
|
|
notify-send "threadwatcher" "Thread already being watched. currently downloads to $(echo "$dl_location_already" | sed "s|$HOME|~|")"
|
|
|
|
if [ "$dl_location" != "$dl_location_already" ]; then
|
|
prompt "Do you want to change download directory to $2?" &&
|
|
new_location="$dl_location" ||
|
|
exit 0
|
|
[ -z "$new_location" ] && exit
|
|
# Wait for last scan to finish in case of quick successive additions.
|
|
# Otherwise there is a potential loss of threads
|
|
[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
|
|
echo "Threadwatcher currently scanning. Waiting for it to finish before adding new thread and rescanning."
|
|
while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
|
|
sleep 1
|
|
done
|
|
|
|
sed -i "s|$dl_location_already|$new_location|" "$URLFILE"
|
|
else
|
|
echo "Already downloading thread to same location, exiting..."
|
|
exit 0
|
|
fi
|
|
|
|
## Move already downloaded files to new location
|
|
ping -q -c 1 1.1.1.1 > /dev/null || ping -q -c 1 1.0.0.1 > /dev/null || ping -q -c 1 4channel.org > /dev/null || { echo "No internet connection detected."; exit ;}
|
|
mkdir -p "$new_location"
|
|
url="$1"
|
|
json_url="$(echo "$url" | sed -E 's/boards\.(4chan|4channel)/a.4cdn/; s/$/.json/')"
|
|
curl -s -L "$json_url" | jq . > /tmp/content$$
|
|
files_json="$(jq '.posts[] | if has("filename") then {filename: "\(.no)_\(.filename)\(.ext)", location: "\(.tim)\(.ext)"} else empty end ' < /tmp/content$$)"
|
|
rm /tmp/content$$
|
|
#only reason for bash here with process substitution
|
|
files="$(paste <(echo "$files_json" | jq '.filename' | tr -d '"') <(echo "$files_json" | jq '.location' | tr -d '"'))"
|
|
echo "$files" | while read -r file_line; do
|
|
filename="$(echo "$file_line" | cut -f1 | tr ' ' '_')"
|
|
mv -v "$dl_location_already/$filename" "$new_location"
|
|
done
|
|
rmdir --ignore-fail-on-non-empty "$dl_location_already"
|
|
notify-send "threadwatcher" "already downloaded files moved to $new_location. New files will also be downloaded there"
|
|
|
|
else
|
|
# Wait for last scan to finish in case of quick successive additions.
|
|
# Otherwise there is a potential loss of threads
|
|
[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
|
|
echo "Threadwatcher currently scanning. Waiting for it to finish before adding new thread and rescanning."
|
|
while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
|
|
sleep 1
|
|
done
|
|
|
|
printf "%s\t%s\n" "$1" "$dl_location" | tee -ai "$URLFILE"
|
|
echo "added $1 to threadwatcher list. Downloading to $dl_location"
|
|
fi
|
|
echo "dl_location:$dl_location"
|
|
}
|
|
|
|
case "$1" in
|
|
"add") add "$2" "$3"
|
|
scan;;
|
|
"scan") scan;;
|
|
"list") printf "Thread:\t\t\t\t\t\tDownload location:\n"
|
|
sed "s|$HOME|~|" "$URLFILE";;
|
|
"clean")
|
|
echo "Watchlist used up to now:"
|
|
cat "$URLFILE"
|
|
prompt "Do you want to stop watching over all current threads?" || exit 0
|
|
echo "Deleting..."
|
|
[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
|
|
echo "Threadwatcher currently scanning. Waiting for it to finish before deleting file"
|
|
while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
|
|
sleep 1
|
|
done
|
|
|
|
rm "$URLFILE"
|
|
touch "$URLFILE";;
|
|
"edit") ${EDITOR:-vim} "$URLFILE";;
|
|
"help") echo "$help";;
|
|
*)echo "Incorrect usage. Correct usage:"
|
|
echo "$help" && exit 1;;
|
|
esac
|