From 13771dcd92e6fd94fa383f19ba6c5b91b45feb84 Mon Sep 17 00:00:00 2001 From: Alexander Bocken Date: Sat, 29 May 2021 16:25:36 +0200 Subject: [PATCH] script and installer added --- Makefile | 19 ++++++ threadwatcher | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+) create mode 100644 Makefile create mode 100755 threadwatcher diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4a4a116 --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ +# threadwatcher +# See LICENSE file for copyright and license details. + +VERSION = 1.0 + +# paths +DESTDIR= ~/.local/bin +SRC = threadwatcher + + +install: + mkdir -p $(DESTDIR) + cp -f $(SRC) $(DESTDIR)/ + chmod 755 $(DESTDIR)/$(SRC) + +uninstall: + rm -f $(DESTDIR)/$(SRC) + +.PHONY: install uninstall diff --git a/threadwatcher b/threadwatcher new file mode 100755 index 0000000..1d441a8 --- /dev/null +++ b/threadwatcher @@ -0,0 +1,171 @@ +#!/bin/bash +#A script that interacts with 4chans API to checks for media to download out of threads. +#It uses the file name used by the uploader. +#(and adds post no. to distinguish possible duplicate file names) +#consider using it in a cronjob intermittently with something like +#*/10 * * * * /home//.local/bin/threadwatcher scan +THREADWATCHER_DIR=${XDG_DATA_HOME:-$HOME/.local/share}/4chan_watcher +URLFILE="$THREADWATCHER_DIR/threads" +TMP_URLFILE=/tmp/4chan_thread_watcher_tmp$$ + +[ -d "$THREADWATCHER_DIR" ] || mkdir -p "$THREADWATCHER_DIR" +[ -f "$URLFILE" ] || touch "$URLFILE" + +#Cronjob Notifications +DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/$(id -u)/bus +export DBUS_SESSION_BUS_ADDRESS +export DISPLAY=:0.0 + +#included personal prompt script here as function for portability. +prompt(){ + [ "$(printf "No\\nYes" | dmenu -i -n -p "$1" -nb darkred -sb red -sf white -nf gray )" = "Yes" ] && $2 +} + +scan(){ + while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do + sleep 1 + done + #Create lock file to stop override of URLFILE while scanning + touch /tmp/threadwatcher.lock + ping -q -c 1 1.1.1.1 > /dev/null || ping -q -c 1 1.0.0.1 > /dev/null || ping -q -c 1 example.org || { echo "No internet connection detected."; exit ;} + if [ $(wc -l < "$URLFILE") -gt 0 ]; then + echo "scanning threads..." + else + echo "no threads to watch over currently" + exit + fi + # READS URLFILE + while read -r line; do + running_dls=0 + url="$(echo "$line" | cut -f1)" + dl_location="$(echo "$line" | cut -f2)" + mkdir -p "$dl_location" + json_url="$(echo "$url" | sed -E 's/boards\.(4chan|4channel)/a.4cdn/; s/$/.json/')" + curl -s -L "$json_url" | jq . > /tmp/content$$ + thread_title="$(jq '.posts[0].sub' < /tmp/content$$ | tr -d '"')" + echo "scanning $url ($thread_title)" + echo "downloading to $dl_location" + if [ -z "$( /dev/null || ping -q -c 1 1.0.0.1 > /dev/null || ping -q -c 1 example.org + then + echo "Thread $url not found ($dl_location) deleting from cached list of threads to watch" + notify-send "threadwatcher" "Thread downloading to $(echo "$dl_location" | sed "s|$HOME|~|") is complete now." + else + echo "No internet connection detected." + exit + fi + continue + else + echo "$line" >> "$TMP_URLFILE" + mkdir -p "$dl_location" + fi + files_json="$(jq '.posts[] | if has("filename") then {filename: "\(.no)_\(.filename)\(.ext)", location: "\(.tim)\(.ext)", md5: .md5} else null end ' < /tmp/content$$ | grep -vE '^null$')" + rm /tmp/content$$ + #pastes together a multiline var using process substitution with + #layout: filename location md5 + #only reason for bash here with process substitution + files="$(paste <(paste <(echo "$files_json" | jq '.filename' | tr -d '"') <(echo "$files_json" | jq '.location' | tr -d '"')) <(echo "$files_json" | jq '.md5' | tr -d '"'))" + echo "$files" | while read -r file_line; do + #better cleanup like in booksplitter? + filename="$(echo "$file_line" | cut -f1 | tr ' ' '_')" + master_location="$(echo "$file_line" | cut -f2 | tr -d '"')" + filelocation="$dl_location/$filename" + correct_md5="$(echo "$file_line" | cut -f3)" + [ -f "$filelocation" ] && slave_md5="$(openssl dgst -md5 -binary "$filelocation" | openssl enc -base64)" + board="$(echo "$url" | cut -d '/' -f4)" + file_url="https://i.4cdn.org/$board/$master_location" + if [ -f "$filelocation" ] && [ "$correct_md5" = "$slave_md5" ]; then + true + else + if [ "$correct_md5" != "$slave_md5" ] && [ -f "$filelocation" ]; then + rm "$filelocation" + echo "removed $filename because of incorrect checksum, redownloading." + fi + [ -f "$filelocation" ] || + touch "$filelocation" #to keep atime order correct? + #limit concurrent dls + if [ $running_dls -gt 25 ]; then + wait + running_dls=0 + fi + wget -q -O "$filelocation" "$file_url" & + echo downloading "$filelocation" + ((running_dls=running_dls+1)) + fi + done + wait + done < "$URLFILE" + mv "$TMP_URLFILE" "$URLFILE" + rm /tmp/threadwatcher.lock +} + +makepathabsolute(){ + if echo "$1" | grep -qE '^/'; then + dl_location="$1" + else + dl_location="$HOME/$1" + fi + echo "$dl_location" +} + + +case "$1" in + "add") dl_location="$(makepathabsolute "$3")" + if grep -qP "^$2\t" "$URLFILE"; then + dl_location_already="$(grep -P "^$2\t" "$URLFILE" | cut -f2)" + notify-send "threadwatcher" "Thread already being watched. currently downloads to $(echo "$dl_location_already" | sed "s|$HOME|~|")" + prompt "Do you want to change download directory to $3?" && + new_location="$dl_location" || + exit 0 + [ -z "$new_location" ] && exit + sed -i "s|$dl_location_already|$new_location|" "$URLFILE" + + ## Move already downloaded files to new location + ping -q -c 1 1.1.1.1 > /dev/null || ping -q -c 1 1.0.0.1 > /dev/null || ping -q -c 1 example.org || { echo "No internet connection detected."; exit ;} + mkdir -p "$new_location" + url="$2" + json_url="$(echo "$url" | sed -E 's/boards\.(4chan|4channel)/a.4cdn/; s/$/.json/')" + curl -s -L "$json_url" | jq . > /tmp/content$$ + files_json="$(jq '.posts[] | if has("filename") then {filename: "\(.no)_\(.filename)\(.ext)", location: "\(.tim)\(.ext)"} else empty end ' < /tmp/content$$)" + rm /tmp/content$$ + #only reason for bash here with process substitution + files="$(paste <(echo "$files_json" | jq '.filename' | tr -d '"') <(echo "$files_json" | jq '.location' | tr -d '"'))" + echo "$files" | while read -r file_line; do + filename="$(echo "$file_line" | cut -f1 | tr ' ' '_')" + mv -v "$dl_location_already/$filename" "$new_location" + done + rmdir --ignore-fail-on-non-empty "$dl_location_already" + notify-send "threadwatcher" "already downloaded files moved to $new_location. New files will also be downloaded there" + + else + printf "%s\t%s\n" "$2" "$dl_location" | tee -ai "$URLFILE" + echo "added $2 to threadwatcher list. Downloading to $dl_location" + fi + echo "dl_location:$dl_location" + # Wait for last scan to finish in case of quick successive additions. + # Otherwise there is a potential loss of threads + scan;; + "scan") scan;; + "list") printf "Thread:\t\t\t\t\t\tDownload location:\n" + sed "s|$HOME|~|" "$URLFILE";; + "clean") + echo "Watchlist used up to now:" + cat "$URLFILE" + prompt "Do you want to stop watching over all current threads?" || exit 0 + echo "Deleting..." + rm "$URLFILE" + touch "$URLFILE";; + "edit") ${EDITOR:-vim} "$URLFILE";; + *)echo "Incorrect usage. Correct usage: +threadwatcher [add URL DL_LOCATION] [list] [edit] [clean] + +add URL DL_LOCATION + downloads specified thread to given locaton. Paths can be relative to HOME or absolute. +list + lists all currently watched URLs and where they are downloading to +edit + open threads file in \$EDITOR/vim to manually edit error +clean + deletes threads file. This will not delete already downloaded material." && exit 1;; +esac