322 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			322 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
#!/bin/bash
 | 
						|
#A script that interacts with 4chans API to checks for media to download out of threads.
 | 
						|
#It uses the file name used by the uploader.
 | 
						|
#(and adds post no. to distinguish possible duplicate file names)
 | 
						|
#consider using it in a cronjob intermittently with something like
 | 
						|
#*/10 * * * * /usr/bin/threadwatcher scan
 | 
						|
if [ -z "$TW_THREADWATCHER_DIR" ]; then
 | 
						|
	THREADWATCHER_DIR=${XDG_DATA_HOME:-$HOME/.local/share}/threadwatcher
 | 
						|
else
 | 
						|
	THREADWATCHER_DIR="$TW_THREADWATCHER_DIR"
 | 
						|
fi
 | 
						|
URLFILE="$THREADWATCHER_DIR/threads"
 | 
						|
AUTOPRUNE=${TW_AUTOPRUNE:-true} #automatically removes ended thread from threads list. Unreliable internet connections can mess up threads list.
 | 
						|
AUTODEDUPE=${TW_AUTODEDUPE:-false} #automatically remove dupes after each scanned thread
 | 
						|
TMP_URLFILE=/tmp/4chan_thread_watcher_tmp$$
 | 
						|
 | 
						|
[ -d "$THREADWATCHER_DIR" ] || mkdir -p "$THREADWATCHER_DIR"
 | 
						|
[ -f "$URLFILE" ] || touch "$URLFILE"
 | 
						|
 | 
						|
#Cronjob Notifications
 | 
						|
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/$(id -u)/bus
 | 
						|
export DBUS_SESSION_BUS_ADDRESS
 | 
						|
export DISPLAY=:0.0
 | 
						|
 | 
						|
help="threadwatcher [add URL DL_LOCATION] [list] [edit] [clean] [help]
 | 
						|
 | 
						|
add URL DL_LOCATION
 | 
						|
	downloads specified thread to given location. Paths can be relative to \$HOME or absolute.
 | 
						|
list	lists all currently watched URLs and where they are downloading to
 | 
						|
edit	open threads file in \$EDITOR/vim to manually edit.
 | 
						|
clean	deletes threads file. This will not delete already downloaded material.
 | 
						|
prune	manually prune list of threads. Deletes all finished threads from list.
 | 
						|
dedupe	[DIR]
 | 
						|
	remove all duplicate files from current download directories. (Comparing sha512 sums)
 | 
						|
	If no DIR is given, will check all currently downloading directories.
 | 
						|
help	display this help and exit."
 | 
						|
 | 
						|
#included personal prompt script here as function for portability.
 | 
						|
prompt(){
 | 
						|
	#use dmenu if installed
 | 
						|
	if [ -n  "$(which dmenu)" ]; then
 | 
						|
		# assumes a few dmenu patches to be installed, remove flags as needed
 | 
						|
		[ "$(printf "No\\nYes" | dmenu -i -n -p "$1" -nb darkred -sb red -sf white -nf gray )" = "Yes" ] && $2
 | 
						|
	#show terminal prompt otherwise
 | 
						|
	else
 | 
						|
		printf "%s (y/N):" "$1"
 | 
						|
		read -r choice
 | 
						|
		if [ "$choice" = "y" ]; then
 | 
						|
			$2
 | 
						|
		else
 | 
						|
			exit
 | 
						|
		fi
 | 
						|
	fi
 | 
						|
}
 | 
						|
 | 
						|
makepathabsolute(){
 | 
						|
	if echo "$1" | grep -qE '^/'; then
 | 
						|
		dl_location="$1"
 | 
						|
	else
 | 
						|
		dl_location="$HOME/$1"
 | 
						|
	fi
 | 
						|
	echo "$dl_location"
 | 
						|
}
 | 
						|
 | 
						|
scan(){
 | 
						|
	[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
 | 
						|
		echo "Threadwatcher already scanning... waiting for it to finish before rescanning."
 | 
						|
	while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
 | 
						|
		sleep 1
 | 
						|
	done
 | 
						|
	#Create lock file to stop override of URLFILE
 | 
						|
	touch /tmp/threadwatcher.lock
 | 
						|
	ping -q -c 1 4channel.org > /dev/null|| { echo "Cannot connect to 4chan."; exit 1;}
 | 
						|
	if [ "$(wc -l < "$URLFILE")" -gt 0 ]; then
 | 
						|
		echo "Scanning threads..."
 | 
						|
	else
 | 
						|
		echo "No threads to watch over currently."
 | 
						|
		exit
 | 
						|
	fi
 | 
						|
 | 
						|
	#tac used to prioritze newly added threads.
 | 
						|
	tac "$URLFILE" | while read -r line; do
 | 
						|
		running_dls=0
 | 
						|
		url="$(echo "$line" | cut -f1)"
 | 
						|
		dl_location="$(echo "$line" | cut -f2)"
 | 
						|
		mkdir -p "$dl_location"
 | 
						|
		json_url="$(echo "$url" | sed -E 's/boards\.(4chan|4channel)/a.4cdn/; s/$/.json/')"
 | 
						|
		curl -s -L "$json_url" | jq . > /tmp/content$$
 | 
						|
		thread_title="$(jq '.posts[0].sub' < /tmp/content$$ | tr -d '"')"
 | 
						|
		# shellcheck disable=SC2001
 | 
						|
		echo "$url ($thread_title) $(echo "$dl_location" | sed "s|$HOME|~|")"
 | 
						|
		if [ -z "$(</tmp/content$$)" ]; then
 | 
						|
			#check for internet again, in case something has changed during the download process
 | 
						|
			if ping -q -c 1 4channel.org > /dev/null
 | 
						|
			then
 | 
						|
				if [ "$AUTOPRUNE" = "true" ]; then
 | 
						|
					echo "Thread $url not found ($dl_location) pruning from cached list of threads to watch"
 | 
						|
					# shellcheck disable=SC2001
 | 
						|
					notify-send "threadwatcher" "Thread downloading to $(echo "$dl_location" | sed "s|$HOME|~|") is complete now."
 | 
						|
				fi
 | 
						|
			else
 | 
						|
				echo "Cannot connect to 4chan."
 | 
						|
				exit 1
 | 
						|
			fi
 | 
						|
			[ "$AUTOPRUNE" = 'false' ] &&
 | 
						|
				echo "$line" >> "$TMP_URLFILE"
 | 
						|
			continue
 | 
						|
		else
 | 
						|
			echo "$line" >> "$TMP_URLFILE"
 | 
						|
			mkdir -p "$dl_location"
 | 
						|
		fi
 | 
						|
		files_json="$(jq '.posts[] | if has("filename") then {filename: "\(.no)_\(.filename)\(.ext)", location: "\(.tim)\(.ext)", md5: .md5} else empty end ' < /tmp/content$$ )"
 | 
						|
		rm /tmp/content$$
 | 
						|
		#pastes together a multiline var using process substitution with
 | 
						|
		#layout: filename location md5
 | 
						|
		#only real reason for bash here with process substitution
 | 
						|
		#(gets messy with lots of temp files otherwise)
 | 
						|
		files="$(paste <(paste <(echo "$files_json" | jq '.filename' | tr -d '"') <(echo "$files_json" | jq '.location' | tr -d '"')) <(echo "$files_json" | jq '.md5' | tr -d '"'))"
 | 
						|
		echo "$files" | while read -r file_line; do
 | 
						|
			#TODO: better cleanup like in booksplitter?
 | 
						|
			filename="$(echo "$file_line" | cut -f1 | tr ' ' '_' |
 | 
						|
				perl -C -MHTML::Entities -pe 'decode_entities($_);')" #recode html entities as UTF-8
 | 
						|
			master_location="$(echo "$file_line" | cut -f2 | tr -d '"')"
 | 
						|
			filelocation="$dl_location/$filename"
 | 
						|
			correct_md5="$(echo "$file_line" | cut -f3)"
 | 
						|
			[ -f "$filelocation" ] && slave_md5="$(openssl dgst -md5 -binary "$filelocation" | openssl enc -base64)"
 | 
						|
			board="$(echo "$url" | cut -d '/' -f4)"
 | 
						|
			file_url="https://i.4cdn.org/$board/$master_location"
 | 
						|
			if [ -f "$filelocation" ]  && [ "$correct_md5" = "$slave_md5" ]; then
 | 
						|
				true
 | 
						|
			else
 | 
						|
				if [ "$correct_md5" != "$slave_md5" ] && [ -f "$filelocation" ]; then
 | 
						|
					rm "$filelocation"
 | 
						|
					echo "[-] $filename because of incorrect checksum, redownloading."
 | 
						|
				fi
 | 
						|
				#limit concurrent dls
 | 
						|
				if [ $running_dls -gt 25 ]; then
 | 
						|
					wait
 | 
						|
					running_dls=0
 | 
						|
				fi
 | 
						|
				curl -s -o "$filelocation" "$file_url" &
 | 
						|
				echo "[+] $filelocation"
 | 
						|
				((running_dls=running_dls+1))
 | 
						|
			fi
 | 
						|
		done
 | 
						|
		wait
 | 
						|
		if [ "$AUTODEDUPE" = "true" ]; then
 | 
						|
			dedupe "$dl_location"
 | 
						|
		fi
 | 
						|
	done
 | 
						|
	tac "$TMP_URLFILE" > "$URLFILE"
 | 
						|
	cleanup
 | 
						|
}
 | 
						|
 | 
						|
prune(){
 | 
						|
	[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
 | 
						|
	echo "Threadwatcher already scanning... waiting for it to finish before pruning"
 | 
						|
	while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
 | 
						|
		sleep 1
 | 
						|
	done
 | 
						|
	#Create lock file to stop override of URLFILE
 | 
						|
	touch /tmp/threadwatcher.lock
 | 
						|
	ping -q -c 1 4channel.org > /dev/null|| { echo "Cannot connect to 4chan."; cleanup && exit 1;}
 | 
						|
	if [ "$(wc -l < "$URLFILE")" -gt 0 ]; then
 | 
						|
		echo "Pruning threads..."
 | 
						|
	else
 | 
						|
		echo "No threads to prune."
 | 
						|
		cleanup && exit
 | 
						|
	fi
 | 
						|
 | 
						|
	#tac used to prioritze newly added threads.
 | 
						|
	tac "$URLFILE" | while read -r line; do
 | 
						|
		url="$(echo "$line" | cut -f1)"
 | 
						|
		dl_location="$(echo "$line" | cut -f2)"
 | 
						|
		json_url="$(echo "$url" | sed -E 's/boards\.(4chan|4channel)/a.4cdn/; s/$/.json/')"
 | 
						|
		curl -s -L "$json_url" | jq . > /tmp/content$$
 | 
						|
		thread_title="$(jq '.posts[0].sub' < /tmp/content$$ | tr -d '"')"
 | 
						|
		# shellcheck disable=SC2001
 | 
						|
		echo "$url ($thread_title) $(echo "$dl_location" | sed "s|$HOME|~|")"
 | 
						|
		if [ -z "$(</tmp/content$$)" ]; then
 | 
						|
			#check for internet again, in case something has changed during the download process
 | 
						|
			if ping -q -c 1 4channel.org > /dev/null
 | 
						|
			then
 | 
						|
				echo "Thread $url not found ($dl_location) pruning from cached list of threads to watch"
 | 
						|
				# shellcheck disable=SC2001
 | 
						|
				notify-send "threadwatcher" "Thread downloading to $(echo "$dl_location" | sed "s|$HOME|~|") is complete now."
 | 
						|
			else
 | 
						|
				echo "Cannot connect to 4chan."
 | 
						|
				cleanup && exit 1
 | 
						|
			fi
 | 
						|
			continue
 | 
						|
		else
 | 
						|
			echo "Thread still running!"
 | 
						|
			echo "$line" >> "$TMP_URLFILE"
 | 
						|
		fi
 | 
						|
		rm /tmp/content$$
 | 
						|
	done
 | 
						|
	tac "$TMP_URLFILE" > "$URLFILE"
 | 
						|
	cleanup
 | 
						|
}
 | 
						|
 | 
						|
dedupe(){
 | 
						|
	if [ -n "$1" ]; then
 | 
						|
		dirs="$1"
 | 
						|
		echo "Deduping $1 manually..."
 | 
						|
	else
 | 
						|
		if [ "$(wc -l < "$URLFILE")" -gt 0 ]; then
 | 
						|
			echo "Deduping download directories..."
 | 
						|
		else
 | 
						|
			echo "No download directories to dedupe."
 | 
						|
			cleanup && exit
 | 
						|
		fi
 | 
						|
		#tac used to prioritze newly added threads.
 | 
						|
		dirs="$( tac "$URLFILE" | cut -f2 | awk '!seen[$0]++' )"
 | 
						|
	fi
 | 
						|
	echo "$dirs" | while read -r dir; do
 | 
						|
		echo "Generating checksums for $dir ..."
 | 
						|
		#sort explanation: list roughly "human readable" names before just number strings.
 | 
						|
		#thus this will result in probably better file names
 | 
						|
		duplicates="$( sha512sum "$dir"/* | sort -r -t _ -k 2h | awk 'seen[$1]++' )"
 | 
						|
		if [ -n "$duplicates" ]; then
 | 
						|
			echo "Deleting duplicates..."
 | 
						|
			echo "$duplicates" | cut -d' ' -f1,2 --complement |
 | 
						|
				while read -r file; do
 | 
						|
					rm -v "$file" | sed 's/^removed/[-]/' | tr -d "'"
 | 
						|
				done
 | 
						|
			echo "$(echo "$duplicates" | wc -l) duplicate files have been deleted in $dir"
 | 
						|
		fi
 | 
						|
	done
 | 
						|
}
 | 
						|
 | 
						|
add() {
 | 
						|
	dl_location="$(makepathabsolute "$2")"
 | 
						|
		if grep -qP "^$1\t" "$URLFILE"; then
 | 
						|
			dl_location_already="$(grep -P "^$1\t" "$URLFILE" | cut -f2)"
 | 
						|
			# shellcheck disable=SC2001
 | 
						|
			notify-send "threadwatcher" "Thread already being watched. currently downloads to $(echo "$dl_location_already" | sed "s|$HOME|~|")"
 | 
						|
 | 
						|
			if [ "$dl_location" != "$dl_location_already" ]; then
 | 
						|
				prompt "Do you want to change download directory to $2?" &&
 | 
						|
					new_location="$dl_location"
 | 
						|
				[ -z "$new_location" ] && exit
 | 
						|
				# Wait for last scan to finish in case of quick successive additions.
 | 
						|
				# Otherwise there is a potential loss of threads
 | 
						|
				[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
 | 
						|
					echo "Threadwatcher currently scanning. Waiting for it to finish before adding new thread and rescanning."
 | 
						|
				while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
 | 
						|
					sleep 1
 | 
						|
				done
 | 
						|
 | 
						|
				sed -i "s|$dl_location_already|$new_location|" "$URLFILE"
 | 
						|
			else
 | 
						|
				echo "Already downloading thread to same location, exiting..."
 | 
						|
				exit 0
 | 
						|
			fi
 | 
						|
 | 
						|
			## Move already downloaded files to new location
 | 
						|
			ping -q -c 1 1.1.1.1 > /dev/null || ping -q -c 1 1.0.0.1 > /dev/null || ping -q -c 1 4channel.org > /dev/null || { echo "No internet connection detected."; exit ;}
 | 
						|
			mkdir -p "$new_location"
 | 
						|
			url="$1"
 | 
						|
			json_url="$(echo "$url" | sed -E 's/boards\.(4chan|4channel)/a.4cdn/; s/$/.json/')"
 | 
						|
			curl -s -L "$json_url" | jq . > /tmp/content$$
 | 
						|
			files_json="$(jq '.posts[] | if has("filename") then {filename: "\(.no)_\(.filename)\(.ext)", location: "\(.tim)\(.ext)"} else empty end ' < /tmp/content$$)"
 | 
						|
			rm /tmp/content$$
 | 
						|
			#only reason for bash here with process substitution
 | 
						|
			files="$(paste <(echo "$files_json" | jq '.filename' | tr -d '"') <(echo "$files_json" | jq '.location' | tr -d '"'))"
 | 
						|
			echo "$files" | while read -r file_line; do
 | 
						|
				filename="$(echo "$file_line" | cut -f1 | tr ' ' '_' |
 | 
						|
					perl -C -MHTML::Entities -pe 'decode_entities($_);')"
 | 
						|
				mv -v "$dl_location_already/$filename" "$new_location"
 | 
						|
			done
 | 
						|
			rmdir --ignore-fail-on-non-empty "$dl_location_already"
 | 
						|
			notify-send "threadwatcher" "already downloaded files moved to $new_location. New files will also be downloaded there"
 | 
						|
 | 
						|
		else
 | 
						|
			# Wait for last scan to finish in case of quick successive additions.
 | 
						|
			# Otherwise there is a potential loss of threads
 | 
						|
			[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
 | 
						|
				echo "Threadwatcher currently scanning. Waiting for it to finish before adding new thread and rescanning."
 | 
						|
			while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
 | 
						|
				sleep 1
 | 
						|
			done
 | 
						|
 | 
						|
			printf "%s\t%s\n" "$1" "$dl_location" | tee -ai "$URLFILE"
 | 
						|
			echo "added $1 to threadwatcher list. Downloading to $dl_location"
 | 
						|
		fi
 | 
						|
}
 | 
						|
 | 
						|
cleanup(){
 | 
						|
	rm -f /tmp/threadwatcher.lock
 | 
						|
	rm -f "$TMP_URLFILE"
 | 
						|
}
 | 
						|
 | 
						|
case "$1" in
 | 
						|
	"add") 	add "$2" "$3"
 | 
						|
		scan;;
 | 
						|
	"scan") scan;;
 | 
						|
	"list") printf "Thread:\t\t\t\t\t\tDownload location:\n"
 | 
						|
		sed "s|$HOME|~|" "$URLFILE";;
 | 
						|
	"clean")
 | 
						|
		echo "Watchlist used up to now:"
 | 
						|
		cat "$URLFILE"
 | 
						|
		prompt "Do you want to stop watching over all current threads?" || exit 0
 | 
						|
		echo "Deleting..."
 | 
						|
		[ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ] &&
 | 
						|
			echo "Threadwatcher currently scanning. Waiting for it to finish before deleting file"
 | 
						|
		while [ -f /tmp/threadwatcher.lock ] && [ "$(pgrep -c threadwatcher)" -gt 1 ]; do
 | 
						|
			sleep 1
 | 
						|
		done
 | 
						|
 | 
						|
		rm "$URLFILE"
 | 
						|
		touch "$URLFILE";;
 | 
						|
	"edit") #kill all other threadwatcher instances sine they could want to change $URLFILE
 | 
						|
		pgrep threadwatcher | grep -v "^$$$" | xargs -r kill
 | 
						|
		${EDITOR:-vim} "$URLFILE";;
 | 
						|
	"prune") prune;;
 | 
						|
	"dedupe") dedupe "$2";;
 | 
						|
	"help") echo "$help";;
 | 
						|
	*)echo "Incorrect usage. Correct usage:"
 | 
						|
		echo "$help" && exit 1;;
 | 
						|
esac
 |