14 lines
		
	
	
		
			990 B
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			14 lines
		
	
	
		
			990 B
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
#!/bin/bash
 | 
						|
#Doanloads readings from nihongo.monash using the EUC-JP standard of the Kanji in question.
 | 
						|
while read -r line; do
 | 
						|
	index="$( echo "$line" | cut -f1)"
 | 
						|
	EUCJP="$( echo "$line" | cut -f2)"
 | 
						|
	url="http://nihongo.monash.edu/cgi-bin/wwwjdic?1MKJ${EUCJP}#"
 | 
						|
	curl -s "$url" > raw_html
 | 
						|
	ON="$(hxnormalize -i 0 -x raw_html | hxselect 'table' | awk 'BEGIN {printout=100000000000}; NR==printout+2 {print $0}; /ON reading/ {printout=NR}'| sed 's/<td><b>//; s/<\/b><\/td>//' )"
 | 
						|
	KUN="$(hxnormalize -i 0 -x raw_html | hxselect 'table' | awk 'BEGIN {printout=100000000000}; NR==printout+2 {print $0}; /KUN reading/ {printout=NR}'| sed 's/<td><b>//; s/<\/b><\/td>//')"
 | 
						|
	NAN="$(hxnormalize -i 0 -x raw_html | hxselect 'table' | awk 'BEGIN {printout=100000000000}; NR==printout+2 {print $0}; /Nanori reading/ {printout=NR}'| sed 's/<td><b>//; s/<\/b><\/td>//')"
 | 
						|
	printf '%s\t%s\t%s\t%s\n' "$index" "$ON" "$KUN" "$NAN" | sed 's/ /、/g' | sed 's/、\t/\t/g; s/、$//'
 | 
						|
	rm raw_html
 | 
						|
done<index_kanji
 |