readings generator script added
This commit is contained in:
		
							
								
								
									
										
											BIN
										
									
								
								euctabletxt.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								euctabletxt.txt
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										15
									
								
								readings_generator/compounds_script
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										15
									
								
								readings_generator/compounds_script
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,15 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
#Doanloads readings from nihongo.monash using the EUC-JP standard of the Kanji in question.
 | 
			
		||||
while read -r line; do
 | 
			
		||||
	index="$( echo "$line" | cut -f1)"
 | 
			
		||||
	EUCJP="$( echo "$line" | cut -f2)"
 | 
			
		||||
	url="http://nihongo.monash.edu/cgi-bin/wwwjdic?1MKJ${EUCJP}#"
 | 
			
		||||
	curl -s "$url" > raw_html
 | 
			
		||||
	ON="$(hxnormalize -i 0 -x raw_html | hxselect 'table' | awk 'BEGIN {printout=100000000000}; NR==printout+2 {print $0}; /ON reading/ {printout=NR}'| sed 's/<td><b>//; s/<\/b><\/td>//' )"
 | 
			
		||||
	KUN="$(hxnormalize -i 0 -x raw_html | hxselect 'table' | awk 'BEGIN {printout=100000000000}; NR==printout+2 {print $0}; /KUN reading/ {printout=NR}'| sed 's/<td><b>//; s/<\/b><\/td>//')"
 | 
			
		||||
	NAN="$(hxnormalize -i 0 -x raw_html | hxselect 'table' | awk 'BEGIN {printout=100000000000}; NR==printout+2 {print $0}; /Nanori reading/ {printout=NR}'| sed 's/<td><b>//; s/<\/b><\/td>//')"
 | 
			
		||||
	printf '%s\t%s\t%s\t%s\n' "$index" "$ON" "$KUN" "$NAN"
 | 
			
		||||
	rm raw_html
 | 
			
		||||
done<index_kanji
 | 
			
		||||
 | 
			
		||||
#curl -s http://nihongo.monash.edu/cgi-bin/wwwjdic?1MKJ%B4%D6# | hxnormalize -i 0 -x | hxselect 'table' | awk 'BEGIN {printout=100000000000}; NR==printout+2 {print $0}; /ON reading/ {printout=NR}' | sed 's/<td><b>//; s/<\/b><\/td>//'
 | 
			
		||||
							
								
								
									
										2201
									
								
								readings_generator/index_kanji
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2201
									
								
								readings_generator/index_kanji
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										2201
									
								
								readings_generator/readings
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2201
									
								
								readings_generator/readings
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user