readings generator script added
This commit is contained in:
commit
6cc5d7a501
BIN
euctabletxt.txt
Normal file
BIN
euctabletxt.txt
Normal file
Binary file not shown.
15
readings_generator/compounds_script
Executable file
15
readings_generator/compounds_script
Executable file
@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
#Doanloads readings from nihongo.monash using the EUC-JP standard of the Kanji in question.
|
||||
while read -r line; do
|
||||
index="$( echo "$line" | cut -f1)"
|
||||
EUCJP="$( echo "$line" | cut -f2)"
|
||||
url="http://nihongo.monash.edu/cgi-bin/wwwjdic?1MKJ${EUCJP}#"
|
||||
curl -s "$url" > raw_html
|
||||
ON="$(hxnormalize -i 0 -x raw_html | hxselect 'table' | awk 'BEGIN {printout=100000000000}; NR==printout+2 {print $0}; /ON reading/ {printout=NR}'| sed 's/<td><b>//; s/<\/b><\/td>//' )"
|
||||
KUN="$(hxnormalize -i 0 -x raw_html | hxselect 'table' | awk 'BEGIN {printout=100000000000}; NR==printout+2 {print $0}; /KUN reading/ {printout=NR}'| sed 's/<td><b>//; s/<\/b><\/td>//')"
|
||||
NAN="$(hxnormalize -i 0 -x raw_html | hxselect 'table' | awk 'BEGIN {printout=100000000000}; NR==printout+2 {print $0}; /Nanori reading/ {printout=NR}'| sed 's/<td><b>//; s/<\/b><\/td>//')"
|
||||
printf '%s\t%s\t%s\t%s\n' "$index" "$ON" "$KUN" "$NAN"
|
||||
rm raw_html
|
||||
done<index_kanji
|
||||
|
||||
#curl -s http://nihongo.monash.edu/cgi-bin/wwwjdic?1MKJ%B4%D6# | hxnormalize -i 0 -x | hxselect 'table' | awk 'BEGIN {printout=100000000000}; NR==printout+2 {print $0}; /ON reading/ {printout=NR}' | sed 's/<td><b>//; s/<\/b><\/td>//'
|
2201
readings_generator/index_kanji
Normal file
2201
readings_generator/index_kanji
Normal file
File diff suppressed because it is too large
Load Diff
2201
readings_generator/readings
Normal file
2201
readings_generator/readings
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user