58 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			58 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
#!/bin/bash
 | 
						|
kanji_ref="Kanji.txt"
 | 
						|
composite_ref="composites"
 | 
						|
 | 
						|
countpipes(){
 | 
						|
	var="$1"
 | 
						|
	res="${var//[^|]}"
 | 
						|
	echo "${#res}"
 | 
						|
}
 | 
						|
 | 
						|
for i in {1..2200}; do
 | 
						|
	kanji="$(grep -P "^$i\t" "$kanji_ref" | cut -f2)"
 | 
						|
	regexp_composites="^L-\d{1,4}\t$kanji"
 | 
						|
	composites_line="$(grep -P "$regexp_composites" "$composite_ref" )"
 | 
						|
	#echo "$composites_line"
 | 
						|
	ON_readings="$( echo "$composites_line" | cut -f3 |  tr '\n' '|' | uniq | sed 's/|$//')"
 | 
						|
	if [ "$ON_readings" = "" ]; then
 | 
						|
		#echo "No composites found for the kanji $kanji"
 | 
						|
		continue
 | 
						|
	fi
 | 
						|
	composites="$( echo "$composites_line" | cut -f4 | awk '!seen[$0]++' | tr '\n' '|' | sed 's/|$//')"
 | 
						|
	composites_kana="$( echo "$composites_line" | cut -f5 | awk '!seen[$0]++' | tr '\n' '|' | sed 's/|$//')"
 | 
						|
	composites_meaning="$( echo "$composites_line" | cut -f6 | awk '!seen[$0]++' | tr '\n' '|' | sed 's/|$//')"
 | 
						|
 | 
						|
	#CHECK FOR CORRECT FORMATTING
 | 
						|
	if [ "$ON_readings" = "" ];then
 | 
						|
		echo missing on reading for no. $i
 | 
						|
	fi
 | 
						|
	if [ "$composites" = "" ];then
 | 
						|
		echo missing composites for no. $i
 | 
						|
	fi
 | 
						|
	if [ "$composites_kana" = "" ];then
 | 
						|
		echo missing composites kana no. $i
 | 
						|
	fi
 | 
						|
	if [ "$( echo "$composites_kana"  | grep -E '(\[|\])')" != "" ];then
 | 
						|
		echo composite likely in composites_kana for no.$i
 | 
						|
		echo "composite_kana: $composites_kana"
 | 
						|
	fi
 | 
						|
	if [ "$composites_meaning" = "" ];then
 | 
						|
		echo missing composite meaning for $i
 | 
						|
	fi
 | 
						|
	flag=0
 | 
						|
	if [ "$(countpipes "$ON_readings")" != "$(countpipes "$composites")" ]; then
 | 
						|
		flag=1
 | 
						|
	elif [ "$(countpipes "$composites")" != "$(countpipes "$composites_kana")" ]; then
 | 
						|
		flag=1
 | 
						|
	elif [ "$(countpipes "$composites_kana")" != "$(countpipes "$composites_meaning")" ]; then
 | 
						|
		flag=1
 | 
						|
	fi
 | 
						|
	if [ $flag -eq 1 ]; then
 | 
						|
		echo line-mismatch for kanji no.$i
 | 
						|
	fi
 | 
						|
 | 
						|
	#WANTED OUTPUT
 | 
						|
		printf '%s\t%s\t%s\t%s\t%s\n' "$i" "$ON_readings" "$composites" "$composites_kana" "$composites_meaning"
 | 
						|
	#sleep 0.2
 | 
						|
done
 |