added pronounciation scripts

This commit is contained in:
Alexander Bocken 2021-11-01 17:00:05 +01:00
commit c37c5f4975
Signed by: Alexander
GPG Key ID: 1D237BE83F9B05E8
4 changed files with 2387 additions and 0 deletions

45
pronounciation/audio Executable file
View File

@ -0,0 +1,45 @@
#!/bin/sh
key="$(pass GRE/merriam_webster_api_key)"
ext="ogg"
no="$1"
echo "$2" | while read -r word; do
#echo "word:$word"
curl -Ls "https://www.dictionaryapi.com/api/v3/references/collegiate/json/$word?key=$key" > content
if grep -q "Access Denied"; then
continue
fi
if jq . content > /dev/null 2>&1
then
true
else
exit
fi
file="$(jq '.[].hwi.prs | .[].sound.audio' content | sed 's/^\"//g; s/\"$//' | head -n1 )"
#echo "file:$file"
pronounciation="$(jq '.[].hwi.prs | .[].mw' content | sed 's/^\"//g; s/\"$//' | head -n1)"
#echo "pronounciation:$pronounciation"
#file beginning checks
if echo "$file" | grep -qE "^bix"; then
subdir=bix
elif
echo echo "$file" | grep -q '^gg'; then
subdir=gg
elif
echo "$file" | grep -qE '^[0-9_\.,]'; then
subdir=number
else
subdir="$(echo "$file" | grep -o '^.')"
fi
fileloc="$no"_"$word".$ext
url="https://media.merriam-webster.com/audio/prons/en/us/$ext/$subdir/$file.$ext"
#echo "url:$url"
curl -Ls "$url" -o "$fileloc"
if grep -q "Access Denied" "$fileloc";then
rm "$fileloc"
printf '%s\t%s\n' "$no" "$word" >> not_found
else
printf '%s\t%s\n' "$no" "$fileloc"
printf '%s\t%s\n' "$no" "$pronounciation"
fi
done

445
pronounciation/not_found Normal file
View File

@ -0,0 +1,445 @@
2029 being resigned
1976 libel
1968 equipoise
1800 pileous
1788 mise en scene
1783 lucubrate
1729 cant
1714 acarpous
1708 nest egg
1638 jocose vs. jocund
1611 inexorable
1565 ingenuous vs. genial
1358 depraved vs. deprived
1318 ad hominen
1260 edge out
394 at loggerheads
349 aseptic
1669 baseness
1547 stint
1536 reprobate
1454 akantha
1389 extra part 2 of 2.
1441 mony
1436 ious
1409 se
1366 vol
1387 equi
1383 de
1352 urb
1417 uni
1447 tude
1321 sist
1327 sta-
1300 phyt
1326 spir
1296 narr
1324 soph
1287 plic
1286 phon
1275 ocul
1269 mand
1268 myo
2029 being resigned
1800 pileous
1788 mise en scene
1783 lucubrate
1729 cant
1714 acarpous
1708 nest egg
1638 jocose vs. jocund
1611 inexorable
1565 ingenuous vs. genial
1358 depraved vs. deprived
1318 ad hominen
1260 edge out
394 at loggerheads
1669 baseness
1547 stint
1536 reprobate
1454 akantha
1389 extra part 2 of 2.
1441 mony
1436 ious
1409 se
1366 vol
2029 being resigned
1800 pileous
1788 mise en scene
1783 lucubrate
1729 cant
1714 acarpous
1708 nest egg
1638 jocose vs. jocund
1611 inexorable
1565 ingenuous vs. genial
1358 depraved vs. deprived
1318 ad hominen
1260 edge out
394 at loggerheads
1669 baseness
1547 stint
1536 reprobate
1454 akantha
1389 extra part 2 of 2.
1441 mony
1436 ious
1409 se
1366 vol
1387 equi
1383 de
1352 urb
1417 uni
1447 tude
1321 sist
1327 sta-
1300 phyt
1326 spir
1296 narr
1324 soph
1287 plic
1286 phon
1275 ocul
1269 mand
1268 myo
1410 ipso facto
1267 mut
1258 sub-, su-, sou, sous
1244 histo
1241 helic
1220 for
1311 rupt
1207 fer
1205 equ
1177 clin
1158 bas
1306 pyr
1293 prot
1291 poten
586 phob
1283 petr
1276 oper
1274 nov
1397 non
1396 neo
1272 neg
1271 nav
1261 migr
1257 ment
1252 magn
1249 lact
1246 juven
1230 junct
1245 jud
1229 ject
1248 iso
1227 hydr
1242 heli
1239 gon
1203 dur
1198 dign
1195 derm
1384 demi
1193 dec
1190 cruc
1189 chrys
1175 chron
1174 chrom
1188 cert
1187 cerebr
1185 cata
1170 carn
1169 capit
1167 cand
1163 brev
1144 aper
1141 anim
1153 ambul
1138 alt
1137 agr
1067 ger
1066 viv
1065 ver
1106 vene
1069 vac
724 utilis
709 tirade (diatribe)
1063 theo
1103 tempor
702 tele
689 subter
1062 somn
1101 sequ
1100 sent
1099 secut
1061 sci
657 riparia
634 publicus
608 pre
599 polein
583 philo
580 phage (see roots section also)
1094 pathy
563 pater
1057 omni
546 olig
1055 nat
523 naïv
1091 multi
1089 medi
769 to mar
990 malaproprism
479 liber
475 legis (lex)
400 idios
398 ideo
1050 grat
369 geo
359 frater
350 floundering
342 fervid, fervent
340 federis
1080 fect
1047 eu
292 epi
1046 dys
254 dynasthai
1045 dol
1077 dict
215 dia
214 di
1075 cycl
1074 cosm
174 corporal
166 constitutus
839 coffers
1073 cis (sometimes scis)
1380 circum
150 cide
126 caco
76 archaios
60 anthrop
1038 ambi
27 agere
5 ac
5 ac
27 agere
1038 ambi
60 anthrop
76 archaios
126 caco
150 cide
1380 circum
1073 cis (sometimes scis)
839 coffers
166 constitutus
174 corporal
1074 cosm
1075 cycl
214 di
215 dia
1077 dict
1045 dol
254 dynasthai
1046 dys
292 epi
1047 eu
1080 fect
340 federis
342 fervid, fervent
350 floundering
359 frater
369 geo
1050 grat
398 ideo
400 idios
475 legis (lex)
479 liber
990 malaproprism
769 to mar
1089 medi
1091 multi
523 naïv
1055 nat
546 olig
1057 omni
563 pater
1094 pathy
580 phage (see roots section also)
583 philo
599 polein
608 pre
634 publicus
657 riparia
1061 sci
1099 secut
1100 sent
1101 sequ
1062 somn
689 subter
702 tele
1103 tempor
1063 theo
709 tirade (diatribe)
724 utilis
1069 vac
1106 vene
1065 ver
1066 viv
1067 ger
1137 agr
1138 alt
1153 ambul
1141 anim
1144 aper
1163 brev
1167 cand
1169 capit
1170 carn
1185 cata
1187 cerebr
1188 cert
1174 chrom
1175 chron
1189 chrys
1190 cruc
1193 dec
1384 demi
1195 derm
1198 dign
1203 dur
1239 gon
1242 heli
1227 hydr
1248 iso
1229 ject
1245 jud
1230 junct
1246 juven
1249 lact
1252 magn
1257 ment
1261 migr
1271 nav
1272 neg
1396 neo
1397 non
1274 nov
1276 oper
1283 petr
586 phob
1291 poten
1293 prot
1306 pyr
1158 bas
1177 clin
1205 equ
1207 fer
1311 rupt
1220 for
1241 helic
1244 histo
1258 sub-, su-, sou, sous
1267 mut
1410 ipso facto
1268 myo
1269 mand
1275 ocul
1286 phon
1287 plic
1324 soph
1296 narr
1326 spir
1300 phyt
1327 sta-
1321 sist
1447 tude
1417 uni
1352 urb
1383 de
1387 equi
1366 vol
1409 se
1436 ious
1441 mony
1389 extra part 2 of 2.
1454 akantha
1536 reprobate
1547 stint
1669 baseness
394 at loggerheads
1260 edge out
1318 ad hominen
1358 depraved vs. deprived
1565 ingenuous vs. genial
1611 inexorable
1638 jocose vs. jocund
1708 nest egg
1714 acarpous
1729 cant
1783 lucubrate
1788 mise en scene
1800 pileous
2029 being resigned
1366 vol
1409 se
1436 ious
1441 mony
1389 extra part 2 of 2.
1454 akantha
1536 reprobate
1547 stint
1669 baseness
394 at loggerheads
1260 edge out
1318 ad hominen
1358 depraved vs. deprived
1565 ingenuous vs. genial
1611 inexorable
1638 jocose vs. jocund
1708 nest egg
1714 acarpous
1729 cant
1783 lucubrate
1788 mise en scene
1800 pileous
2029 being resigned
1268 myo
1269 mand
1275 ocul
1286 phon
1287 plic
1324 soph
1296 narr
1326 spir
1300 phyt
1327 sta-
1321 sist
1447 tude
1417 uni
1352 urb
1383 de
1387 equi
1366 vol
1409 se
1436 ious
1441 mony
1389 extra part 2 of 2.
1454 akantha
1536 reprobate
1547 stint
1669 baseness
394 at loggerheads
1260 edge out
1318 ad hominen
1358 depraved vs. deprived
1565 ingenuous vs. genial
1611 inexorable
1638 jocose vs. jocund
1708 nest egg
1714 acarpous
1729 cant
1783 lucubrate
1788 mise en scene
1800 pileous
2029 being resigned

1880
pronounciation/to_dl Normal file

File diff suppressed because it is too large Load Diff

17
pronounciation/wrapper Executable file
View File

@ -0,0 +1,17 @@
#!/bin/bash
tac to_dl | while read -r line; do
#echo "$line" | cut -f1
no="$(echo "$line" | cut -f1)"
word="$(echo "$line" | cut -f2)"
echo "$no"_"$word.ogg"
if [ -f "$no"_"$word".ogg ]; then
echo "$word already downloaded"
continue
elif grep -qE "$no\t$word$" not_found; then
echo "we seem to have issues with this word, check manually"
continue
else
./audio "$no" "$word"
true
fi
done