From ff97886534692f6322d54b419a07204100af2a90 Mon Sep 17 00:00:00 2001 From: Alexander Bocken Date: Fri, 14 Aug 2020 00:49:50 +0200 Subject: [PATCH] updated footnote_scripts to final version --- footnote_scripts/1_create_footnotes | 20 ++++++-- footnote_scripts/3_integrate | 8 +-- footnote_scripts/books | 75 +++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 8 deletions(-) create mode 100644 footnote_scripts/books diff --git a/footnote_scripts/1_create_footnotes b/footnote_scripts/1_create_footnotes index 7a58a91..d0f6eca 100644 --- a/footnote_scripts/1_create_footnotes +++ b/footnote_scripts/1_create_footnotes @@ -1,7 +1,19 @@ #!/bin/zsh #first script used to create a file with nicer formatting, here just for Genesis -for chapter in {0..50}; do - for i in {0..50}; do - cat Gen_$chapter.html | grep "fnm$i" | tr '\n' '@' | perl -pe "s/
(.*?)<\/div>/Genesis\tGen\t1\t$chapter\t\1\t\2\*/g" | perl -pe "s///g" | perl -pe "s/
.*?<\/sup> (.*?)<\/div>/\1/" | tr '@' '\n' +awk 'BEGIN{FS="\t"} {print $1}' books > tmp_book_files +while read book_file +do + book_short="$(grep $book_file books | awk 'BEGIN{FS="\t"} {print $3}')" + long_book="$(grep $book_file books | awk 'BEGIN{FS="\t"} {print $2}')" + book_no="$(grep $book_file books | awk 'BEGIN{FS="\t"} {print $4}')" + chapters=$( ls all_books/$book_file* | wc -l ) + for chapter in {1..$chapters} + do + for i in {1..100} + do + cat all_books/"$book_file"_$chapter.html | grep "fnm$i" | tr '\n' '@' | perl -pe "s/
(.*?)<\/div>/$long_book\t$book_short\t$book_no\t$chapter\t\1\t*/g" | perl -pe "s///g" | perl -pe "s/
.*?<\/sup> (.*?)<\/div>/\1/" | tr '@' '\n' + done done -done +done < tmp_book_files +#format of books: file book_short book_long book_no +#Still leaves some html formatting in there for some reason, needs additional stripping diff --git a/footnote_scripts/3_integrate b/footnote_scripts/3_integrate index 829654d..bc55d93 100644 --- a/footnote_scripts/3_integrate +++ b/footnote_scripts/3_integrate @@ -2,15 +2,15 @@ #Adds the footnotes in the right location in the larger tsv file while read line; do start_of_line="$(echo "$line" | awk 'BEGIN{FS="\t"}{printf("%s\t%s\t%d\t%d\t%d\t\n",$1,$2,$3,$4,$5)}')" - if grep -q "$start_of_line" notes_formatted; then - count=$( grep "$start_of_line" notes_formatted | wc -l ) + if grep -q "$start_of_line" formatted_all_footnotes; then + count=$( grep "$start_of_line" formatted_all_footnotes | wc -l ) printf '%s' "$line" for i in {1..$count}; do printf '*' done printf '\n' - grep "$start_of_line" notes_formatted + grep "$start_of_line" formatted_all_footnotes else echo "$line" fi -done