Implements structured JSON output with hierarchical schema including book metadata, chapters, verses, and footnotes. All existing flags (-F, -g, -L) are respected in JSON mode.
581 lines
16 KiB
Awk
581 lines
16 KiB
Awk
BEGIN {
|
|
# $1 Book name
|
|
# $2 Book abbreviation
|
|
# $3 Book number
|
|
# $4 Chapter number
|
|
# $5 Verse number
|
|
# $6 Verse
|
|
FS = "\t"
|
|
|
|
MAX_WIDTH = 120
|
|
if (ENVIRON["ALLIOLI_MAX_WIDTH"] ~ /^[0-9]+$/) {
|
|
MAX_WIDTH = int(ENVIRON["ALLIOLI_MAX_WIDTH"])
|
|
}
|
|
|
|
if (cmd == "ref") {
|
|
mode = parseref(ref, p)
|
|
p["book"] = cleanbook(p["book"])
|
|
}
|
|
}
|
|
|
|
cmd == "list" {
|
|
if (!($2 in seen_books)) {
|
|
printf("%s (%s)\n", $1, $2)
|
|
seen_books[$2] = 1
|
|
}
|
|
}
|
|
|
|
function parseref(ref, arr) {
|
|
# 1. <book>
|
|
# 2. <book>:?<chapter>
|
|
# 3. <book>:?<chapter>:<verse>
|
|
# 3a. <book>:?<chapter>:<verse>[,<verse>]...
|
|
# 4. <book>:?<chapter>-<chapter>
|
|
# 5. <book>:?<chapter>:<verse>-<verse>
|
|
# 6. <book>:?<chapter>:<verse>-<chapter>:<verse>
|
|
# 7. /<search>
|
|
# 8. <book>/search
|
|
# 9. <book>:?<chapter>/search
|
|
|
|
if (match(ref, "^[1-9]?[a-zA-ZäüöÄÜÖ ]+")) {
|
|
# 1, 2, 3, 3a, 4, 5, 6, 8, 9
|
|
arr["book"] = substr(ref, 1, RLENGTH)
|
|
ref = substr(ref, RLENGTH + 1)
|
|
} else if (match(ref, "^/")) {
|
|
# 7
|
|
arr["search"] = substr(ref, 2)
|
|
return "search"
|
|
} else {
|
|
return "unknown"
|
|
}
|
|
|
|
if (match(ref, "^:?[1-9]+[0-9]*")) {
|
|
# 2, 3, 3a, 4, 5, 6, 9
|
|
if (sub("^:", "", ref)) {
|
|
arr["chapter"] = int(substr(ref, 1, RLENGTH - 1))
|
|
ref = substr(ref, RLENGTH)
|
|
} else {
|
|
arr["chapter"] = int(substr(ref, 1, RLENGTH))
|
|
ref = substr(ref, RLENGTH + 1)
|
|
}
|
|
} else if (match(ref, "^/")) {
|
|
# 8
|
|
arr["search"] = substr(ref, 2)
|
|
return "search"
|
|
} else if (ref == "") {
|
|
# 1
|
|
return "exact"
|
|
} else {
|
|
return "unknown"
|
|
}
|
|
|
|
if (match(ref, "^:[1-9]+[0-9]*")) {
|
|
# 3, 3a, 5, 6
|
|
arr["verse"] = int(substr(ref, 2, RLENGTH - 1))
|
|
ref = substr(ref, RLENGTH + 1)
|
|
} else if (match(ref, "^-[1-9]+[0-9]*$")) {
|
|
# 4
|
|
arr["chapter_end"] = int(substr(ref, 2))
|
|
return "range"
|
|
} else if (match(ref, "^/")) {
|
|
# 9
|
|
arr["search"] = substr(ref, 2)
|
|
return "search"
|
|
} else if (ref == "") {
|
|
# 2
|
|
return "exact"
|
|
} else {
|
|
return "unknown"
|
|
}
|
|
|
|
if (match(ref, "^-[1-9]+[0-9]*$")) {
|
|
# 5
|
|
arr["verse_end"] = int(substr(ref, 2))
|
|
return "range"
|
|
} else if (match(ref, "-[1-9]+[0-9]*")) {
|
|
# 6
|
|
arr["chapter_end"] = int(substr(ref, 2, RLENGTH - 1))
|
|
ref = substr(ref, RLENGTH + 1)
|
|
} else if (ref == "") {
|
|
# 3
|
|
return "exact"
|
|
} else if (match(ref, "^,[1-9]+[0-9]*")) {
|
|
# 3a
|
|
arr["verse", arr["verse"]] = 1
|
|
delete arr["verse"]
|
|
do {
|
|
arr["verse", substr(ref, 2, RLENGTH - 1)] = 1
|
|
ref = substr(ref, RLENGTH + 1)
|
|
} while (match(ref, "^,[1-9]+[0-9]*"))
|
|
|
|
if (ref != "") {
|
|
return "unknown"
|
|
}
|
|
|
|
return "exact_set"
|
|
} else {
|
|
return "unknown"
|
|
}
|
|
|
|
if (match(ref, "^:[1-9]+[0-9]*$")) {
|
|
# 6
|
|
arr["verse_end"] = int(substr(ref, 2))
|
|
return "range_ext"
|
|
} else {
|
|
return "unknown"
|
|
}
|
|
}
|
|
|
|
function cleanbook(book) {
|
|
book = tolower(book)
|
|
gsub(" +", "", book)
|
|
return book
|
|
}
|
|
|
|
function bookmatches(book, bookabbr, query) {
|
|
book = cleanbook(book)
|
|
if (book == query) {
|
|
return book
|
|
}
|
|
|
|
bookabbr = cleanbook(bookabbr)
|
|
if (bookabbr == query) {
|
|
return book
|
|
}
|
|
|
|
if (substr(book, 1, length(query)) == query) {
|
|
return book
|
|
}
|
|
}
|
|
|
|
function printverse(verse, word_count, characters_printed) {
|
|
# Remove superscript footnote numbers if footnotes are disabled
|
|
if (ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0") {
|
|
gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", verse)
|
|
}
|
|
|
|
if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
|
|
printf("%s\n", verse)
|
|
return
|
|
}
|
|
|
|
word_count = split(verse, words, " ")
|
|
for (i = 1; i <= word_count; i++) {
|
|
if (characters_printed + length(words[i]) + (characters_printed > 0 ? 1 : 0) > MAX_WIDTH - 8) {
|
|
printf("\n\t")
|
|
characters_printed = 0
|
|
}
|
|
if (characters_printed > 0) {
|
|
printf(" ")
|
|
characters_printed++
|
|
}
|
|
printf("%s", words[i])
|
|
characters_printed += length(words[i])
|
|
}
|
|
printf("\n")
|
|
}
|
|
|
|
function printverse_bilingual(latin, german, latin_words, german_words, latin_count, german_count, latin_idx, german_idx, col_width, latin_chars, german_chars, latin_line, german_line) {
|
|
# Remove superscript footnote numbers if footnotes are disabled
|
|
if (ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0") {
|
|
gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", german)
|
|
}
|
|
|
|
if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
|
|
printf("%s | %s\n", latin, german)
|
|
return
|
|
}
|
|
|
|
# Column width is half the total width, minus separators
|
|
col_width = int((MAX_WIDTH - 10) / 2)
|
|
|
|
# Split into words
|
|
latin_count = split(latin, latin_words, " ")
|
|
german_count = split(german, german_words, " ")
|
|
|
|
latin_idx = 1
|
|
german_idx = 1
|
|
latin_chars = 0
|
|
german_chars = 0
|
|
latin_line = ""
|
|
german_line = ""
|
|
|
|
# Print both columns line by line
|
|
while (latin_idx <= latin_count || german_idx <= german_count) {
|
|
# Build Latin line
|
|
while (latin_idx <= latin_count) {
|
|
word = latin_words[latin_idx]
|
|
if (latin_chars + length(word) + (latin_chars > 0 ? 1 : 0) > col_width) {
|
|
break
|
|
}
|
|
if (latin_chars > 0) {
|
|
latin_line = latin_line " "
|
|
latin_chars++
|
|
}
|
|
latin_line = latin_line word
|
|
latin_chars += length(word)
|
|
latin_idx++
|
|
}
|
|
|
|
# Build German line
|
|
while (german_idx <= german_count) {
|
|
word = german_words[german_idx]
|
|
if (german_chars + length(word) + (german_chars > 0 ? 1 : 0) > col_width) {
|
|
break
|
|
}
|
|
if (german_chars > 0) {
|
|
german_line = german_line " "
|
|
german_chars++
|
|
}
|
|
german_line = german_line word
|
|
german_chars += length(word)
|
|
german_idx++
|
|
}
|
|
|
|
# Print the line with padding
|
|
printf("\t%-*s | %s\n", col_width, latin_line, german_line)
|
|
|
|
# Reset for next line
|
|
latin_line = ""
|
|
german_line = ""
|
|
latin_chars = 0
|
|
german_chars = 0
|
|
}
|
|
}
|
|
|
|
function printintroductionpar(verse, word_count, characters_printed) {
|
|
if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
|
|
printf("%s\n", verse)
|
|
return
|
|
}
|
|
|
|
word_count = split(verse, words, " ")
|
|
characters_printed=8 #account for indents at beginning of each verse
|
|
for (i = 1; i <= word_count; i++) {
|
|
if (characters_printed + length(words[i]) + (characters_printed > 0 ? 1 : 0) > MAX_WIDTH) {
|
|
printf("\n")
|
|
characters_printed = 0
|
|
}
|
|
if (i != 1 && characters_printed > 0) { #need first check because we set characters_printed > 0 for first line only
|
|
printf(" ")
|
|
characters_printed++
|
|
}
|
|
printf("%s", words[i])
|
|
characters_printed += length(words[i])
|
|
}
|
|
printf("\n")
|
|
printed_intrudction=1
|
|
}
|
|
|
|
function to_superscript_num(num) {
|
|
# Convert a number to Unicode superscript
|
|
result = ""
|
|
len = length(num)
|
|
for (i = 1; i <= len; i++) {
|
|
digit = substr(num, i, 1)
|
|
if (digit == "0") result = result "⁰"
|
|
else if (digit == "1") result = result "¹"
|
|
else if (digit == "2") result = result "²"
|
|
else if (digit == "3") result = result "³"
|
|
else if (digit == "4") result = result "⁴"
|
|
else if (digit == "5") result = result "⁵"
|
|
else if (digit == "6") result = result "⁶"
|
|
else if (digit == "7") result = result "⁷"
|
|
else if (digit == "8") result = result "⁸"
|
|
else if (digit == "9") result = result "⁹"
|
|
else result = result digit
|
|
}
|
|
return result
|
|
}
|
|
|
|
function printfootnote(footnote_num, footnote, word_count, characters_printed, sup_num) {
|
|
if ( ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0"){
|
|
return
|
|
}
|
|
else{
|
|
# Convert footnote number to superscript
|
|
sup_num = to_superscript_num(footnote_num)
|
|
|
|
if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
|
|
printf("\t\t%s%s\n", sup_num, footnote)
|
|
return
|
|
}
|
|
|
|
if( length(footnote) < MAX_WIDTH - 17){
|
|
for ( i=1; i <= MAX_WIDTH - length(footnote) - 1; i++){
|
|
printf(" ")
|
|
}
|
|
printf("%s%s\n", sup_num, footnote)
|
|
}
|
|
else{
|
|
word_count = split(footnote, words, " ")
|
|
printf("\t\t%s", sup_num)
|
|
characters_printed=17 #account for indents at beginning of each multiline footnote (2 tabs + sup_num)
|
|
for (i = 1; i <= word_count; i++) {
|
|
if (characters_printed + length(words[i]) + (characters_printed > 0 ? 1 : 0) > MAX_WIDTH - 8 ) {
|
|
printf("\n\t")
|
|
characters_printed = 0
|
|
}
|
|
if (i != 1 && characters_printed > 0) { #Do not print empty space in front of first word for the first line (since characters_printed gets initialized > 0 we need this
|
|
printf(" ")
|
|
characters_printed++
|
|
}
|
|
printf("%s", words[i])
|
|
characters_printed += length(words[i])
|
|
}
|
|
printf("\n")
|
|
}
|
|
}
|
|
}
|
|
|
|
function processline() {
|
|
# JSON mode: collect data instead of printing
|
|
if (ENVIRON["ALLIOLI_JSON_OUTPUT"] != "" && ENVIRON["ALLIOLI_JSON_OUTPUT"] != "0") {
|
|
# Store book info (will be used in END block)
|
|
if (json_book_name == "") {
|
|
json_book_name = $1
|
|
json_book_abbr = $2
|
|
json_book_num = $3
|
|
}
|
|
|
|
# Check if this is a footnote
|
|
if ($6 == "" && $7 ~ /^[0-9]+$/ && NF >= 8) {
|
|
json_footnotes[$4, $5, $7] = $8
|
|
json_footnote_nums[$4, $5, ++json_footnote_count[$4, $5]] = $7
|
|
}
|
|
# Check if this is an introduction (chapter 0)
|
|
else if ($4 == 0 && $6 == "") {
|
|
if (json_intro == "") {
|
|
json_intro = $7
|
|
} else {
|
|
json_intro = json_intro " " $7
|
|
}
|
|
}
|
|
# Verse with content
|
|
else if ($6 != "" || ($7 != "" && $7 !~ /^[0-9]+$/)) {
|
|
# Store verse data
|
|
json_latin[$4, $5] = $6
|
|
json_german[$4, $5] = $7
|
|
|
|
# Track unique verses per chapter
|
|
if (!json_verse_seen[$4, $5]) {
|
|
json_verse_seen[$4, $5] = 1
|
|
json_verses[$4, ++json_verse_count[$4]] = $5
|
|
}
|
|
|
|
# Track chapters
|
|
if (!json_chapter_seen[$4]) {
|
|
json_chapter_seen[$4] = 1
|
|
json_chapters[++json_chapter_total] = $4
|
|
}
|
|
}
|
|
|
|
outputted_records++
|
|
return
|
|
}
|
|
|
|
# Normal text output mode
|
|
if (printed_intrudction && $4 != 0){
|
|
printf("\n\n")
|
|
printed_intrudction=0
|
|
}
|
|
if (last_book_printed != $2) {
|
|
print $1
|
|
last_book_printed = $2
|
|
}
|
|
|
|
# Determine line type based on column structure
|
|
# Column 6 = Latin, Column 7 = German or footnote number, Column 8 = footnote text
|
|
|
|
# Check if this is a footnote (column 6 empty, column 7 is a number, column 8 has text)
|
|
if ($6 == "" && $7 ~ /^[0-9]+$/ && NF >= 8) {
|
|
printfootnote($7, $8)
|
|
}
|
|
# Check if this is an introduction (chapter 0, column 6 empty, column 7 is text)
|
|
else if ($4 == 0 && $6 == ""){
|
|
printf("\t")
|
|
printintroductionpar($7)
|
|
}
|
|
# Bilingual verse (both column 6 and 7 have text)
|
|
else if ($6 != "" && $7 != "") {
|
|
# Check language filter flags
|
|
if (ENVIRON["ALLIOLI_ONLY_LATIN"] != "" && ENVIRON["ALLIOLI_ONLY_LATIN"] != "0") {
|
|
# Show only Latin
|
|
printf("%d:%d\t", $4, $5)
|
|
printverse($6)
|
|
} else if (ENVIRON["ALLIOLI_ONLY_GERMAN"] != "" && ENVIRON["ALLIOLI_ONLY_GERMAN"] != "0") {
|
|
# Show only German
|
|
printf("%d:%d\t", $4, $5)
|
|
printverse($7)
|
|
} else {
|
|
# Show both side-by-side
|
|
printf("%d:%d", $4, $5)
|
|
printverse_bilingual($6, $7)
|
|
}
|
|
}
|
|
# German-only verse (column 6 empty, column 7 has text, not a footnote)
|
|
else if ($6 == "" && $7 != "" && $7 !~ /^[0-9]+$/) {
|
|
printf("%d:%d\t", $4, $5)
|
|
printverse($7)
|
|
}
|
|
# Latin-only verse (column 6 has text, column 7 empty) - rare but handle it
|
|
else if ($6 != "" && $7 == "") {
|
|
printf("%d:%d\t", $4, $5)
|
|
printverse($6)
|
|
}
|
|
outputted_records++
|
|
}
|
|
|
|
cmd == "ref" && mode == "exact" && bookmatches($1, $2, p["book"]) && (p["chapter"] == "" || $4 == p["chapter"]) && (p["verse"] == "" || $5 == p["verse"]) {
|
|
processline()
|
|
}
|
|
|
|
cmd == "ref" && mode == "exact_set" && bookmatches($1, $2, p["book"]) && (p["chapter"] == "" || $4 == p["chapter"]) && p["verse", $5] {
|
|
processline()
|
|
}
|
|
|
|
cmd == "ref" && mode == "range" && bookmatches($1, $2, p["book"]) && ((p["chapter_end"] == "" && $4 == p["chapter"]) || ($4 >= p["chapter"] && $4 <= p["chapter_end"])) && (p["verse"] == "" || $5 >= p["verse"]) && (p["verse_end"] == "" || $5 <= p["verse_end"]) {
|
|
processline()
|
|
}
|
|
|
|
cmd == "ref" && mode == "range_ext" && bookmatches($1, $2, p["book"]) && (($4 == p["chapter"] && $5 >= p["verse"] && p["chapter"] != p["chapter_end"]) || ($4 > p["chapter"] && $4 < p["chapter_end"]) || ($4 == p["chapter_end"] && $5 <= p["verse_end"] && p["chapter"] != p["chapter_end"]) || (p["chapter"] == p["chapter_end"] && $4 == p["chapter"] && $5 >= p["verse"] && $5 <= p["verse_end"])) {
|
|
processline()
|
|
}
|
|
|
|
cmd == "ref" && mode == "search" && (p["book"] == "" || bookmatches($1, $2, p["book"])) && (p["chapter"] == "" || $4 == p["chapter"]) && match(tolower($6), tolower(p["search"])) {
|
|
processline()
|
|
}
|
|
|
|
END {
|
|
# JSON output mode
|
|
if (cmd == "ref" && ENVIRON["ALLIOLI_JSON_OUTPUT"] != "" && ENVIRON["ALLIOLI_JSON_OUTPUT"] != "0") {
|
|
if (outputted_records == 0) {
|
|
print "Unknown reference: " ref
|
|
exit 1
|
|
}
|
|
|
|
# Determine language flags
|
|
only_latin = (ENVIRON["ALLIOLI_ONLY_LATIN"] != "" && ENVIRON["ALLIOLI_ONLY_LATIN"] != "0")
|
|
only_german = (ENVIRON["ALLIOLI_ONLY_GERMAN"] != "" && ENVIRON["ALLIOLI_ONLY_GERMAN"] != "0")
|
|
no_footnotes = (ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0")
|
|
|
|
# Start JSON output
|
|
print "{"
|
|
printf(" \"book\": {\n")
|
|
printf(" \"name\": \"%s\",\n", json_book_name)
|
|
printf(" \"abbreviation\": \"%s\",\n", json_book_abbr)
|
|
printf(" \"number\": %d\n", json_book_num)
|
|
printf(" },\n")
|
|
|
|
# Output chapters
|
|
for (c_idx = 1; c_idx <= json_chapter_total; c_idx++) {
|
|
chapter = json_chapters[c_idx]
|
|
|
|
# Handle introduction (chapter 0)
|
|
if (chapter == 0) {
|
|
printf(" \"introduction\": \"%s\"", json_intro)
|
|
if (json_chapter_total > 1) {
|
|
printf(",\n")
|
|
} else {
|
|
printf("\n")
|
|
}
|
|
continue
|
|
}
|
|
|
|
# Regular chapter
|
|
printf(" \"chapter\": %d,\n", chapter)
|
|
printf(" \"verses\": [\n")
|
|
|
|
# Sort verses numerically before output
|
|
delete sorted_verses
|
|
for (v_idx = 1; v_idx <= json_verse_count[chapter]; v_idx++) {
|
|
sorted_verses[v_idx] = json_verses[chapter, v_idx]
|
|
}
|
|
# Simple bubble sort for numeric ordering
|
|
for (i = 1; i <= json_verse_count[chapter]; i++) {
|
|
for (j = i + 1; j <= json_verse_count[chapter]; j++) {
|
|
if (sorted_verses[i] + 0 > sorted_verses[j] + 0) {
|
|
temp = sorted_verses[i]
|
|
sorted_verses[i] = sorted_verses[j]
|
|
sorted_verses[j] = temp
|
|
}
|
|
}
|
|
}
|
|
|
|
# Output verses in sorted order
|
|
for (v_idx = 1; v_idx <= json_verse_count[chapter]; v_idx++) {
|
|
verse_num = sorted_verses[v_idx]
|
|
|
|
printf(" {\n")
|
|
printf(" \"verse\": %d,\n", verse_num)
|
|
|
|
# Text object
|
|
printf(" \"text\": {")
|
|
|
|
# Output text based on language flags
|
|
if (only_latin) {
|
|
printf("\n \"latin\": \"%s\"\n", json_latin[chapter, verse_num])
|
|
} else if (only_german) {
|
|
# Remove superscript markers if footnotes disabled
|
|
german_text = json_german[chapter, verse_num]
|
|
if (no_footnotes) {
|
|
gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", german_text)
|
|
}
|
|
printf("\n \"german\": \"%s\"\n", german_text)
|
|
} else {
|
|
# Both languages
|
|
german_text = json_german[chapter, verse_num]
|
|
if (no_footnotes) {
|
|
gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", german_text)
|
|
}
|
|
if (json_latin[chapter, verse_num] != "") {
|
|
printf("\n \"latin\": \"%s\",\n", json_latin[chapter, verse_num])
|
|
}
|
|
if (german_text != "") {
|
|
printf(" \"german\": \"%s\"\n", german_text)
|
|
}
|
|
}
|
|
|
|
printf(" }")
|
|
|
|
# Footnotes array (if not disabled)
|
|
if (!no_footnotes && json_footnote_count[chapter, verse_num] > 0) {
|
|
printf(",\n \"footnotes\": [\n")
|
|
for (f_idx = 1; f_idx <= json_footnote_count[chapter, verse_num]; f_idx++) {
|
|
fn_num = json_footnote_nums[chapter, verse_num, f_idx]
|
|
fn_text = json_footnotes[chapter, verse_num, fn_num]
|
|
printf(" {\n")
|
|
printf(" \"number\": %d,\n", fn_num)
|
|
printf(" \"text\": \"%s\"\n", fn_text)
|
|
if (f_idx < json_footnote_count[chapter, verse_num]) {
|
|
printf(" },\n")
|
|
} else {
|
|
printf(" }\n")
|
|
}
|
|
}
|
|
printf(" ]\n")
|
|
} else {
|
|
printf("\n")
|
|
}
|
|
|
|
# Close verse object
|
|
if (v_idx < json_verse_count[chapter]) {
|
|
printf(" },\n")
|
|
} else {
|
|
printf(" }\n")
|
|
}
|
|
}
|
|
|
|
printf(" ]\n")
|
|
}
|
|
|
|
print "}"
|
|
exit
|
|
}
|
|
|
|
# Normal text mode
|
|
if (cmd == "ref" && outputted_records == 0) {
|
|
print "Unknown reference: " ref
|
|
}
|
|
}
|