Files
allioli/allioli.awk
Alexander Bocken 7ef782dda2 Add JSON output format for programmatic access
Implements structured JSON output with hierarchical schema including
book metadata, chapters, verses, and footnotes. All existing flags
(-F, -g, -L) are respected in JSON mode.
2025-12-16 18:49:32 +01:00

581 lines
16 KiB
Awk

BEGIN {
# $1 Book name
# $2 Book abbreviation
# $3 Book number
# $4 Chapter number
# $5 Verse number
# $6 Verse
FS = "\t"
MAX_WIDTH = 120
if (ENVIRON["ALLIOLI_MAX_WIDTH"] ~ /^[0-9]+$/) {
MAX_WIDTH = int(ENVIRON["ALLIOLI_MAX_WIDTH"])
}
if (cmd == "ref") {
mode = parseref(ref, p)
p["book"] = cleanbook(p["book"])
}
}
cmd == "list" {
if (!($2 in seen_books)) {
printf("%s (%s)\n", $1, $2)
seen_books[$2] = 1
}
}
function parseref(ref, arr) {
# 1. <book>
# 2. <book>:?<chapter>
# 3. <book>:?<chapter>:<verse>
# 3a. <book>:?<chapter>:<verse>[,<verse>]...
# 4. <book>:?<chapter>-<chapter>
# 5. <book>:?<chapter>:<verse>-<verse>
# 6. <book>:?<chapter>:<verse>-<chapter>:<verse>
# 7. /<search>
# 8. <book>/search
# 9. <book>:?<chapter>/search
if (match(ref, "^[1-9]?[a-zA-ZäüöÄÜÖ ]+")) {
# 1, 2, 3, 3a, 4, 5, 6, 8, 9
arr["book"] = substr(ref, 1, RLENGTH)
ref = substr(ref, RLENGTH + 1)
} else if (match(ref, "^/")) {
# 7
arr["search"] = substr(ref, 2)
return "search"
} else {
return "unknown"
}
if (match(ref, "^:?[1-9]+[0-9]*")) {
# 2, 3, 3a, 4, 5, 6, 9
if (sub("^:", "", ref)) {
arr["chapter"] = int(substr(ref, 1, RLENGTH - 1))
ref = substr(ref, RLENGTH)
} else {
arr["chapter"] = int(substr(ref, 1, RLENGTH))
ref = substr(ref, RLENGTH + 1)
}
} else if (match(ref, "^/")) {
# 8
arr["search"] = substr(ref, 2)
return "search"
} else if (ref == "") {
# 1
return "exact"
} else {
return "unknown"
}
if (match(ref, "^:[1-9]+[0-9]*")) {
# 3, 3a, 5, 6
arr["verse"] = int(substr(ref, 2, RLENGTH - 1))
ref = substr(ref, RLENGTH + 1)
} else if (match(ref, "^-[1-9]+[0-9]*$")) {
# 4
arr["chapter_end"] = int(substr(ref, 2))
return "range"
} else if (match(ref, "^/")) {
# 9
arr["search"] = substr(ref, 2)
return "search"
} else if (ref == "") {
# 2
return "exact"
} else {
return "unknown"
}
if (match(ref, "^-[1-9]+[0-9]*$")) {
# 5
arr["verse_end"] = int(substr(ref, 2))
return "range"
} else if (match(ref, "-[1-9]+[0-9]*")) {
# 6
arr["chapter_end"] = int(substr(ref, 2, RLENGTH - 1))
ref = substr(ref, RLENGTH + 1)
} else if (ref == "") {
# 3
return "exact"
} else if (match(ref, "^,[1-9]+[0-9]*")) {
# 3a
arr["verse", arr["verse"]] = 1
delete arr["verse"]
do {
arr["verse", substr(ref, 2, RLENGTH - 1)] = 1
ref = substr(ref, RLENGTH + 1)
} while (match(ref, "^,[1-9]+[0-9]*"))
if (ref != "") {
return "unknown"
}
return "exact_set"
} else {
return "unknown"
}
if (match(ref, "^:[1-9]+[0-9]*$")) {
# 6
arr["verse_end"] = int(substr(ref, 2))
return "range_ext"
} else {
return "unknown"
}
}
function cleanbook(book) {
book = tolower(book)
gsub(" +", "", book)
return book
}
function bookmatches(book, bookabbr, query) {
book = cleanbook(book)
if (book == query) {
return book
}
bookabbr = cleanbook(bookabbr)
if (bookabbr == query) {
return book
}
if (substr(book, 1, length(query)) == query) {
return book
}
}
function printverse(verse, word_count, characters_printed) {
# Remove superscript footnote numbers if footnotes are disabled
if (ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0") {
gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", verse)
}
if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
printf("%s\n", verse)
return
}
word_count = split(verse, words, " ")
for (i = 1; i <= word_count; i++) {
if (characters_printed + length(words[i]) + (characters_printed > 0 ? 1 : 0) > MAX_WIDTH - 8) {
printf("\n\t")
characters_printed = 0
}
if (characters_printed > 0) {
printf(" ")
characters_printed++
}
printf("%s", words[i])
characters_printed += length(words[i])
}
printf("\n")
}
function printverse_bilingual(latin, german, latin_words, german_words, latin_count, german_count, latin_idx, german_idx, col_width, latin_chars, german_chars, latin_line, german_line) {
# Remove superscript footnote numbers if footnotes are disabled
if (ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0") {
gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", german)
}
if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
printf("%s | %s\n", latin, german)
return
}
# Column width is half the total width, minus separators
col_width = int((MAX_WIDTH - 10) / 2)
# Split into words
latin_count = split(latin, latin_words, " ")
german_count = split(german, german_words, " ")
latin_idx = 1
german_idx = 1
latin_chars = 0
german_chars = 0
latin_line = ""
german_line = ""
# Print both columns line by line
while (latin_idx <= latin_count || german_idx <= german_count) {
# Build Latin line
while (latin_idx <= latin_count) {
word = latin_words[latin_idx]
if (latin_chars + length(word) + (latin_chars > 0 ? 1 : 0) > col_width) {
break
}
if (latin_chars > 0) {
latin_line = latin_line " "
latin_chars++
}
latin_line = latin_line word
latin_chars += length(word)
latin_idx++
}
# Build German line
while (german_idx <= german_count) {
word = german_words[german_idx]
if (german_chars + length(word) + (german_chars > 0 ? 1 : 0) > col_width) {
break
}
if (german_chars > 0) {
german_line = german_line " "
german_chars++
}
german_line = german_line word
german_chars += length(word)
german_idx++
}
# Print the line with padding
printf("\t%-*s | %s\n", col_width, latin_line, german_line)
# Reset for next line
latin_line = ""
german_line = ""
latin_chars = 0
german_chars = 0
}
}
function printintroductionpar(verse, word_count, characters_printed) {
if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
printf("%s\n", verse)
return
}
word_count = split(verse, words, " ")
characters_printed=8 #account for indents at beginning of each verse
for (i = 1; i <= word_count; i++) {
if (characters_printed + length(words[i]) + (characters_printed > 0 ? 1 : 0) > MAX_WIDTH) {
printf("\n")
characters_printed = 0
}
if (i != 1 && characters_printed > 0) { #need first check because we set characters_printed > 0 for first line only
printf(" ")
characters_printed++
}
printf("%s", words[i])
characters_printed += length(words[i])
}
printf("\n")
printed_intrudction=1
}
function to_superscript_num(num) {
# Convert a number to Unicode superscript
result = ""
len = length(num)
for (i = 1; i <= len; i++) {
digit = substr(num, i, 1)
if (digit == "0") result = result "⁰"
else if (digit == "1") result = result "¹"
else if (digit == "2") result = result "²"
else if (digit == "3") result = result "³"
else if (digit == "4") result = result "⁴"
else if (digit == "5") result = result "⁵"
else if (digit == "6") result = result "⁶"
else if (digit == "7") result = result "⁷"
else if (digit == "8") result = result "⁸"
else if (digit == "9") result = result "⁹"
else result = result digit
}
return result
}
function printfootnote(footnote_num, footnote, word_count, characters_printed, sup_num) {
if ( ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0"){
return
}
else{
# Convert footnote number to superscript
sup_num = to_superscript_num(footnote_num)
if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
printf("\t\t%s%s\n", sup_num, footnote)
return
}
if( length(footnote) < MAX_WIDTH - 17){
for ( i=1; i <= MAX_WIDTH - length(footnote) - 1; i++){
printf(" ")
}
printf("%s%s\n", sup_num, footnote)
}
else{
word_count = split(footnote, words, " ")
printf("\t\t%s", sup_num)
characters_printed=17 #account for indents at beginning of each multiline footnote (2 tabs + sup_num)
for (i = 1; i <= word_count; i++) {
if (characters_printed + length(words[i]) + (characters_printed > 0 ? 1 : 0) > MAX_WIDTH - 8 ) {
printf("\n\t")
characters_printed = 0
}
if (i != 1 && characters_printed > 0) { #Do not print empty space in front of first word for the first line (since characters_printed gets initialized > 0 we need this
printf(" ")
characters_printed++
}
printf("%s", words[i])
characters_printed += length(words[i])
}
printf("\n")
}
}
}
function processline() {
# JSON mode: collect data instead of printing
if (ENVIRON["ALLIOLI_JSON_OUTPUT"] != "" && ENVIRON["ALLIOLI_JSON_OUTPUT"] != "0") {
# Store book info (will be used in END block)
if (json_book_name == "") {
json_book_name = $1
json_book_abbr = $2
json_book_num = $3
}
# Check if this is a footnote
if ($6 == "" && $7 ~ /^[0-9]+$/ && NF >= 8) {
json_footnotes[$4, $5, $7] = $8
json_footnote_nums[$4, $5, ++json_footnote_count[$4, $5]] = $7
}
# Check if this is an introduction (chapter 0)
else if ($4 == 0 && $6 == "") {
if (json_intro == "") {
json_intro = $7
} else {
json_intro = json_intro " " $7
}
}
# Verse with content
else if ($6 != "" || ($7 != "" && $7 !~ /^[0-9]+$/)) {
# Store verse data
json_latin[$4, $5] = $6
json_german[$4, $5] = $7
# Track unique verses per chapter
if (!json_verse_seen[$4, $5]) {
json_verse_seen[$4, $5] = 1
json_verses[$4, ++json_verse_count[$4]] = $5
}
# Track chapters
if (!json_chapter_seen[$4]) {
json_chapter_seen[$4] = 1
json_chapters[++json_chapter_total] = $4
}
}
outputted_records++
return
}
# Normal text output mode
if (printed_intrudction && $4 != 0){
printf("\n\n")
printed_intrudction=0
}
if (last_book_printed != $2) {
print $1
last_book_printed = $2
}
# Determine line type based on column structure
# Column 6 = Latin, Column 7 = German or footnote number, Column 8 = footnote text
# Check if this is a footnote (column 6 empty, column 7 is a number, column 8 has text)
if ($6 == "" && $7 ~ /^[0-9]+$/ && NF >= 8) {
printfootnote($7, $8)
}
# Check if this is an introduction (chapter 0, column 6 empty, column 7 is text)
else if ($4 == 0 && $6 == ""){
printf("\t")
printintroductionpar($7)
}
# Bilingual verse (both column 6 and 7 have text)
else if ($6 != "" && $7 != "") {
# Check language filter flags
if (ENVIRON["ALLIOLI_ONLY_LATIN"] != "" && ENVIRON["ALLIOLI_ONLY_LATIN"] != "0") {
# Show only Latin
printf("%d:%d\t", $4, $5)
printverse($6)
} else if (ENVIRON["ALLIOLI_ONLY_GERMAN"] != "" && ENVIRON["ALLIOLI_ONLY_GERMAN"] != "0") {
# Show only German
printf("%d:%d\t", $4, $5)
printverse($7)
} else {
# Show both side-by-side
printf("%d:%d", $4, $5)
printverse_bilingual($6, $7)
}
}
# German-only verse (column 6 empty, column 7 has text, not a footnote)
else if ($6 == "" && $7 != "" && $7 !~ /^[0-9]+$/) {
printf("%d:%d\t", $4, $5)
printverse($7)
}
# Latin-only verse (column 6 has text, column 7 empty) - rare but handle it
else if ($6 != "" && $7 == "") {
printf("%d:%d\t", $4, $5)
printverse($6)
}
outputted_records++
}
cmd == "ref" && mode == "exact" && bookmatches($1, $2, p["book"]) && (p["chapter"] == "" || $4 == p["chapter"]) && (p["verse"] == "" || $5 == p["verse"]) {
processline()
}
cmd == "ref" && mode == "exact_set" && bookmatches($1, $2, p["book"]) && (p["chapter"] == "" || $4 == p["chapter"]) && p["verse", $5] {
processline()
}
cmd == "ref" && mode == "range" && bookmatches($1, $2, p["book"]) && ((p["chapter_end"] == "" && $4 == p["chapter"]) || ($4 >= p["chapter"] && $4 <= p["chapter_end"])) && (p["verse"] == "" || $5 >= p["verse"]) && (p["verse_end"] == "" || $5 <= p["verse_end"]) {
processline()
}
cmd == "ref" && mode == "range_ext" && bookmatches($1, $2, p["book"]) && (($4 == p["chapter"] && $5 >= p["verse"] && p["chapter"] != p["chapter_end"]) || ($4 > p["chapter"] && $4 < p["chapter_end"]) || ($4 == p["chapter_end"] && $5 <= p["verse_end"] && p["chapter"] != p["chapter_end"]) || (p["chapter"] == p["chapter_end"] && $4 == p["chapter"] && $5 >= p["verse"] && $5 <= p["verse_end"])) {
processline()
}
cmd == "ref" && mode == "search" && (p["book"] == "" || bookmatches($1, $2, p["book"])) && (p["chapter"] == "" || $4 == p["chapter"]) && match(tolower($6), tolower(p["search"])) {
processline()
}
END {
# JSON output mode
if (cmd == "ref" && ENVIRON["ALLIOLI_JSON_OUTPUT"] != "" && ENVIRON["ALLIOLI_JSON_OUTPUT"] != "0") {
if (outputted_records == 0) {
print "Unknown reference: " ref
exit 1
}
# Determine language flags
only_latin = (ENVIRON["ALLIOLI_ONLY_LATIN"] != "" && ENVIRON["ALLIOLI_ONLY_LATIN"] != "0")
only_german = (ENVIRON["ALLIOLI_ONLY_GERMAN"] != "" && ENVIRON["ALLIOLI_ONLY_GERMAN"] != "0")
no_footnotes = (ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0")
# Start JSON output
print "{"
printf(" \"book\": {\n")
printf(" \"name\": \"%s\",\n", json_book_name)
printf(" \"abbreviation\": \"%s\",\n", json_book_abbr)
printf(" \"number\": %d\n", json_book_num)
printf(" },\n")
# Output chapters
for (c_idx = 1; c_idx <= json_chapter_total; c_idx++) {
chapter = json_chapters[c_idx]
# Handle introduction (chapter 0)
if (chapter == 0) {
printf(" \"introduction\": \"%s\"", json_intro)
if (json_chapter_total > 1) {
printf(",\n")
} else {
printf("\n")
}
continue
}
# Regular chapter
printf(" \"chapter\": %d,\n", chapter)
printf(" \"verses\": [\n")
# Sort verses numerically before output
delete sorted_verses
for (v_idx = 1; v_idx <= json_verse_count[chapter]; v_idx++) {
sorted_verses[v_idx] = json_verses[chapter, v_idx]
}
# Simple bubble sort for numeric ordering
for (i = 1; i <= json_verse_count[chapter]; i++) {
for (j = i + 1; j <= json_verse_count[chapter]; j++) {
if (sorted_verses[i] + 0 > sorted_verses[j] + 0) {
temp = sorted_verses[i]
sorted_verses[i] = sorted_verses[j]
sorted_verses[j] = temp
}
}
}
# Output verses in sorted order
for (v_idx = 1; v_idx <= json_verse_count[chapter]; v_idx++) {
verse_num = sorted_verses[v_idx]
printf(" {\n")
printf(" \"verse\": %d,\n", verse_num)
# Text object
printf(" \"text\": {")
# Output text based on language flags
if (only_latin) {
printf("\n \"latin\": \"%s\"\n", json_latin[chapter, verse_num])
} else if (only_german) {
# Remove superscript markers if footnotes disabled
german_text = json_german[chapter, verse_num]
if (no_footnotes) {
gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", german_text)
}
printf("\n \"german\": \"%s\"\n", german_text)
} else {
# Both languages
german_text = json_german[chapter, verse_num]
if (no_footnotes) {
gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", german_text)
}
if (json_latin[chapter, verse_num] != "") {
printf("\n \"latin\": \"%s\",\n", json_latin[chapter, verse_num])
}
if (german_text != "") {
printf(" \"german\": \"%s\"\n", german_text)
}
}
printf(" }")
# Footnotes array (if not disabled)
if (!no_footnotes && json_footnote_count[chapter, verse_num] > 0) {
printf(",\n \"footnotes\": [\n")
for (f_idx = 1; f_idx <= json_footnote_count[chapter, verse_num]; f_idx++) {
fn_num = json_footnote_nums[chapter, verse_num, f_idx]
fn_text = json_footnotes[chapter, verse_num, fn_num]
printf(" {\n")
printf(" \"number\": %d,\n", fn_num)
printf(" \"text\": \"%s\"\n", fn_text)
if (f_idx < json_footnote_count[chapter, verse_num]) {
printf(" },\n")
} else {
printf(" }\n")
}
}
printf(" ]\n")
} else {
printf("\n")
}
# Close verse object
if (v_idx < json_verse_count[chapter]) {
printf(" },\n")
} else {
printf(" }\n")
}
}
printf(" ]\n")
}
print "}"
exit
}
# Normal text mode
if (cmd == "ref" && outputted_records == 0) {
print "Unknown reference: " ref
}
}