Add JSON output format for programmatic access

Implements structured JSON output with hierarchical schema including
book metadata, chapters, verses, and footnotes. All existing flags
(-F, -g, -L) are respected in JSON mode.
This commit is contained in:
2025-12-16 18:49:27 +01:00
parent 6432d37df3
commit 7ef782dda2
3 changed files with 228 additions and 0 deletions

View File

@@ -329,6 +329,52 @@ function printfootnote(footnote_num, footnote, word_count, characters_printed
}
function processline() {
# JSON mode: collect data instead of printing
if (ENVIRON["ALLIOLI_JSON_OUTPUT"] != "" && ENVIRON["ALLIOLI_JSON_OUTPUT"] != "0") {
# Store book info (will be used in END block)
if (json_book_name == "") {
json_book_name = $1
json_book_abbr = $2
json_book_num = $3
}
# Check if this is a footnote
if ($6 == "" && $7 ~ /^[0-9]+$/ && NF >= 8) {
json_footnotes[$4, $5, $7] = $8
json_footnote_nums[$4, $5, ++json_footnote_count[$4, $5]] = $7
}
# Check if this is an introduction (chapter 0)
else if ($4 == 0 && $6 == "") {
if (json_intro == "") {
json_intro = $7
} else {
json_intro = json_intro " " $7
}
}
# Verse with content
else if ($6 != "" || ($7 != "" && $7 !~ /^[0-9]+$/)) {
# Store verse data
json_latin[$4, $5] = $6
json_german[$4, $5] = $7
# Track unique verses per chapter
if (!json_verse_seen[$4, $5]) {
json_verse_seen[$4, $5] = 1
json_verses[$4, ++json_verse_count[$4]] = $5
}
# Track chapters
if (!json_chapter_seen[$4]) {
json_chapter_seen[$4] = 1
json_chapters[++json_chapter_total] = $4
}
}
outputted_records++
return
}
# Normal text output mode
if (printed_intrudction && $4 != 0){
printf("\n\n")
printed_intrudction=0
@@ -401,6 +447,133 @@ cmd == "ref" && mode == "search" && (p["book"] == "" || bookmatches($1, $2, p["b
}
END {
# JSON output mode
if (cmd == "ref" && ENVIRON["ALLIOLI_JSON_OUTPUT"] != "" && ENVIRON["ALLIOLI_JSON_OUTPUT"] != "0") {
if (outputted_records == 0) {
print "Unknown reference: " ref
exit 1
}
# Determine language flags
only_latin = (ENVIRON["ALLIOLI_ONLY_LATIN"] != "" && ENVIRON["ALLIOLI_ONLY_LATIN"] != "0")
only_german = (ENVIRON["ALLIOLI_ONLY_GERMAN"] != "" && ENVIRON["ALLIOLI_ONLY_GERMAN"] != "0")
no_footnotes = (ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0")
# Start JSON output
print "{"
printf(" \"book\": {\n")
printf(" \"name\": \"%s\",\n", json_book_name)
printf(" \"abbreviation\": \"%s\",\n", json_book_abbr)
printf(" \"number\": %d\n", json_book_num)
printf(" },\n")
# Output chapters
for (c_idx = 1; c_idx <= json_chapter_total; c_idx++) {
chapter = json_chapters[c_idx]
# Handle introduction (chapter 0)
if (chapter == 0) {
printf(" \"introduction\": \"%s\"", json_intro)
if (json_chapter_total > 1) {
printf(",\n")
} else {
printf("\n")
}
continue
}
# Regular chapter
printf(" \"chapter\": %d,\n", chapter)
printf(" \"verses\": [\n")
# Sort verses numerically before output
delete sorted_verses
for (v_idx = 1; v_idx <= json_verse_count[chapter]; v_idx++) {
sorted_verses[v_idx] = json_verses[chapter, v_idx]
}
# Simple bubble sort for numeric ordering
for (i = 1; i <= json_verse_count[chapter]; i++) {
for (j = i + 1; j <= json_verse_count[chapter]; j++) {
if (sorted_verses[i] + 0 > sorted_verses[j] + 0) {
temp = sorted_verses[i]
sorted_verses[i] = sorted_verses[j]
sorted_verses[j] = temp
}
}
}
# Output verses in sorted order
for (v_idx = 1; v_idx <= json_verse_count[chapter]; v_idx++) {
verse_num = sorted_verses[v_idx]
printf(" {\n")
printf(" \"verse\": %d,\n", verse_num)
# Text object
printf(" \"text\": {")
# Output text based on language flags
if (only_latin) {
printf("\n \"latin\": \"%s\"\n", json_latin[chapter, verse_num])
} else if (only_german) {
# Remove superscript markers if footnotes disabled
german_text = json_german[chapter, verse_num]
if (no_footnotes) {
gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", german_text)
}
printf("\n \"german\": \"%s\"\n", german_text)
} else {
# Both languages
german_text = json_german[chapter, verse_num]
if (no_footnotes) {
gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", german_text)
}
if (json_latin[chapter, verse_num] != "") {
printf("\n \"latin\": \"%s\",\n", json_latin[chapter, verse_num])
}
if (german_text != "") {
printf(" \"german\": \"%s\"\n", german_text)
}
}
printf(" }")
# Footnotes array (if not disabled)
if (!no_footnotes && json_footnote_count[chapter, verse_num] > 0) {
printf(",\n \"footnotes\": [\n")
for (f_idx = 1; f_idx <= json_footnote_count[chapter, verse_num]; f_idx++) {
fn_num = json_footnote_nums[chapter, verse_num, f_idx]
fn_text = json_footnotes[chapter, verse_num, fn_num]
printf(" {\n")
printf(" \"number\": %d,\n", fn_num)
printf(" \"text\": \"%s\"\n", fn_text)
if (f_idx < json_footnote_count[chapter, verse_num]) {
printf(" },\n")
} else {
printf(" }\n")
}
}
printf(" ]\n")
} else {
printf("\n")
}
# Close verse object
if (v_idx < json_verse_count[chapter]) {
printf(" },\n")
} else {
printf(" }\n")
}
}
printf(" ]\n")
}
print "}"
exit
}
# Normal text mode
if (cmd == "ref" && outputted_records == 0) {
print "Unknown reference: " ref
}