allioli/allioli.awk

BEGIN {
	#  $1 Book name
	#  $2 Book abbreviation
	#  $3 Book number
	#  $4 Chapter number
	#  $5 Verse number
	#  $6 Verse
	FS = "\t"

	MAX_WIDTH = 120
	if (ENVIRON["ALLIOLI_MAX_WIDTH"] ~ /^[0-9]+$/) {
		MAX_WIDTH = int(ENVIRON["ALLIOLI_MAX_WIDTH"])
	}

	if (cmd == "ref") {
		mode = parseref(ref, p)
		p["book"] = cleanbook(p["book"])
	}
}

cmd == "list" {
	if (!($2 in seen_books)) {
		printf("%s (%s)\n", $1, $2)
		seen_books[$2] = 1
	}
}

function parseref(ref, arr) {
	# 1. <book>
	# 2. <book>:?<chapter>
	# 3. <book>:?<chapter>:<verse>
	# 3a. <book>:?<chapter>:<verse>[,<verse>]...
	# 4. <book>:?<chapter>-<chapter>
	# 5. <book>:?<chapter>:<verse>-<verse>
	# 6. <book>:?<chapter>:<verse>-<chapter>:<verse>
	# 7. /<search>
	# 8. <book>/search
	# 9. <book>:?<chapter>/search

	if (match(ref, "^[1-9]?[a-zA-ZäüöÄÜÖ ]+")) {
		# 1, 2, 3, 3a, 4, 5, 6, 8, 9
		arr["book"] = substr(ref, 1, RLENGTH)
		ref = substr(ref, RLENGTH + 1)
	} else if (match(ref, "^/")) {
		# 7
		arr["search"] = substr(ref, 2)
		return "search"
	} else {
		return "unknown"
	}

	if (match(ref, "^:?[1-9]+[0-9]*")) {
		# 2, 3, 3a, 4, 5, 6, 9
		if (sub("^:", "", ref)) {
			arr["chapter"] = int(substr(ref, 1, RLENGTH - 1))
			ref = substr(ref, RLENGTH)
		} else {
			arr["chapter"] = int(substr(ref, 1, RLENGTH))
			ref = substr(ref, RLENGTH + 1)
		}
	} else if (match(ref, "^/")) {
		# 8
		arr["search"] = substr(ref, 2)
		return "search"
	} else if (ref == "") {
		# 1
		return "exact"
	} else {
		return "unknown"
	}

	if (match(ref, "^:[1-9]+[0-9]*")) {
		# 3, 3a, 5, 6
		arr["verse"] = int(substr(ref, 2, RLENGTH - 1))
		ref = substr(ref, RLENGTH + 1)
	} else if (match(ref, "^-[1-9]+[0-9]*$")) {
		# 4
		arr["chapter_end"] = int(substr(ref, 2))
		return "range"
	} else if (match(ref, "^/")) {
		# 9
		arr["search"] = substr(ref, 2)
		return "search"
	} else if (ref == "") {
		# 2
		return "exact"
	} else {
		return "unknown"
	}

	if (match(ref, "^-[1-9]+[0-9]*$")) {
		# 5
		arr["verse_end"] = int(substr(ref, 2))
		return "range"
	} else if (match(ref, "-[1-9]+[0-9]*")) {
		# 6
		arr["chapter_end"] = int(substr(ref, 2, RLENGTH - 1))
		ref = substr(ref, RLENGTH + 1)
	} else if (ref == "") {
		# 3
		return "exact"
	} else if (match(ref, "^,[1-9]+[0-9]*")) {
		# 3a
		arr["verse", arr["verse"]] = 1
		delete arr["verse"]
		do {
			arr["verse", substr(ref, 2, RLENGTH - 1)] = 1
			ref = substr(ref, RLENGTH + 1)
		} while (match(ref, "^,[1-9]+[0-9]*"))

		if (ref != "") {
			return "unknown"
		}

		return "exact_set"
	} else {
		return "unknown"
	}

	if (match(ref, "^:[1-9]+[0-9]*$")) {
		# 6
		arr["verse_end"] = int(substr(ref, 2))
		return "range_ext"
	} else {
		return "unknown"
	}
}

function cleanbook(book) {
	book = tolower(book)
	gsub(" +", "", book)
	return book
}

function bookmatches(book, bookabbr, query) {
	book = cleanbook(book)
	if (book == query) {
		return book
	}

	bookabbr = cleanbook(bookabbr)
	if (bookabbr == query) {
		return book
	}

	if (substr(book, 1, length(query)) == query) {
		return book
	}
}

function printverse(verse,    word_count, characters_printed) {
	# Remove superscript footnote numbers if footnotes are disabled
	if (ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0") {
		gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", verse)
	}

	if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
		printf("%s\n", verse)
		return
	}

	word_count = split(verse, words, " ")
	for (i = 1; i <= word_count; i++) {
		if (characters_printed + length(words[i]) + (characters_printed > 0 ? 1 : 0) > MAX_WIDTH - 8) {
			printf("\n\t")
			characters_printed = 0
		}
		if (characters_printed > 0) {
			printf(" ")
			characters_printed++
		}
		printf("%s", words[i])
		characters_printed += length(words[i])
	}
	printf("\n")
}

function printverse_bilingual(latin, german,    latin_words, german_words, latin_count, german_count, latin_idx, german_idx, col_width, latin_chars, german_chars, latin_line, german_line) {
	# Remove superscript footnote numbers if footnotes are disabled
	if (ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0") {
		gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", german)
	}

	if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
		printf("%s | %s\n", latin, german)
		return
	}

	# Column width is half the total width, minus separators
	col_width = int((MAX_WIDTH - 10) / 2)

	# Split into words
	latin_count = split(latin, latin_words, " ")
	german_count = split(german, german_words, " ")

	latin_idx = 1
	german_idx = 1
	latin_chars = 0
	german_chars = 0
	latin_line = ""
	german_line = ""

	# Print both columns line by line
	while (latin_idx <= latin_count || german_idx <= german_count) {
		# Build Latin line
		while (latin_idx <= latin_count) {
			word = latin_words[latin_idx]
			if (latin_chars + length(word) + (latin_chars > 0 ? 1 : 0) > col_width) {
				break
			}
			if (latin_chars > 0) {
				latin_line = latin_line " "
				latin_chars++
			}
			latin_line = latin_line word
			latin_chars += length(word)
			latin_idx++
		}

		# Build German line
		while (german_idx <= german_count) {
			word = german_words[german_idx]
			if (german_chars + length(word) + (german_chars > 0 ? 1 : 0) > col_width) {
				break
			}
			if (german_chars > 0) {
				german_line = german_line " "
				german_chars++
			}
			german_line = german_line word
			german_chars += length(word)
			german_idx++
		}

		# Print the line with padding
		printf("\t%-*s | %s\n", col_width, latin_line, german_line)

		# Reset for next line
		latin_line = ""
		german_line = ""
		latin_chars = 0
		german_chars = 0
	}
}

function printintroductionpar(verse,    word_count, characters_printed) {
	if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
		printf("%s\n", verse)
		return
	}

	word_count = split(verse, words, " ")
	characters_printed=8 #account for indents at beginning of each verse
	for (i = 1; i <= word_count; i++) {
		if (characters_printed + length(words[i]) + (characters_printed > 0 ? 1 : 0) > MAX_WIDTH) {
			printf("\n")
			characters_printed = 0
		}
		if (i != 1 && characters_printed > 0) { #need first check because we set characters_printed > 0 for first line only
			printf(" ")
			characters_printed++
		}
		printf("%s", words[i])
		characters_printed += length(words[i])
	}
	printf("\n")
	printed_intrudction=1
}

function to_superscript_num(num) {
	# Convert a number to Unicode superscript
	result = ""
	len = length(num)
	for (i = 1; i <= len; i++) {
		digit = substr(num, i, 1)
		if (digit == "0") result = result "⁰"
		else if (digit == "1") result = result "¹"
		else if (digit == "2") result = result "²"
		else if (digit == "3") result = result "³"
		else if (digit == "4") result = result "⁴"
		else if (digit == "5") result = result "⁵"
		else if (digit == "6") result = result "⁶"
		else if (digit == "7") result = result "⁷"
		else if (digit == "8") result = result "⁸"
		else if (digit == "9") result = result "⁹"
		else result = result digit
	}
	return result
}

function printfootnote(footnote_num, footnote,    word_count, characters_printed, sup_num) {
	if ( ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0"){
		return
	}
	else{
	# Convert footnote number to superscript
	sup_num = to_superscript_num(footnote_num)

	if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
		printf("\t\t%s%s\n", sup_num, footnote)
		return
	}

	if( length(footnote) < MAX_WIDTH - 17){
			for ( i=1; i <= MAX_WIDTH - length(footnote) - 1; i++){
				printf(" ")
			}
			printf("%s%s\n", sup_num, footnote)
		}
	else{
	word_count = split(footnote, words, " ")
	printf("\t\t%s", sup_num)
	characters_printed=17 #account for indents at beginning of each multiline footnote (2 tabs + sup_num)
	for (i = 1; i <= word_count; i++) {
		if (characters_printed + length(words[i]) + (characters_printed > 0 ? 1 : 0) > MAX_WIDTH - 8 ) {
			printf("\n\t")
			characters_printed = 0
		}
		if (i != 1 && characters_printed > 0) { #Do not print empty space in front of first word for the first line (since characters_printed gets initialized > 0 we need this
			printf(" ")
			characters_printed++
		}
		printf("%s", words[i])
		characters_printed += length(words[i])
	}
	printf("\n")
	}
	}
}

function processline() {
	# JSON mode: collect data instead of printing
	if (ENVIRON["ALLIOLI_JSON_OUTPUT"] != "" && ENVIRON["ALLIOLI_JSON_OUTPUT"] != "0") {
		# Store book info (will be used in END block)
		if (json_book_name == "") {
			json_book_name = $1
			json_book_abbr = $2
			json_book_num = $3
		}

		# Check if this is a footnote
		if ($6 == "" && $7 ~ /^[0-9]+$/ && NF >= 8) {
			json_footnotes[$4, $5, $7] = $8
			json_footnote_nums[$4, $5, ++json_footnote_count[$4, $5]] = $7
		}
		# Check if this is an introduction (chapter 0)
		else if ($4 == 0 && $6 == "") {
			if (json_intro == "") {
				json_intro = $7
			} else {
				json_intro = json_intro " " $7
			}
		}
		# Verse with content
		else if ($6 != "" || ($7 != "" && $7 !~ /^[0-9]+$/)) {
			# Store verse data
			json_latin[$4, $5] = $6
			json_german[$4, $5] = $7

			# Track unique verses per chapter
			if (!json_verse_seen[$4, $5]) {
				json_verse_seen[$4, $5] = 1
				json_verses[$4, ++json_verse_count[$4]] = $5
			}

			# Track chapters
			if (!json_chapter_seen[$4]) {
				json_chapter_seen[$4] = 1
				json_chapters[++json_chapter_total] = $4
			}
		}

		outputted_records++
		return
	}

	# Normal text output mode
	if (printed_intrudction && $4 != 0){
		printf("\n\n")
		printed_intrudction=0
	}
	if (last_book_printed != $2) {
		print $1
		last_book_printed = $2
	}

	# Determine line type based on column structure
	# Column 6 = Latin, Column 7 = German or footnote number, Column 8 = footnote text

	# Check if this is a footnote (column 6 empty, column 7 is a number, column 8 has text)
	if ($6 == "" && $7 ~ /^[0-9]+$/ && NF >= 8) {
		printfootnote($7, $8)
	}
	# Check if this is an introduction (chapter 0, column 6 empty, column 7 is text)
	else if ($4 == 0 && $6 == ""){
		printf("\t")
		printintroductionpar($7)
	}
	# Bilingual verse (both column 6 and 7 have text)
	else if ($6 != "" && $7 != "") {
		# Check language filter flags
		if (ENVIRON["ALLIOLI_ONLY_LATIN"] != "" && ENVIRON["ALLIOLI_ONLY_LATIN"] != "0") {
			# Show only Latin
			printf("%d:%d\t", $4, $5)
			printverse($6)
		} else if (ENVIRON["ALLIOLI_ONLY_GERMAN"] != "" && ENVIRON["ALLIOLI_ONLY_GERMAN"] != "0") {
			# Show only German
			printf("%d:%d\t", $4, $5)
			printverse($7)
		} else {
			# Show both side-by-side
			printf("%d:%d", $4, $5)
			printverse_bilingual($6, $7)
		}
	}
	# German-only verse (column 6 empty, column 7 has text, not a footnote)
	else if ($6 == "" && $7 != "" && $7 !~ /^[0-9]+$/) {
		printf("%d:%d\t", $4, $5)
		printverse($7)
	}
	# Latin-only verse (column 6 has text, column 7 empty) - rare but handle it
	else if ($6 != "" && $7 == "") {
		printf("%d:%d\t", $4, $5)
		printverse($6)
	}
	outputted_records++
}

cmd == "ref" && mode == "exact" && bookmatches($1, $2, p["book"]) && (p["chapter"] == "" || $4 == p["chapter"]) && (p["verse"] == "" || $5 == p["verse"]) {
	processline()
}

cmd == "ref" && mode == "exact_set" && bookmatches($1, $2, p["book"]) && (p["chapter"] == "" || $4 == p["chapter"]) && p["verse", $5] {
	processline()
}

cmd == "ref" && mode == "range" && bookmatches($1, $2, p["book"]) && ((p["chapter_end"] == "" && $4 == p["chapter"]) || ($4 >= p["chapter"] && $4 <= p["chapter_end"])) && (p["verse"] == "" || $5 >= p["verse"]) && (p["verse_end"] == "" || $5 <= p["verse_end"]) {
	processline()
}

cmd == "ref" && mode == "range_ext" && bookmatches($1, $2, p["book"]) && (($4 == p["chapter"] && $5 >= p["verse"] && p["chapter"] != p["chapter_end"]) || ($4 > p["chapter"] && $4 < p["chapter_end"]) || ($4 == p["chapter_end"] && $5 <= p["verse_end"] && p["chapter"] != p["chapter_end"]) || (p["chapter"] == p["chapter_end"] && $4 == p["chapter"] && $5 >= p["verse"] && $5 <= p["verse_end"])) {
	processline()
}

cmd == "ref" && mode == "search" && (p["book"] == "" || bookmatches($1, $2, p["book"])) && (p["chapter"] == "" || $4 == p["chapter"]) && match(tolower($6), tolower(p["search"])) {
	processline()
}

END {
	# JSON output mode
	if (cmd == "ref" && ENVIRON["ALLIOLI_JSON_OUTPUT"] != "" && ENVIRON["ALLIOLI_JSON_OUTPUT"] != "0") {
		if (outputted_records == 0) {
			print "Unknown reference: " ref
			exit 1
		}

		# Determine language flags
		only_latin = (ENVIRON["ALLIOLI_ONLY_LATIN"] != "" && ENVIRON["ALLIOLI_ONLY_LATIN"] != "0")
		only_german = (ENVIRON["ALLIOLI_ONLY_GERMAN"] != "" && ENVIRON["ALLIOLI_ONLY_GERMAN"] != "0")
		no_footnotes = (ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0")

		# Start JSON output
		print "{"
		printf("  \"book\": {\n")
		printf("    \"name\": \"%s\",\n", json_book_name)
		printf("    \"abbreviation\": \"%s\",\n", json_book_abbr)
		printf("    \"number\": %d\n", json_book_num)
		printf("  },\n")

		# Output chapters
		for (c_idx = 1; c_idx <= json_chapter_total; c_idx++) {
			chapter = json_chapters[c_idx]

			# Handle introduction (chapter 0)
			if (chapter == 0) {
				printf("  \"introduction\": \"%s\"", json_intro)
				if (json_chapter_total > 1) {
					printf(",\n")
				} else {
					printf("\n")
				}
				continue
			}

			# Regular chapter
			printf("  \"chapter\": %d,\n", chapter)
			printf("  \"verses\": [\n")

			# Sort verses numerically before output
			delete sorted_verses
			for (v_idx = 1; v_idx <= json_verse_count[chapter]; v_idx++) {
				sorted_verses[v_idx] = json_verses[chapter, v_idx]
			}
			# Simple bubble sort for numeric ordering
			for (i = 1; i <= json_verse_count[chapter]; i++) {
				for (j = i + 1; j <= json_verse_count[chapter]; j++) {
					if (sorted_verses[i] + 0 > sorted_verses[j] + 0) {
						temp = sorted_verses[i]
						sorted_verses[i] = sorted_verses[j]
						sorted_verses[j] = temp
					}
				}
			}

			# Output verses in sorted order
			for (v_idx = 1; v_idx <= json_verse_count[chapter]; v_idx++) {
				verse_num = sorted_verses[v_idx]

				printf("    {\n")
				printf("      \"verse\": %d,\n", verse_num)

				# Text object
				printf("      \"text\": {")

				# Output text based on language flags
				if (only_latin) {
					printf("\n        \"latin\": \"%s\"\n", json_latin[chapter, verse_num])
				} else if (only_german) {
					# Remove superscript markers if footnotes disabled
					german_text = json_german[chapter, verse_num]
					if (no_footnotes) {
						gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", german_text)
					}
					printf("\n        \"german\": \"%s\"\n", german_text)
				} else {
					# Both languages
					german_text = json_german[chapter, verse_num]
					if (no_footnotes) {
						gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", german_text)
					}
					if (json_latin[chapter, verse_num] != "") {
						printf("\n        \"latin\": \"%s\",\n", json_latin[chapter, verse_num])
					}
					if (german_text != "") {
						printf("        \"german\": \"%s\"\n", german_text)
					}
				}

				printf("      }")

				# Footnotes array (if not disabled)
				if (!no_footnotes && json_footnote_count[chapter, verse_num] > 0) {
					printf(",\n      \"footnotes\": [\n")
					for (f_idx = 1; f_idx <= json_footnote_count[chapter, verse_num]; f_idx++) {
						fn_num = json_footnote_nums[chapter, verse_num, f_idx]
						fn_text = json_footnotes[chapter, verse_num, fn_num]
						printf("        {\n")
						printf("          \"number\": %d,\n", fn_num)
						printf("          \"text\": \"%s\"\n", fn_text)
						if (f_idx < json_footnote_count[chapter, verse_num]) {
							printf("        },\n")
						} else {
							printf("        }\n")
						}
					}
					printf("      ]\n")
				} else {
					printf("\n")
				}

				# Close verse object
				if (v_idx < json_verse_count[chapter]) {
					printf("    },\n")
				} else {
					printf("    }\n")
				}
			}

			printf("  ]\n")
		}

		print "}"
		exit
	}

	# Normal text mode
	if (cmd == "ref" && outputted_records == 0) {
		print "Unknown reference: " ref
	}
}