allioli/allioli.awk

BEGIN {
	#  $1 Book name
	#  $2 Book abbreviation
	#  $3 Book number
	#  $4 Chapter number
	#  $5 Verse number
	#  $6 Verse
	FS = "\t"

	MAX_WIDTH = 100
	if (ENVIRON["ALLIOLI_MAX_WIDTH"] ~ /^[0-9]+$/) {
		if (int(ENVIRON["ALLIOLI_MAX_WIDTH"]) < MAX_WIDTH) {
			MAX_WIDTH = int(ENVIRON["ALLIOLI_MAX_WIDTH"])
		}
	}

	if (cmd == "ref") {
		mode = parseref(ref, p)
		p["book"] = cleanbook(p["book"])
	}
}

cmd == "list" {
	if (!($2 in seen_books)) {
		printf("%s (%s)\n", $1, $2)
		seen_books[$2] = 1
	}
}

function parseref(ref, arr) {
	# 1. <book>
	# 2. <book>:?<chapter>
	# 3. <book>:?<chapter>:<verse>
	# 3a. <book>:?<chapter>:<verse>[,<verse>]...
	# 4. <book>:?<chapter>-<chapter>
	# 5. <book>:?<chapter>:<verse>-<verse>
	# 6. <book>:?<chapter>:<verse>-<chapter>:<verse>
	# 7. /<search>
	# 8. <book>/search
	# 9. <book>:?<chapter>/search

	if (match(ref, "^[1-9]?[a-zA-ZäüöÄÜÖ ]+")) {
		# 1, 2, 3, 3a, 4, 5, 6, 8, 9
		arr["book"] = substr(ref, 1, RLENGTH)
		ref = substr(ref, RLENGTH + 1)
	} else if (match(ref, "^/")) {
		# 7
		arr["search"] = substr(ref, 2)
		return "search"
	} else {
		return "unknown"
	}

	if (match(ref, "^:?[1-9]+[0-9]*")) {
		# 2, 3, 3a, 4, 5, 6, 9
		if (sub("^:", "", ref)) {
			arr["chapter"] = int(substr(ref, 1, RLENGTH - 1))
			ref = substr(ref, RLENGTH)
		} else {
			arr["chapter"] = int(substr(ref, 1, RLENGTH))
			ref = substr(ref, RLENGTH + 1)
		}
	} else if (match(ref, "^/")) {
		# 8
		arr["search"] = substr(ref, 2)
		return "search"
	} else if (ref == "") {
		# 1
		return "exact"
	} else {
		return "unknown"
	}

	if (match(ref, "^:[1-9]+[0-9]*")) {
		# 3, 3a, 5, 6
		arr["verse"] = int(substr(ref, 2, RLENGTH - 1))
		ref = substr(ref, RLENGTH + 1)
	} else if (match(ref, "^-[1-9]+[0-9]*$")) {
		# 4
		arr["chapter_end"] = int(substr(ref, 2))
		return "range"
	} else if (match(ref, "^/")) {
		# 9
		arr["search"] = substr(ref, 2)
		return "search"
	} else if (ref == "") {
		# 2
		return "exact"
	} else {
		return "unknown"
	}

	if (match(ref, "^-[1-9]+[0-9]*$")) {
		# 5
		arr["verse_end"] = int(substr(ref, 2))
		return "range"
	} else if (match(ref, "-[1-9]+[0-9]*")) {
		# 6
		arr["chapter_end"] = int(substr(ref, 2, RLENGTH - 1))
		ref = substr(ref, RLENGTH + 1)
	} else if (ref == "") {
		# 3
		return "exact"
	} else if (match(ref, "^,[1-9]+[0-9]*")) {
		# 3a
		arr["verse", arr["verse"]] = 1
		delete arr["verse"]
		do {
			arr["verse", substr(ref, 2, RLENGTH - 1)] = 1
			ref = substr(ref, RLENGTH + 1)
		} while (match(ref, "^,[1-9]+[0-9]*"))

		if (ref != "") {
			return "unknown"
		}

		return "exact_set"
	} else {
		return "unknown"
	}

	if (match(ref, "^:[1-9]+[0-9]*$")) {
		# 6
		arr["verse_end"] = int(substr(ref, 2))
		return "range_ext"
	} else {
		return "unknown"
	}
}

function cleanbook(book) {
	book = tolower(book)
	gsub(" +", "", book)
	return book
}

function bookmatches(book, bookabbr, query) {
	book = cleanbook(book)
	if (book == query) {
		return book
	}

	bookabbr = cleanbook(bookabbr)
	if (bookabbr == query) {
		return book
	}

	if (substr(book, 1, length(query)) == query) {
		return book
	}
}

function printverse(verse,    word_count, characters_printed) {
	# Remove superscript footnote numbers if footnotes are disabled
	if (ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0") {
		gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", verse)
	}

	if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
		printf("%s\n", verse)
		return
	}

	word_count = split(verse, words, " ")
	for (i = 1; i <= word_count; i++) {
		if (characters_printed + length(words[i]) + (characters_printed > 0 ? 1 : 0) > MAX_WIDTH - 8) {
			printf("\n\t")
			characters_printed = 0
		}
		if (characters_printed > 0) {
			printf(" ")
			characters_printed++
		}
		printf("%s", words[i])
		characters_printed += length(words[i])
	}
	printf("\n")
}

function printverse_bilingual(latin, german,    latin_words, german_words, latin_count, german_count, latin_idx, german_idx, col_width, latin_chars, german_chars, latin_line, german_line) {
	# Remove superscript footnote numbers if footnotes are disabled
	if (ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0") {
		gsub(/[⁰¹²³⁴⁵⁶⁷⁸⁹]+/, "", german)
	}

	if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
		printf("%s | %s\n", latin, german)
		return
	}

	# Column width is half the total width, minus separators
	col_width = int((MAX_WIDTH - 10) / 2)

	# Split into words
	latin_count = split(latin, latin_words, " ")
	german_count = split(german, german_words, " ")

	latin_idx = 1
	german_idx = 1
	latin_chars = 0
	german_chars = 0
	latin_line = ""
	german_line = ""

	# Print both columns line by line
	while (latin_idx <= latin_count || german_idx <= german_count) {
		# Build Latin line
		while (latin_idx <= latin_count) {
			word = latin_words[latin_idx]
			if (latin_chars + length(word) + (latin_chars > 0 ? 1 : 0) > col_width) {
				break
			}
			if (latin_chars > 0) {
				latin_line = latin_line " "
				latin_chars++
			}
			latin_line = latin_line word
			latin_chars += length(word)
			latin_idx++
		}

		# Build German line
		while (german_idx <= german_count) {
			word = german_words[german_idx]
			if (german_chars + length(word) + (german_chars > 0 ? 1 : 0) > col_width) {
				break
			}
			if (german_chars > 0) {
				german_line = german_line " "
				german_chars++
			}
			german_line = german_line word
			german_chars += length(word)
			german_idx++
		}

		# Print the line with padding
		printf("\t%-*s | %s\n", col_width, latin_line, german_line)

		# Reset for next line
		latin_line = ""
		german_line = ""
		latin_chars = 0
		german_chars = 0
	}
}

function printintroductionpar(verse,    word_count, characters_printed) {
	if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
		printf("%s\n", verse)
		return
	}

	word_count = split(verse, words, " ")
	characters_printed=8 #account for indents at beginning of each verse
	for (i = 1; i <= word_count; i++) {
		if (characters_printed + length(words[i]) + (characters_printed > 0 ? 1 : 0) > MAX_WIDTH) {
			printf("\n")
			characters_printed = 0
		}
		if (i != 1 && characters_printed > 0) { #need first check because we set characters_printed > 0 for first line only
			printf(" ")
			characters_printed++
		}
		printf("%s", words[i])
		characters_printed += length(words[i])
	}
	printf("\n")
	printed_intrudction=1
}

function to_superscript_num(num) {
	# Convert a number to Unicode superscript
	result = ""
	len = length(num)
	for (i = 1; i <= len; i++) {
		digit = substr(num, i, 1)
		if (digit == "0") result = result "⁰"
		else if (digit == "1") result = result "¹"
		else if (digit == "2") result = result "²"
		else if (digit == "3") result = result "³"
		else if (digit == "4") result = result "⁴"
		else if (digit == "5") result = result "⁵"
		else if (digit == "6") result = result "⁶"
		else if (digit == "7") result = result "⁷"
		else if (digit == "8") result = result "⁸"
		else if (digit == "9") result = result "⁹"
		else result = result digit
	}
	return result
}

function printfootnote(footnote_num, footnote,    word_count, characters_printed, sup_num) {
	if ( ENVIRON["ALLIOLI_NOFOOTNOTES"] != "" && ENVIRON["ALLIOLI_NOFOOTNOTES"] != "0"){
		return
	}
	else{
	# Convert footnote number to superscript
	sup_num = to_superscript_num(footnote_num)

	if (ENVIRON["ALLIOLI_NOLINEWRAP"] != "" && ENVIRON["ALLIOLI_NOLINEWRAP"] != "0") {
		printf("\t\t%s%s\n", sup_num, footnote)
		return
	}

	if( length(footnote) < MAX_WIDTH - 17){
			for ( i=1; i <= MAX_WIDTH - length(footnote) - 1; i++){
				printf(" ")
			}
			printf("%s%s\n", sup_num, footnote)
		}
	else{
	word_count = split(footnote, words, " ")
	printf("\t\t%s", sup_num)
	characters_printed=17 #account for indents at beginning of each multiline footnote (2 tabs + sup_num)
	for (i = 1; i <= word_count; i++) {
		if (characters_printed + length(words[i]) + (characters_printed > 0 ? 1 : 0) > MAX_WIDTH - 8 ) {
			printf("\n\t")
			characters_printed = 0
		}
		if (i != 1 && characters_printed > 0) { #Do not print empty space in front of first word for the first line (since characters_printed gets initialized > 0 we need this
			printf(" ")
			characters_printed++
		}
		printf("%s", words[i])
		characters_printed += length(words[i])
	}
	printf("\n")
	}
	}
}

function processline() {
	if (printed_intrudction && $4 != 0){
		printf("\n\n")
		printed_intrudction=0
	}
	if (last_book_printed != $2) {
		print $1
		last_book_printed = $2
	}

	# Determine line type based on column structure
	# Column 6 = Latin, Column 7 = German or footnote number, Column 8 = footnote text

	# Check if this is a footnote (column 6 empty, column 7 is a number, column 8 has text)
	if ($6 == "" && $7 ~ /^[0-9]+$/ && NF >= 8) {
		printfootnote($7, $8)
	}
	# Check if this is an introduction (chapter 0, column 6 empty, column 7 is text)
	else if ($4 == 0 && $6 == ""){
		printf("\t")
		printintroductionpar($7)
	}
	# Bilingual verse (both column 6 and 7 have text)
	else if ($6 != "" && $7 != "") {
		# Check language filter flags
		if (ENVIRON["ALLIOLI_ONLY_LATIN"] != "" && ENVIRON["ALLIOLI_ONLY_LATIN"] != "0") {
			# Show only Latin
			printf("%d:%d\t", $4, $5)
			printverse($6)
		} else if (ENVIRON["ALLIOLI_ONLY_GERMAN"] != "" && ENVIRON["ALLIOLI_ONLY_GERMAN"] != "0") {
			# Show only German
			printf("%d:%d\t", $4, $5)
			printverse($7)
		} else {
			# Show both side-by-side
			printf("%d:%d", $4, $5)
			printverse_bilingual($6, $7)
		}
	}
	# German-only verse (column 6 empty, column 7 has text, not a footnote)
	else if ($6 == "" && $7 != "" && $7 !~ /^[0-9]+$/) {
		printf("%d:%d\t", $4, $5)
		printverse($7)
	}
	# Latin-only verse (column 6 has text, column 7 empty) - rare but handle it
	else if ($6 != "" && $7 == "") {
		printf("%d:%d\t", $4, $5)
		printverse($6)
	}
	outputted_records++
}

cmd == "ref" && mode == "exact" && bookmatches($1, $2, p["book"]) && (p["chapter"] == "" || $4 == p["chapter"]) && (p["verse"] == "" || $5 == p["verse"]) {
	processline()
}

cmd == "ref" && mode == "exact_set" && bookmatches($1, $2, p["book"]) && (p["chapter"] == "" || $4 == p["chapter"]) && p["verse", $5] {
	processline()
}

cmd == "ref" && mode == "range" && bookmatches($1, $2, p["book"]) && ((p["chapter_end"] == "" && $4 == p["chapter"]) || ($4 >= p["chapter"] && $4 <= p["chapter_end"])) && (p["verse"] == "" || $5 >= p["verse"]) && (p["verse_end"] == "" || $5 <= p["verse_end"]) {
	processline()
}

cmd == "ref" && mode == "range_ext" && bookmatches($1, $2, p["book"]) && (($4 == p["chapter"] && $5 >= p["verse"] && p["chapter"] != p["chapter_end"]) || ($4 > p["chapter"] && $4 < p["chapter_end"]) || ($4 == p["chapter_end"] && $5 <= p["verse_end"] && p["chapter"] != p["chapter_end"]) || (p["chapter"] == p["chapter_end"] && $4 == p["chapter"] && $5 >= p["verse"] && $5 <= p["verse_end"])) {
	processline()
}

cmd == "ref" && mode == "search" && (p["book"] == "" || bookmatches($1, $2, p["book"])) && (p["chapter"] == "" || $4 == p["chapter"]) && match(tolower($6), tolower(p["search"])) {
	processline()
}

END {
	if (cmd == "ref" && outputted_records == 0) {
		print "Unknown reference: " ref
	}
}