# texindex.awk, generated by jrtangle from ti.twjr. # # Copyright 2014-2023 Free Software Foundation, Inc. # # This file is part of GNU Texinfo. # # Texinfo is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # Texinfo is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see . # ftrans.awk --- handle data file transitions # # user supplies beginfile() and endfile() functions # # Arnold Robbins, arnold@skeeve.com, Public Domain # November 1992 FNR == 1 { if (_filename_ != "") endfile(_filename_) _filename_ = FILENAME beginfile(FILENAME) } END { endfile(_filename_) } # join.awk --- join an array into a string # # Arnold Robbins, arnold@skeeve.com, Public Domain # May 1993 function join(array, start, end, sep, result, i) { if (sep == "") sep = " " else if (sep == SUBSEP) # magic value sep = "" result = array[start] for (i = start + 1; i <= end; i++) result = result sep array[i] return result } # quicksort --- C.A.R. Hoare's quick sort algorithm. See Wikipedia # or almost any algorithms or computer science text # Adapted from K&R-II, page 110 # function quicksort(data, left, right, compare, # parameters i, last, use_index, lt) # locals { if (left >= right) # do nothing if array contains fewer return # than two elements use_index = (compare == "index") quicksort_swap(data, left, int((left + right) / 2)) last = left for (i = left + 1; i <= right; i++) { lt = (use_index \ ? less_than(data, i, left) \ : key_compare(data[i], data[left]) < 0) if (lt) quicksort_swap(data, ++last, i) } quicksort_swap(data, left, last) quicksort(data, left, last - 1, compare) quicksort(data, last + 1, right, compare) } # quicksort_swap --- quicksort helper function, could be inline # function quicksort_swap(data, i, j, temp) { temp = data[i] data[i] = data[j] data[j] = temp } function del_array(a) { # Portable and faster than # for (i in a) # delete a[i] split("", a) } function check_split_null( n, a) { n = split("abcde", a, "") return (n == 5) } function char_split(string, array, n, i) { if (Can_split_null) return split(string, array, "") # do it the hard way del_array(array) n = length(string) for (i = 1; i <= n; i++) array[i] = substr(string, i, 1) return n } function fatal(format, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, cat) { cat = "cat 1>&2" # maximal portability printf(format, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) | cat close(cat) exit EXIT_FAILURE } function isupper(c) { return index("ABCDEFGHIJKLMNOPQRSTUVWXYZ", c) } function islower(c) { return index("abcdefghijklmnopqrstuvwxyz", c) } function isalpha(c) { return islower(c) || isupper(c) } function isdigit(c) { return index("0123456789", c) } function make_regexp(regexp, a, sep, n) { n = split(regexp, a, "%") if (Command_char == "\\") sep = Command_char Command_char else sep = Command_char return join(a, 1, n, sep) } function escape(regexp, a, n) { if (Command_char != "\\") return regexp n = split(regexp, a, "\\") if (n == 1) return regexp return join(a, 1, n, "\\\\") } function min(a, b) { return (a < b ? a : b) } function usage(exit_val) { printf(_"Usage: %s [OPTION]... FILE...\n", Invocation_name) print _"Generate a sorted index for each TeX output FILE." print _"Usually FILE... is specified as `foo.??' for a document `foo.texi'." print "" print _"Options:" print _" -h, --help display this help and exit" print _" --version display version information and exit" print _" -- end option processing" print "" print _"Email bug reports to bug-texinfo@gnu.org," print _"general questions and discussion to help-texinfo@gnu.org." print _"Texinfo home page: https://www.gnu.org/software/texinfo/" exit exit_val } function version() { print "texindex (GNU texinfo)", Texindex_version print "" printf _"Copyright (C) %s Free Software Foundation, Inc.\n", "2023" print _"License GPLv3+: GNU GPL version 3 or later " print _"This is free software: you are free to change and redistribute it." print _"There is NO WARRANTY, to the extent permitted by law." exit EXIT_SUCCESS } BEGIN { TRUE = 1 FALSE = 0 EXIT_SUCCESS = 0 EXIT_FAILURE = 1 Texindex_version = "7.1" if (! Invocation_name) { # provide fallback in case it's not passed in. Invocation_name = "texindex" } Can_split_null = check_split_null() TEXTDOMAIN = "texinfo" for (i = 1; i < ARGC; i++) { if (ARGV[i] == "-h" || ARGV[i] == "--help") { usage(EXIT_SUCCESS) } else if (ARGV[i] == "--version") { version() } else if (ARGV[i] == "-k" || ARGV[i] == "--keep") { # do nothing, backwards compatibility delete ARGV[i] } else if (ARGV[i] == "--") { delete ARGV[i] break } else if (ARGV[i] ~ /^--?.+/) { fatal(_"%s: unrecognized option `%s'\n" \ "Try `%s --help' for more information.\n", Invocation_name, ARGV[i], Invocation_name) # fatal() will do `exit EXIT_FAILURE' } else { break } } } function beginfile(filename) { Output_file = filename "s" # Reinitialize these for each input file del_array(Seen) del_array(Keys) del_array(Allkeys) del_array(Subkeys) del_array(Initials) del_array(Numfields) del_array(Primary) del_array(Secondary) del_array(Tertiary) del_array(See) del_array(See_count) del_array(Seealso) del_array(Seealso_count) del_array(Pagedata) del_array(Printed) Entries = 0 Do_initials = FALSE Prev_initial = "" Command_char = substr($0, 1, 1) if ((Command_char != "\\" && Command_char != "@") \ || substr($0, 2, 5) != "entry") fatal(_"%s is not a Texinfo index file\n", filename) Special_chars = "{}" Command_char Seeentry_re = make_regexp("%seeentry") Seealso_re = make_regexp("%seealso") Subentry_re = make_regexp(" *%subentry +") } function endfile(filename, i, prev_initial, initial) { # sort the entries quicksort(Keys, 1, Entries, "index") prev_initial = "" for (i = 1; i <= Entries; i++) { # deal with initial initial = Initials[Keys[i]] if (initial != prev_initial) { prev_initial = initial print_initial(initial) } write_index_entry(i) } close(Output_file) } { # Remove duplicates, which can happen if ($0 in Seen) next Seen[$0] = TRUE $0 = substr($0, 7) # remove leading \entry or @entry initial = extract_initial($0) numfields = field_split($0, fields, "{", "}", Command_char) if (numfields < 3 || numfields > 5) fatal(_"%s:%d: Bad entry; expected 3 to 5 fields, not %d\n", FILENAME, FNR, numfields) key = fields[1] pagenum = fields[2] primary_text = fields[3] secondary_text = (numfields > 3 ? fields[4] : "") tertiary_text = (numfields > 4 ? fields[5] : "") if (! (key in Allkeys)) { # first time we've seen this full line Keys[++Entries] = key Allkeys[key] = key Initials[key] = initial Numfields[key] = numfields - 2 # don't count sortkey, page number Primary[key] = primary_text if (secondary_text) Secondary[key] = secondary_text if (tertiary_text) Tertiary[key] = tertiary_text n = split(key, subparts, Subentry_re) for (i = 1; i <= n; i++) Subkeys[key, i] = subparts[i] if (pagenum ~ Seeentry_re) { See_count[key]++ See[key, See_count[key]] = pagenum } else if (pagenum ~ Seealso_re) { Seealso_count[key]++ Seealso[key, Seealso_count[key]] = pagenum } else Pagedata[key] = pagenum } else { # We've seen this key before: # Add to see or see also, or else add to list of pages. # In the latter case, make sure we've not seen this # page number before. (Shouldn't happen based on the # earlier removal of exact duplicates, but we could have # an identical key with different formatting of actual text. if (pagenum ~ Seeentry_re) { See_count[key]++ See[key, See_count[key]] = pagenum } else if (pagenum ~ Seealso_re) { Seealso_count[key]++ Seealso[key, Seealso_count[key]] = pagenum } else if (! (key in Pagedata)) { Pagedata[key] = pagenum } else if (Pagedata[key] != pagenum \ && Pagedata[key] !~ escape(", " pagenum "$")) { Pagedata[key] = Pagedata[key] ", " pagenum } } if (! Do_initials) { if (Prev_initial == "") Prev_initial = initial else if (initial != Prev_initial) Do_initials = TRUE } } function extract_initial(key, initial, nextchar, i, l, kchars) { l = char_split(key, kchars) if (l >= 3 && kchars[2] == "{") { bracecount = 1 i = 3 while (bracecount > 0 && i <= l) { if (kchars[i] == "{") bracecount++ else if (kchars[i] == "}") bracecount-- i++ } if (i > l) fatal(_"%s:%d: Bad key %s in record\n", FILENAME, FNR, key) initial = substr(key, 2, i - 2) } else if (kchars[2] == Command_char) { nextchar = kchars[3] if (initial == Command_char && index("{}", nextchar) > 0) initial = substr(key, 2, 3) else { initial = toupper(nextchar) } } else { initial = toupper(kchars[2]) } return initial } function field_split( \ record, fields, start, end, com_ch, # parameters chars, numchars, out, delim_count, i, j, k) # locals { del_array(fields) numchars = char_split(record, chars) j = 1 # index into fields k = 1 # index into out delim_count = 1 for (i = 2; i <= numchars; i++) { if (chars[i] == com_ch) { if (chars[i+1] == Command_char) { # input was @@ out[k++] = chars[i+1] out[k++] = chars[i+1] i++ } else if (index(Special_chars, chars[i+1]) != 0) { out[k++] = chars[i+1] i++ } else out[k++] = chars[i] } else if (chars[i] == start) { delim_count++ out[k++] = chars[i] } else if (chars[i] == end) { delim_count-- if (delim_count == 0) { fields[j++] = join(out, 1, k-1, SUBSEP) del_array(out) # reset for next time through k = 1 i++ if (i <= numchars && chars[i] != start) fatal(_"%s:%d: Bad entry; expected %s at column %d\n", FILENAME, FNR, start, i) delim_count = 1 } else out[k++] = chars[i] } else out[k++] = chars[i] } return j - 1 # num fields } function print_initial(initial) { if (! Do_initials) return if (index(Special_chars, initial) != 0) initial = Command_char initial printf("%cinitial {%s}\n", Command_char, initial) > Output_file } function less_than(data, l, r, left, right, nfields, cmp1, cmp2) { left = data[l] right = data[r] left_fields = Numfields[left] right_fields = Numfields[right] nfields = min(left_fields, right_fields) # At least one field, always check the first subkey cmp1 = key_compare(Subkeys[left, 1], Subkeys[right, 1]) if (cmp1 != 0) return cmp1 < 0 # cmp1 == 0: one side has 1 field, other side has 1 to 3 fields if (nfields == 1) return left_fields < right_fields # At least two fields, check second subkey cmp2 = key_compare(Subkeys[left, 2], Subkeys[right, 2]) if (cmp2 != 0) return cmp2 < 0 # cmp1 == 0, cmp2 == 0, one side has 2 fields, # other has 2 to 3 fields if (nfields == 2) return left_fields < right_fields # Three fields return key_compare(Subkeys[left, 3], Subkeys[right, 3]) < 0 } BEGIN { for (i = 0; i < 256; i++) { c = sprintf("%c", i) Ordval[c] = i # map character to value if (isdigit(c)) Ordval[c] += 512 } # Set things up such that 'a' < 'A' < 'b' < 'B' < ... i = Ordval["a"] j = Ordval["z"] newval = i + 512 for (; i <= j; i++) { c = sprintf("%c", i) if (islower(c)) { Ordval[c] = newval++ Ordval[toupper(c)] = newval++ } } } function key_compare(left, right, len_l, len_r, len, chars_l, chars_r) { len_l = length(left) len_r = length(right) len = (len_l < len_r ? len_l : len_r) char_split(left, chars_l) char_split(right, chars_r) for (i = 1; i <= len; i++) { if (isalpha(chars_l[i]) && isalpha(chars_r[i])) { # same char different case # upper case comes out last if (chars_l[i] != chars_r[i] && tolower(chars_l[i]) == tolower(chars_r[i])) { if (i != len \ && (isalpha(chars_l[i+1]) || isalpha(chars_r[i+1]))) continue # negative, zero, or positive return Ordval[chars_l[i]] - Ordval[chars_r[i]] } # same case, different char, # or different case, different char: # letter order wins if (Ordval[chars_l[i]] < Ordval[chars_r[i]]) return -1 if (Ordval[chars_l[i]] > Ordval[chars_r[i]]) return 1 # equal, keep going continue } # letter and something else, or two non-letters # letter order wins if (Ordval[chars_l[i]] < Ordval[chars_r[i]]) return -1 if (Ordval[chars_l[i]] > Ordval[chars_r[i]]) return 1 # equal, keep going } # equal so far, shorter one wins if (len_l < len_r) return -1 if (len_l > len_r) return 1 return 0 } function print_entry(key, entry_command, entry_text, count, see_entries, i) { if ((key, 1) in See) # at least one ``see'' print_see_entry(key, entry_command, entry_text, See_count, See) if (key in Pagedata) { # at least one page number if ((key, 1) in Seealso) { count = Seealso_count[key] # Copy the entries to a separate array for (i = 1; i <= count; i++) see_entries[i] = Seealso[key, i] # sort them quicksort(see_entries, 1, count, "string") # now add them to the page data for (i = 1; i <= count; i++) Pagedata[key] = Pagedata[key] ", " see_entries[i] } printf("%c%s{%s}{%s}\n", Command_char, entry_command, entry_text[key], Pagedata[key]) > Output_file Printed[key] = True # mark this key as printed } else if ((key, 1) in Seealso) { # at least one ``see also'' # Only ``see also'' entry, print it count = Seealso_count[key] # Copy the entries to a separate array for (i = 1; i <= count; i++) see_entries[i] = Seealso[key, i] # sort them quicksort(see_entries, 1, count, "string") printf("%c%s{%s}{", Command_char, entry_command, entry_text[key]) > Output_file # now add them to the page data for (i = 1; i <= count; i++) { printf("%s", see_entries[i]) > Output_file if (i != count) printf(", ") > Output_file } printf("}\n") > Output_file } } function print_see_entry(key, entry_command, entry_text, # parameters count_array, see_text_array, # parameters i, count, see_entries) # locals { count = count_array[key] if (count == 1) { # the easy case printf("%c%s{%s, %s}{}\n", Command_char, entry_command, entry_text[key], see_text_array[key, 1]) > Output_file return } # Otherwise, we need to sort the entries and then print them # Copy the entries to a separate array for (i = 1; i <= count; i++) see_entries[i] = see_text_array[key, i] # sort them quicksort(see_entries, 1, count, "string") # now print them for (i = 1; i <= count; i++) printf("%c%s{%s, %s}{}\n", Command_char, entry_command, entry_text[key], see_entries[i]) > Output_file } function write_index_entry(current, key) { key = Keys[current] # current sort key if (Numfields[key] == 1) { print_entry(key, "entry", Primary) } else if (Numfields[key] == 2) { if (! (Subkeys[key, 1] in Printed)) { printf("%centry{%s,}{}\n", Command_char, Primary[key]) > Output_file Printed[Subkeys[key, 1]] = True } print_entry(key, "secondary", Secondary) } else if (Numfields[key] == 3) { if (! (Subkeys[key, 1] in Printed)) { printf("%centry{%s,}{}\n", Command_char, Primary[key]) > Output_file Printed[Subkeys[key, 1]] = True } subkey = (Subkeys[key, 1] Command_char "subentry " Subkeys[key, 2]) if (! (subkey in Printed)) { printf("%csecondary{%s,}{}\n", Command_char, Secondary[key]) > Output_file Printed[subkey] = True } print_entry(key, "tertiary", Tertiary) } }