# Common.pm: definition of commands. Common code of other Texinfo modules.
#
# Copyright 2010-2023 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License,
# or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
# Original author: Patrice Dumas
# Parts (also from Patrice Dumas) come from texi2html.pl or texi2html.init.
package Texinfo::Common;
use strict;
# To check if there is no erroneous autovivification
#no autovivification qw(fetch delete exists store strict);
# for unicode/layer support in binmode
# for binmode documented as pushing :utf8 on top of :encoding
use 5.008001;
# to determine the null file
use Config;
use File::Spec;
# for find_encoding, resolve_alias
use Encode;
# debugging
use Carp qw(cluck confess);
# uncomment to check that settable commands are contained in global commands
#use List::Compare;
use Locale::Messages;
use Texinfo::Documentlanguages;
use Texinfo::Commands;
require Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
@ISA = qw(Exporter);
%EXPORT_TAGS = ( 'all' => [ qw(
collect_commands_in_tree
collect_commands_list_in_tree
move_index_entries_after_items_in_tree
relate_index_entries_to_table_items_in_tree
protect_colon_in_tree
protect_comma_in_tree
protect_first_parenthesis
protect_node_after_label_in_tree
valid_customization_option
valid_tree_transformation
) ] );
@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
# This is where the Texinfo modules get access to __( without explicit
# import.
@EXPORT = qw(
__ __p
);
$VERSION = '7.1';
# i18n
# For the messages translations.
my $messages_textdomain = 'texinfo';
sub __($) {
my $msgid = shift;
return Locale::Messages::dgettext($messages_textdomain, $msgid);
}
sub __p($$) {
my $context = shift;
my $msgid = shift;
return Locale::Messages::dpgettext($messages_textdomain, $context, $msgid);
}
# For the in-document output strings translations in the texinfo_document
# textdomain.
#
# Return the string but do nothing else, used to mark strings to be translated
# in the Gettext framework.
# Can be used in other modules, in particular if the modules already overwrite
# gdt from Texinfo::Translations.
sub gdt($)
{
return $_[0];
}
# First argument is the translation context.
sub pgdt($$)
{
return $_[1];
}
# to be used when the context needs to be returned too
sub pgdt_context($$)
{
return [$_[0], $_[1]];
}
# determine the null devices
my $default_null_device = File::Spec->devnull();
our %null_device_file = (
$default_null_device => 1
);
# special case, djgpp recognizes both null devices
if ($Config{osname} eq 'dos' and $Config{osvers} eq 'djgpp') {
$null_device_file{'/dev/null'} = 1;
$null_device_file{'NUL'} = 1;
}
# Customization options
# variables not specific of Parser, used in other contexts. Spread over
# the different categories set below. The default values are in general
# the same as elsewhere, but occasionally may be specific of the Parser.
my %default_parser_common_customization = (
'INCLUDE_DIRECTORIES' => [ '.' ],
'documentlanguage' => undef, # not 'en' as it is better to specify that there is no
# need for translation since the strings are in english
# rather than ask for translations to en
'EXPANDED_FORMATS' => [],
'DEBUG' => 0, # if >= 10, tree is printed in texi2any.pl after parsing.
# If >= 100 tree is printed every line.
'FORMAT_MENU' => 'menu', # if not 'menu' no menu error related.
# next three related to file names encoding
'DOC_ENCODING_FOR_INPUT_FILE_NAME' => 1, # use document encoding for input file
# names encoding if set
'COMMAND_LINE_ENCODING' => undef, # encoding of command line strings
# used to decode file names for error message
'INPUT_FILE_NAME_ENCODING' => undef, # used for input file encoding
);
# Customization variables obeyed only by the parser, and the default values.
my %default_parser_specific_customization = (
'IGNORE_SPACE_AFTER_BRACED_COMMAND_NAME' => 1,
'CPP_LINE_DIRECTIVES' => 1, # handle cpp like synchronization lines
'MAX_MACRO_CALL_NESTING' => 100000, # max number of nested macro calls
);
# this serves both to set defaults and list customization variable
# valid for the parser.
# also used in util/txicustomvars
our %default_parser_customization_values = (%default_parser_common_customization,
%default_parser_specific_customization);
# @-commands that can be used multiple time in a document and default
# values. Associated with customization values too.
# also used in util/txicustomvars
our %document_settable_multiple_at_commands = (
'allowcodebreaks' => 'true',
'clickstyle' => '@arrow',
'codequotebacktick' => 'off',
'codequoteundirected' => 'off',
'contents' => 0,
'deftypefnnewline' => 'off',
'documentencoding' => 'utf-8',
'documentlanguage' => undef, # Documented as en, but no @documentlanguage
# really means that the language is not set.
# --document-language
'evenfooting' => undef,
'evenheading' => undef,
'everyfooting' => undef,
'everyheading' => undef,
# is N ems in TeX, 0.4 in.
'exampleindent' => 5,
'firstparagraphindent' => 'none',
'frenchspacing' => 'off',
'headings' => 'on',
'kbdinputstyle' => 'distinct',
'microtype' => undef,
'oddheading' => undef,
'oddfooting' => undef,
'paragraphindent' => 3,
'shortcontents' => 0,
'summarycontents' => 0,
'urefbreakstyle' => 'after',
'xrefautomaticsectiontitle' => 'off',
);
# @-commands that should be unique. Associated with customization values too.
# also used in util/txicustomvars
our %document_settable_unique_at_commands = (
'afivepaper' => undef,
'afourpaper' => undef,
'afourlatex' => undef,
'afourwide' => undef,
'bsixpaper' => undef,
# when passed through a customization variable, documentdescription
# should be already formatted for HTML. There is no default,
# what is determined to be the title is used if not set.
'documentdescription' => undef,
'evenfootingmarks' => undef,
'evenheadingmarks' => undef,
'everyfootingmarks' => 'bottom',
'everyheadingmarks' => 'bottom',
'fonttextsize' => 11,
'footnotestyle' => 'end', # --footnote-style
'novalidate' => 0, # --no-validate
'oddfootingmarks' => undef,
'oddheadingmarks' => undef,
'pagesizes' => undef,
'setchapternewpage' => 'on',
'setfilename' => undef,
'smallbook' => undef,
);
# check that settable commands are contained in global commands
# from command_data.txt
if (0) {
#if (1) {
my @global_unique_settable = keys(%document_settable_unique_at_commands);
my @global_unique_commands = keys(%Texinfo::Commands::global_unique_commands);
my $lcu = List::Compare->new(\@global_unique_settable, \@global_unique_commands);
# only in the first list
if (scalar($lcu->get_unique)) {
warn 'BUG: Unique settable not global: '.join(',',$lcu->get_unique)."\n";
}
if (scalar($lcu->get_complement)) {
print STDERR "REMARK: global_uniq commands not settable: "
.join(',', $lcu->get_complement)."\n";
}
my @global_multi_settable = keys(%document_settable_multiple_at_commands);
my @global_multi_commands = keys(%Texinfo::Commands::global_commands);
my $lcm = List::Compare->new(\@global_multi_settable, \@global_multi_commands);
if (scalar($lcm->get_unique)) {
warn 'BUG: Multi settable not global: '.join(',',$lcm->get_unique)."\n";
}
if (scalar($lcm->get_complement)) {
print STDERR "REMARK: global commands not settable: "
.join(',', $lcm->get_complement)."\n";
}
}
# a value corresponds to defaults that are the same for every output format
# otherwise undef is used
our %default_converter_command_line_options = (
'SPLIT_SIZE' => 300000, # --split-size
'FILLCOLUMN' => 72, # --fill-column
'NUMBER_SECTIONS' => 1, # --number-sections
'NUMBER_FOOTNOTES' => 1, # --number-footnotes
# only in HTML
'TRANSLITERATE_FILE_NAMES' => 1, # --transliterate-file-names
'SPLIT' => undef, # --split
'HEADERS' => 1, # --headers. Used to set diverse
# customization options in main program.
# Only directly used in HTML converter
'NODE_FILES' => undef, # --node-files. Depend on SPLIT
'VERBOSE' => undef, # --verbose
'OUTFILE' => undef, # --output If non split and not ending by /.
# Setting can be format dependent
'SUBDIR' => undef, # --output If split or ending by /.
# Setting can be format dependent
'ENABLE_ENCODING' => 1, # --disable-encoding/--enable-encoding.
# The option is directly used in
# Info/Plaintext, and used in diverse formats
# for index sorting and plain text output.
);
# used in main program, defaults documented in manual
my %default_main_program_command_line_options = (
'MACRO_EXPAND' => undef, # --macro-expand. Only for main program
# used in HTML only, called from main program
'INTERNAL_LINKS' => undef, # --internal-links
'ERROR_LIMIT' => 100, # --error-limit
'FORCE' => undef, # --force
'NO_WARN' => undef, # --no-warn
'SILENT' => undef, # --silent. Not used. For completeness
# following also set in converters
'FORMAT_MENU' => 'menu', # --headers. Modified by the format.
);
# used in main program, defaults documented in manual
# also used in util/txicustomvars
our %default_main_program_customization = (
'CHECK_NORMAL_MENU_STRUCTURE' => 0, # output warnings when node with
# automatic direction and directions in menu are not consistent
# with sectionning, and when node directions are not consistent
# with menu directions.
'CHECK_MISSING_MENU_ENTRY' => 1,
'DUMP_TREE' => undef,
'DUMP_TEXI' => undef,
'SHOW_BUILTIN_CSS_RULES' => 0,
'SORT_ELEMENT_COUNT' => undef,
'SORT_ELEMENT_COUNT_WORDS' => undef,
'TEXI2DVI' => 'texi2dvi',
'TREE_TRANSFORMATIONS' => undef,
);
# defaults for the main program. In general transmitted to converters as defaults
our %default_main_program_customization_options = (
%default_main_program_command_line_options, %default_main_program_customization);
# used in converters, default documented in manual
# also used in util/txicustomvars
our %default_converter_customization = (
'TOP_NODE_UP' => '(dir)', # up node of Top node default value
'BASEFILENAME_LENGTH' => 255 - 10,
'DOC_ENCODING_FOR_INPUT_FILE_NAME' => 1,
'DOC_ENCODING_FOR_OUTPUT_FILE_NAME' => 0,
# only used in HTML
'IMAGE_LINK_PREFIX' => undef,
'CASE_INSENSITIVE_FILENAMES' => 0,
'DEBUG' => 0,
# only used in HTML
'HANDLER_FATAL_ERROR_LEVEL' => 100,
'TEST' => 0,
'TEXTCONTENT_COMMENT' => undef, # in textcontent format
# used in TexinfoXML/SXML
# Reset by the main program, therefore this value is only used in converter
# tests that use the perl modules directly. Does not need to match with the
# documented value used in the main program, nor to be updated every time a
# DTD is released, to have a fixed value for the tests. However, it should
# be good to update from time to time to avoid test results that are not
# valid against their reported DTD.
'TEXINFO_DTD_VERSION' => '7.1',
);
# Some are for all converters, EXTENSION for instance, some for
# some converters, for example CLOSE_QUOTE_SYMBOL and many
# for HTML. Could be added to %default_converter_customization.
# Defaults are documented in manual and set in the various converters.
# used in util/txicustomvars
our @variable_string_settables = (
'AFTER_BODY_OPEN',
'AFTER_SHORT_TOC_LINES',
'AFTER_TOC_LINES',
'ASCII_DASHES_AND_QUOTES',
'ASCII_GLYPH',
'ASCII_PUNCTUATION',
'AUTO_MENU_DESCRIPTION_ALIGN_COLUMN',
'AUTO_MENU_MAX_WIDTH',
'AVOID_MENU_REDUNDANCY',
'BEFORE_SHORT_TOC_LINES',
'BEFORE_TOC_LINES',
'BIG_RULE',
'BODYTEXT',
'CLASS_BEGIN_USEPACKAGE', # for LaTeX
'COPIABLE_LINKS',
'CHAPTER_HEADER_LEVEL',
'CHECK_HTMLXREF',
'CLOSE_DOUBLE_QUOTE_SYMBOL',
'CLOSE_QUOTE_SYMBOL',
'COMMAND_LINE_ENCODING',
'COMPLEX_FORMAT_IN_TABLE',
'CONTENTS_OUTPUT_LOCATION',
'CONVERT_TO_LATEX_IN_MATH',
'DATE_IN_HEADER',
'DEFAULT_RULE',
'DEF_TABLE',
'DO_ABOUT',
'DOC_ENCODING_FOR_INPUT_FILE_NAME',
'DOC_ENCODING_FOR_OUTPUT_FILE_NAME',
'DOCTYPE',
'END_USEPACKAGE', # for LaTeX
'EPUB_CREATE_CONTAINER_FILE', # for ext/epub3.pm
'EPUB_KEEP_CONTAINER_FOLDER', # for ext/epub3.pm
'EXTENSION',
'EXTERNAL_CROSSREF_EXTENSION',
'EXTERNAL_CROSSREF_SPLIT',
'EXTERNAL_DIR',
'EXTRA_HEAD',
'FOOTNOTE_END_HEADER_LEVEL',
'FOOTNOTE_SEPARATE_HEADER_LEVEL',
'FRAMES',
'FRAMESET_DOCTYPE',
'HEADER_IN_TABLE',
'HIGHLIGHT_SYNTAX', # for ext/highlight_syntax.pm
'HIGHLIGHT_SYNTAX_DEFAULT_LANGUAGE', # for ext/highlight_syntax.pm
'HTML_MATH',
'HTML_ROOT_ELEMENT_ATTRIBUTES',
'HTMLXREF_FILE',
'HTMLXREF_MODE',
'ICONS',
'IMAGE_LINK_PREFIX',
'INDEX_ENTRY_COLON',
'INDEX_SPECIAL_CHARS_WARNING',
'INFO_JS_DIR',
'INFO_SPECIAL_CHARS_QUOTE',
'INFO_SPECIAL_CHARS_WARNING',
'IGNORE_REF_TO_TOP_NODE_UP',
'INLINE_CSS_STYLE',
'INPUT_FILE_NAME_ENCODING',
'JS_WEBLABELS',
'JS_WEBLABELS_FILE',
'LOCALE_ENCODING',
'L2H_CLEAN',
'L2H_FILE',
'L2H_HTML_VERSION',
'L2H_L2H',
'L2H_SKIP',
'L2H_TMP',
'MATHJAX_SCRIPT',
'MATHJAX_SOURCE',
'MAX_HEADER_LEVEL',
'MENU_ENTRY_COLON',
'MENU_SYMBOL',
'MESSAGE_ENCODING',
'MONOLITHIC',
'NO_CSS',
'NO_NUMBER_FOOTNOTE_SYMBOL',
'NO_CUSTOM_HTML_ATTRIBUTE',
'NODE_NAME_IN_INDEX',
'NODE_NAME_IN_MENU',
'NO_TOP_NODE_OUTPUT',
'NO_USE_SETFILENAME',
'OPEN_DOUBLE_QUOTE_SYMBOL',
'OPEN_QUOTE_SYMBOL',
'OUTPUT_CHARACTERS',
'OUTPUT_ENCODING_NAME',
'OUTPUT_FILE_NAME_ENCODING',
'OUTPUT_PERL_ENCODING',
'PACKAGE',
'PACKAGE_AND_VERSION',
'PACKAGE_NAME',
'PACKAGE_URL',
'PACKAGE_VERSION',
'PRE_BODY_CLOSE',
'PREFIX',
'PROGRAM',
'PROGRAM_NAME_IN_ABOUT',
'PROGRAM_NAME_IN_FOOTER',
'SECTION_NAME_IN_TITLE',
'SHORT_TOC_LINK_TO_TOC',
'SHOW_TITLE',
'SIMPLE_MENU',
'SORT_ELEMENT_COUNT',
'T4H_LATEX_CONVERSION',
'T4H_MATH_CONVERSION',
'T4H_TEX_CONVERSION',
'TEXI2HTML',
'TEXINFO_OUTPUT_FORMAT',
'TOC_LINKS',
'TOP_FILE',
'TOP_NODE_FILE_TARGET',
'TOP_NODE_UP_URL',
'USE_ACCESSKEY',
'USE_ISO',
'USE_LINKS',
'USE_NEXT_HEADING_FOR_LONE_NODE',
'USE_NODES',
'USE_NODE_DIRECTIONS',
'USE_NUMERIC_ENTITY',
'USE_REL_REV',
'USE_SETFILENAME_EXTENSION',
'USE_TITLEPAGE_FOR_TITLE',
'USE_UNIDECODE',
'USE_UP_NODE_FOR_ELEMENT_UP',
'USE_XML_SYNTAX',
'VERTICAL_HEAD_NAVIGATION',
'WORDS_IN_PAGE',
'XREF_USE_FLOAT_LABEL',
'XREF_USE_NODE_NAME_ARG',
);
# Not strings.
my @variable_other_settables = (
# Documented in the texi2any_api manual
'LINKS_BUTTONS', 'TOP_BUTTONS', 'SECTION_BUTTONS',
'CHAPTER_FOOTER_BUTTONS', 'SECTION_FOOTER_BUTTONS',
'NODE_FOOTER_BUTTONS',
'MISC_BUTTONS', 'CHAPTER_BUTTONS',
'ACTIVE_ICONS', 'PASSIVE_ICONS',
# set from command line.
# TODO not documented.
'CSS_FILES', # --css-include
'CSS_REFS', # --css-ref
'EXPANDED_FORMATS', # --if*
'INCLUDE_DIRECTORIES', # -I
);
our %document_settable_at_commands = (%document_settable_multiple_at_commands,
%document_settable_unique_at_commands);
my %valid_customization_options;
foreach my $var (keys(%document_settable_at_commands),
keys(%default_main_program_command_line_options),
keys(%default_converter_command_line_options),
keys(%default_main_program_customization),
keys(%default_parser_specific_customization),
keys(%default_converter_customization),
@variable_string_settables,
@variable_other_settables) {
$valid_customization_options{$var} = 1;
}
sub valid_customization_option($)
{
my $option = shift;
return $valid_customization_options{$option};
}
# not documented on purpose, should not be directly called in user-defined
# codes
sub add_valid_customization_option($)
{
my $option = shift;
if ($option =~ /^[A-Z][A-Z_]{2,}$/) {
$valid_customization_options{$option} = 1;
return 1;
}
return 0;
}
# Output formats
our %texinfo_output_formats;
foreach my $output_format_command ('info', 'plaintext',
grep {$Texinfo::Commands::block_commands{$_} eq 'format_raw'}
keys(%Texinfo::Commands::block_commands)) {
$texinfo_output_formats{$output_format_command} = $output_format_command;
}
# Tree transformations
my %valid_tree_transformations;
foreach my $valid_transformation ('simple_menus',
'fill_gaps_in_sectioning', 'move_index_entries_after_items',
'relate_index_entries_to_items',
'insert_nodes_for_sectioning_commands',
'complete_tree_nodes_menus',
'complete_tree_nodes_missing_menu',
'regenerate_master_menu',
'indent_menu_descriptions') {
$valid_tree_transformations{$valid_transformation} = 1;
}
sub valid_tree_transformation ($)
{
my $transformation = shift;
return 1 if (defined($transformation)
and $valid_tree_transformations{$transformation});
return 0;
}
# information on encodings
# in Texinfo up to 5.2, we presume that ISO-8859-1 was considered as
# the default encoding (although it had never been said explicitly in
# the manual, it is consistent with HTML output without encoding
# being the default for makeinfo output in earlier versions and being,
# at that time, considered as ISO-8859-1). The wording in the Texinfo
# manual implied that setting US-ASCII had no effect, a possible
# interpretation being that it was an alias for ISO-8859-1. Since
# ISO-8859-1 extends US-ASCII in a compatible way, this interpretation
# is valid. Also, as long as the same 8bit encoding is used for input and
# output, the precise 8bit encoding used to extend US-ASCII has no
# practical consequence, something consistent with past makeinfo supporting
# any 8bit encoding without documentencoding and also when US-ASCII was
# specified as encoding.
#
# To support old manuals in which US-ASCII can be specified although
# the encoding corresponds to any 8bit encoding compatible with ISO-8859-1,
# we convert US-ASCII as ISO-8859-1 to avoid errors for characters in
# ISO-8859-1 but not in US-ASCII.
our %encoding_name_conversion_map;
%encoding_name_conversion_map = (
'us-ascii' => 'iso-8859-1',
# The mapping to utf-8 is important for perl code, as it means using a strict
# conversion to utf-8 and not a lax conversion:
# https://perldoc.perl.org/perlunifaq#What's-the-difference-between-UTF-8-and-utf8?
# In more detail, we want to use utf-8 only for two different reasons
# 1) if input is malformed it is better to error out it as soon as possible
# 2) we do not want to have different behaviour and hard to find bugs
# depending on whether the user used @documentencoding utf-8
# or @documentencoding utf8. There is a warning with utf8, but
# we want to be clear in any case.
'utf8' => 'utf-8',
);
# information on @-commands
our %nobrace_symbol_text;
%nobrace_symbol_text = (
'*', "\n",
' ', ' ',
"\t", ' ',
"\n", ' ',
'-', '', # hyphenation hint
'|', '', # used in formatting commands @evenfooting and friends
'/', '',
':', '',
'!', '!',
'?', '?',
'.', '.',
'@', '@',
'}', '}',
'{', '{',
'&', '&',
'\\', '\\', # should only appear in math
);
our %def_map = (
# basic commands.
# 'arg' and 'argtype' are for everything appearing after the other
# arguments.
'defline', [ 'category', 'name', 'arg' ],
'deftypeline', [ 'category', 'type', 'name', 'argtype' ],
'deffn', [ 'category', 'name', 'arg' ],
'defvr', [ 'category', 'name' ],
'deftypefn', [ 'category', 'type', 'name', 'argtype' ],
'deftypeop', [ 'category', 'class' , 'type', 'name', 'argtype' ],
'deftypevr', [ 'category', 'type', 'name' ],
'defcv', [ 'category', 'class' , 'name' ],
'deftypecv', [ 'category', 'class' , 'type', 'name' ],
'defop', [ 'category', 'class' , 'name', 'arg' ],
'deftp', [ 'category', 'name', 'argtype' ],
# shortcuts
# The strings are marked to be translated in the parsers with type
# 'untranslated'.
'defun', {'deffn' => pgdt_context(
'category of functions for @defun',
'Function')},
# TRANSLATORS: category of macros for @defmac
'defmac', {'deffn' => gdt('Macro')},
# TRANSLATORS: category of special forms for @defspec
'defspec', {'deffn' => gdt('Special Form')},
'defvar', {'defvr' => pgdt_context(
'category of variables for @defvar',
'Variable')},
# TRANSLATORS: category of user-modifiable options for @defopt
'defopt', {'defvr' => gdt('User Option')},
'deftypefun', {'deftypefn' => pgdt_context(
'category of functions for @deftypefun',
'Function')},
'deftypevar', {'deftypevr' => pgdt_context(
'category of variables in typed languages for @deftypevar',
'Variable')},
'defivar', {'defcv' => pgdt_context(
'category of instance variables in object-oriented programming for @defivar',
'Instance Variable')},
'deftypeivar', {'deftypecv' => pgdt_context(
'category of instance variables with data type in object-oriented programming for @deftypeivar',
'Instance Variable')},
'defmethod', {'defop' => pgdt_context(
'category of methods in object-oriented programming for @defmethod',
'Method')},
'deftypemethod', {'deftypeop' => pgdt_context(
'category of methods with data type in object-oriented programming for @deftypemethod',
'Method')},
);
# Argument not metasyntactic variables only.
our %def_no_var_arg_commands;
our %def_aliases;
foreach my $def_command(keys %def_map) {
if (ref($def_map{$def_command}) eq 'HASH') {
my ($real_command) = keys (%{$def_map{$def_command}});
$def_aliases{$def_command} = $real_command;
$def_aliases{$def_command.'x'} = $real_command.'x';
}
$def_no_var_arg_commands{$def_command} = 1 if ($def_command =~ /^deftype/);
}
our %small_block_associated_command;
for my $cmd ('example', 'display', 'format', 'lisp', 'quotation',
'indentedblock') {
$small_block_associated_command{'small'.$cmd} = $cmd;
};
# Section and heading commands hierarchical levels
our %command_structuring_level = (
'top' => 0,
'part' => 0, # out of the main hierarchy
'chapter' => 1,
'majorheading' => 1, # same as chapheading
'unnumbered' => 1,
'centerchap' => 1, # like unnumbered
'chapheading' => 1,
'appendix' => 1,
'section' => 2,
'unnumberedsec' => 2,
'heading' => 2,
'appendixsec' => 2,
'appendixsection' => 2, # same as appendixsec
'subsection' => 3,
'unnumberedsubsec' => 3,
'subheading', => 3,
'appendixsubsec' => 3,
'subsubsection' => 4,
'unnumberedsubsubsec' => 4,
'subsubheading' => 4,
'appendixsubsubsec' => 4,
);
our %level_to_structuring_command;
{
my $sections = [ ];
my $appendices = [ ];
my $unnumbered = [ ];
my $headings = [ ];
# set levels for synonyms
$level_to_structuring_command{'appendixsection'} = $appendices;
$level_to_structuring_command{'majorheading'} = $headings;
$level_to_structuring_command{'centerchap'} = $unnumbered;
foreach my $command (keys (%command_structuring_level)) {
next if defined($level_to_structuring_command{$command});
if ($command =~ /^appendix/) {
$level_to_structuring_command{$command} = $appendices;
} elsif ($command =~ /^unnumbered/ or $command eq 'top') {
$level_to_structuring_command{$command} = $unnumbered;
} elsif ($command =~ /section$/ or $command eq 'chapter') {
$level_to_structuring_command{$command} = $sections;
} else {
# no mapping for part, it is outside of the main hierarchy
next if ($command eq 'part');
$level_to_structuring_command{$command} = $headings;
}
my $command_level = $command_structuring_level{$command};
if (defined($level_to_structuring_command{$command}->[$command_level])) {
die "$command: level_to_structuring_command already set to "
.$level_to_structuring_command{$command}->[$command_level]."\n";
}
$level_to_structuring_command{$command}->[$command_level]
= $command;
}
}
# %all_commands includes user-settable commands only.
# The internal commands are not in %all_commands.
# used in util/txicmdlist
our %all_commands;
foreach my $command (
keys(%Texinfo::Commands::block_commands),
keys(%Texinfo::Commands::brace_commands),
keys(%Texinfo::Commands::line_commands),
keys(%Texinfo::Commands::nobrace_commands),
) {
$all_commands{$command} = 1;
}
# brace commands that are not replaced with text.
our %non_formatted_brace_commands;
foreach my $non_formatted_brace_command ('anchor', 'caption',
'errormsg', 'hyphenation', 'shortcaption', 'sortas') {
$non_formatted_brace_commands{$non_formatted_brace_command} = 1;
}
# functions for main program. Should not be called in user-defined code.
# FIXME locate_init_file() is also called in HTML Converter for htmlxref files.
# $FILE: file name to locate. It can be a file path. Binary string.
# $DIRECTORIES: a reference on a array containing a list of directories to
# search the file in. Binary strings.
# $ALL_FILES: if true collect all the files with that name, otherwise stop
# at first match.
sub locate_init_file($$$)
{
my $file = shift;
my $directories = shift;
my $all_files = shift;
if (File::Spec->file_name_is_absolute($file)) {
return $file if (-e $file and -r $file);
} else {
my @files;
foreach my $dir (@$directories) {
next unless (-d $dir);
my $possible_file = File::Spec->catfile($dir, $file);
if ($all_files) {
push (@files, $possible_file)
if (-e $possible_file and -r $possible_file);
} else {
return $possible_file if (-e $possible_file and -r $possible_file);
}
}
return @files if ($all_files);
}
return undef;
}
# API to open, set encoding and register files.
# In general $SELF is stored as $converter->{'output_files'}
# and should be accessed through $converter->output_files_information();
# TODO next three functions not documented anywhere, probably relevant to document
# both in POD and in HTML Customization API.
sub output_files_initialize
{
return {'unclosed_files' => {}, 'opened_files' => []};
}
#
# All the opened files are registered, except for stdout,
# and the closing of files should be registered too with
# output_files_register_closed() below. This makes possible to
# unlink all the opened files and close the files not already
# closed.
#
# $FILE_PATH is the file path, it should be a binary string.
# If $USE_BINMODE is set, call binmode() to set binary mode.
# $OUTPUT_ENCODING argument overrides the output encoding.
# Returns
# - the opened filehandle, or undef if opening failed,
# - the $! error message or undef if opening succeeded.
sub output_files_open_out($$$;$$)
{
my $self = shift;
my $customization_information = shift;
my $file_path = shift;
my $use_binmode = shift;
my $output_encoding = shift;
#if (!defined($file_path)) {
# cluck('output_files_open_out: file_path undef');
#}
my $encoding;
if (defined($output_encoding)) {
$encoding = $output_encoding;
} elsif (defined($customization_information->get_conf('OUTPUT_PERL_ENCODING'))) {
$encoding = $customization_information->get_conf('OUTPUT_PERL_ENCODING');
}
if ($file_path eq '-') {
binmode(STDOUT) if $use_binmode;
binmode(STDOUT, ":encoding($encoding)") if (defined($encoding));
if ($self) {
$self->{'unclosed_files'}->{$file_path} = \*STDOUT;
}
return \*STDOUT, undef;
}
my $filehandle = do { local *FH };
if (!open ($filehandle, '>', $file_path)) {
my $error_message = $!;
return undef, $error_message;
}
# If $use_binmode is true, we run binmode to turn off outputting LF as CR LF
# under MS-Windows, so that Info tag tables will have correct offsets. This
# must be done before setting the encoding filters with binmode.
binmode($filehandle) if $use_binmode;
if ($encoding) {
binmode($filehandle, ":encoding($encoding)");
}
if ($self) {
push @{$self->{'opened_files'}}, $file_path;
$self->{'unclosed_files'}->{$file_path} = $filehandle;
}
return $filehandle, undef;
}
# see the description of $SELF in comment above output_files_open_out.
#
# $FILE_PATH is the file path, it should be a binary string.
sub output_files_register_closed($$)
{
my $self = shift;
my $file_path = shift;
if ($self->{'unclosed_files'}->{$file_path}) {
delete $self->{'unclosed_files'}->{$file_path};
} else {
cluck "$file_path not opened\n";
}
}
# The next two functions should not be called from user-defined
# code, only from the main program. They are defined here for
# consistency of the API and clarity of the code.
#
# see the description of $SELF in comment above output_files_open_out.
sub output_files_opened_files($)
{
my $self = shift;
if (defined($self->{'opened_files'})) {
return @{$self->{'opened_files'}};
} else {
return ();
}
}
# see the description of $SELF in comment above output_files_open_out.
sub output_files_unclosed_files($)
{
my $self = shift;
return $self->{'unclosed_files'};
}
# end of output_files API
# functions used in main program, Parser and/or Texinfo::Structuring.
# Not supposed to be called in user-defined code.
# Called both in NonXS and XS parsers
sub rearrange_tree_beginning($$)
{
my $self = shift;
my $before_node_section = shift;
# Put everything before @setfilename in a special type. This allows to
# ignore everything before @setfilename.
if ($self->global_commands_information()->{'setfilename'}
and $self->global_commands_information()->{'setfilename'}->{'parent'}
eq $before_node_section) {
my $before_setfilename = {'type' => 'preamble_before_setfilename',
'parent' => $before_node_section,
'contents' => []};
while (@{$before_node_section->{'contents'}}
and (!$before_node_section->{'contents'}->[0]->{'cmdname'}
or $before_node_section->{'contents'}->[0]->{'cmdname'} ne 'setfilename')) {
my $content = shift @{$before_node_section->{'contents'}};
$content->{'parent'} = $before_setfilename;
push @{$before_setfilename->{'contents'}}, $content;
}
unshift (@{$before_node_section->{'contents'}}, $before_setfilename)
if (@{$before_setfilename->{'contents'}});
delete $before_node_section->{'contents'}
if (scalar(@{$before_node_section->{'contents'}}) == 0);
}
_add_preamble_before_content($before_node_section);
}
sub _add_preamble_before_content($)
{
my $before_node_section = shift;
# add a preamble for informational commands
my $informational_preamble = {'type' => 'preamble_before_content',
'parent' => $before_node_section,
'contents' => []};
my @first_types;
if ($before_node_section->{'contents'}) {
while (@{$before_node_section->{'contents'}}) {
my $next_content = $before_node_section->{'contents'}->[0];
if ($next_content->{'type'}
and ($next_content->{'type'} eq 'preamble_before_beginning'
or $next_content->{'type'} eq 'preamble_before_setfilename')) {
push @first_types, shift @{$before_node_section->{'contents'}};
} elsif (($next_content->{'type'} and $next_content->{'type'} eq 'paragraph')
or ($next_content->{'cmdname'} and
not $Texinfo::Commands::preamble_commands{
$next_content->{'cmdname'}})) {
last;
} else {
my $content = shift @{$before_node_section->{'contents'}};
$content->{'parent'} = $informational_preamble;
push @{$informational_preamble->{'contents'}}, $content;
}
}
}
push @first_types, $informational_preamble;
unshift (@{$before_node_section->{'contents'}}, @first_types);
}
sub get_perl_encoding($$$)
{
my $commands_info = shift;
my $registrar = shift;
my $configuration_information = shift;
my $result;
if (defined($commands_info->{'documentencoding'})) {
foreach my $element (@{$commands_info->{'documentencoding'}}) {
my $perl_encoding = element_associated_processing_encoding($element);
if (!defined($perl_encoding)) {
my $encoding = $element->{'extra'}->{'input_encoding_name'}
if ($element->{'extra'});
if (defined($encoding)) {
$registrar->line_warn($configuration_information,
sprintf(__("unrecognized encoding name `%s'"), $encoding),
$element->{'source_info'});
}
} else {
$result = $perl_encoding;
}
}
}
return $result;
}
# for Parser and main program
sub warn_unknown_language($) {
my $lang = shift;
my @messages = ();
my $lang_code = $lang;
my $region_code;
if ($lang =~ /^([a-z]+)_([A-Z]+)/) {
$lang_code = $1;
$region_code = $2;
}
if (! $Texinfo::Documentlanguages::language_codes{$lang_code}) {
push @messages, sprintf(__("%s is not a valid language code"),
$lang_code);
}
if (defined($region_code)
and ! $Texinfo::Documentlanguages::region_codes{$region_code}) {
push @messages, sprintf(__("%s is not a valid region code"),
$region_code);
}
return @messages;
}
# next functions are for code used in Structuring in addition to Parser.
# also possibly used in Texinfo::Transformations.
sub _find_end_brace($$)
{
my $text = shift;
my $braces_count = shift;
my $before = '';
while ($braces_count > 0 and length($text)) {
if ($text =~ s/([^()]*)([()])//) {
$before .= $1.$2;
my $brace = $2;
if ($brace eq '(') {
$braces_count++;
} else {
$braces_count--;
if ($braces_count == 0) {
return ($before, $text, 0);
}
}
} else {
$before .= $text;
$text = '';
}
}
return ($before, undef, $braces_count);
}
# This only counts opening braces, and returns 0 once all the parentheses
# are closed
sub _count_opened_tree_braces($$);
sub _count_opened_tree_braces($$)
{
my $current = shift;
my $braces_count = shift;
if (defined($current->{'text'})) {
my ($before, $after);
($before, $after, $braces_count) = _find_end_brace($current->{'text'},
$braces_count);
}
return $braces_count;
}
# relocate $SOURCE_MARKS source marks with position between
# $BEGIN_POSITION and $BEGIN_POSITION + $ADDED_LEN to be relative to
# $BEGIN_POSITION, and move to element $E.
# return $BEGIN_POSITION + $ADDED_LEN if there were source marks
sub relocate_source_marks($$$$)
{
my $source_marks = shift;
return undef if (!$source_marks);
my $e = shift;
my $begin_position = shift;
my $added_len = shift;
my $end_position = $begin_position + $added_len;
my @indices_to_remove;
# collect source marks to remove starting from the beginning to keep
# the correct order in the $e element. Order indices to remove
# in the reverse order to start from the last in order not to change
# the array order when the entry is splice'd away.
for (my $i = 0; $i < scalar(@$source_marks); $i++) {
my $source_mark = $source_marks->[$i];
if (($begin_position == 0
and (!defined($source_marks->[$i]->{'position'})
# this should never happen
or $source_marks->[$i]->{'position'} == 0))
or ($source_marks->[$i]->{'position'} > $begin_position
and $source_marks->[$i]->{'position'} <= $end_position)) {
unshift @indices_to_remove, $i;
if ($source_mark->{'position'}) {
$source_mark->{'position'}
= $source_mark->{'position'} - $begin_position;
} elsif ($begin_position) {
warn "BUG: no $source_mark->{'position'} but $begin_position\n";
}
$e->{'source_marks'} = [] if (! defined($e->{'source_marks'}));
push @{$e->{'source_marks'}}, $source_mark;
} elsif ($source_marks->[$i]->{'position'} > $end_position) {
# only correct if positions are always monotonically increasing
# but should be the case for now
last;
}
}
foreach my $i (@indices_to_remove) {
splice (@$source_marks, $i, 1);
}
return $end_position;
}
# retrieve a leading manual name in parentheses, if there is one.
# $LABEL_CONTENTS_CONTAINER->{'contents'} is the Texinfo for the specification
# of a node. It is relevant in any situation when a label is expected,
# @node, menu entry, float, anchor... For the @node command, for instance,
# $LABEL_CONTENTS_CONTAINER is typically $node->{'args'}->[0].
#
# Returned object is a hash with two fields:
#
# manual_content - Texinfo tree for a manual name extracted from the
# node specification.
# node_content - Texinfo tree for the node name on its own
#
# A contents array where the manual_content and node_content
# elements substituted the initial contents is also returned,
# typically to replace $LABEL_CONTENTS_CONTAINER->{'contents'}
# for consistency.
#
# Could be documented, but only if there is evidence that this function
# is useful in user-defined code.
sub parse_node_manual($;$)
{
my $label_contents_container = shift;
my $modify_node = shift;
return (undef, undef)
if (!$label_contents_container->{'contents'});
my $contents = $label_contents_container->{'contents'};
my $idx = 0;
my $manual;
my $result;
my $node_content = [];
if ($contents->[0] and $contents->[0]->{'text'}
and $contents->[0]->{'text'} =~ /^\(/) {
my ($new_first, $opening_brace);
$manual = [];
my $braces_count = 1; # Number of ( seen minus number of ) seen.
# the leading ( from @$contents is not in manual.
# If the first contents element is "(" followed by more text, split
# the leading "(" into its own element.
my $first = $contents->[0];
if ($first->{'text'} ne '(') {
if ($modify_node) {
$opening_brace = {'text' => '(', 'parent' => $label_contents_container};
}
my $brace_text = $first->{'text'};
$brace_text =~ s/^\(//;
$new_first = { 'text' => $brace_text};
} else {
# first element is "(", it is not part of the manual, keep it
$idx++;
}
for (; $idx < scalar(@$contents); $idx++) {
my $content;
if ($idx == 0) {
$content = $new_first;
} else {
$content = $contents->[$idx];
}
if (!defined($content->{'text'}) or $content->{'text'} !~ /\)/) {
push @$manual, $content;
$braces_count = _count_opened_tree_braces($content, $braces_count);
# This is an error, braces were closed in a command
if ($braces_count == 0) {
last;
}
} else {
my ($before, $after);
($before, $after, $braces_count) = _find_end_brace($content->{'text'},
$braces_count);
if ($braces_count == 0) {
my @remaining_source_marks;
my $current_position = 0;
# At this point, we are sure that there is a manual part,
# so the pending removal/addition of elements at the beginning
# of the manual can proceed (if modify_node).
if ($modify_node) {
if ($opening_brace) {
# remove the original first element and prepend the
# split "(" and text elements
shift @$contents;
$new_first ->{'parent'} = $label_contents_container;
unshift @$contents, $new_first;
unshift @$contents, $opening_brace;
$idx++;
if ($first->{'source_marks'}) {
my $current_position = relocate_source_marks(
$first->{'source_marks'}, $opening_brace,
0, length($opening_brace->{'text'}));
relocate_source_marks($first->{'source_marks'}, $new_first,
$current_position, length($new_first->{'text'}));
}
}
# Remove current element $content with closing brace from the tree.
splice(@$contents, $idx, 1);
}
# remove the closing ), it is not in manual_content
$before =~ s/(\))$//;
my $end_paren = $1;
if ($before ne '') {
# text before ), part of the manual name
my $last_manual_element = { 'text' => $before };
push @$manual, $last_manual_element;
if ($modify_node) {
$last_manual_element->{'parent'} = $content->{'parent'};
splice(@$contents, $idx, 0, $last_manual_element);
$idx++;
$current_position = relocate_source_marks(
$content->{'source_marks'}, $last_manual_element,
$current_position, length($before));
}
}
if ($modify_node) {
my $closing_brace = {'text' => ')',
'parent' => $content->{'parent'}};
splice(@$contents, $idx, 0, $closing_brace);
$idx++;
$current_position = relocate_source_marks(
$content->{'source_marks'}, $closing_brace,
$current_position, length($closing_brace->{'text'}));
}
$after =~ s/^(\s*)//;
my $spaces_after = $1;
if ($spaces_after and $modify_node) {
my $spaces_element = {'text' => $spaces_after,
'parent' => $content->{'parent'}};
splice(@$contents, $idx, 0, $spaces_element);
$idx++;
$current_position = relocate_source_marks(
$content->{'source_marks'}, $spaces_element,
$current_position, length($spaces_after));
}
if ($after ne '') {
# text after ), part of the node name.
my $leading_node_content = {'text' => $after};
push @$node_content, $leading_node_content;
if ($modify_node) {
$leading_node_content->{'parent'} = $content->{'parent'};
splice(@$contents, $idx, 0, $leading_node_content);
$current_position = relocate_source_marks(
$content->{'source_marks'}, $leading_node_content,
$current_position, length($after));
}
$idx++;
}
last;
} else {
push @$manual, $content;
}
}
}
if ($braces_count != 0) {
# unclosed brace, reset
$manual = undef;
$idx = 0;
} else {
$result = {};
$result->{'manual_content'} = $manual;
}
}
if ($idx < scalar(@$contents)) {
push(@$node_content, @$contents[$idx .. scalar(@$contents)-1]);
}
if (scalar(@$node_content)) {
$result = {} if (!$result);
$result->{'node_content'} = $node_content;
}
return $result;
}
# misc functions used in diverse contexts and useful in converters
sub element_associated_processing_encoding($)
{
my $element = shift;
my $perl_encoding;
my $encoding = $element->{'extra'}->{'input_encoding_name'}
if ($element->{'extra'});
if (defined($encoding) and $encoding ne '') {
$encoding = $encoding_name_conversion_map{$encoding}
if (defined($encoding_name_conversion_map{$encoding}));
my $Encode_encoding_object = Encode::find_encoding($encoding);
if (defined($Encode_encoding_object)) {
$perl_encoding = $Encode_encoding_object->name();
$perl_encoding = undef if (defined($perl_encoding)
and $perl_encoding eq '');
}
}
return $perl_encoding;
}
# Reverse the decoding of the file name from the input encoding. When
# dealing with file names, we want Perl strings representing sequences of
# bytes, not Unicode codepoints.
# This is necessary even if the name of the included file is purely
# ASCII, as the name of the directory it is located within may contain
# non-ASCII characters.
# Otherwise, the -e operator and similar may not work correctly.
sub encode_file_name($$)
{
my $file_name = shift;
my $input_encoding = shift;
my $encoding;
return ($file_name, $encoding)
if (not defined($input_encoding));
if ($input_encoding eq 'utf-8' or $input_encoding eq 'utf-8-strict') {
$encoding = 'utf-8';
} else {
$encoding = $input_encoding;
}
$file_name = Encode::encode($encoding, $file_name);
return ($file_name, $encoding);
}
sub locate_include_file($$)
{
my $customization_information = shift;
my $input_file_path = shift;
my $ignore_include_directories = 0;
my ($volume, $directories, $filename)
= File::Spec->splitpath($input_file_path);
my @directories = File::Spec->splitdir($directories);
#print STDERR "$customization_information $input_file_path ".
# @{$customization_information->get_conf('INCLUDE_DIRECTORIES')}\n";
# If the path is absolute or begins with . or .., do not search in
# include directories. This is consistent with Kpathsea for Texinfo TeX.
if (File::Spec->file_name_is_absolute($input_file_path)) {
$ignore_include_directories = 1;
} else {
foreach my $dir (@directories) {
if ($dir eq File::Spec->updir() or $dir eq File::Spec->curdir()) {
$ignore_include_directories = 1;
last;
} elsif ($dir ne '') {
last;
}
}
}
my $found_file;
if ($ignore_include_directories) {
$found_file = $input_file_path
if (-e $input_file_path and -r $input_file_path);
} else {
my @include_directories;
if ($customization_information
and $customization_information->get_conf('INCLUDE_DIRECTORIES')) {
@include_directories
= @{$customization_information->get_conf('INCLUDE_DIRECTORIES')};
} else {
# no object with directory list and not an absolute path, never succeed
return undef;
}
foreach my $include_dir (@include_directories) {
my ($include_volume, $include_dir_path, $include_filename)
= File::Spec->splitpath($include_dir, 1);
my $possible_file = File::Spec->catpath($include_volume,
File::Spec->catdir(File::Spec->splitdir($include_dir_path),
@directories), $filename);
$found_file = $possible_file
if (-e $possible_file and -r $possible_file);
last if (defined($found_file));
}
}
return $found_file;
}
sub _informative_command_value($)
{
my $element = shift;
my $cmdname = $element->{'cmdname'};
if ($Texinfo::Commands::line_commands{$cmdname} eq 'lineraw') {
if (not $Texinfo::Commands::commands_args_number{$cmdname}) {
return 1;
} elsif ($element->{'args'}) {
return join(' ', map {$_->{'text'}} @{$element->{'args'}});
}
} elsif ($element->{'extra'}
and exists($element->{'extra'}->{'text_arg'})) {
return $element->{'extra'}->{'text_arg'};
} elsif ($element->{'extra'} and $element->{'extra'}->{'misc_args'}
and exists($element->{'extra'}->{'misc_args'}->[0])) {
return $element->{'extra'}->{'misc_args'}->[0];
} elsif ($Texinfo::Commands::line_commands{$cmdname} eq 'line'
and $element->{'args'} and scalar(@{$element->{'args'}})
and $element->{'args'}->[0]
and $element->{'args'}->[0]->{'contents'}
and scalar(@{$element->{'args'}->[0]->{'contents'}})
and exists($element->{'args'}->[0]->{'contents'}->[0]->{'text'})) {
return $element->{'args'}->[0]->{'contents'}->[0]->{'text'};
}
return undef;
}
# REMARK documentencoding handling is not reverted by resetting a value with
# set_conf, as the encodings are set using other sources of information
# (possibly based on @documentencoding) in converter.
sub set_informative_command_value($$)
{
my $self = shift;
my $element = shift;
my $cmdname = $element->{'cmdname'};
$cmdname = 'shortcontents' if ($cmdname eq 'summarycontents');
my $value = _informative_command_value($element);
if (defined($value)) {
return $self->set_conf($cmdname, $value);
}
return 0;
}
sub _in_preamble($)
{
my $element = shift;
my $current_element = $element;
while ($current_element->{'parent'}) {
if (defined($current_element->{'parent'}->{'type'})
and $current_element->{'parent'}->{'type'} eq 'preamble_before_content') {
return 1;
}
$current_element = $current_element->{'parent'};
}
return 0;
}
# $COMMAND_LOCATION is 'last', 'preamble' or 'preamble_or_first'
# 'preamble' means setting sequentially to the values in the preamble.
# 'preamble_or_first' means setting to the first value for the command
# in the document if the first command is not in the preamble, else set
# sequentially to the values in the preamble.
# 'last' means setting to the last value for the command in the document.
#
# For unique command, the last may be considered to be the same as the first.
#
# Notice that the only effect is to use set_conf (directly or through
# set_informative_command_value), no @-commands setting side effects are done
# and associated customization variables are not set/reset either.
sub set_global_document_command($$$$)
{
my $self = shift;
my $global_commands_information = shift;
my $global_command = shift;
my $command_location = shift;
if ($command_location ne 'last' and $command_location ne 'preamble_or_first'
and $command_location ne 'preamble') {
warn "BUG: set_global_document_command: unknown command_location: $command_location";
}
my $element;
if ($global_commands_information
and defined($global_commands_information->{$global_command})
and ref($global_commands_information->{$global_command}) eq 'ARRAY') {
if ($command_location eq 'last') {
$element = $global_commands_information->{$global_command}->[-1];
set_informative_command_value($self, $element);
} else {
if ($command_location eq 'preamble_or_first'
and not _in_preamble($global_commands_information->{$global_command}->[0])) {
$element =
$global_commands_information->{$global_command}->[0];
set_informative_command_value($self, $element);
} else {
foreach my $command_element (@{$global_commands_information->{$global_command}}) {
if (_in_preamble($command_element)) {
$element = $command_element;
set_informative_command_value($self, $element);
} else {
last;
}
}
}
}
} elsif ($global_commands_information
and defined($global_commands_information->{$global_command})) {
# unique command, first, preamble and last are the same
$element = $global_commands_information->{$global_command};
set_informative_command_value($self, $element);
}
return $element;
}
sub lookup_index_entry($$)
{
my $index_entry_info = shift;
my $indices_information = shift;
my ($entry_index_name, $entry_number) = @{$index_entry_info};
my $index_info;
if ($indices_information->{$entry_index_name}) {
$index_info = $indices_information->{$entry_index_name};
if ($index_info->{'index_entries'}
and $index_info->{'index_entries'}->[$entry_number-1]) {
return ($index_info->{'index_entries'}->[$entry_number-1], $index_info);
}
}
return (undef, $index_info);
}
sub set_output_encodings($$)
{
my $customization_information = shift;
my $parser_information = shift;
$customization_information->set_conf('OUTPUT_ENCODING_NAME',
$parser_information->{'input_encoding_name'})
if ($parser_information
and $parser_information->{'input_encoding_name'});
if (not defined($customization_information->get_conf('OUTPUT_PERL_ENCODING'))
and defined($customization_information->get_conf('OUTPUT_ENCODING_NAME'))) {
my $conversion_encoding
= $customization_information->get_conf('OUTPUT_ENCODING_NAME');
if (defined($encoding_name_conversion_map{$conversion_encoding})) {
$conversion_encoding
= $encoding_name_conversion_map{$conversion_encoding};
}
my $perl_encoding = Encode::resolve_alias($conversion_encoding);
if (defined($perl_encoding) and $perl_encoding ne '') {
$customization_information->set_conf('OUTPUT_PERL_ENCODING', $perl_encoding);
}
}
}
my $min_level = $command_structuring_level{'chapter'};
my $max_level = $command_structuring_level{'subsubsection'};
# Return numbered level of an element, as modified by raise/lowersections
sub section_level($)
{
my $section = shift;
my $level = $command_structuring_level{$section->{'cmdname'}};
# correct level according to raise/lowersections
if ($section->{'extra'} and $section->{'extra'}->{'sections_level'}) {
$level -= $section->{'extra'}->{'sections_level'};
if ($level < $min_level) {
if ($command_structuring_level{$section->{'cmdname'}} < $min_level) {
$level = $command_structuring_level{$section->{'cmdname'}};
} else {
$level = $min_level;
}
} elsif ($level > $max_level) {
$level = $max_level;
}
}
return $level;
}
sub trim_spaces_comment_from_content($)
{
my $contents = shift;
shift @$contents
if ($contents->[0] and $contents->[0]->{'type'}
and ($contents->[0]->{'type'} eq 'ignorable_spaces_after_command'
or $contents->[0]->{'type'} eq 'spaces_after_close_brace'));
while (@$contents
and (($contents->[-1]->{'cmdname'}
and ($contents->[-1]->{'cmdname'} eq 'c'
or $contents->[-1]->{'cmdname'} eq 'comment'))
or ($contents->[-1]->{'type'}
and $contents->[-1]->{'type'} eq 'spaces_at_end'))) {
pop @$contents;
}
}
# decompose a decimal number on a given base.
sub _decompose_integer($$)
{
my $number = shift;
my $base = shift;
my @result = ();
while ($number >= 0) {
my $factor = $number % $base;
push (@result, $factor);
$number = int(($number - $factor) / $base) - 1;
}
return @result;
}
sub enumerate_item_representation($$)
{
my $specification = shift;
my $number = shift;
if ($specification =~ /^[0-9]+$/) {
return $specification + $number -1;
}
my $result = '';
my $base_letter = ord('a');
$base_letter = ord('A') if (ucfirst($specification) eq $specification);
my @letter_ords = _decompose_integer(ord($specification) - $base_letter + $number - 1, 26);
foreach my $ord (@letter_ords) {
$result = chr($base_letter + $ord) . $result;
}
return $result;
}
sub is_content_empty($;$);
sub is_content_empty($;$)
{
my $tree = shift;
my $do_not_ignore_index_entries = shift;
if (!defined($tree) or !exists($tree->{'contents'})) {
return 1;
}
foreach my $content (@{$tree->{'contents'}}) {
if ($content->{'cmdname'}) {
if ($content->{'type'} and $content->{'type'} eq 'index_entry_command') {
if ($do_not_ignore_index_entries) {
return 0;
} else {
next;
}
}
if (exists($Texinfo::Commands::line_commands{$content->{'cmdname'}})) {
if ($Texinfo::Commands::formatted_line_commands{$content->{'cmdname'}}
or $Texinfo::Commands::formattable_line_commands{$content->{'cmdname'}}) {
return 0;
} else {
next;
}
} elsif (exists($Texinfo::Commands::nobrace_commands{$content->{'cmdname'}})) {
if ($Texinfo::Commands::formatted_nobrace_commands{$content->{'cmdname'}}) {
return 0;
} else {
next;
}
} elsif ($non_formatted_brace_commands{$content->{'cmdname'}}
or $Texinfo::Commands::non_formatted_block_commands{$content->{'cmdname'}}) {
next;
} else {
return 0;
}
}
if ($content->{'type'}) {
if ($content->{'type'} eq 'paragraph') {
return 0;
}
}
if ($content->{'text'} and $content->{'text'} =~ /\S/) {
return 0;
}
if (not is_content_empty($content, $do_not_ignore_index_entries)) {
return 0;
}
}
return 1;
}
# if in this container, we are 'inline', within a running text
my @inline_types = ('def_line', 'paragraph', 'preformatted',
'line_arg', 'block_line_arg', 'menu_entry_name', 'menu_entry_node');
my %inline_types;
foreach my $type (@inline_types) {
$inline_types{$type} = 1;
}
my %not_inline_commands;
foreach my $command (
keys(%Texinfo::Commands::root_commands),
keys(%Texinfo::Commands::block_commands),
grep {$Texinfo::Commands::brace_commands{$_} eq 'context'}
keys(%Texinfo::Commands::brace_commands)) {
$not_inline_commands{$command} = 1;
}
# Return 1 if inline in a running text, 0 if right in top-level or block
# environment, and undef otherwise.
sub _inline_or_block($)
{
my $current = shift;
if ($current->{'type'} and $inline_types{$current->{'type'}}) {
return 1;
} elsif ($current->{'cmdname'}
and exists($not_inline_commands{$current->{'cmdname'}})) {
return 0;
} else {
return undef;
}
}
# return true if in running text context.
# If $CHECK_CURRENT is set, check the element itself, too, in
# addition to the parent context.
sub element_is_inline($;$)
{
my $current = shift;
my $check_current = shift;
if ($check_current) {
my $inline_or_block = _inline_or_block($current);
return ($inline_or_block) if (defined($inline_or_block));
}
while ($current->{'parent'}) {
$current = $current->{'parent'};
my $inline_or_block = _inline_or_block($current);
return ($inline_or_block) if (defined($inline_or_block));
}
return 0;
}
sub normalize_top_node_name($)
{
my $node_name = shift;
if ($node_name =~ /^top$/i) {
return 'Top';
}
return $node_name;
}
# Used in count_bytes
my $Encode_encoding_object;
my $last_encoding;
sub count_bytes($$;$)
{
my $self = shift;
my $string = shift;
my $encoding = shift;
if (!defined($encoding) and $self and $self->get_conf('OUTPUT_PERL_ENCODING')) {
$encoding = $self->get_conf('OUTPUT_PERL_ENCODING');
}
if ($encoding and $encoding ne 'ascii') {
if (!defined($last_encoding) or $last_encoding ne $encoding) {
# Look up and save encoding object for next time. This is
# slightly faster than calling Encode::encode.
$last_encoding = $encoding;
$Encode_encoding_object = Encode::find_encoding($encoding);
if (!defined($Encode_encoding_object)) {
Carp::croak "Unknown encoding '$encoding'";
}
}
return length($Encode_encoding_object->encode($string));
} else {
return length($string);
}
}
# if $PREFER_REFERENCE_ELEMENT is set, prefer an untranslated element.
sub index_content_element($;$)
{
my $element = shift;
my $prefer_reference_element = shift;
if ($element->{'extra'} and $element->{'extra'}->{'def_command'}) {
if ($prefer_reference_element
and $element->{'extra'}->{'def_index_ref_element'}) {
return $element->{'extra'}->{'def_index_ref_element'};
} else {
return $element->{'extra'}->{'def_index_element'};
}
} else {
return $element->{'args'}->[0];
}
}
# custom heading command line is split at @|
sub split_custom_heading_command_contents($)
{
my $contents = shift;
my $result = [];
my $nr_split_contents = 0;
my @contents = @$contents;
trim_spaces_comment_from_content(\@contents);
if (scalar(@contents) == 0) {
# or undef?
return $result;
}
push @$result, [];
while (scalar(@contents)) {
my $current_content = $contents[0];
#print STDERR "$nr_split_contents ".scalar(@contents).": "
# .debug_print_element($current_content)."\n";
if (defined($current_content->{'cmdname'})
and $current_content->{'cmdname'} eq '|') {
shift @contents;
push @$result, [];
$nr_split_contents++;
if ($nr_split_contents >= 2) {
last;
}
} else {
push @{$result->[-1]}, shift @contents;
}
}
push @{$result->[-1]}, @contents;
return $result;
}
# not currently used
sub find_parent_root_command($$);
sub find_parent_root_command($$)
{
my $self = shift;
my $current = shift;
my $root_command;
while (1) {
if ($current->{'cmdname'}) {
if ($Texinfo::Commands::root_commands{$current->{'cmdname'}}) {
return $current;
} elsif ($Texinfo::Commands::block_commands{$current->{'cmdname'}}
and $Texinfo::Commands::block_commands{$current->{'cmdname'}} eq 'region') {
if ($current->{'cmdname'} eq 'copying' and $self
and $self->{'global_commands'}
and $self->{'global_commands'}->{'insertcopying'}) {
foreach my $insertcopying(@{$self->{'global_commands'}->{'insertcopying'}}) {
my $root_command
= find_parent_root_command($self, $insertcopying);
return $root_command if (defined($root_command));
}
} else {
return undef;
}
}
}
if ($current->{'parent'}) {
$current = $current->{'parent'};
} else {
return undef;
}
}
# Should never get there
return undef;
}
# In the default case, global informative commands are collected
# by the parsers. The following functions allow to collect
# any @-command.
# Used in customization init files code and should be useful in
# particular in user-defined init files.
sub collect_commands_in_tree($$)
{
my $root = shift;
my $commands_list = shift;
my $commands_hash = {};
foreach my $command_name (@$commands_list) {
$commands_hash->{$command_name} = [];
}
_collect_commands_in_tree($root, $commands_hash);
return $commands_hash;
}
sub _collect_commands_in_tree($$);
sub _collect_commands_in_tree($$)
{
my $current = shift;
my $commands_hash = shift;
if (defined($current->{'cmdname'})
and defined($commands_hash->{$current->{'cmdname'}})) {
push @{$commands_hash->{$current->{'cmdname'}}}, $current;
}
foreach my $key ('args', 'contents') {
if ($current->{$key}) {
foreach my $child (@{$current->{$key}}) {
_collect_commands_in_tree($child, $commands_hash);
}
}
}
}
sub collect_commands_list_in_tree($$)
{
my $root = shift;
my $commands_list = shift;
my $collected_commands_list = [];
my $commands_hash = {};
foreach my $command_name (@$commands_list) {
$commands_hash->{$command_name} = 1;
}
_collect_commands_list_in_tree($root, $commands_hash, $collected_commands_list);
return $collected_commands_list;
}
sub _collect_commands_list_in_tree($$$);
sub _collect_commands_list_in_tree($$$)
{
my $current = shift;
my $commands_hash = shift;
my $collected_commands_list = shift;
if (defined($current->{'cmdname'})
and defined($commands_hash->{$current->{'cmdname'}})) {
push @{$collected_commands_list}, $current;
}
foreach my $key ('args', 'contents') {
if ($current->{$key}) {
foreach my $child (@{$current->{$key}}) {
_collect_commands_list_in_tree($child, $commands_hash,
$collected_commands_list);
}
}
}
}
# functions useful for Texinfo tree transformations
# and some tree transformations functions, mostly those
# used in conversion to main output formats. In general,
# tree transformations functions are documented in the POD section.
# Some helper functions defined here are used in other
# modules but are not generally useful in converters
# and therefore not public.
sub _copy_tree($$$);
sub _copy_tree($$$)
{
my $current = shift;
my $parent = shift;
my $reference_associations = shift;
if (exists($reference_associations->{$current})) {
# happens for def_*index_element (which are not in the main tree)
# as they contain pieces of the main tree
#print STDERR "COPY: replace $current: "
# ."$reference_associations->{$current}; "
# .Texinfo::Common::debug_print_element($current)."\n";
return $reference_associations->{$current};
}
my $new = {};
$reference_associations->{$current} = $new;
$new->{'parent'} = $parent if ($parent);
foreach my $key ('type', 'cmdname', 'text') {
$new->{$key} = $current->{$key} if (exists($current->{$key}));
}
foreach my $key ('args', 'contents') {
if ($current->{$key}) {
if (ref($current->{$key}) ne 'ARRAY') {
my $command_or_type = '';
if ($new->{'cmdname'}) {
$command_or_type = '@'.$new->{'cmdname'};
} elsif ($new->{'type'}) {
$command_or_type = $new->{'type'};
}
print STDERR "BUG: Not an array [$command_or_type] $key ".
ref($current->{$key})."\n";
}
$new->{$key} = [];
foreach my $child (@{$current->{$key}}) {
push @{$new->{$key}}, _copy_tree($child, $new, $reference_associations);
}
}
}
return $new;
}
# this code works with arrays mixing scalars and reference to elements.
# In practice arrays in extra are either only scalars (index_entry,
# misc_args) or only elements (the remaining).
sub _substitute_references_in_array($$$;$);
sub _substitute_references_in_array($$$;$)
{
my $array = shift;
my $reference_associations = shift;
my $context = shift;
my $level = shift;
$level = 0 if (!defined($level));
$level++;
my $result = [];
my $index = 0;
foreach my $item (@{$array}) {
if (ref($item) eq '') {
push @{$result}, $item;
} elsif ($reference_associations->{$item}) {
push @{$result}, $reference_associations->{$item};
} elsif (ref($item) eq 'ARRAY') {
# nothing like a two level array currently, and hopefully never
push @$result,
_substitute_references_in_array($item, $reference_associations,
"$context [$index]", $level);
} else {
print STDERR "Trouble with $context [$index] (".ref($item).")\n";
push @{$result}, undef;
}
$index++;
}
return $result;
}
sub _copy_extra_info($$$;$);
sub _copy_extra_info($$$;$)
{
my $current = shift;
my $new = shift;
my $reference_associations = shift;
my $level = shift;
my $command_or_type = '';
if ($new->{'cmdname'}) {
$command_or_type = '@'.$new->{'cmdname'};
} elsif ($new->{'type'}) {
$command_or_type = $new->{'type'};
}
$level = 0 if (!defined($level));
$level++;
#print STDERR (' ' x $level)
# .Texinfo::Common::debug_print_element($current).": $current\n";
foreach my $key ('args', 'contents') {
if ($current->{$key}) {
my $index = 0;
foreach my $child (@{$current->{$key}}) {
_copy_extra_info($child, $new->{$key}->[$index],
$reference_associations, $level);
$index++;
}
}
}
foreach my $info_type ('info', 'extra') {
next if (!$current->{$info_type});
foreach my $key (keys %{$current->{$info_type}}) {
#print STDERR (' ' x $level) . "K $info_type $key\n";
my $value = $current->{$info_type}->{$key};
if (ref($value) eq '') {
$new->{$info_type}->{$key} = $value;
} elsif (ref($value) eq 'ARRAY') {
# authors index_entry manual_content menus misc_args node_content
#print STDERR "Array $command_or_type $info_type -> $key\n";
$new->{$info_type}->{$key}
= _substitute_references_in_array($value, $reference_associations,
"${info_type}[$command_or_type]{$key}", $level);
} elsif (ref($value) eq 'HASH') {
if ($reference_associations->{$value}) {
# reference to another element in the tree, for example:
# associated_node caption columnfractions def_index_element
# command_as_argument seealso subentry
$new->{$info_type}->{$key} = $reference_associations->{$value};
#print STDERR "Done $info_type [$command_or_type]: $key\n";
} elsif ($value->{'contents'} or $value->{'args'} or $value->{'cmdname'}
or $value->{'type'} or (defined($value->{'text'})
and $value->{'text'} ne '')) {
# Out of tree element.
# Note that the code works only if the out of tree elements are
# not referred to by main tree elements.
#print STDERR "Doing $info_type [$command_or_type]: $key\n";
my $new_element = _copy_tree($value, undef, $reference_associations);
_copy_extra_info($value, $new_element,
$reference_associations, $level);
$new->{$info_type}->{$key} = $new_element;
} else {
# code that could handle hash structures that would not be elements.
# Not used currently, and it would be better if it stayed that way.
#print STDERR "HASH $info_type $key\n";
$new->{$info_type}->{$key} = {};
foreach my $type_key (keys(%{$value})) {
if (ref($value->{$type_key}) eq '') {
$new->{$info_type}->{$key}->{$type_key} = $value->{$type_key};
} elsif ($reference_associations->{$value->{$type_key}}) {
$new->{$info_type}->{$key}->{$type_key}
= $reference_associations->{$value->{$type_key}};
} elsif (ref($value->{$type_key}) eq 'ARRAY') {
$new->{$info_type}->{$key}->{$type_key}
= _substitute_references_in_array($value->{$type_key},
$reference_associations,
"${info_type}[$command_or_type]{$key}{$type_key}", $level);
} else {
print STDERR "Unexpected $info_type [$command_or_type]{$key}: $type_key\n";
}
}
}
}
}
}
}
sub copy_tree($;$)
{
my $current = shift;
my $parent = shift;
my $reference_associations = {};
my $copy = _copy_tree($current, $parent, $reference_associations);
_copy_extra_info($current, $copy, $reference_associations);
return $copy;
}
sub copy_contents($)
{
my $contents = shift;
if (ref($contents) ne 'ARRAY') {
cluck "$contents not an array";
return undef;
}
my $copy = copy_tree({'contents' => $contents});
return $copy->{'contents'};
}
sub modify_tree($$;$);
sub modify_tree($$;$)
{
my $tree = shift;
my $operation = shift;
my $argument = shift;
#print STDERR "modify_tree tree: $tree\n";
if (!defined($tree) or ref($tree) ne 'HASH') {
cluck "tree ".(!defined($tree) ? 'UNDEF' : "not a hash: $tree");
return undef;
}
if ($tree->{'args'}) {
my @args = @{$tree->{'args'}};
for (my $i = 0; $i <= $#args; $i++) {
my @new_args = &$operation('arg', $args[$i], $argument);
modify_tree($args[$i], $operation, $argument);
# this puts the new args at the place of the old arg using the
# offset from the end of the array
splice (@{$tree->{'args'}}, $i - $#args -1, 1, @new_args);
}
}
if ($tree->{'contents'}) {
my @contents = @{$tree->{'contents'}};
for (my $i = 0; $i <= $#contents; $i++) {
my @new_contents = &$operation('content', $contents[$i], $argument);
modify_tree($contents[$i], $operation, $argument);
# this puts the new contents at the place of the old content using the
# offset from the end of the array
splice (@{$tree->{'contents'}}, $i - $#contents -1, 1, @new_contents);
}
}
if ($tree->{'source_marks'}) {
my @source_marks = @{$tree->{'source_marks'}};
for (my $i = 0; $i <= $#source_marks; $i++) {
if ($source_marks[$i]->{'element'}) {
my @new_element
= &$operation('source_mark', $source_marks[$i]->{'element'},
$argument);
$source_marks[$i]->{'element'} = $new_element[0];
}
}
}
return $tree;
}
sub _protect_comma($$)
{
my $type = shift;
my $current = shift;
return _protect_text($current, quotemeta(','));
}
sub protect_comma_in_tree($)
{
my $tree = shift;
return modify_tree($tree, \&_protect_comma);
}
sub _new_asis_command_with_text($$;$)
{
my $text = shift;
my $parent = shift;
my $text_type = shift;
my $new_command = {'cmdname' => 'asis', 'parent' => $parent };
push @{$new_command->{'args'}}, {'type' => 'brace_command_arg',
'parent' => $new_command};
push @{$new_command->{'args'}->[0]->{'contents'}}, {
'text' => $text,
'parent' => $new_command->{'args'}->[0]};
if (defined($text_type)) {
$new_command->{'args'}->[0]->{'contents'}->[0]->{'type'} = $text_type;
}
return $new_command;
}
sub _protect_text($$)
{
my $current = shift;
my $to_protect = shift;
#print STDERR "_protect_text: $to_protect: $current "
# .debug_print_element($current, 1)."\n";
if (defined($current->{'text'}) and $current->{'text'} =~ /$to_protect/
and !(defined($current->{'type'}) and $current->{'type'} eq 'raw')) {
my @result = ();
my $remaining_text = $current->{'text'};
while ($remaining_text) {
if ($remaining_text =~ s/^(.*?)(($to_protect)+)//) {
if ($1 ne '') {
push @result, {'text' => $1, 'parent' => $current->{'parent'}};
$result[-1]->{'type'} = $current->{'type'}
if defined($current->{'type'});
}
if ($to_protect eq quotemeta(',')) {
for (my $i = 0; $i < length($2); $i++) {
push @result, {'cmdname' => 'comma', 'parent' => $current->{'parent'},
'args' => [{'type' => 'brace_command_arg'}]};
}
} else {
push @result, _new_asis_command_with_text($2, $current->{'parent'},
$current->{'type'});
}
} else {
push @result, {'text' => $remaining_text, 'parent' => $current->{'parent'}};
$result[-1]->{'type'} = $current->{'type'}
if defined($current->{'type'});
last;
}
}
#print STDERR "_protect_text: Result: @result\n";
return @result;
} else {
#print STDERR "_protect_text: No change\n";
return ($current);
}
}
sub _protect_colon($$)
{
my $type = shift;
my $current = shift;
return _protect_text($current, quotemeta(':'));
}
sub protect_colon_in_tree($)
{
my $tree = shift;
return modify_tree($tree, \&_protect_colon);
}
sub _protect_node_after_label($$)
{
my $type = shift;
my $current = shift;
return _protect_text($current, '['. quotemeta(".\t,") .']');
}
sub protect_node_after_label_in_tree($)
{
my $tree = shift;
return modify_tree($tree, \&_protect_node_after_label);
}
sub protect_first_parenthesis($)
{
my $contents = shift;
confess("BUG: protect_first_parenthesis contents undef")
if (!defined($contents));
#print STDERR "protect_first_parenthesis: $contents\n";
my @contents = @$contents;
my $brace;
if ($contents[0] and $contents->[0]{'text'} and $contents[0]->{'text'} =~ /^\(/) {
if ($contents[0]->{'text'} !~ /^\($/) {
$brace = shift @contents;
my $brace_text = $brace->{'text'};
$brace_text =~ s/^\(//;
unshift @contents, { 'text' => $brace_text,
'type' => $brace->{'type'},
'parent' => $brace->{'parent'} }
if $brace_text ne '';
} else {
$brace = shift @contents;
}
unshift @contents, _new_asis_command_with_text('(', $brace->{'parent'},
$brace->{'type'});
}
return \@contents;
}
sub move_index_entries_after_items($)
{
# enumerate or itemize
my $current = shift;
return unless ($current->{'contents'});
my $previous;
foreach my $item (@{$current->{'contents'}}) {
#print STDERR "Before proceeding: $previous $item->{'cmdname'} (@{$previous->{'contents'}})\n" if ($previous and $previous->{'contents'});
if (defined($previous) and $item->{'cmdname'}
and $item->{'cmdname'} eq 'item'
and $previous->{'contents'} and scalar(@{$previous->{'contents'}})) {
my $previous_ending_container;
if ($previous->{'contents'}->[-1]->{'type'}
and ($previous->{'contents'}->[-1]->{'type'} eq 'paragraph'
or $previous->{'contents'}->[-1]->{'type'} eq 'preformatted')) {
$previous_ending_container = $previous->{'contents'}->[-1];
} else {
$previous_ending_container = $previous;
}
my @gathered_index_entries;
#print STDERR "Gathering for item $item in previous $previous ($previous_ending_container)\n";
while ($previous_ending_container->{'contents'}->[-1]
and (($previous_ending_container->{'contents'}->[-1]->{'type'}
and $previous_ending_container->{'contents'}->[-1]->{'type'} eq 'index_entry_command')
or ($previous_ending_container->{'contents'}->[-1]->{'cmdname'}
and ($previous_ending_container->{'contents'}->[-1]->{'cmdname'} eq 'c'
or $previous_ending_container->{'contents'}->[-1]->{'cmdname'} eq 'comment')))) {
unshift @gathered_index_entries, pop @{$previous_ending_container->{'contents'}};
}
#print STDERR "Gathered: @gathered_index_entries\n";
if (scalar(@gathered_index_entries)) {
# put back leading comments
while ($gathered_index_entries[0]
and (!$gathered_index_entries[0]->{'type'}
or $gathered_index_entries[0]->{'type'} ne 'index_entry_command')) {
#print STDERR "Putting back $gathered_index_entries[0] $gathered_index_entries[0]->{'cmdname'}\n";
push @{$previous_ending_container->{'contents'}},
shift @gathered_index_entries;
}
# We have the index entries of the previous @item or before item.
# Now put them right after the current @item command.
if (scalar(@gathered_index_entries)) {
my $item_container;
if ($item->{'contents'} and $item->{'contents'}->[0]
and $item->{'contents'}->[0]->{'type'}
and $item->{'contents'}->[0]->{'type'} eq 'preformatted') {
$item_container = $item->{'contents'}->[0];
} else {
$item_container = $item;
}
foreach my $entry(@gathered_index_entries) {
$entry->{'parent'} = $item_container;
}
if ($item_container->{'contents'}
and $item_container->{'contents'}->[0]
and $item_container->{'contents'}->[0]->{'type'}
and $item_container->{'contents'}->[0]->{'type'} eq 'ignorable_spaces_after_command') {
$item_container->{'contents'}->[0]->{'text'} .= "\n"
if ($item_container->{'contents'}->[0]->{'text'} !~ /\n$/);
unshift @gathered_index_entries, shift @{$item_container->{'contents'}};
}
unshift @{$item_container->{'contents'}}, @gathered_index_entries;
}
}
}
$previous = $item;
}
}
sub _move_index_entries_after_items($$)
{
my $type = shift;
my $current = shift;
if ($current->{'cmdname'} and ($current->{'cmdname'} eq 'enumerate'
or $current->{'cmdname'} eq 'itemize')) {
move_index_entries_after_items($current);
}
return ($current);
}
# For @itemize/@enumerate
sub move_index_entries_after_items_in_tree($)
{
my $tree = shift;
return modify_tree($tree, \&_move_index_entries_after_items);
}
sub _relate_index_entries_to_table_items_in($$)
{
my $table = shift;
my $indices_information = shift;
return unless $table->{'contents'};
foreach my $table_entry (@{$table->{'contents'}}) {
next unless $table_entry->{'contents'}
and $table_entry->{'type'} eq 'table_entry';
my $term = $table_entry->{'contents'}->[0];
my $definition;
my $item;
my @moved_index_entries;
# Move any index entries from the start of a 'table_definition' to
# the 'table_term'.
if (defined($table_entry->{'contents'}->[1])
and defined($table_entry->{'contents'}->[1]->{'type'})
and $table_entry->{'contents'}->[1]->{'type'} eq 'table_definition') {
$definition = $table_entry->{'contents'}->[1];
while (defined($definition->{'contents'}->[0])) {
my $child = $definition->{'contents'}->[0];
last if !defined($child->{'type'})
or $child->{'type'} ne 'index_entry_command';
shift @{$definition->{'contents'}};
push @moved_index_entries, $child;
$child->{'parent'} = $term;
}
unshift @{$term->{'contents'}}, @moved_index_entries;
}
if (defined($term->{'type'}) and $term->{'type'} eq 'table_term') {
# Relate the first index_entry_command in the 'table_term' to
# the term itself.
my $index_entry;
foreach my $content (@{$term->{'contents'}}) {
if ($content->{'type'}
and $content->{'type'} eq 'index_entry_command') {
my $index_info;
($index_entry, $index_info)
= Texinfo::Common::lookup_index_entry(
$content->{'extra'}->{'index_entry'},
$indices_information)
unless $index_entry;
} elsif ($content->{'cmdname'} and $content->{'cmdname'} eq 'item') {
$item = $content unless $item;
}
if ($item and $index_entry) {
# This is better than overwriting 'entry_element', which
# holds important information.
$index_entry->{'entry_associated_element'} = $item;
last;
}
}
}
}
}
# Locate all @tables in the tree, and relate index entries to
# the @item that immediately follows or precedes them.
sub _relate_index_entries_to_table_items($$$)
{
my $type = shift;
my $current = shift;
my $indices_information = shift;
return $current unless $current->{'cmdname'};
if ($current->{'cmdname'} eq 'table') {
_relate_index_entries_to_table_items_in($current, $indices_information);
}
return $current;
}
sub relate_index_entries_to_table_items_in_tree($$)
{
my $tree = shift;
my $indices_information = shift;
return modify_tree($tree, \&_relate_index_entries_to_table_items,
$indices_information);
}
# Common to different module, but not meant to be used in user-defined
# codes.
# Register a target element associated to a label that may be the target of
# a reference and must be unique in the document. Corresponds to @node,
# @anchor, and @float (float label corresponds to the second argument).
#
# $TARGET_ELEMENTS_LIST array reference for elements associated to a label
# $TARGET_ELEMENT is the tree element associated to the label.
sub register_label($$)
{
my ($target_elements_list, $target_element) = @_;
# register the element in the list.
push @{$target_elements_list}, $target_element;
}
sub get_label_element($)
{
my $current = shift;
if (($current->{'cmdname'} eq 'node' or $current->{'cmdname'} eq 'anchor')
and $current->{'args'} and scalar(@{$current->{'args'}})) {
return $current->{'args'}->[0]
} elsif ($current->{'cmdname'} eq 'float'
and $current->{'args'} and scalar(@{$current->{'args'}}) >= 2) {
return $current->{'args'}->[1];
}
return undef;
}
# functions used for debugging. May be used in other modules.
# Not documented.
sub _parent_string($)
{
my $current = shift;
my $parent_string;
if ($current->{'parent'}) {
my $parent = $current->{'parent'};
my $parent_cmd = '';
my $parent_type = '';
$parent_cmd = "\@$parent->{'cmdname'}" if (defined($parent->{'cmdname'}));
$parent_type = "($parent->{'type'})" if (defined($parent->{'type'}));
$parent_string = " <- $parent_cmd$parent_type";
}
return $parent_string
}
sub debug_command_name($)
{
my $cmdname = shift;
if ($cmdname eq "\n") {
return '\n';
} elsif ($cmdname eq "\t") {
return '\t';
} else {
return $cmdname;
}
}
# informations on a tree element, short version
sub debug_print_element($;$)
{
my $current = shift;
my $print_parent = shift;
if (!defined($current)) {
return "debug_print_element: UNDEF\n";
}
if (ref($current) ne 'HASH') {
return "debug_print_element: $current not a hash\n";
}
my $type = '';
my $cmd = '';
my $text = '';
$type = "($current->{'type'})" if (defined($current->{'type'}));
# specific of HTML
$type .= '{'.$current->{'extra'}->{'special_element_type'}.'}'
if (defined($current->{'extra'})
and defined($current->{'extra'}->{'special_element_type'}));
if (defined($current->{'cmdname'})) {
$cmd = '@' . debug_command_name($current->{'cmdname'});
}
if (defined($current->{'text'}) and $current->{'text'} ne '') {
my $text_str = $current->{'text'};
$text_str =~ s/\n/\\n/g;
$text = "[T: $text_str]";
}
my $args = '';
my $contents = '';
$args = "[A".scalar(@{$current->{'args'}}).']' if $current->{'args'};
$contents = "[C".scalar(@{$current->{'contents'}}).']'
if $current->{'contents'};
my $parent_string = '';
if ($print_parent) {
$parent_string = _parent_string($current);
$parent_string = '' if (!defined($parent_string));
}
return "$cmd$type$text$args$contents$parent_string";
}
# for debugging
sub debug_print_element_details($;$)
{
my $current = shift;
my $print_parent = shift;
my $string = debug_print_element($current, $print_parent);
foreach my $key (keys (%$current)) {
$string .= " $key: $current->{$key}\n";
}
if ($current->{'extra'}) {
$string .= " EXTRA\n";
foreach my $key (keys (%{$current->{'extra'}})) {
$string .= " $key: $current->{'extra'}->{$key}\n";
}
}
return $string;
}
# format list for debugging messages
sub debug_list
{
my $label = shift;
my (@list) = (ref $_[0] && $_[0] =~ /.*ARRAY.*/) ? @{$_[0]} : @_;
my $str = "$label: [";
my @items = ();
for my $item (@list) {
$item = "" if ! defined ($item);
$item =~ s/\n/\\n/g;
push (@items, $item);
}
$str .= join (" ", @items);
$str .= "]";
warn "$str\n";
}
# format hash for debugging messages
sub debug_hash
{
my ($label) = shift;
my (%hash) = (ref $_[0] && $_[0] =~ /.*HASH.*/) ? %{$_[0]} : @_;
my $str = "$label: {";
my @items = ();
for my $key (sort keys %hash) {
my $val = $hash{$key} || ""; # no undef
$key =~ s/\n/\\n/g;
$val =~ s/\n/\\n/g;
push (@items, "$key:$val");
}
$str .= join (",", @items);
$str .= "}";
warn "$str\n";
}
use Data::Dumper;
my @kept_keys = ('contents', 'cmdname', 'type', 'text', 'args',
'extra', 'info', 'def_role', 'spaces_before_argument',
'spaces_after_argument', 'comment_at_end', 'index_entry'
);
my %kept_keys;
foreach my $key (@kept_keys) {
$kept_keys{$key} = 1;
}
sub _filter_print_keys { [grep {$kept_keys{$_}} ( sort keys %{$_[0]} )] };
sub debug_print_tree($)
{
my $tree = shift;
local $Data::Dumper::Sortkeys = \&_filter_print_keys;
local $Data::Dumper::Purity = 1;
local $Data::Dumper::Indent = 1;
return Data::Dumper->Dump([$tree]);
}
1;
__END__
=head1 NAME
Texinfo::Common - Texinfo modules common data and miscellaneous methods
=head1 SYNOPSIS
use Texinfo::Common;
my @commands_to_collect = ('math');
my $collected_commands
= Texinfo::Common::collect_commands_in_tree($document_root,
\@commands_to_collect);
=head1 NOTES
The Texinfo Perl module main purpose is to be used in C to convert
Texinfo to other formats. There is no promise of API stability.
=head1 DESCRIPTION
Texinfo::Common holds hashes with miscellaneous information and some
hashes with information on Texinfo @-commands, as well as miscellaneous
methods.
=head1 MISC INFORMATION
Hashes are defined as C variables, and are therefore available
outside of the module.
TODO: undocumented
%null_device_file %default_parser_customization_values %document_settable_multiple_at_commands %document_settable_unique_at_commands %default_converter_command_line_options %default_main_program_customization_options %default_converter_customization @variable_string_settables %document_settable_at_commands %def_map %command_structuring_level %level_to_structuring_command %encoding_name_conversion_map
=over
=item %texinfo_output_formats
X>
Cannonical output formats that have associated conditionals. In
practice corresponds to C C<%block_commands> plus C
and C.
=back
=head1 @-COMMAND INFORMATION
Hashes are defined as C variables, and are therefore available
outside of the module.
The key of the hashes are @-command names without the @. The
following hashes are available:
=over
=item %all_commands
X>
All the @-commands.
=item %def_aliases
=item %def_no_var_arg_commands
X>
X>
C<%def_aliases> associates an aliased command to the original command, for
example C is associated to C.
C<%def_no_var_arg_commands> associates a definition command name with
a true value if the I on the definition command line can contain
non-metasyntactic variables. For instance, it is true for C
but false for C, since C<@defun> I is supposed to contain
metasyntactic variables only.
=item %nobrace_symbol_text
X>
Values are ASCII representation of single character non-alphabetical commands
without brace such as C<*> or C<:>. The value may be an empty string.
=item %non_formatted_brace_commands
Brace commands that are not immediately replaced with text, such as
C, C, C and others.
=item %small_block_associated_command
X>
Associate small command like C to the regular command
C.
=back
=head1 METHODS
Two methods are exported in the default case for Texinfo modules messages
translation in the Uniforum gettext framework, C<__> and C<__p>.
The Texinfo tree and Texinfo tree elements used in argument of some functions
are documented in L. When customization
information is needed, an object that defines C and/or C is
expected, for example a converter inheriting from
C, see L.
=over
=item $translated_string = __($msgid)
=item $translated_string = __p($msgctxt, $msgid)
Returns the I<$msgid> string translated in the Texinfo messages text domain.
C<__p> can be used instead of C<__> to pass a I<$msgctxt> context string to
provide translators with information on the string context when the string is
short or if the translation could depend on the context. C<__> corresponds to
the C function and C<__p> to the C function.
It is not advised to use those functions in user-defined code. It is not
practical either, as the translatable strings marked by C<__> or C<__p> need to
be collected and added to the Texinfo messages domain. This facility could
only be used in user-defined code with translatable strings already present in
the domain anyway. In fact, these functions are documented mainly because they
are automatically exported.
See L,
L C interface|https://www.gnu.org/software/gettext/manual/html_node/gettext.html>,
L.
For translation of strings in output, see L.
=item collect_commands_in_tree($tree, $commands_list)
X>
Returns a hash reference with keys @-commands names specified
in the I<$commands_list> array reference and values arrays of
tree elements corresponding to those @-command found in I<$tree>
by traversing the tree.
=item collect_commands_list_in_tree($tree, $commands_list)
X>
Return a list reference containing the tree elements corresponding
to the @-commands names specified in the I<$commands_list> found
in I<$tree> by traversing the tree. The order of the @-commands
should be kept.
=item $encoding_name = element_associated_processing_encoding($element)
X>
Returns the encoding name that can be used for decoding derived
from the encoding that was set where I<$element> appeared.
=item $result = element_is_inline($element, $check_current)
X>
Return true if the element passed in argument is in running text
context. If the optional I<$check_current> argument is set,
check the element itself, in addition to the parent context.
=item ($encoded_file_name, $encoding) = encode_file_name($file_name, $input_encoding)
Encode the I<$file_name> text string to a binary string I<$encoded_file_name>
based on I<$input_encoding>. Also returns the I<$encoding> name actually
used which may have undergone some normalization. This function is mostly
a wrapper around L which avoids calling the module if not
needed. Do nothing if I<$input_encoding> is C.
=item $text = enumerate_item_representation($specification, $number)
X>
This function returns the number or letter correponding to item
number I<$number> for an C<@enumerate> specification I<$specification>,
appearing on an C<@enumerate> line. For example
enumerate_item_representation('c', 3)
is C.
=item $command = find_parent_root_command($object, $tree_element)
X>
Find the parent root command (sectioning command or node) of a tree element.
The I<$object> argument is optional, its C field is used
to continue through C<@insertcopying> if in a C<@copying>.
=item $entry_content_element = index_content_element($element, $prefer_reference_element)
Return a Texinfo tree element corresponding to the content of the index
entry associated to I<$element>. If I<$prefer_reference_element> is set,
prefer an untranslated element. If the element is an index command like
C<@cindex> or an C<@ftable> C<@item>, the content element is the argument
of the command. If the element is a definition line, the index entry
element is based on the name and class.
=item $result = is_content_empty($tree, $do_not_ignore_index_entries)
X>
Return true if the I<$tree> has content that could be formatted.
I<$do_not_ignore_index_entries> is optional. If set, index entries
are considered to be formatted.
=item $file = locate_include_file($customization_information, file_path)
X>
Locate I<$file_path>. If I<$file_path> is an absolute path or has C<.>
or C<..> in the path directories it is checked that the path exists and is a
file. Otherwise, the file name in I<$file_path> is located in include
directories also used to find texinfo files included in Texinfo documents.
I<$file_path> should be a binary string. C is returned if the file was
not found, otherwise the file found is returned as a binary string.
=item ($index_entry, $index_info) = lookup_index_entry($index_entry_info, $indices_information)
Returns an I<$index_entry> hash based on the I<$index_entry_info> and
I<$indices_information>. Also returns the I<$index_info> hash with information on
the index associated to the index entry. I<$index_entry_info> should be
an array reference with an index name as first element and the index entry number
in that index (1-based) as second element. In general, the I<$index_entry_info>
is an L I|Texinfo::Parser/index_entry> associated to an element.
The I<$index_entry> hash is described in L. The
I<$index_info> hash is described in LL<< C|Texinfo::Parser/$indices_information = $parser->indices_information() >>.
=item move_index_entries_after_items_in_tree($tree)
X>
In C<@enumerate> and C<@itemize> from the tree, move index entries
appearing just before C<@item> after the C<@item>. Comment lines
between index entries are moved too.
=item relate_index_entries_to_table_items_in_tree($tree)
X>
In tables, relate index entries preceding and following an
entry with said item. Reference one of them in the entry's
C.
=item $normalized_name = normalize_top_node_name($node_string)
X>
Normalize the node name string given in argument, by normalizing
Top node case.
=item protect_colon_in_tree($tree)
=item protect_node_after_label_in_tree($tree)
X>
X>
Protect colon with C and characters that
are special in node names after a label in menu entries (tab
dot and comma) with C.
The protection is achieved by putting protected characters
in C<@asis{}>.
=item protect_comma_in_tree($tree)
X>
Protect comma characters, replacing C<,> with @comma{} in tree.
=item $contents_result = protect_first_parenthesis($contents)
X>
Return a contents array reference with first parenthesis in the
contents array reference protected. If I<$contents> is undef
a fatal error with a backtrace will be emitted.
=item $level = section_level($section)
X>
Return numbered level of the tree sectioning element I<$section>, as modified by
raise/lowersections.
=item $element = set_global_document_command($customization_information, $global_commands_information, $cmdname, $command_location)
X>
Set the Texinfo customization variable corresponding to I<$cmdname> in
I<$customization_information>. The I<$global_commands_information> should
contain information about global commands in a Texinfo document, typically obtained
from a parser L<< $parser->global_commands_information()|Texinfo::Parser/$commands = global_commands_information($parser) >>.
I<$command_location> specifies where in the document the value should be taken from,
for commands that may appear more than once. The possibilities are:
=over
=item last
Set to the last value for the command.
=item preamble
Set sequentially to the values in the Texinfo preamble.
=item preamble_or_first
Set to the first value of the command if the first command is not
in the Texinfo preamble, else set as with I,
sequentially to the values in the Texinfo preamble.
=back
The I<$element> returned is the last element that was used to set the
customization value, or C if no customization value was found.
Notice that the only effect of this function is to set a customization
variable value, no @-command side effects are run, no associated customization
variables are set.
=item $status = set_informative_command_value($customization_information, $element)
X>
Set the Texinfo customization option corresponding to the tree element
I<$element>. The command associated to the tree element should be
a command that sets some information, such as C<@documentlanguage>,
C<@contents> or C<@footnotestyle> for example. Return true if the command
argument was found and the customization variable was set.
=item set_output_encodings($customization_information, $parser_information)
X>
If not already set, set C based on input file
encoding. Also set C accordingly which is used
to output in the correct encoding. In general, C
should not be set directly by user-defined code such that it corresponds
to C.
=item $split_contents = split_custom_heading_command_contents($contents)
X>
Split the I<$contents> array reference at C<@|> in at max three parts.
Return an array reference containing the split parts. The I<$contents>
array reference is supposed to be C<< $element->{'args'}->[0]->{'contents'} >>
of C<%Texinfo::Commands::heading_spec_commands> commands such as C<@everyheading>.
=item trim_spaces_comment_from_content($contents)
X>
Remove empty spaces after commands or braces at begin and
spaces and comments at end from a content array, modifying it.
=item $status = valid_customization_option($name)
X>
Return true if the I<$name> is a known customization option.
=item $status = valid_tree_transformation($name)
X>
Return true if the I<$name> is a known tree transformation name
that may be passed with C to modify a texinfo
tree.
=back
=head1 SEE ALSO
L, L and L.
=head1 AUTHOR
Patrice Dumas, Epertusus@free.frE
=head1 COPYRIGHT AND LICENSE
Copyright 2010- Free Software Foundation, Inc. See the source file for
all copyright years.
This library is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
=cut