1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
|
/*
See license.txt in the root of this project.
*/
# ifndef LMT_INPUTSTACK_H
# define LMT_INPUTSTACK_H
/*tex
The state of \TEX's input mechanism appears in the input stack, whose entries are records with
six fields, called |state|, |index|, |start|, |loc|, |limit|, and |name|.
*/
/* todo: there is no need to be sparse here */
typedef struct in_state_record {
halfword start;
halfword loc;
unsigned short state;
union { unsigned short index; unsigned short token_type; }; /*tex: So, no macro but name. */
union { halfword limit; halfword parameter_start; }; /*tex: So, no macro but name. */
halfword name;
short cattable; /*tex The category table used by the current line (see |textoken.c|). */
unsigned short partial; /*tex Is the current line partial (see |textoken.c|)? */
int state_file; /*tex Here we stack the tag of the current file. */
int state_line; /*tex Not used. */
} in_state_record;
typedef struct input_stack_record {
halfword input_file_callback_id; /* lua reference */
halfword line;
halfword end_of_file_seen; /* just a boolean (we could use a negative line number) */
halfword group; /* stack boundary pointer */
halfword if_ptr;
halfword at_end_of_file; /* token list to be flushed when we're done reading */
// halfword padding;
char *full_source_filename;
} input_stack_record;
// todo: better names for in_state_record and input_stack_record ... now mixed up
typedef struct input_state_info {
in_state_record *input_stack;
memory_data input_stack_data;
input_stack_record *in_stack;
memory_data in_stack_data;
halfword *parameter_stack;
memory_data parameter_stack_data;
in_state_record cur_input; /*tex The \quote {top} input state. Why not just pointing. */
int input_line;
int scanner_status;
halfword def_ref; /*tex Has to be set for error recovery etc. */
int align_state;
int base_ptr;
halfword warning_index;
int open_files;
int padding;
} input_state_info;
extern input_state_info lmt_input_state;
typedef struct input_file_state_info {
int forced_file;
int forced_line;
halfword mode;
halfword line;
} input_file_state_info;
extern input_file_state_info input_file_state;
static inline int input_file_value(void)
{
return input_file_state.forced_file ? input_file_state.forced_file : lmt_input_state.cur_input.state_file;
}
static inline int input_line_value(void)
{
return input_file_state.forced_line ? input_file_state.forced_line : (input_file_state.line ? input_file_state.line : lmt_input_state.input_line);
}
/*tex
In \LUAMETATEX\ the io model was stepwise changed a bit, mostly in the \LUA\ feedback area.
Support for nodes, tokens, short and long string were improved. Around 2.06.17 specification
nodes became dynamic and that left the pseudo files as only variable node type. By removing
variable nodes we can avoid some code in node management so getting rid of pseudo files made
sense. The token scan macros used these but now use a lightweight varian tof the \LUA\ scanner,
which we had anyway. The only complication is the |\everyeof| of |\scantokens|. Also, tracing
(if at all) is now different but these three scanners are seldom used and were introduced in
\ETEX\ (|scantokens|), \LUATEX\ (|\scantextokens|) and \LUAMETATEX\ (|tokenized|). The new
approach also gives more room for future extensions.
All this has been a very stepwise process, because we know that there are users who use \LMTX\
in production and small steps are easier to test. Experiments mostly happen in parts of the
code that is less critital ... after all \LUAMETATEX\ is also an experimental engine ... but
io related code changes are kind of critital.
Just to remember wahat we came from: the first 15 were reserved read channels but that is now
delegated to \LUA, so we had an offset of 16 in:
*/
typedef enum io_codes {
io_initial_input_code,
io_lua_input_code,
io_token_input_code,
io_token_eof_input_code,
io_tex_macro_code,
io_file_input_code,
} io_codes;
/*
*
Now, these |io_codes| are used in the name field but that field can also be a way larger number,
i.e.\ the string index of the file. That also assumes that the first used index is above the last
io_code. It can be the warning index too, just for the sake of an error context message. So:
symbolic (small) number, tex string being the filename, and macro name. But, because we also
have that information in other places (partly as side effect of luafication) a simpler model is
used now where we use a few dedicates codes. It also means that we no longer store the filename
in the string pool.
*/
# define io_token_input(c) (c >= io_lua_input_code && c <= io_token_eof_input_code)
# define io_file_input(c) (c >= io_file_input_code)
/*tex
Let's look more closely now at the control variables (|state|, |index|, |start|, |loc|, |limit|,
|name|), assuming that \TEX\ is reading a line of characters that have been input from some file
or from the user's terminal. There is an array called |buffer| that acts as a stack of all lines
of characters that are currently being read from files, including all lines on subsidiary levels
of the input stack that are not yet completed. \TEX\ will return to the other lines when it is
finished with the present input file.
(Incidentally, on a machine with byte-oriented addressing, it might be appropriate to combine
|buffer| with the |str_pool| array, letting the buffer entries grow downward from the top of the
string pool and checking that these two tables don't bump into each other.)
The line we are currently working on begins in position |start| of the buffer; the next character
we are about to read is |buffer[loc]|; and |limit| is the location of the last character present.
If |loc > limit|, the line has been completely read. Usually |buffer[limit]| is the
|end_line_char|, denoting the end of a line, but this is not true if the current line is an
insertion that was entered on the user's terminal in response to an error message.
The |name| variable is a string number that designates the name of the current file, if we are
reading a text file. It is zero if we are reading from the terminal; it is |n+1| if we are reading
from input stream |n|, where |0 <= n <= 16|. (Input stream 16 stands for an invalid stream number;
in such cases the input is actually from the terminal, under control of the procedure |read_toks|.)
Finally |18 <= name <=20| indicates that we are reading a pseudo file created by the |\scantokens|
or |\scantextokens| command. A larger value is reserved for input coming from \LUA.
The |state| variable has one of three values, when we are scanning such files:
\startitemize
\startitem
|mid_line| is the normal state.
\stopitem
\startitem
|skip_blanks| is like |mid_line|, but blanks are ignored.
\stopitem
\startitem
|new_line| is the state at the beginning of a line.
\stopitem
\stopitemize
These state values are assigned numeric codes so that if we add the state code to the next
character's command code, we get distinct values. For example, |mid_line + spacer| stands for the
case that a blank space character occurs in the middle of a line when it is not being ignored;
after this case is processed, the next value of |state| will be |skip_blanks|.
As with other constants, we only add some prefix or suffix but keep the normal name as much as
possible, so that the original documentation still applies.
*/
/*
We could have |token_array_state| for a packed representation of really permanent macros if we
freeze permanent. If we don't freeze we need an extra bit to flag a macro as using the array
but we don't have a bit left. Packing could happen before we dump and would make the body half
the size. Fetching from an array is a middle ground between a token list and a file and could a
bit faster and definitely make for a smaller format file. In the end it might not really pay
off and it is also a bit un-TeX.
*/
typedef enum state_codes {
token_list_state = 0,
/*tex when scanning a line of characters */
mid_line_state = 1,
/*tex when ignoring blanks */
skip_blanks_state = 2 + max_category_code,
/*tex at the start of a line */
new_line_state = 3 + max_category_code + max_category_code,
} state_codes;
/*tex
Additional information about the current line is available via the |index| variable, which
counts how many lines of characters are present in the buffer below the current level. We
have |index = 0| when reading from the terminal and prompting the user for each line; then if
the user types, e.g., |\input paper|, we will have |index = 1| while reading the file
|paper.tex|. However, it does not follow that |index| is the same as the input stack pointer,
since many of the levels on the input stack may come from token lists. For example, the
instruction |\input paper| might occur in a token list.
The global variable |in_open| is equal to the |index| value of the highest \quote {non token
list} level. Thus, the number of partially read lines in the buffer is |in_open + 1|, and we
have |in_open = index| when we are not reading a token list.
If we are not currently reading from the terminal, or from an input stream, we are reading from
the file variable |input_file [index]|. We use the notation |terminal_input| as a convenient
abbreviation for |name = 0|, and |cur_file| as an abbreviation for |input_file [index]|.
The global variable |line| contains the line number in the topmost open file, for use in error
messages. If we are not reading from the terminal, |line_stack [index]| holds the line number
or the enclosing level, so that |line| can be restored when the current file has been read.
Line numbers should never be negative, since the negative of the current line number is used to
identify the user's output routine in the |mode_line| field of the semantic nest entries.
If more information about the input state is needed, it can be included in small arrays like
those shown here. For example, the current page or segment number in the input file might be
put into a variable |page|, maintained for enclosing levels in ||page_stack:array [1 ..
max_input_open] of integer| by analogy with |line_stack|.
Users of \TEX\ sometimes forget to balance left and right braces properly, and one of the ways
\TEX\ tries to spot such errors is by considering an input file as broken into subfiles by
control sequences that are declared to be |\outer|.
A variable called |scanner_status| tells \TEX\ whether or not to complain when a subfile ends.
This variable has six possible values:
\startitemize
\startitem
|normal|, means that a subfile can safely end here without incident.
\stopitem
\startitem
|skipping|, means that a subfile can safely end here, but not a file, because we're reading
past some conditional text that was not selected.
\stopitem
\startitem
|defining|, means that a subfile shouldn't end now because a macro is being defined.
\stopitem
\startitem
|matching|, means that a subfile shouldn't end now because a macro is being used and we are
searching for the end of its arguments.
\stopitem
\startitem
|aligning|, means that a subfile shouldn't end now because we are not finished with the
preamble of an |\halign| or |\valign|.
\stopitem
\startitem
|absorbing|, means that a subfile shouldn't end now because we are reading a balanced token
list for |\message|, |\write|, etc.
\stopitem
\stopitemize
If the |scanner_status| is not |normal|, the variable |warning_index| points to the |eqtb|
location for the relevant control sequence name to print in an error message.
*/
typedef enum scanner_states {
scanner_is_normal, /*tex passing conditional text */
scanner_is_skipping, /*tex passing conditional text */
scanner_is_defining, /*tex reading a macro definition */
scanner_is_matching, /*tex reading macro arguments */
scanner_is_tolerant, /*tex reading tolerant macro arguments */
scanner_is_aligning, /*tex reading an alignment preamble */
scanner_is_absorbing, /*tex reading a balanced text */
} scanner_states;
extern void tex_show_runaway(void); /*tex This is only used when running out of token memory. */
/*tex
However, the discussion about input state really applies only to the case that we are inputting
from a file. There is another important case, namely when we are currently getting input from a
token list. In this case |state = token_list|, and the conventions about the other state
variables are
different:
\startitemize
\startitem
|loc| is a pointer to the current node in the token list, i.e., the node that will be read
next. If |loc=null|, the token list has been fully read.
\stopitem
\startitem
|start| points to the first node of the token list; this node may or may not contain a
reference count, depending on the type of token list involved.
\stopitem
\startitem
|token_type|, which takes the place of |index| in the discussion above, is a code number
that explains what kind of token list is being scanned.
\stopitem
\startitem
|name| points to the |eqtb| address of the control sequence being expanded, if the current
token list is a macro.
\stopitem
\startitem
|param_start|, which takes the place of |limit|, tells where the parameters of the current
macro begin in the |param_stack|, if the current token list is a macro.
\stopitem
\stopitemize
The |token_type| can take several values, depending on where the current token list came from:
\startitemize
\startitem
|parameter|, if a parameter is being scanned;
\stopitem
\startitem
|u_template|, if the |u_j| part of an alignment template is being scanned;
\stopitem
\startitem
|v_template|, if the |v_j| part of an alignment template is being scanned;
\stopitem
\startitem
|backed_up|, if the token list being scanned has been inserted as \quotation {to be read
again}.
\stopitem
\startitem
|inserted|, if the token list being scanned has been inserted as the text expansion of a
|\count| or similar variable;
\stopitem
\startitem
|macro|, if a user-defined control sequence is being scanned;
\stopitem
\startitem
|output_text|, if an |\output| routine is being scanned;
\stopitem
\startitem
|every_par_text|, if the text of |\everypar| is being scanned;
\stopitem
\startitem
|every_math_text|, if the text of |\everymath| is being scanned;
\stopitem
\startitem
|every_display_text|, if the text of \everydisplay| is being scanned;
\stopitem
\startitem
|every_hbox_text|, if the text of |\everyhbox| is being scanned;
\stopitem
\startitem
|every_vbox_text|, if the text of |\everyvbox| is being scanned;
\stopitem
\startitem
|every_job_text|, if the text of |\everyjob| is being scanned;
\stopitem
\startitem
|every_cr_text|, if the text of |\everycr| is being scanned;
\stopitem
\startitem
|mark_text|, if the text of a |\mark| is being scanned;
\stopitem
\startitem
|write_text|, if the text of a |\write| is being scanned.
\stopitem
\stopitemize
The codes for |output_text|, |every_par_text|, etc., are equal to a constant plus the
corresponding codes for token list parameters |output_routine_loc|, |every_par_loc|, etc.
The token list begins with a reference count if and only if |token_type >= macro|.
Since \ETEX's additional token list parameters precede |toks_base|, the corresponding token
types must precede |write_text|. However, in \LUAMETATEX\ we delegate all the read and write
primitives to \LUA\ so that model has been simplified.
*/
/* #define token_type input_state.cur_input.token_type */ /*tex type of current token list */
/* #define param_start input_state.cur_input.param_start */ /*tex base of macro parameters in |param_stack| */
typedef enum token_types {
/*tex This one is unreferenced and always flushed at the end of a macro. */
parameter_text, /*tex parameter */
/*tex These are managed by the node handlers and flushed there. */
template_pre_text, /*tex |u_j| template */
template_post_text, /*tex |v_j| template */
/*tex This one is rather special too and never flushed. */
associated_text, /*tex used in units */
/*tex These are unreferenced and always flushed en the end. */
backed_up_text, /*tex text to be reread */
inserted_text, /*tex inserted texts */
/*tex This one referenced in the begin call and dereferenced at the end. */
macro_text, /*tex defined control sequences */
/*tex These are referenced in the begin call and dereferenced at the end. */
output_text,
every_par_text,
every_par_begin_text,
every_par_end_text,
every_math_text,
every_display_text,
every_hbox_text,
every_vbox_text,
every_math_atom_text,
every_job_text,
every_cr_text,
every_tab_text,
error_help_text,
every_before_par_text,
every_eof_text,
/*tex These could be unreferenced and always flush (different begin call). */
end_of_group_text, /*tex |\atendofgroup| */
mark_text, /*tex |\topmark|, etc. */
token_text, /*tex */
loop_text, /*tex */
end_paragraph_text, /*tex |\everyendpar| */
end_file_text,
write_text, /*tex |\write| */
local_text, /*tex */
local_loop_text, /*tex */
/* */
n_of_token_types,
} token_types;
extern void tex_initialize_input_state (void);
/* int tex_room_on_parameter_stack (void); */
/* int tex_room_on_in_stack (void); */
/* int tex_room_on_input_stack (void); */
extern void tex_copy_to_parameter_stack (halfword *pstack, int n);
extern void tex_show_context (void);
extern void tex_show_validity (void);
extern void tex_set_trick_count (void);
extern void tex_begin_token_list (halfword t, quarterword kind); /* include some tracing */
extern void tex_begin_parameter_list (halfword t);
extern void tex_begin_backed_up_list (halfword t);
extern void tex_begin_inserted_list (halfword t);
extern void tex_begin_associated_list (halfword t);
extern void tex_begin_macro_list (halfword t);
extern void tex_end_token_list (void);
extern void tex_quit_token_list (void);
extern void tex_cleanup_input_state (void);
extern void tex_back_input (halfword t);
extern void tex_reinsert_token (halfword t);
extern void tex_insert_input (halfword h);
extern void tex_append_input (halfword h);
extern void tex_begin_file_reading (void);
extern void tex_end_file_reading (void);
extern void tex_initialize_inputstack (void);
extern void tex_lua_string_start (void);
extern void tex_tex_string_start (int iotype, int cattable);
extern void tex_any_string_start (char *s);
extern halfword tex_wrapped_token_list (halfword h);
extern const char *tex_current_input_file_name (void);
# endif
|