1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
|
/*****
* miniparse.h : required header file when compiling the parser standalone.
*
* This file Version $Revision: 1.7 $
*
* Creation date: Wed Mar 19 17:26:15 GMT+0100 1997
* Last modification: $Date: 1998/04/27 07:01:07 $
* By: $Author: newt $
* Current State: $State: Exp $
*
* Author: newt
*
* Copyright (C) 1994-1997 by Ripley Software Development
* All Rights Reserved
*
* This file is part of the XmHTML Widget Library.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*****/
/*****
* $Source: /usr/local/rcs/Newt/XmHTML/RCS/miniparse.h,v $
*****/
/*****
* ChangeLog
* $Log: miniparse.h,v $
* Revision 1.7 1998/04/27 07:01:07 newt
* Added _FastLower macro
*
* Revision 1.6 1997/10/23 00:30:39 newt
* XmHTML Beta 1.1.0 release
*
* Revision 1.5 1997/08/30 02:04:25 newt
* _XmHTMLWarning proto changes.
*
* Revision 1.3 1997/05/28 01:56:39 newt
* Added my_strdup.
*
* Revision 1.2 1997/04/29 14:31:41 newt
* Removed unused structures.
*
* Revision 1.1 1997/03/20 08:01:55 newt
* Initial Revision
*
*****/
#ifndef _miniparse_h_
#define _miniparse_h_
#ifndef MINIPARSE
#define MINIPARSE 1
#endif
#include <sys/time.h>
#include <unistd.h>
#ifdef __STDC__
#include <stdarg.h>
#else
#include <varargs.h>
#endif
#include <errno.h> /* perror */
/* required typedefs */
typedef char* String;
typedef unsigned char Byte;
typedef unsigned char Boolean;
typedef unsigned short Dimension;
typedef unsigned char* Widget;
typedef Widget XmHTMLWidget;
#define _XFUNCPROTOBEGIN /* */
#define _XFUNCPROTOEND /* */
/* Set to False if you don't want any warnings being issued */
extern Boolean parser_warnings;
/* Running count of encountered errors */
extern int parser_errors;
/* Count of HTML segments in the input text */
extern int parsed_object_count;
/* Count of text segments in the input text */
extern int parsed_text_object_count;
/* Set to False if you want the parser to be a bit more lenient */
extern Boolean parser_strict_checking;
/* Set to True if you want to see debug output */
extern Boolean parser_debug;
/* Set to True if you want to get timings from the parser tree verification */
extern Boolean parser_verification_timings;
/* we always set debug flag in here, unless NDEBUG was defined */
#ifdef NDEBUG
# undef DEBUG
#else
# ifndef DEBUG
# define DEBUG 1
# endif
#endif
#ifndef True
# define True 1
# define False 0
#endif
/* tolower macro replacement */
extern const Byte __my_translation_table[];
#define _FastLower(x) (__my_translation_table[(Byte)x])
/*****
* HTML Elements internal id's
* This list is alphabetically sorted to speed up the searching process.
* DO NOT MODIFY
*****/
typedef enum{
HT_DOCTYPE, HT_A, HT_ADDRESS, HT_APPLET, HT_AREA, HT_B, HT_BASE, HT_BASEFONT,
HT_BIG, HT_BLOCKQUOTE, HT_BODY, HT_BR, HT_CAPTION, HT_CENTER, HT_CITE, HT_CODE,
HT_DD, HT_DFN, HT_DIR, HT_DIV, HT_DL, HT_DT, HT_EM, HT_FONT, HT_FORM, HT_FRAME,
HT_FRAMESET, HT_H1, HT_H2, HT_H3, HT_H4, HT_H5, HT_H6, HT_HEAD, HT_HR, HT_HTML,
HT_I, HT_IMG, HT_INPUT, HT_ISINDEX, HT_KBD, HT_LI, HT_LINK, HT_MAP, HT_MENU,
HT_META, HT_NOFRAMES, HT_OL, HT_OPTION, HT_P, HT_PAGE,HT_PARAM, HT_PRE, HT_SAMP,
HT_SCRIPT, HT_SELECT, HT_SMALL, HT_STRIKE, HT_STRONG, HT_STYLE, HT_SUB,
HT_SUP, HT_TAB, HT_TABLE, HT_TD, HT_TEXTAREA, HT_TH, HT_TITLE,
HT_TR, HT_TT, HT_U, HT_UL, HT_VAR, HT_ZTEXT
}htmlEnum;
/*****
* and corresponding name table, defined in parse.c
*****/
extern String *html_tokens;
/* elements for which a closing counterpart is optional */
#define OPTIONAL_CLOSURE(id) ((id) == HT_DD || (id) == HT_DT || \
(id) == HT_LI || (id) == HT_P || (id) == HT_OPTION || (id) == HT_TD || \
(id) == HT_TH || (id) == HT_TR)
/* physical/logical markup elements */
#define IS_MARKUP(id) ((id) == HT_TT || (id) == HT_I || (id) == HT_B || \
(id) == HT_U || (id) == HT_STRIKE || (id) == HT_BIG || (id) == HT_SMALL || \
(id) == HT_SUB || (id) == HT_SUP || (id) == HT_EM || (id) == HT_STRONG || \
(id) == HT_DFN || (id) == HT_CODE || (id) == HT_SAMP || (id) == HT_KBD || \
(id) == HT_VAR || (id) == HT_CITE || (id) == HT_FONT)
/* text containers */
#define IS_CONTAINER(id) ((id) == HT_BODY || (id) == HT_DIV || \
(id) == HT_CENTER || (id) == HT_BLOCKQUOTE || (id) == HT_FORM || \
(id) == HT_TH || (id) == HT_TD || (id) == HT_DD || (id) == HT_LI || \
(id) == HT_NOFRAMES)
/* all elements that may be nested */
#define NESTED_ELEMENT(id) (IS_MARKUP(id) || (id) == HT_APPLET || \
(id) == HT_BLOCKQUOTE || (id) == HT_DIV || (id) == HT_CENTER || \
(id) == HT_FRAMESET)
/* other elements */
#define IS_MISC(id) ((id) == HT_P || (id) == HT_H1 || (id) == HT_H2 || \
(id) == HT_H3 || (id) == HT_H4 || (id) == HT_H5 || (id) == HT_H6 || \
(id) == HT_PRE || (id) == HT_ADDRESS || (id) == HT_APPLET || \
(id) == HT_CAPTION || (id) == HT_A || (id) == HT_DT)
/*****
* possible error codes for XmNparserCallback
*****/
typedef enum{
HTML_UNKNOWN_ELEMENT = 1, /* unknown HTML element */
HTML_BAD, /* very badly placed element */
HTML_OPEN_BLOCK, /* block still open while new block started */
HTML_CLOSE_BLOCK, /* block closed but was never opened */
HTML_OPEN_ELEMENT, /* unbalanced terminator */
HTML_NESTED, /* improperly nested element */
HTML_VIOLATION, /* bad content for current block/element */
HTML_NOTIFY, /* insertion of optional opening/closing */
HTML_INTERNAL /* internal parser error */
}parserError;
/*****
* And corresponding values for XmNenableBadHTMLWarnings.
* These are or'd together.
* XmNONE disables warnings and XmHTML_ALL enables all warnings.
* See parserError for their meaning.
*****/
enum{
XmHTML_NONE = 0, /* no warnings */
XmHTML_UNKNOWN_ELEMENT = 1,
XmHTML_BAD = 2,
XmHTML_OPEN_BLOCK = 4,
XmHTML_CLOSE_BLOCK = 8,
XmHTML_OPEN_ELEMENT = 16,
XmHTML_NESTED = 32,
XmHTML_VIOLATION = 64,
XmHTML_ALL = 127 /* all warnings */
};
/*****
* Definition of parsed HTML elements
*****/
typedef struct _XmHTMLObject{
htmlEnum id; /* ID for this element */
String element; /*
* Raw text. For HTML elements, freeing this
* member also frees attributes.
*/
String attributes; /* attributes for this element, if any */
Boolean is_end; /* true when this is a closing element */
Boolean terminated; /* true when element has a closing counterpart */
Boolean ignore; /* true if element must be ignored */
Boolean auto_insert;/* auto inserted element */
Boolean violation; /* element is in violation of HTML standard */
int line; /* line number for this element */
struct _XmHTMLObject *next;
struct _XmHTMLObject *prev;
}XmHTMLObject;
/*****
* Function to be called when the parser finished a single pass on the input
*
* ARGS:
* First : The current list of parser objects, which may NOT be freed.
* Second: True if input was HTML3.2 conforming, False if not;
* Third : True if parser verification succeeded;
* Fourth: True if parser tree was balanced;
* Fifth : current parser pass (count starts at 0);
* Sixth : length of input text;
* Return values:
* True : make another pass on the input using the current (possibly
* repaired) output;
* False : don't make another pass on the input;
*****/
typedef Boolean (*ParserDocumentCallback)(XmHTMLObject*, Boolean, Boolean,
Boolean, int, int);
extern ParserDocumentCallback parser_document_callback;
/*****
* Function to be called upon completion of a single pass
* ARGS:
* First : number of elements still on stack (only when document is
* unbalanced);
* Second: number of (missing) HTML tags inserted by the parser;
* Third : number of HTML tags ignored by the parser;
* Return values:
* None.
*****/
typedef void (*ParserAutoCorrectCallback)(int, int, int);
extern ParserAutoCorrectCallback parser_autocorrect_callback;
/*****
* Parser state stack object
*****/
typedef struct _stateStack{
htmlEnum id; /* current state id */
struct _stateStack *next; /* ptr to next record */
}stateStack;
typedef struct _XmHTMLParserTag{
/* user-provided data fields */
String tag; /* name of tag */
Boolean terminated; /* tag has a terminating counterpart */
unsigned long flags; /* defined attributes */
void *user_data; /* external tag data, unused internally */
/* internal fields */
int id; /* internal id, -1 == unused */
}XmHTMLParserTag;
/*****
* A Parser
*****/
typedef struct _Parser{
String source; /* text being parsed */
int index; /* last known position */
int len; /* length of input text */
int num_lines; /* current line count */
Dimension line_len; /* maximum line length so far */
Dimension cnt; /* current line length */
void (*store_text)(); /* text insertion function */
/* running list of inserted elements */
int num_elements; /* no of tags inserted so far */
int num_text; /* no of text elements inserted so far */
XmHTMLObject *head; /* head of object list */
XmHTMLObject *current; /* lastly inserted element */
stateStack state_base; /* stack base point */
stateStack *state_stack; /* actual stack */
int cstart; /* current element start position */
int cend; /* current element end position */
Boolean strict_checking; /* HTML 3.2 looseness flag */
Boolean have_body; /* indicates presence of <body> tag */
Boolean have_page;
Boolean warn; /* warn about bad html constructs */
Boolean bad_html; /* bad HTML document flag */
Boolean html32; /* HTML32 conforming document flag */
Boolean automatic; /* when in automatic mode */
Boolean do_icons; /* look for icon entities */
Widget widget; /* for the warning messages */
}Parser;
/*****
* Various helper functions used by the parser (and defined by the parser
* when it's compiled with -DMINIPARSE
*****/
extern void my_locase(char *string);
extern char* my_strcasestr(const char *s1, const char *s2);
extern char* my_strndup(const char *s1, size_t len);
extern char* my_strdup(const char *s1);
/*****
* The parser uses strcasecmp and strncasecmp. Since these do not exist
* on every system, the parser carriers fallback copies which will be used
* if you define -DNEED_STRCASECMP at compile time.
*****/
#ifdef NEED_STRCASECMP
extern int my_strcasecmp (const char *s1, const char *s2);
extern int my_strncasecmp (const char *s1, const char *s2, size_t n);
#define strcasecmp(S1,S2) my_strcasecmp(S1,S2)
#define strncasecmp(S1,S2,N) my_strncasecmp(S1,S2,N)
#endif
/*****
* Warning message display function
* When parser_warnings has been set to False, no warnings will be
* generated.
*****/
#define __WFUNC__(WIDGET_ID, FUNC) (Widget)WIDGET_ID, __FILE__, \
__LINE__, FUNC
extern void __XmHTMLWarning(
#ifdef __STDC__
Widget w, String module, int line, String routine,
String fmt, ...
#endif
);
#define _XmHTMLWarning __XmHTMLWarning
/*****
* Public Parser Functions
*****/
/*****
* Write the list of objects to the given file. If notext is True, HTML
* text segments will not be included in the output file.
*****/
extern void ParserWriteOutputToFile(XmHTMLObject *objects, String prefix,
Boolean notext);
/* Write the list of objects to the given file as a HTML file */
extern void ParserWriteHTMLOutputToFile(XmHTMLObject *objects, String prefix,
Boolean notext);
/* compose a HTML output string from the list of objects */
extern String _XmHTMLTextGetString(XmHTMLObject *objects);
/* free the given list of objects */
extern void _XmHTMLFreeObjects(XmHTMLObject *objects);
/*****
* The parser. Takes a two widgets, a previous list of objects and the text
* to be parsed as it's input.
* Returns a list of parsed objects.
*****/
extern XmHTMLObject *_XmHTMLparseHTML(XmHTMLWidget html,
XmHTMLObject *old_list, char *input, XmHTMLWidget dest);
/* Don't add anything after this endif! */
#endif /* _miniparse_h_ */
|