File: xmlparser_impl.h

package info (click to toggle)
virtuoso-opensource 6.1.6%2Bdfsg2-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 260,060 kB
  • ctags: 123,765
  • sloc: ansic: 652,532; sql: 458,419; xml: 282,834; java: 61,031; sh: 40,031; cpp: 36,890; cs: 25,240; php: 12,692; yacc: 9,523; lex: 7,018; makefile: 6,157; jsp: 4,484; awk: 1,643; perl: 1,013; ruby: 1,003; python: 326
file content (476 lines) | stat: -rw-r--r-- 18,318 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
/*
 *  xmlparser_impl.h
 *
 *  $Id$
 *
 *  This file is part of the OpenLink Software Virtuoso Open-Source (VOS)
 *  project.
 *
 *  Copyright (C) 1998-2012 OpenLink Software
 *
 *  This project is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the
 *  Free Software Foundation; only version 2 of the License, dated June 1991.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 *
 */

#ifndef _XML_PARSER_IMPL_H
#define _XML_PARSER_IMPL_H

#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

#include "Dk.h"
#include "xmlparser.h"
#include "xml_ecm.h"
#include "charclasses.h"
#include "schema.h"

/*#define UNIT_DEBUG 0*/

#ifndef UNIT_DEBUG
#include "libutil.h"
#else
#define dk_alloc(sz)		malloc (sz)
#define dk_free(ptr, sz)	free (ptr)
#endif

#include "xmlparser.h"

#include "charclasses.h"
/*#include "encodings.h"*/
#include "html_mode.h"

/* Types of errors (severity levels) */
#define XCFG_FATAL	0	/*!< Fatal error, which will abort processing immediately */
#define XCFG_ERROR	1	/*!< Error, which will abort processing after check completion */
#define XCFG_WARNING	2	/*!< Warning, which will not abort processing */
#define XCFG_DETAILS	3	/*!< Special value to mark details of previous value */
#define XCFG_OK		4	/*!< Success confirmation  */
#define XCFG_ERRLEVELS	5	/*!< Total number of error levels */

#define XCFG_NOLOGPLACE 0x10	/*!< Bit that prevents automatic calling xmlparser_log_place when reporting message */

/* If violation should be reported, error level is used as ID of action */
#define XCFG_IGNORE	5	/*!< Do processing quietly, do not report nonfatal errors */
#define XCFG_DISABLE	6	/*!< Disable some sort of processing fully */
#define XCFG_ENABLE	7	/*!< Enable some sort of processing fully */
#define XCFG_QUICK	7	/*!< Process DTD to report those serious errors which are easy to find */
#define XCFG_RIGOROUS	8	/*!< Process DTD to report all violations of validity constrains */
#define XCFG_SGML	9	/*!< Full check of DTD plus checks for SGML compatibility */

#define DTD_IN_UNSPEC	0
#define DTD_IN_ISUNREAD	1
#define DTD_IN_ISREAD	2

#define BRICK_SIZE	4076

#define PM(member) (parser->member)

#define LIST_INC 32

#define MAX_CONTEXT_LEFT 70
#define MAX_CONTEXT_ALL (MAX_CONTEXT_LEFT+8)
#define CONTEXT_BUF_LENGTH (MAX_CONTEXT_LEFT*2+MAX_CONTEXT_ALL+10)

#define RET_ERRMSG(msg) { xmlparser_logprintf (parser, XCFG_ERROR, 100, (msg)); return 0; }

typedef enum xml_tok_type {
XML_TOK_FINISH,
XML_TOK_ERROR,
XML_TOK_INVALID,
XML_TOK_WS,
XML_TOK_CHAR_DATA,
XML_TOK_START_TAG,
XML_TOK_END_TAG,
XML_TOK_EMPTY_TAG,
XML_TOK_ENTITY_REF,
XML_TOK_COMMENT,
XML_TOK_CDATA,
XML_TOK_PI,
XML_TOK_DTD
} xml_tok_type_t;

typedef int XML_PARSER_STATE;
#define XML_A_XMLDECL	1
#define XML_A_DTD	2
#define XML_A_SPACE	4
#define XML_A_COMMENT	8
#define XML_A_PI	16
#define XML_A_ELEMENT	32
#define XML_A_CHAR	64

#define XML_ST_NOT_EMPTY	128


#define XML_A_MISC	XML_A_COMMENT | XML_A_PI | XML_A_SPACE
#define XML_A_PROLOG	XML_A_XMLDECL | XML_A_DTD | XML_A_MISC

#define  XML_ST_INITIAL XML_A_PROLOG | XML_A_ELEMENT

#define CLR_STATE(x) do { parser->state &= ~(x); } while (0)
#define SET_STATE(x) do { parser->state |=  (x); } while (0)

/*! Brick is a descriptor of data source.
It can own some data under \c data_begin, and other bricks may reference to it.
To count uses of brick's data buffer, \c data_refctr is used.
Pointers \c beg and \c end delimits visible area in some buffer. */
struct brick_s {
  utf8char *		end;		/*<! Pointer past last byte of visible part of data. Should be the first field, for efficiency */
  utf8char *		beg;		/*<! Pointer to the first byte of visible part of data */
  struct brick_s *	next;		/*<! Pointer to the next brick in the chain. */
  struct brick_s *	prev;		/*<! Pointer to previous brick in the chain. */
  char *		data_begin;	/*<! Pointer to the first byte of data, owned by this brick */
  int			data_refctr;	/*<! Counter of external references to buffer under data_begin */
  struct brick_s *	data_owner;	/*<! Owner of data which are in memory from \c beg to \c end */
  xml_pos_t		beg_pos;	/*<! Position of beg[0] char in the source */
};

typedef struct brick_s brick_t;

struct buf_ptr_s {
  utf8char	* ptr;
  brick_t	* buf;
};

typedef struct buf_ptr_s buf_ptr_t;

struct buf_range_s {
  buf_ptr_t	beg;
  buf_ptr_t	end;
};

typedef struct buf_range_s buf_range_t;

struct vxml_parser_handlers_s
{
  void			* user_data;
  VXmlCharacterDataHandler		char_data_handler;
  VXmlStartElementHandler		start_element_handler;
  VXmlEndElementHandler			end_element_handler;
  VXmlIdHandler				id_handler;
  VXmlCommentHandler			comment_handler;
  VXmlProcessingInstructionHandler	pi_handler;
  VXmlEntityRefHandler			entity_ref_handler;
/*
  VXmlDtdHandler			dtd_handler;
*/
};

typedef struct vxml_parser_handlers_s vxml_parser_handlers_t;


typedef struct lenmem_list_elem_s {
  struct lenmem_list_elem_s	* next;
  lenmem_t	name;
} lenmem_list_elem_t, * lenmem_list_t;


#define XML_MAX_DOC_COST 0x3FFFffff
#define XML_BIG_DOC_COST 0x03FFffff
#define XML_XPER_DOC_WEIGHT 1000		/*!< Weight is in 16-bytes units */


struct vxml_parser_s {
  char *		static_src;		/*!< The source text to be parsed, this is the whole text if there's no feed, otherwise, it's a begginning only */
  char *		static_src_tail;	/*!< Unparsed rest of source text to be parsed */
  char *		static_src_end;		/*!< End of source text to be parsed */
  vxml_parser_config_t	cfg;
  encoding_handler_t	* src_eh;		/*!< (currenlty known) encoding of the input text */
  struct
    {
      int		byteorder;
      char		exact_mark;
      char		code_length;
      char		ucs4;
      char		utf16;
      char		utf8;
      char		ebcdic;
    } bom;
  int			src_eh_state;		/*!< State stored for stateful encodings, must be reset to 0 on any change of the encoding */

  xml_enc_flag_t	enc_flag;

  struct id_hash_s * includes;		/*!< Hash table of all texts included via URIs of entities */
  struct id_hash_s * ids;		/*!< Hash table of all ids */

  buf_ptr_t		bptr;		/*!< data begin */
  buf_ptr_t		eptr;		/*!< data end */
  buf_ptr_t		pptr;		/*!< it points past last parsed byte */

  char			* feed_buf;		/*!< buffer for getting new data */
  int			feed_buf_size;		/*!< Allocated size of \c feed_buf */
  char			* feed_tail;		/*!< untranslated tail of \c feed_buf */
  char			* feed_end;		/*!< end of data in \c feed_buf */
  unichar		* uni_buf;		/*!< buffer for data in unicode */
  unichar		* uni_tail;		/*!< tail of \c uni_tail */
  unichar		* uni_end;		/*!< end of \c uni_tail */

  xml_read_func_t	feeder;
  void			* read_cd;

  XML_PARSER_STATE	state;		/*!< state of parser */

  struct {
    buf_range_t		name;
    buf_range_t		string;
    buf_range_t		value;
    lenmem_list_t	enum_of_values;
    tag_attr_t		attr_array[XML_PARSER_MAX_ATTRS];
    ptrlong		att_type;
    char		* dtd_puburi;
    char		* dtd_sysuri;
    int			dtd_loaded_from_uri;
#if 0
    ptrlong		* tag_index;
#endif
    html_tag_descr_t	* tag_descr;
    lenmem_t		flat_name;
  }			tmp;
  vxml_parser_attrdata_t attrdata;

  xml_pos_t		curr_pos;	/*!< current position in source text of document (main or included) */
  buf_ptr_t		curr_pos_ptr;   /*!< Place in current buffer where curr_pos has measured */
  xml_pos_t		last_main_pos;	/*!< Last reached position in main source text */

  vxml_parser_handlers_t masters;	/*!< top level callbacks */
  vxml_parser_handlers_t slaves;		/*!< lower level of callbacks */

  dtd_validator_t	validator;
  schema_processor_t	processor;

  opened_tag_t		tag_stack_holder[XML_PARSER_MAX_DEPTH];
  opened_tag_t *	inner_tag;
  nsdecl_t		nsdecl_array[XML_PARSER_MAX_NSDECLS];
  ptrlong		fill_ns_2dict;
  xml_ns_2dict_t 	ns_2dict;
  dk_set_t		msglog;				/*!< Log of all messages */
  ptrlong		msglog_ctrs[XCFG_ERRLEVELS];	/*!< Numbers of messages grouped by error levels */
  /* xml logging, later it will substitute usual log throw xslt */
  /* caddr_t*		msglog_as_xml; */

#ifdef DEBUG
  int			last_err_detector;
#endif
  ptrlong		obj_serial; /*!< A growing counter for various objects. */
  ptrlong		input_cost; /*!< Total cost of input */
  ptrlong		input_weight; /*!< Total weight of input in 16-byte units */
};

extern encoding_handler_t *find_encoding (vxml_parser_t *parser, char * enc_name);

#ifdef DEBUG
extern int names_are_equal (lenmem_t * sname, char * name);
#endif
extern xml_tok_type_t get_token (vxml_parser_t * parser);
extern void advance_ptr (vxml_parser_t * parser);
extern unichar get_one_xml_char (vxml_parser_t * parser);
extern unichar get_tok_char (vxml_parser_t * parser);

extern void push_tag (vxml_parser_t * parser);
extern int pop_tag (vxml_parser_t * parser);
extern void add_attribute (vxml_parser_t * parser, buf_range_t * name, buf_range_t * value);
extern void free_attr_array (vxml_parser_t * parser);
extern void convert_attr_array (vxml_parser_t * parser);

extern void DBG_NAME(brcpy) (DBG_PARAMS lenmem_t * to, buf_range_t * from);
extern caddr_t DBG_NAME(box_brcpy) (DBG_PARAMS buf_range_t * from);

#ifdef MALLOC_DEBUG
#define brcpy(to,from) dbg_brcpy(__FILE__, __LINE__, (to), (from))
#define box_brcpy(from) dbg_box_brcpy(__FILE__, __LINE__, (from))
#endif

/* Rus/ DTD Validation Functions  */
extern void dtd_start_element_handler (struct vxml_parser_s* parser,
			   const char * name,
			   vxml_parser_attrdata_t *attrdata );

extern void dtd_end_element_handler (struct vxml_parser_s* parser,
			 const char * name);
extern void dtd_char_data_handler (struct vxml_parser_s* parser,
			   const char * s,
			   int len);
extern void dtd_pi_handler (struct vxml_parser_s* parser,
	     const char * target,
	     const char * data);
extern void dtd_comment_handler (struct vxml_parser_s* parser,  const char * text);
extern void dtd_start_cdata_section_handler (struct vxml_parser_s* parser);
extern void dtd_end_cdata_section_handler (struct vxml_parser_s* parser);
extern void dtd_unparsed_entity_decl_handler (struct vxml_parser_s* parser,
			  const char * entityName,
			  const char * base,
			  const char * systemId,
			  const char * publicId,
			  const char * notationName);
extern void dtd_notation_decl_handler (struct vxml_parser_s* parser,
		   const char * notationName,
		   const char * base,
		   const char * systemId,
		   const char * publicId);
extern void dtd_start_namespace_decl_handler (struct vxml_parser_s* parser,
			  const char * prefix,
			  const char * uri);
extern void dtd_end_namespace_decl_handler (struct vxml_parser_s* parser,
			const char * prefix);
extern void dtd_default_handler (struct vxml_parser_s* parser,
		  const char * s,
		  int len);

extern void dtd_entity_ref_handler (struct vxml_parser_s* parser,
		     const char *refname,
		     int reflen,
		     int isparam,
		     const xml_def_4_entity_t *edef);
/*
extern void dtd_dtd_handler (struct vxml_parser_s* parser,
		     struct dtd_s *doc_dtd);
*/

void
normalize_value (lenmem_t * lsp);

/* Functions that in common use for dtd.c and xmlread.c */
extern int test_ws (struct vxml_parser_s * parser);
extern int get_att_name (struct vxml_parser_s * parser);
extern int test_char_int (struct vxml_parser_s * parser, unichar ch);
#define test_char(parser,ch) \
  (((0 != parser->src_eh->eh_stable_ascii7) && \
    (parser->pptr.ptr == (parser->eptr.ptr-1)) && \
    (parser->pptr.ptr[0] != ch) && \
    (parser->pptr.buf == (parser->eptr.buf)) && \
    (parser->pptr.ptr[0] != '%') ) ? \
   0 : test_char_int (parser, ch) )

extern char* get_encoded_name (struct vxml_parser_s* parser, unichar* beg, unichar* end,
			   char** name, dtd_validator_t* validator);
extern int dtd_get_att_type (struct vxml_parser_s* parser,
			     ecm_el_t* elem, ptrlong attr_index,
			     dtd_validator_t* validator);
extern int dtd_get_def_decl (struct vxml_parser_s * parser, ecm_attr_t* attr, ecm_el_t* el);
extern int test_string (struct vxml_parser_s * parser, const char * s);
extern int test_class_str (struct vxml_parser_s * parser, const xml_char_class_t _class);
extern int test_class_str_noentity (struct vxml_parser_s * parser, const xml_char_class_t _class);
extern int get_name (struct vxml_parser_s * parser);
extern int get_value (struct vxml_parser_s * parser, int dtd_body);
extern int dtd_constraint_check (struct vxml_parser_s *parser, ecm_el_t* elem, ptrlong attr_idx);
extern int dtd_add_element_decl (dtd_validator_t* validator, struct vxml_parser_s * parser);
extern int get_content_def (struct vxml_parser_s * parser);

extern caddr_t dtd_normalize_attr_value (struct vxml_parser_s* parser, struct buf_range_s* range, ptrlong attr_type);
extern caddr_t dtd_normalize_attr_val (vxml_parser_t *parser, const char* value, int level);

/* functions from dtd.c for reading DTD declarations and filling dtd_, ecm_ structures */
extern int dtd_add_attlist_decl(dtd_validator_t* validator,  struct vxml_parser_s* parser);
extern int dtd_get_element_decl (dtd_validator_t* validator, struct vxml_parser_s * parser);
extern int dtd_add_include_section (struct vxml_parser_s* parser);
extern int dtd_add_ignore_section (struct vxml_parser_s* parser);

extern int entity_compile_repl (vxml_parser_t *parser, xml_def_4_entity_t *newdef);

extern int get_att_type (vxml_parser_t* parser);
extern int get_def_decl (vxml_parser_t* parser);

/*********************************************************************************************/

extern int replace_entity (vxml_parser_t* parser); /*replaces parameter entity */
extern void dtd_check_ids(vxml_parser_t* parser);
extern int get_refentry (vxml_parser_t* parser, char* refname);

extern ptrlong xs_get_primitive_typeidx(vxml_parser_t* parser, xs_component_t *type);



/* if entity is not found already, than leave ent with NULL */
extern int replace_entity_common (vxml_parser_t* parsery, int is_ge, struct xml_def_4_entity_s* ent, buf_ptr_t rem, int log_syntax_error);
extern void insert_buffer (vxml_parser_t* parser, buf_ptr_t* cut_begin, buf_ptr_t* cut_end, lenmem_t* ins, xml_pos_t *ins_beg_pos, xml_pos_t *end_pos);
extern int dtd_check_attribute (vxml_parser_t* parser, const char* value, struct ecm_attr_s* attr);
extern void free_refid_log (basket_t* refid_log);
extern int get_include (vxml_parser_t *parser, const char *base, const char *ref, lenmem_t *res_text, xml_pos_t *res_pos);

#ifndef MALLOC_DEBUG
#define NO_validate_parser_bricks
#endif

#ifndef NO_validate_parser_bricks
extern int validate_parser_bricks (vxml_parser_t * parser);
#else
#define validate_parser_bricks(parser)
#endif

extern int insert_external_dtd(struct vxml_parser_s* parser);

extern const char *concat_full_name (const char *ns, const char *name);

extern int check_entity_recursiveness (vxml_parser_t* parser, const char* entityname, int level, const char* currentname);

void xs_clear_tag (ptrlong tag, int is_free);

int insert_external_xmlschema_dtd (struct vxml_parser_s * parser);

#define LM_EQUAL(a,b) \
  ( \
   (((lm_equal_tmp1 = (a))->lm_length) == \
    ((lm_equal_tmp2 = (b))->lm_length) ) && \
   (0 == memcmp (lm_equal_tmp1->lm_memblock, lm_equal_tmp2->lm_memblock, lm_equal_tmp1->lm_length)))

#define LM_EQUAL_TO_STR(a,str) \
  ( \
   (((lm_equal_tmp1 = (a))->lm_length) == \
    strlen ((void *)(lm_equal_tmp2 = (void *)(str))) ) && \
   (0 == memcmp (lm_equal_tmp1->lm_memblock, (void *)lm_equal_tmp2, lm_equal_tmp1->lm_length)))

/*! Adds namespace declaration \c ns into \c xn2.
Returns 1 if really added or a duplicate detected, 0 if there's a conflict between prefixes or URIs */
extern int xml_ns_2dict_add (xml_ns_2dict_t *xn2, nsdecl_t *ns);
/*! Removes namespace declaration for prefix \c pref from \c xn2.
Returns 1 if really deleted, 0 if the prefix is not found. */
extern int xml_ns_2dict_del (xml_ns_2dict_t *xn2, caddr_t pref);
/*! Adds all namespace declaration from \src into \c dest.
Returns 1 if all declarations from \c src are really added or detected as duplicates,
0 if there's at least one conflict between prefixes or URIs */
extern int xml_ns_2dict_extend (xml_ns_2dict_t *dest, xml_ns_2dict_t *src);

/*! Adds a message of \c errlevel importance into log of \c parser.
\c msg is out of caller's ownership after the call (it's either in parser->msglog or freed).
\returns zero if message is not dumped e.g. due to limitation on number of messages.
 */
extern int xmlparser_log_box (vxml_parser_t *parser, int errlevel, caddr_t msg);

/* Adds a message of \errlevel importance into log of \c dv, allocating
at least \c buflen_eval bytes for internal buffer.
\returns zero if message is not dumped e.g. due to limitation on number of messages. */
extern int xmlparser_logprintf (vxml_parser_t *parser, ptrlong errlevel, size_t buflen_eval, const char *format, ...);

extern int xmlparser_log_place (struct vxml_parser_s *parser);

/* Adds a ECM_DETAILS message into log if grammar of element \c el is brief enough to be printed */
extern int xmlparser_logprintf_grammar (vxml_parser_t *parser, struct ecm_el_s *el);

void xmlparser_log_nconcat (vxml_parser_t *parser, vxml_parser_t *sub_parser);

/* \returns 0 if the config is void or invalid */
extern int xmlparser_configure (vxml_parser_t *parser, caddr_t dtd_config, struct vxml_parser_config_s *parser_config);
extern void xmlparser_tighten_validator (vxml_parser_t *parser);
extern void xmlparser_loose_validator (vxml_parser_t *parser);
extern int xmlparser_is_ok (vxml_parser_t *parser); /*!< returns if there were severe problems */
extern char *xmlparser_log_section_to_string (dk_set_t top, dk_set_t pastbottom, const char *title);

#define INNER_HANDLERS ((NULL != parser->cfg.dtd_config) ? &(parser->masters) : &(parser->slaves))
#define OUTER_HANDLERS ((NULL != parser->cfg.dtd_config) ? &(parser->slaves) : &(parser->masters))

char *xecm_print_fsm (xecm_el_idx_t el_idx, schema_parsed_t * schema, int use_raw);

#endif /* _XML_PARSER_IMPL_H */