File: xmlparser.h

package info (click to toggle)
virtuoso-opensource 7.2.5.1%2Bdfsg1-0.3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 285,240 kB
  • sloc: ansic: 641,220; sql: 490,413; xml: 269,570; java: 83,893; javascript: 79,900; cpp: 36,927; sh: 31,653; cs: 25,702; php: 12,690; yacc: 10,227; lex: 7,601; makefile: 7,129; jsp: 4,523; awk: 1,697; perl: 1,013; ruby: 1,003; python: 326
file content (411 lines) | stat: -rw-r--r-- 17,373 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
/*
 *  xmlparser.h
 *
 *  $Id$
 *
 *  This file is part of the OpenLink Software Virtuoso Open-Source (VOS)
 *  project.
 *
 *  Copyright (C) 1998-2018 OpenLink Software
 *
 *  This project is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the
 *  Free Software Foundation; only version 2 of the License, dated June 1991.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 *
 */

#ifndef _XML_PARSER_H
#define _XML_PARSER_H

#include <stddef.h>
#include "Dk.h"
#include "langfunc.h"

struct vxml_parser_s;
typedef struct vxml_parser_s vxml_parser_t;

/*				 0         1         2         3   */
/*				 012345678901234567890123456789012 */
#define XMLSCHEMA_NS_URI	"http://www.w3.org/2001/XMLSchema"
#define XMLSCHEMA_NS_URI_LEN	32
/*				 0         1         2         3         4    */
/*				 01234567890123456789012345678901234567890123 */
#define RDF_NS_URI		"http://www.w3.org/1999/02/22-rdf-syntax-ns#"
#define RDF_NS_URI_LEN		43
/*				 0         1         2         3       */
/*				 0123456789012345678901234567890123456 */
#define XML_NS_URI		"http://www.w3.org/XML/1998/namespace"
#define XML_NS_URI_LEN		36
/*				 0         1         2         3         4 */
/*				 01234567890123456789012345678901234567890 */
#define MSSQL_NS_URI		"urn:schemas-microsoft-com:mapping-schema"
#define MSSQL_NS_URI_LEN	40
/*				 0         1         2         3         4  */
/*				 012345678901234567890123456789012345678901 */
#define XMLSCHEMA_INSTANCE_URI	"http://www.w3.org/2001/XMLSchema-instance"
#define XMLSCHEMA_INSTANCE_URI_LEN  41


#ifndef UNICHAR_DEFINED
#define UNICHAR_DEFINED
typedef int /* or int32 ??? */ unichar;	/* 31-bit unicode values, negative ones are invalid */
#endif

typedef int s_size_t;	/* type for length of text, signed */

#ifndef DTD_T_DECLARED
#define DTD_T_DECLARED
typedef struct dtd_s dtd_t;
extern void dtd_addref (dtd_t *dtd, int make_global);
extern int dtd_release (dtd_t *dtd);
#endif

/* Cloned from Dk/Dkhashext.h */
#ifndef LENMEM_T_DEFINED
#define LENMEM_T_DEFINED
typedef struct struct lenmem_s
{
  size_t		lm_length;
  char *		lm_memblock;
} lenmem_t;
#endif

struct xml_pos_s {
  int			line_num;	/*!< current line number */
  int			col_b_num;	/*!< current column byte offset */
  int			col_c_num;	/*!< current column char offset */
  const char *		origin_uri;	/*<! URI where visible data of this brick comes from or NULL for main text. xml_pos_s is not owner of this string */
  struct xml_def_4_entity_s *	origin_ent;	/*<! Entity where visible data of this brick comes from or NULL for main text. xml_pos_s is not owner of this def. */
};
typedef struct xml_pos_s xml_pos_t;

#ifdef DEBUG
#define xml_pos_set(dst,src) \
  do { \
    if ((NULL != (src)->origin_uri) && (DV_STRING != DV_TYPE_OF ((src)->origin_uri)) && (DV_UNAME != DV_TYPE_OF ((src)->origin_uri))) \
      GPF_T; \
    memcpy ((dst), (src), sizeof (xml_pos_t)); \
    } while (0)
#else
#define xml_pos_set(dst,src) \
  memcpy ((dst), (src), sizeof (xml_pos_t))
#endif

struct xml_def_4_notation_s {
  char * xd4n_publicId; /*!< PUBLIC value, pub-literal (i.e. restricted charset), may be NULL */
  char * xd4n_systemId; /*!< SYSTEM value, sys-literal (i.e. any chars may occur), may be NULL */
  };
typedef struct xml_def_4_notation_s xml_def_4_notation_t;

struct xml_def_4_entity_s {
  caddr_t xd4e_literalVal;	/*!< Literal value, may be NULL */
  caddr_t xd4e_publicId;	/*!< PUBLIC value, pub-literal (i.e. restricted charset), may be NULL */
  caddr_t xd4e_systemId;	/*!< SYSTEM value, sys-literal (i.e. any chars may occur), may be NULL */
  caddr_t xd4e_notationName;	/*!< NDATA name, known NOTATION name expected */
  lenmem_t xd4e_repl;		/*!< Replacement text, entity is not its owner */
  xml_pos_t xd4e_defn_pos;	/*!< Position of the entity's definition */
  xml_pos_t xd4e_val_pos;	/*!< Position of the entity's value */
  int xd4e_may_be_in_mkup;	/*!< True if PE matches [  VC: Proper Declaration/PE Nesting ], i.e. \<...\> balance */
  int xd4e_may_be_in_ecm;	/*!< True if PE matches [  VC: Proper Group/PE Nesting ], i.e. (...) balance */
  int xd4e_may_be_in_cond;	/*!< True if PE matches [  VC: Proper Conditional Section/PE Nesting ], i.e. \<[...\]]> balance */
  int xd4e_valid;		/*!< True if entity does not contain recursive references */
  };
typedef struct xml_def_4_entity_s xml_def_4_entity_t;


typedef struct opened_tag_s {
  lenmem_t			ot_name;
  struct html_tag_descr_s	* ot_descr;
  xml_pos_t			ot_pos;
} opened_tag_t;

#define XML_PARSER_MAX_DEPTH 0x800

typedef struct nsdecl_s {
  caddr_t		nsd_prefix;
  caddr_t		nsd_uri;
  opened_tag_t *	nsd_tag;
} nsdecl_t;

#define XML_PARSER_MAX_NSDECLS 0x80

typedef struct tag_attr_s {
  /* nsdecl_t *		ns; */
  lenmem_t		ta_raw_name;
  caddr_t		ta_value;
} tag_attr_t;

#define XML_PARSER_MAX_ATTRS 0x100

typedef struct xml_name_assoc_s {
  caddr_t xna_key;
  caddr_t xna_value;
} xml_name_assoc_t;

typedef struct xml_ns_2dict_s {
  xml_name_assoc_t *	xn2_prefix2uri;
  xml_name_assoc_t *	xn2_uri2prefix;
  ptrlong		xn2_size;
} xml_ns_2dict_t;

#define xml_ns_2dict_clean(ns_2dict) \
do { \
  if ((ns_2dict)->xn2_size) \
    { \
      box_tag_modify ((ns_2dict)->xn2_prefix2uri, DV_ARRAY_OF_POINTER); \
      dk_check_tree ((ns_2dict)->xn2_prefix2uri); \
      dk_check_tree ((ns_2dict)->xn2_uri2prefix); \
      dk_free_tree ((box_t) (ns_2dict)->xn2_prefix2uri); \
      dk_free_box ((box_t) (ns_2dict)->xn2_uri2prefix); \
      (ns_2dict)->xn2_prefix2uri = NULL; \
      (ns_2dict)->xn2_uri2prefix = NULL; \
      (ns_2dict)->xn2_size = 0; \
    } \
 } while (0)

typedef struct vxml_parser_attrdata_s {
  tag_attr_t *local_attrs;	/*!< Attributes of the local tag */
  size_t local_attrs_count;	/*!< Number of used element in \c local_attrs array */
  nsdecl_t *local_nsdecls;	/*!< Namespace declarations made in the current opening tag */
  size_t local_nsdecls_count;	/*!< Number of used element in \c local_nsdecls array */
  nsdecl_t *all_nsdecls;	/*!< All namespace declarations in all ancestor-or-self tags, as a stack. It may contain duplicates so always search from end to begin */
  size_t all_nsdecls_count;	/*!< Number of used element in \c all_nsdecls array */
} vxml_parser_attrdata_t;

typedef void (*VXmlStartElementHandler)
     (void *userData,
      const char * name,
      vxml_parser_attrdata_t *attrdata);

typedef void (*VXmlEndElementHandler)
     (void *userData,
      const char * name);

typedef void (*VXmlIdHandler)
     (void *userData,
      const char * name);

/* s is not 0 terminated. */
typedef void (*VXmlCharacterDataHandler)
     (void *userData,
      const char * s,
      size_t len);

/* target and data are 0 terminated */
typedef void (*VXmlProcessingInstructionHandler)
     (void *userData,
      const char * target,
      const char * data);

/* data is 0 terminated */
  typedef void (*VXmlCommentHandler) (void *userData, const char * text);

/* IvAn/ParseDTD/000721
   Arg1 - e.g., vxml_parser_t,
   Arg2,3 - reference name (pointer to and length of text),
   Arg4 - flag if reference is global
   Arg5 - definition of the reference */
  typedef void (*VXmlEntityRefHandler) (void *userData, const char *refname, size_t reflen, int isparam, const xml_def_4_entity_t *edef);
/* IvAn/ParseDTD/000721 **/

  typedef void (*VXmlDtdHandler) (void *userData, dtd_t *doc_dtd);

  typedef encoding_handler_t * (*VXmlFindUserEncoding) (const char *encname, int xml_input_is_wide);

  typedef char *(*VXmlUriResolver) (void *uri_appdata, char **err_ret, ccaddr_t base_uri, ccaddr_t rel_uri, const char *output_charset);
  typedef char *(*VXmlUriReader) (void *uri_appdata, char **err_ret, char **options, ccaddr_t base_uri, ccaddr_t rel_uri, int cast_blob_to_varchar);
  typedef void (*VXmlErrorReporter) (DBG_PARAMS  const char *state, const char *format, ...);

  typedef void *(*VXmlAttrParser) (void *userData, const char *elname, const char *attrname, const char *attrvalue);
  
extern void *xmlap_qname (void *userData, const char *elname, const char *attrname, const char *attrvalue);
extern void *xmlap_xpath (void *userData, const char *elname, const char *attrname, const char *attrvalue);
  

typedef enum xml_enc_flag {
  XML_EF_NONE,
  XML_EF_DEFAULT,
  XML_EF_SUGGEST,
  XML_EF_FORCE
} xml_enc_flag_t;

#define FINE_XML	0
#define FINE_HTML	1
#define DEAD_HTML	2
#define GE_XML		0x10	/* May be passed to the parser */
#define WEBIMPORT_HTML	0x40	/* Not for passing to the parser */
#define FINE_XSLT	0x80	/* Not for passing to the parser */
#define FINE_XML_SRCPOS	0x100	/* Not for passing to the parser */

typedef size_t (*xml_read_func_t) (void * read_cd, char * buf, size_t bsize);
typedef void (*xml_read_abend_func_t) (void * read_cd);

#define XML_SOURCE_TYPE_TEXT		0	/*!< Plain default input from text representation */
#define XML_SOURCE_TYPE_XTREE_DOC	1	/*!< Special handling of attributes (they're expanded already) */

/* Memory pointed by members of this structure is owned by caller of
   VXmlParserCreate. This memory should not be freed until Xml_ParserDestroy */
struct vxml_parser_config_s
{
  int				input_is_wide;		/*!< Flags if XML input (to be parsed) is wchar_t, not plain character data */
  int				input_is_ge;		/*!< Flags if input is Generic Entity, not a complete document. */
  int				input_is_html;		/*!< Flags if input is HTML, not XML */
  int				input_is_xslt;		/*!< Flags if input is XSLT and should be handled in a special way */
  int				input_source_type;	/*!< Type of input (XML_SOURCE_TYPE_TEXT is default) */
  const char *			initial_src_enc_name;
  VXmlFindUserEncoding		user_encoding_handler;
  const char *			uri;			/*!< URI of the document, and base for relative URIs */
  VXmlUriResolver		uri_resolver;
  VXmlUriReader			uri_reader;
  VXmlErrorReporter		error_reporter;
  void *			uri_appdata;		/*!< Application-specific data for \c uri_resolver and \c uri_reader callbacks */
  caddr_t *			log_ret;		/*!< Application-specific data for \c error_reporter */
  caddr_t			dtd_config;
  lang_handler_t *		root_lang_handler;
  int				validation_mode;	/*!< XML_DTD (default) or XML_SCHEMA */
  int				auto_load_xmlschema_dtd;	/*!< 0 or 1 */
  caddr_t			auto_load_xmlschema_dtd_p;	/*!< xmlschema namespace prefix */
  caddr_t			auto_load_xmlschema_dtd_s;	/*!< xmlschema namespace suffix */
  caddr_t			auto_load_xmlschema_uri;	/*!< uri of the schema that is requested by parser caller */
  int				dc_namespaces;		/*!< Enforced fixed value for parser's dc_namespaces, dtd config will not override */
  int				feed_buf_size;		/*!< If nonzero then the size of the buffer for text input. */
};

typedef struct vxml_parser_config_s vxml_parser_config_t;

vxml_parser_t * VXmlParserCreate (vxml_parser_config_t *config);

void VXmlParserDestroy (vxml_parser_t * parser);
int VXmlParse (vxml_parser_t * parser, char * data, s_size_t size);
/*void VXmlParsePosition (vxml_parser_t * parser, size_t * start_pos, size_t * end_pos);*/

extern void VXmlSetElementHandler (vxml_parser_t * parser, VXmlStartElementHandler sh, VXmlEndElementHandler eh);
extern void VXmlSetIdHandler (vxml_parser_t * parser, VXmlIdHandler h);
extern void VXmlSetCommentHandler (vxml_parser_t * parser, VXmlCommentHandler h); /* IvAn/ParseDTD/999721 */
extern void VXmlSetProcessingInstructionHandler (vxml_parser_t * parser, VXmlProcessingInstructionHandler h); /* IvAn/ParseDTD/999721 */
extern void VXmlSetCharacterDataHandler (vxml_parser_t * parser, VXmlCharacterDataHandler h);

int VXmlGetCurrentLineNumber (vxml_parser_t * parser);
int VXmlGetOuterLineNumber (vxml_parser_t * parser);
int VXmlGetCurrentColumnNumber (vxml_parser_t * parser);
int VXmlGetCurrentByteNumber (vxml_parser_t * parser);
const char *VXmlGetCurrentFileName (vxml_parser_t * parser);
const char *VXmlGetOuterFileName (vxml_parser_t * parser);
void VXmlSetEntityRefHandler (vxml_parser_t * parser, VXmlEntityRefHandler h);
caddr_t VXmlErrorContext(vxml_parser_t * parser);
char *VXmlErrorContext2(char* buffer, vxml_parser_t * parser);
caddr_t VXmlValidationLog (vxml_parser_t * parser);
caddr_t VXmlFullErrorMessage (vxml_parser_t * parser);
extern ccaddr_t VXmlFindNamespaceUriByPrefix (vxml_parser_t * parser, ccaddr_t prefix);
extern ccaddr_t VXmlFindNamespacePrefixByUri (vxml_parser_t * parser, ccaddr_t uri);
extern void VXmlFindNamespaceUriByQName (vxml_parser_t * parser, const char *qname, int is_attr, lenmem_t *uri_ret);
extern caddr_t DBG_NAME(VXmlFindExpandedNameByQName) (DBG_PARAMS vxml_parser_t * parser, const char *qname, int is_attr);
#ifdef MALLOC_DEBUG
#define VXmlFindExpandedNameByQName(p,q,a) dbg_VXmlFindExpandedNameByQName (__FILE__, __LINE__, (p), (q), (a))
#endif
extern int VXmlExpandedNameEqualsQName (vxml_parser_t * parser, const char * expanded_name,
				 const char * qname, int is_attr);


/*!
 * valid values for force:
 * XML_EF_NONE - ignore the call - try to find proper encoding without any help
 * XML_EF_DEFAULT - set entity encoding and than try to find it from XML declaration
 * XML_EF_SUGGEST - same as above, except use suggested encoding if encoding in XML
 *		    declaration is unknown
 * XML_EF_FORCE - set encoding and ignore XML declaration
 */
void VXmlSetUserData (vxml_parser_t * parser, void * ptr);
void VXmlParserInput (vxml_parser_t * parser, xml_read_func_t f, void * read_cd);

const xml_def_4_notation_t *VXmlGetNotation(vxml_parser_t * parser, const char *refname);
const xml_def_4_entity_t *VXmlGetParameterEntity(vxml_parser_t * parser, const char *refname);
const xml_def_4_entity_t *VXmlGetGenericEntity(vxml_parser_t * parser, const char *refname);

void VXmlSetFindUserEncoding (vxml_parser_t * parser, VXmlFindUserEncoding find);


#define XML_DTD	    0	/* BTW zero value means 'default' */
#define XML_SCHEMA  1

extern dtd_t *VXmlGetDtd (vxml_parser_t * parser);

/*** BEG RUS/Schema Thu Mar 22 19:00:19 2001 ***/
extern void xml_schema_init (void);
extern void VXmlAddSchemaDeclarationCallbacks (vxml_parser_t * parser);
/*** END RUS/Schema Thu Mar 22 19:00:22 2001 ***/

extern caddr_t xml_add_system_path (caddr_t path_uri);
struct xml_iter_syspath_s* xml_iter_system_path (void);
extern void xml_free_iter_system_path(struct xml_iter_syspath_s*);
extern caddr_t xml_iter_syspath_hitnext(struct xml_iter_syspath_s*);
extern ptrlong xml_iter_syspath_length(struct xml_iter_syspath_s*);

extern void html_hash_init (void);

struct query_instance_s;
extern caddr_t xml_uri_resolve (struct query_instance_s * qi, caddr_t *err_ret, ccaddr_t base_uri, ccaddr_t rel_uri, const char *output_charset);
extern caddr_t xml_uri_resolve_like_get (struct query_instance_s * qi, caddr_t *err_ret, ccaddr_t base_uri, ccaddr_t rel_uri, const char *output_charset);

#define XML_URI_ANY 0
#define XML_URI_STRING 1
#define XML_URI_STRING_OR_ENT 2
extern caddr_t xml_uri_get (struct query_instance_s * qi, caddr_t *err_ret, caddr_t *options, ccaddr_t base_uri, ccaddr_t rel_uri, int mode);

/* These are from sqlbif2.c */

typedef struct rdf1808_split_s {
  ptrlong schema_begin;		/*!< schema without ':' */
  ptrlong schema_end;
  ptrlong netloc_begin;		/*!< network location/login without '/' */
  ptrlong netloc_end;
  ptrlong path_begin;		/*!< path with starting '/' */
  ptrlong path_end;
  ptrlong params_begin;		/*!< parameters without starting ';' */
  ptrlong params_end;
  ptrlong query_begin;		/*!< query without starting '?' */
  ptrlong query_end;
  ptrlong fragment_begin;	/*!< fragment without starting '#' */
  ptrlong fragment_end;
  ptrlong two_slashes;		/*!< position of end of two slashes, zero if missing */
} rdf1808_split_t;

#ifndef NDEBUG
#define CHECK_RDF1808_SPLIT(split,uri_len) \
  if ((0 != split.schema_begin) || \
    (split.schema_begin > split.schema_end) || \
    (split.schema_end > split.netloc_begin) || \
    (split.netloc_begin > split.netloc_end) || \
    (split.netloc_end > split.path_begin) || \
    (split.path_begin > split.path_end) || \
    (split.path_end > split.params_begin) || \
    (split.params_begin > split.params_end) || \
    (split.params_end > split.query_begin) || \
    (split.query_begin > split.query_end) || \
    (split.query_end > split.fragment_begin) || \
    (split.fragment_begin > split.fragment_end) || \
    (split.fragment_end > (uri_len)) ) \
    GPF_T1("CHECK_RDF1808_SPLIT failed");
#else
#define CHECK_RDF1808_SPLIT(split,uri_len)
#endif

extern void rfc1808_parse_uri (const char *iri, rdf1808_split_t *split_ret);
extern void rfc1808_parse_wide_uri (const wchar_t *iri, rdf1808_split_t *split_ret);
extern caddr_t rfc1808_expand_uri (ccaddr_t base_uri, ccaddr_t rel_uri,
  ccaddr_t output_cs_name, int do_resolve_like_http_get,
  ccaddr_t base_string_cs_name, /* Encoding used for base_uri IFF it is a narrow string, neither DV_UNAME nor WIDE */
  ccaddr_t rel_string_cs_name, /* Encoding used for rel_uri IFF it is a narrow string, neither DV_UNAME nor WIDE */
  caddr_t * err_ret );

#endif /* _XML_PARSER_H */