1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
%{
#include <string.h>
#include <ctype.h>
#include "bow/archer.h"
#define FILENAME_LEN 256
static long pos = 0;
static long line = 0;
static char filename[FILENAME_LEN];
void pop_label();
void push_label();
void strip_entity(char entity[]);
int parse_entity(void);
%}
%option noyywrap
%s PARSE_TAGS
NAME [[:alpha:]][_[:alnum:]]+
ENTITY "&"(lt|gt|amp|quot|apos)";"
%%
<PARSE_TAGS>{ENTITY} pos++;
/* <PARSE_TAGS>"&"{NAME}";" if (!parse_entity()) return 1; */
<PARSE_TAGS>"<"{NAME}">" push_label(); return 2;
<PARSE_TAGS>"</"{NAME}">" pop_label(); return 3;
[[:alnum:]]+ pos += yyleng; return 1;
\n pos++; line++;
. pos++;
%%
int is_entity (char entity[])
{
int ret = 0;
if (!strcmp(entity, "<") ||
!strcmp(entity, ">") ||
!strcmp(entity, "&") ||
!strcmp(entity, """) ||
!strcmp(entity, "'"))
ret = 1;
return ret;
}
void strip_entity (char entity[])
{
memmove (entity, &entity[1], strlen (entity));
entity[strlen(entity) - 1] = '\0';
}
int parse_entity(void)
{
if (is_entity (yytext))
{
pos += 1;
return 1;
}
else
{
strip_entity (yytext);
pos += yyleng;
return 0;
}
}
void strip_label (char label[])
{
memmove (label, &label[(label[1] == '/' ? 2 : 1)], strlen (label));
label[strlen (label) - 1] = '\0';
}
void push_label ()
{
strip_label (yytext);
bow_push_label (yytext);
}
void pop_label ()
{
char buf[100];
bow_pop_label (buf, 100);
strip_label (yytext);
if(strcmp (buf, yytext)) printf("ERROR: at line %ld, expected </%s>, instead got </%s>. This is invalid XML.\n", line, yytext, buf);
}
void tagged_lex_open_dont_parse_tags (FILE * fp, const char * name)
{
yyin = fp;
pos = 0;
line = 0;
strncpy (filename, name, FILENAME_LEN);
BEGIN (INITIAL);
}
void tagged_lex_open (FILE * fp, const char * name)
{
yyin = fp;
pos = 0;
line = 0;
strncpy (filename, name, FILENAME_LEN);
BEGIN (PARSE_TAGS);
}
int tagged_lex_get_word_extended(char buf[], int bufsz, long *start, long *end)
{
int ret;
ret = yylex();
if (ret == 1 || ret == 2 || ret == 3)
{
int i, len;
strncpy(buf, (ret == 1 ? yytext : bow_last_label()), bufsz);
buf[bufsz - 1] = 0;
len = ret == 1 ? yyleng : strlen(buf);
for (i = 0; i < len; ++i) buf[i] = tolower(buf[i]);
if(start != NULL)
{
*end = pos - 1;
*start = pos - yyleng;
}
}
return ret;
}
int tagged_lex_get_word(char buf[], int bufsz) {
return tagged_lex_get_word_extended(buf, bufsz, NULL, NULL);
}
|