File: tagged.lex

package info (click to toggle)
bow 19991122-4
  • links: PTS
  • area: main
  • in suites: woody
  • size: 2,544 kB
  • ctags: 2,987
  • sloc: ansic: 38,660; lisp: 1,072; makefile: 594; perl: 492; yacc: 149; sh: 91
file content (137 lines) | stat: -rw-r--r-- 2,655 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
%{

#include <string.h>
#include <ctype.h>
#include "bow/archer.h"

#define FILENAME_LEN 256

static long pos = 0;
static long line = 0;
static char filename[FILENAME_LEN]; 

void pop_label();
void push_label();
void strip_entity(char entity[]);
int parse_entity(void);
%}

%option noyywrap
%s PARSE_TAGS

NAME            [[:alpha:]][_[:alnum:]]+
ENTITY          "&"(lt|gt|amp|quot|apos)";"

%%

<PARSE_TAGS>{ENTITY}        pos++;
              /* <PARSE_TAGS>"&"{NAME}";"    if (!parse_entity()) return 1; */
<PARSE_TAGS>"<"{NAME}">"    push_label(); return 2;
<PARSE_TAGS>"</"{NAME}">"   pop_label(); return 3;
[[:alnum:]]+                pos += yyleng; return 1;
\n                          pos++; line++;
.                           pos++;

%%

int is_entity (char entity[]) 
{
  int ret = 0;
  if (!strcmp(entity, "&lt;") ||
      !strcmp(entity, "&gt;") ||
      !strcmp(entity, "&amp;") ||
      !strcmp(entity, "&quot;") ||
      !strcmp(entity, "&apos;"))
    ret = 1;
  
  return ret;
}

void strip_entity (char entity[])
{
  memmove (entity, &entity[1], strlen (entity));
  entity[strlen(entity) - 1] = '\0';
}

int parse_entity(void)
{
  if (is_entity (yytext)) 
  {
    pos += 1; 
    return 1;
  }
  else 
  { 
    strip_entity (yytext); 
    pos += yyleng; 
    return 0;
  }
}

void strip_label (char label[])
{
  memmove (label, &label[(label[1] == '/' ? 2 : 1)], strlen (label));
  label[strlen (label) - 1] = '\0';
}

void push_label ()
{
  strip_label (yytext);
  bow_push_label (yytext);
}

void pop_label ()
{
  char buf[100];

  bow_pop_label (buf, 100);
  strip_label (yytext);
  if(strcmp (buf, yytext)) printf("ERROR: at line %ld, expected </%s>, instead got </%s>. This is invalid XML.\n", line, yytext, buf);
}

void tagged_lex_open_dont_parse_tags (FILE * fp, const char * name)
{
  yyin = fp;
  pos = 0;
  line = 0;
  strncpy (filename, name, FILENAME_LEN);
  BEGIN (INITIAL);
}


void tagged_lex_open (FILE * fp, const char * name)
{
  yyin = fp;
  pos = 0;
  line = 0;
  strncpy (filename, name, FILENAME_LEN);
  BEGIN (PARSE_TAGS);
}

int tagged_lex_get_word_extended(char buf[], int bufsz, long *start, long *end)
{ 
  int ret;

  ret = yylex();
  if (ret == 1 || ret == 2 || ret == 3)
  {
    int i, len;

    strncpy(buf, (ret == 1 ? yytext : bow_last_label()), bufsz);
    buf[bufsz - 1] = 0;
    len = ret == 1 ? yyleng : strlen(buf);
    for (i = 0; i < len; ++i) buf[i] = tolower(buf[i]);
    
    if(start != NULL)
    {
      *end = pos - 1;
      *start = pos - yyleng;
    }
  }

  return ret;
}

int tagged_lex_get_word(char buf[], int bufsz) {
  return tagged_lex_get_word_extended(buf, bufsz, NULL, NULL);
}