File: links.g

package info (click to toggle)
antlr 2.7.7%2Bdfsg-14
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 10,016 kB
  • sloc: java: 54,649; cs: 12,537; makefile: 8,854; cpp: 7,359; pascal: 5,273; sh: 4,333; python: 4,297; lisp: 1,969; xml: 220; lex: 192; ansic: 127
file content (107 lines) | stat: -rw-r--r-- 2,040 bytes parent folder | download | duplicates (12)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
{
import java.util.Vector;
}

/** Parse an entire html file, firing events to a single listener
 *  for each image and href link encountered.  All tokens are
 *  defined to skip so the lexer will continue scarfing until EOF.
 */
class LinkExtractor extends Lexer;
options {
	caseSensitive=false;
	k=2;
	filter=SCARF;
	charVocabulary='\3'..'\177';
}

{
	protected LinkListener listener;
	
	public void addLinkListener(LinkListener listener) {
		this.listener = listener;
	}

	public void removeLinkListener(LinkListener listener) {
		this.listener = null;
	}

	public void fireImageLinkEvent(String target, int line) {
		listener.imageReference(target, line);
	}

	public void fireHREFLinkEvent(String target, int line) {
		listener.hrefReference(target, line);
	}

	/** strip quotes from "..." or '...' strings */
	public static String stripQuotes(String src) {
		int h = src.indexOf('"');
		if ( h==-1 ) h = src.indexOf('\'');
		int t = src.lastIndexOf('"');
		if ( t==-1 ) t = src.lastIndexOf('\'');
		if ( h==-1 || t==-1 ) return src;
		return src.substring(h+1,t);
	}
}

AHREF
	:	"<a" WS (ATTR)+ '>'		{$setType(Token.SKIP);}
	;

IMG	:	"<img" WS (ATTR)+ '>'	{$setType(Token.SKIP);}
	;

protected
ATTR
options {
	ignore=WS;
}
	:	w:WORD '='
		(	s:STRING
		|	v:WORD
		)
		{
		String target = s!=null ? stripQuotes(s.getText()) : v.getText();
		if ( w.getText().equalsIgnoreCase("href") ) {
			fireHREFLinkEvent(target, getLine());
		}
		else if ( w.getText().equalsIgnoreCase("src") ) {
			fireImageLinkEvent(target, getLine());
		}
		}
	;

/** Match until next whitespace; can be file, int, etc... */
protected
WORD:	(
			options {
				generateAmbigWarnings=false;
			}
		:	'a'..'z' | '0'..'9' | '/' | '.' | '#' | '_'
		)+
	;

protected
STRING
	:	'"' (~'"')* '"'
	|	'\'' (~'\'')* '\''
	;

protected
WS	:	(	' '
		|	'\t'
		|	'\f'
		|	(	"\r\n"  // DOS
			|	'\r'    // Macintosh
			|	'\n'    // Unix (the right way)
			)
			{ newline(); }
		)
		{ $setType(Token.SKIP); }
	;

protected
SCARF
	:	WS	// track line numbers while you scarf
	|	.
	;