File: Node.cc

package info (click to toggle)
htmlcxx 0.87-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,412 kB
  • sloc: sh: 4,380; cpp: 4,355; yacc: 526; ansic: 205; lex: 159; makefile: 47; perl: 27
file content (114 lines) | stat: -rw-r--r-- 2,515 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include <iostream>
#include <cctype>
#include <algorithm>
#include "wincstring.h"
#include "Node.h"

//#define DEBUG
#include "debug.h"

using namespace std;
using namespace htmlcxx;
using namespace HTML;

void Node::parseAttributes() 
{
	if (!(this->isTag())) return;

	const char *end;
	const char *ptr = mText.c_str();
	if ((ptr = strchr(ptr, '<')) == 0) return;
	++ptr;

	// Skip initial blankspace
	while (*ptr && isspace(*ptr)) ++ptr;

	// Skip tagname
	if (!*ptr || !isalpha(*ptr)) return;
	while (*ptr && !isspace(*ptr) && *ptr != '>') ++ptr;

	// Skip blankspace after tagname
	while (*ptr && isspace(*ptr)) ++ptr;

	while (*ptr && *ptr != '>') 
	{
		string key, val;

		// skip unrecognized
		while (*ptr && !isalnum(*ptr) && !isspace(*ptr)) ++ptr;

		// skip blankspace
		while (isspace(*ptr)) ++ptr;

		end = ptr;
		while (isalnum(*end) || *end == '-') ++end;
		key.assign(end - ptr, '\0');
		transform(ptr, end, key.begin(), ::tolower);
		ptr = end;

		// skip blankspace
		while (isspace(*ptr)) ++ptr;

		if (*ptr == '=') 
		{
			++ptr;
			while (isspace(*ptr)) ++ptr;
			if (*ptr == '"' || *ptr == '\'') 
			{
				char quote = *ptr;
//				fprintf(stderr, "Trying to find quote: %c\n", quote);
				const char *end = strchr(ptr + 1, quote);
				if (end == 0)
				{
					//b = mText.find_first_of(" >", a+1);
					const char *end1, *end2;
					end1 = strchr(ptr + 1, ' ');
					end2 = strchr(ptr + 1, '>');
					if (end1 && end1 < end2) end = end1;
					else end = end2;
					if (end == 0) return;
				}
				const char *begin = ptr + 1;
				while (isspace(*begin) && begin < end) ++begin;
				const char *trimmed_end = end - 1;
				while (isspace(*trimmed_end) && trimmed_end >= begin) --trimmed_end;
				val.assign(begin, trimmed_end + 1);
				ptr = end + 1;
			}
			else 
			{
				end = ptr;
				while (*end && !isspace(*end) && *end != '>') end++;
				val.assign(ptr, end);
				ptr = end;
			}

//			fprintf(stderr, "%s = %s\n", key.c_str(), val.c_str());
			mAttributes.insert(make_pair(key, val));
		}
		else
		{
//			fprintf(stderr, "D: %s\n", key.c_str());
			if (!key.empty())
			{
				mAttributes.insert(make_pair(key, string()));
			}
		}
	}
}

bool Node::operator==(const Node &n) const 
{
	if (!isTag() || !n.isTag()) return false;
	return !(strcasecmp(tagName().c_str(), n.tagName().c_str()));
}

Node::operator string() const {
	if (isTag()) return this->tagName();
	return this->text();
}

ostream &Node::operator<<(ostream &stream) const {
	stream << (string)(*this);
	return stream;
}