File: test_utils.cc

package info (click to toggle)
ruby-nokogiri 1.13.10%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 7,416 kB
  • sloc: ansic: 38,198; xml: 28,086; ruby: 22,271; java: 15,517; cpp: 7,037; yacc: 244; sh: 148; makefile: 136
file content (166 lines) | stat: -rw-r--r-- 6,077 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: jdtang@google.com (Jonathan Tang)

#include "test_utils.h"

#include "error.h"
#include "util.h"

int GetChildCount(GumboNode* node) {
  if (node->type == GUMBO_NODE_DOCUMENT) {
    return node->v.document.children.length;
  } else {
    return node->v.element.children.length;
  }
}

GumboTag GetTag(GumboNode* node) { return node->v.element.tag; }

GumboNode* GetChild(GumboNode* parent, int index) {
  if (parent->type == GUMBO_NODE_DOCUMENT) {
    return static_cast<GumboNode*>(parent->v.document.children.data[index]);
  } else {
    return static_cast<GumboNode*>(parent->v.element.children.data[index]);
  }
}

int GetAttributeCount(GumboNode* node) {
  return node->v.element.attributes.length;
}

GumboAttribute* GetAttribute(GumboNode* node, int index) {
  return static_cast<GumboAttribute*>(node->v.element.attributes.data[index]);
}

// Convenience function to do some basic assertions on the structure of the
// document (nodes are elements, nodes have the right tags) and then return
// the body node.
void GetAndAssertBody(GumboNode* root, GumboNode** body) {
  GumboNode* html = NULL;
  for (int i = 0; i < GetChildCount(root); ++i) {
    GumboNode* child = GetChild(root, i);
    if (child->type != GUMBO_NODE_ELEMENT) {
      ASSERT_EQ(GUMBO_NODE_COMMENT, child->type);
      continue;
    }
    ASSERT_TRUE(html == NULL);
    html = child;
  }
  ASSERT_TRUE(html != NULL);
  ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
  EXPECT_EQ(GUMBO_TAG_HTML, GetTag(html));

  // There may be comment/whitespace nodes; this walks through the children of
  // <html> and assigns head/body based on them, or assert-fails if there are
  // fewer/more than 2 such nodes.
  GumboNode* head = NULL;
  *body = NULL;
  for (int i = 0; i < GetChildCount(html); ++i) {
    GumboNode* child = GetChild(html, i);
    if (child->type != GUMBO_NODE_ELEMENT) {
      continue;
    }

    if (!head) {
      head = child;
      EXPECT_EQ(GUMBO_TAG_HEAD, GetTag(head));
    } else if (!(*body)) {
      *body = child;
      EXPECT_EQ(GUMBO_TAG_BODY, GetTag(*body));
    } else {
      ASSERT_TRUE("More than two elements found inside <html>" != NULL);
    }
  }
  EXPECT_TRUE(head != NULL);
  ASSERT_TRUE(*body != NULL);
}

void SanityCheckPointers(
    const char* input, size_t input_length, const GumboNode* node, int depth) {
  ASSERT_GE(input_length, (size_t) 0);
  ASSERT_TRUE(node != NULL);
  // There are some truly pathological HTML documents out there - the
  // integration tests for this include one where the DOM "tree" is actually a
  // linked list 27,500 nodes deep - and so we need a limit on the recursion
  // depth here to avoid blowing the stack. Alternatively, we could externalize
  // the stack and use an iterative algorithm, but that gets us very little for
  // the additional programming complexity.
  if (node->type == GUMBO_NODE_DOCUMENT || depth > 400) {
    // Don't sanity-check the document as well...we start with the root.
    return;
  }
  if (node->type == GUMBO_NODE_ELEMENT) {
    const GumboElement* element = &node->v.element;
    // Sanity checks on original* pointers, making sure they fall within the
    // original input.
    if (element->original_tag.data && element->original_tag.length) {
      EXPECT_GE(element->original_tag.data, input);
      EXPECT_LT(element->original_tag.data, input + input_length);
      EXPECT_LE(element->original_tag.length, input_length);
    }
    if (element->original_end_tag.data && element->original_tag.length) {
      EXPECT_GE(element->original_end_tag.data, input);
      EXPECT_LT(element->original_end_tag.data, input + input_length);
      EXPECT_LE(element->original_end_tag.length, input_length);
    }
    EXPECT_GE(element->start_pos.offset, 0);
    EXPECT_LE(element->start_pos.offset, input_length);
    EXPECT_GE(element->end_pos.offset, 0);
    EXPECT_LE(element->end_pos.offset, input_length);

    const GumboVector* children = &element->children;
    for (unsigned int i = 0; i < children->length; ++i) {
      const GumboNode* child = static_cast<const GumboNode*>(children->data[i]);
      // Checks on parent/child links.
      ASSERT_TRUE(child != NULL);
      EXPECT_EQ(node, child->parent);
      EXPECT_EQ(i, child->index_within_parent);
      SanityCheckPointers(input, input_length, child, depth + 1);
    }
  } else {
    const GumboText* text = &node->v.text;
    EXPECT_GE(text->original_text.data, input);
    EXPECT_LT(text->original_text.data, input + input_length);
    EXPECT_LE(text->original_text.length, input_length);
    EXPECT_GE(text->start_pos.offset, 0);
    EXPECT_LT(text->start_pos.offset, input_length);
  }
}

GumboTest::GumboTest()
    : options_(kGumboDefaultOptions), errors_are_expected_(false), text_("") {
  options_.max_errors = 100;
  parser_._options = &options_;
  parser_._output = static_cast<GumboOutput*>(gumbo_alloc(sizeof(GumboOutput)));
  gumbo_init_errors(&parser_);
}

GumboTest::~GumboTest() {
  if (!errors_are_expected_) {
    // TODO(jdtang): A googlemock matcher may be a more appropriate solution for
    // this; we only want to pretty-print errors that are not an expected
    // output of the test.
    for (unsigned int i = 0; i < parser_._output->errors.length && i < 1; ++i) {
      gumbo_print_caret_diagnostic (
        static_cast<GumboError*>(parser_._output->errors.data[i]),
        text_,
	strlen(text_)
      );
    }
  }
  gumbo_destroy_errors(&parser_);
  gumbo_free(parser_._output);
}