File: html5lib-testcase-support.js

package info (click to toggle)
firefox 147.0.3-1
links: PTS, VCS
area: main
in suites: sid
size: 4,683,320 kB
sloc: cpp: 7,607,359; javascript: 6,533,295; ansic: 3,775,223; python: 1,415,500; xml: 634,561; asm: 438,949; java: 186,241; sh: 62,752; makefile: 18,079; objc: 13,092; perl: 12,808; yacc: 4,583; cs: 3,846; pascal: 3,448; lex: 1,720; ruby: 1,003; php: 436; lisp: 258; awk: 247; sql: 66; sed: 54; csh: 10; exp: 6
file content (243 lines) | stat: -rw-r--r-- 9,658 bytes
parent folder | download | duplicates (12)
// This library supports HTML5lib-style test cases.
//
// The HTMLlib test case format describes an actual DOM tree. For testing, and
// particular for testing of DOM parsers and DOM parser-related functionality,
// this has the advantage of being able to represent edge cases.
//
// Example: If `.replaceWithChildren` is called on the `<span>` element as a
// result of parsing `"<p>Hello<span>World</span></p>"`, then this results in
// a tree with two adjacent text nodes. This behaviour will affect subsequent
// DOM operations and should thus be tested. The HTML5lib format makes it easy
// to describe the expected result unambiguously.
//
// References:
// - HTML5lib: https://github.com/html5lib
// - HTML5lib testcases: https://github.com/html5lib/html5lib-tests/tree/master/tree-construction
// - test case format description:
// https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
//
// The main "API" is:
//
// - parse_html5lib_testcases(string)
//   This returns an array of dictionaries, where the dictionary contains the
//   the text of the test file, keyed by the lines starting with a hashtag.
//
//   E.g. #data\nbla results in [{data: "bla"}].
//
// - html5lib_testcases_from_script()
//   Wrapper for parse_html5lib_testcases that gets the test data from a script
//   element with type "html5lib-tests". This allows to specify the test data
//   in the test file, but requires working around closing script tags.
//
// - html5lib_testcases_from_response(response_promise)
//   Wrapper for parse_html5lib_testcases that gets the data from a Response
//   Promise, as is returned from `fetch()`, and returns a Promise for the array
//   of testcases. This allows getting the test dat from a text resource.
//
// - build_node_tree(node, documentstr)
//   This builds a node tree from the "#document" string from a testcase, and
//   appends it to the node argument. Returns node.
//
// - assert_subtree_equals(node1, node2)
//   Asserts that the child trees of node1 and node2 are equals. This
//   recursively descends the trees.
//
// - assert_testcase(node, testcase)
//   Wrapper for build_node_tree and assert_subtree_equals, for use with a
//   result of parse_html5lib_testcases.
//

function html5lib_testcases_from_script() {
  return parse_html5lib_testcases(
      document.querySelector("script[type='html5lib-tests']").textContent);
}

function html5lib_testcases_from_response(response_promise) {
  return response_promise
      .then(response => response.text())
      .then(parse_html5lib_testcases);
}

function add_html5lib_testcase(testcases, current) {
  for (const item in current) {
    current[item] = current[item].join("\n");
  }
  if (Object.entries(current).length) {
    testcases.push(current);
  }
}

function parse_html5lib_testcases(content) {
  const testcases = [];
  var state = undefined;
  var current = {};
  for (const line of content.split("\n")) {
    if (!line) {
      add_html5lib_testcase(testcases, current);
      state = undefined;
      current = {};
    } else if (line[0] == "#") {
      state = line.substring(1);
      current[state] = [];
    } else if (state) {
      current[state].push(line);
    } else {
      // Error handling is for another day.
    }
  }
  return testcases;
}

function get_child_at(node, level) {
  for (i = 0; i < level; i++) {
    if (is_html_template(node)) {
      // For <template>, continue with the content fragment.
      node = node.content;
    } else {
      node = node.lastChild;
    }
  }
  return node;
}

function append_child_at(node, level, child) {
  get_child_at(node, level).appendChild(child);
}

function is_element(node) {
  return node.tagName && node.namespaceURI;
}

function is_html_template(node) {
  return is_element(node) && node.tagName == "TEMPLATE" &&
      node.namespaceURI == "http://www.w3.org/1999/xhtml";
}

function create_element(name, maybe_namespace) {
  // `For the HTML namespace, the namespace designator is the empty string,
  // i.e. there's no prefix. For the SVG namespace, the namespace designator is
  // "svg ". For the MathML namespace, the namespace designator is "math ".
  if (maybe_namespace == "svg ") {
    return document.createElementNS("http://www.w3.org/2000/svg", name);
  } else if (maybe_namespace == "math ") {
    return document.createElementNS("http://www.w3.org/1998/Math/MathML", name);
  } else if (!maybe_namespace) {
    return document.createElement(name);
  } else {
    assert_unreached(`Invalid element name: "${maybe_namespace}${name}"`);
  }
}

function set_attribute(node, name, maybe_namespace, value) {
  // `The attribute name string is the local name prefixed by a namespace
  // designator. For no namespace, the namespace designator is the empty string,
  // i.e. there's no prefix. For the XLink namespace, the namespace designator
  // is "xlink " [Likewise, "xml " and "xmlns ".]
  if (maybe_namespace == "xlink ") {
    node.setAttributeNS("http://www.w3.org/1999/xlink", name, value);
  } else if (maybe_namespace == "xml ") {
    node.setAttributeNS("http://www.w3.org/XML/1998/namespace", name, value);
  } else if (maybe_namespace == "xmlns ") {
    node.setAttributeNS("http://www.w3.org/2000/xmlns/", name, value);
  } else if (!maybe_namespace) {
    node.setAttribute(name, value);
  } else {
    assert_unreached(`Invalid attribute name: "${maybe_namespace}${name}"`);
  }
}

function build_node_tree(root, docstr) {
  // Format described here:
  // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md

  // Special-case empty string: Don't build anything.
  // (Happens for test docs that cause parse errors, but also for genuinely
  // empty expectation documents.)
  if (!docstr) return root;

  for (const line of docstr.split("\n")) {
    const [_, indent, remainder] = line.match(/^\| ( *)(.*)/);
    const level = indent.length / 2;
    if (match = remainder.match(/^<([a-z]* )?([a-zA-Z0-9_-]*)>$/)) {
      // `Element nodes must be represented by a "<, the tag name string, ">".`
      append_child_at(root, level, create_element(match[2], match[1]));
    } else if (match = remainder.match(/^"([^"]*)"$/)) {
      // `Text nodes must be the string, in double quotes.`
      append_child_at(root, level, document.createTextNode(match[1]));
    } else if (match = remainder.match(/^([a-z]* )?(.*)="(.*)"$/)) {
      // `Attribute nodes must have the attribute name string, then an "=" sign,
      // then the attribute value in double quotes (").`
      set_attribute(get_child_at(root, level), match[2], match[1], match[3]);
    } else if (match = remainder.match(/^<!--(.*)-->$/)) {
      // `Comments must be "<" then "!-- " then the data then " -->".`
      append_child_at(root, level, document.createComment(match[1]));
    } else if (match = remainder.match(
        /^<!DOCTYPE ([^ ]*)( "([^"]*)"( "([^"]*)")?)?>$/)) {
      // `DOCTYPEs must be "<!DOCTYPE " then [... bla bla ...]`
      append_child_at(root, level,
         document.implementation.createDocumentType(match[1], match[3], match[5]));
    } else if (match = remainder.match(/^<?([a-z]*)( (.*))>$/)) {
      // `Processing instructions must be "<?", then the target, then [...]`
      append_child_at(root, level, document.createProcessingInstruction(
          match[1], match[3]));
    } else if (remainder == "content") {
      // Template contents are represented by the string "content" with the
      // children below it.
      // Nothing to do here; so let's just check we're actually in a template.
      assert_true(is_html_template(get_child_at(root, level)),
          "\"content\" only expected as child of a <template>.");
    } else {
      assert_unreached(
          `Unknown line type. Maybe test data is malformed. ("${line}")`);
    }
  }
  return root;
}

function assert_subtree_equals(node1, node2) {
  // Iterate in parallel over both trees.
  const tree1 = document.createNodeIterator(node1);
  const tree2 = document.createNodeIterator(node2);
  // Skip the root/context node, so that we can re-use the test with different
  // context types.
  var current1 = tree1.nextNode();
  var current2 = tree2.nextNode();
  do {
    current1 = tree1.nextNode();
    current2 = tree2.nextNode();

    if (!current1) break;

    // Conceptually, we only want to check whether a.isEqualNode(b). But that
    // yields terrible error messages ("expected true but got false"). With
    // this being a test suite and all, let's invest a bit of effort into nice
    // error messages.
    if (!current1.isEqualNode(current2)) {
      let breadcrumbs = "";
      let current = current1;
      while (current) {
        const here = is_element(current) ? `<${current.tagName}>` : `${current}`;
        breadcrumbs = `${here} / ${breadcrumbs}`;
        current = current.parentNode;
      }
      breadcrumbs = breadcrumbs.substring(0, breadcrumbs.length - 3);
      assert_true(current1.isEqualNode(current2),
          `${current1}.isEqual(${current2}) fails. Path: ${breadcrumbs}.`);
    }

    // NodeIterator does not recurse into template contents. So we need to do
    // this manually.
    if (is_html_template(current1) && is_html_template(current2)) {
      assert_subtree_equals(current1.content, current2.content);
    }
  } while (current1);

  // Ensure that both iterators have come to an end.
  assert_false(!!current2, "Additional nodes at the of node2.\n");
}

function assert_testcase(node, testcase) {
  const context = document.createElement(testcase["document-fragment"] ?? "div");
  const tree = build_node_tree(context, testcase.document);
  assert_subtree_equals(node, tree);
}