1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
|
package htmlquery
import (
"compress/gzip"
"fmt"
"io/ioutil"
"net/http"
"net/http/httptest"
"os"
"strings"
"sync"
"testing"
"github.com/antchfx/xpath"
"golang.org/x/net/html"
)
const htmlSample = `<!DOCTYPE html><html lang="en-US">
<head>
<title>Hello,World!</title>
</head>
<body>
<div class="container">
<header>
<!-- Logo -->
<h1>City Gallery</h1>
</header>
<nav>
<ul>
<li><a href="/London">London</a></li>
<li><a href="/Paris">Paris</a></li>
<li><a href="/Tokyo">Tokyo</a></li>
</ul>
</nav>
<article>
<h1>London</h1>
<img src="pic_mountain.jpg" alt="Mountain View" style="width:304px;height:228px;">
<p>London is the capital city of England. It is the most populous city in the United Kingdom, with a metropolitan area of over 13 million inhabitants.</p>
<p>Standing on the River Thames, London has been a major settlement for two millennia, its history going back to its founding by the Romans, who named it Londinium.</p>
</article>
<footer>Copyright © W3Schools.com</footer>
</div>
</body>
</html>
`
var testDoc = loadHTML(htmlSample)
func BenchmarkSelectorCache(b *testing.B) {
DisableSelectorCache = false
for i := 0; i < b.N; i++ {
getQuery("/AAA/BBB/DDD/CCC/EEE/ancestor::*")
}
}
func BenchmarkDisableSelectorCache(b *testing.B) {
DisableSelectorCache = true
for i := 0; i < b.N; i++ {
getQuery("/AAA/BBB/DDD/CCC/EEE/ancestor::*")
}
}
func TestSelectorCache(t *testing.T) {
SelectorCacheMaxEntries = 2
for i := 1; i <= 3; i++ {
getQuery(fmt.Sprintf("//a[position()=%d]", i))
}
getQuery("//a[position()=3]")
}
func TestLoadURL(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, htmlSample)
}))
defer ts.Close()
_, err := LoadURL(ts.URL)
if err != nil {
t.Fatal(err)
}
}
func TestLoadURLWithGzipResponse(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Add("Content-Encoding", "gzip")
gz := gzip.NewWriter(w)
defer gz.Close()
fmt.Fprint(gz, htmlSample)
}))
defer ts.Close()
_, err := LoadURL(ts.URL)
if err != nil {
t.Fatal(err)
}
}
func TestLoadDoc(t *testing.T) {
tempHTMLdoc, err := ioutil.TempFile("", "sample_*.html")
if err != nil {
t.Fatal(err)
}
tempHTMLFilename := tempHTMLdoc.Name()
defer func(tempHTMLdoc *os.File, filename string) {
tempHTMLdoc.Close()
os.Remove(filename)
}(tempHTMLdoc, tempHTMLFilename)
tempHTMLdoc.Write([]byte(htmlSample))
if _, err := LoadDoc(tempHTMLFilename); err != nil {
t.Fatal(err)
}
}
func TestNavigator(t *testing.T) {
top := FindOne(testDoc, "//html")
nav := &NodeNavigator{curr: top, root: top, attr: -1}
nav.MoveToChild() // HEAD
nav.MoveToNext()
if nav.NodeType() != xpath.TextNode {
t.Fatalf("expectd node type is TextNode,but got %vs", nav.NodeType())
}
nav.MoveToNext() // <BODY>
if nav.Value() != InnerText(FindOne(testDoc, "//body")) {
t.Fatal("body not equal")
}
nav.MoveToPrevious() //
nav.MoveToParent() //<HTML>
if nav.curr != top {
t.Fatal("current node is not html node")
}
nav.MoveToNextAttribute()
if nav.LocalName() != "lang" {
t.Fatal("node not move to lang attribute")
}
nav.MoveToParent()
nav.MoveToFirst() // <!DOCTYPE html>
if nav.curr.Type != html.DoctypeNode {
t.Fatalf("expected node type is DoctypeNode,but got %d", nav.curr.Type)
}
}
func TestXPath(t *testing.T) {
node := FindOne(testDoc, "//html")
if SelectAttr(node, "lang") != "en-US" {
t.Fatal("//html[@lang] != en-Us")
}
node = FindOne(testDoc, "//header")
if strings.Index(InnerText(node), "Logo") > 0 {
t.Fatal("InnerText() have comment node text")
}
if !strings.Contains(OutputHTML(node, true), "Logo") {
t.Fatal("OutputHTML() shoud have comment node text")
}
link := FindOne(testDoc, "//a[1]/@href")
if link == nil {
t.Fatal("link is nil")
}
if v := InnerText(link); v != "/London" {
t.Fatalf("expect value is /London, but got %s", v)
}
}
func TestXPathCdUp(t *testing.T) {
doc := loadHTML(`<html><b attr="1"></b></html>`)
node := FindOne(doc, "//b/@attr/..")
t.Logf("node = %#v", node)
if node == nil || node.Data != "b" {
t.Fatal("//b/@id/.. != <b></b>")
}
}
func TestConcurrentQuery(t *testing.T) {
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
s := `<html><head></head><body><div>a</div></body>`
doc := loadHTML(s)
if n := FindOne(doc, `//div`); n == nil {
t.Fatalf("should find one but got nil [%d]", i)
}
}(i)
}
wg.Wait()
}
func loadHTML(str string) *html.Node {
node, err := Parse(strings.NewReader(str))
if err != nil {
panic(err)
}
return node
}
|