1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
|
use html_parser::Dom;
use indoc::indoc;
#[test]
fn it_can_parse_simple() {
let html = indoc!(
r#"
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Document</title>
<style>
body {
background: black;
}
h1 {
color: white;
}
</style>
</head>
<body>
<h1>Hello world</h1>
<!-- There should be more text here -->
<script>
const title = document.querySelector("h1")
title.innerText = "Hello from script"
</script>
</body>
</html>
"#
);
assert!(Dom::parse(html).is_ok());
}
#[test]
fn it_can_parse_spotify() {
let resp = reqwest::blocking::get("https://www.spotify.com/se")
.unwrap()
.text()
.unwrap();
assert!(Dom::parse(&resp).is_ok());
}
#[ignore]
#[test]
fn it_can_parse_facebook() {
let resp = reqwest::blocking::get("https://www.facebook.com/")
.unwrap()
.text()
.unwrap();
assert!(Dom::parse(&resp).is_ok());
}
#[ignore]
#[test]
fn it_can_parse_amazon() {
let resp = reqwest::blocking::get("https://www.amazon.com/")
.unwrap()
.text()
.unwrap();
assert!(Dom::parse(&resp).is_ok());
}
#[ignore]
#[test]
fn it_can_parse_apple() {
let resp = reqwest::blocking::get("https://www.apple.com/")
.unwrap()
.text()
.unwrap();
assert!(Dom::parse(&resp).is_ok());
}
#[ignore]
#[test]
fn it_can_parse_nytimes() {
let resp = reqwest::blocking::get("https://www.nytimes.com/")
.unwrap()
.text()
.unwrap();
assert!(Dom::parse(&resp).is_ok());
}
#[ignore]
#[test]
fn it_can_parse_wikipedia() {
let resp = reqwest::blocking::get("https://en.wikipedia.org/wiki/Main_Page")
.unwrap()
.text()
.unwrap();
assert!(Dom::parse(&resp).is_ok());
}
|