File: _web_example2.py

package info (click to toggle)
nodebox-web 1.9.4.6-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 1,904 kB
  • ctags: 1,602
  • sloc: python: 7,582; ansic: 581; xml: 239; makefile: 2
file content (31 lines) | stat: -rw-r--r-- 743 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Parsing web pages.

try:
    web = ximport("web")
except:
    web = ximport("__init__")
    reload(web)

reload(web.html)

url = "http://nodebox.net"
print web.url.is_webpage(url)

# Retrieve the data from the web page and put it in an easy object.
html = web.page.parse(url)

# The actual URL you are redirected to.
# This will be None when the page is retrieved from cache.
print html.redirect

# Get the web page title.
print html.title

# Get all the links, including internal links in the same site.
print html.links(external=False)

# Browse through the HTML tree, find <div id="content">,
# strip tags from it and print out the contents.
content = html.find(id="content")
fontsize(10)
text( web.html.plain(content), 20, 20, width=300)