File: 09-web.py

package info (click to toggle)
python-pattern 2.6%2Bgit20180818-2
  • links: PTS
  • area: main
  • in suites: bullseye
  • size: 93,888 kB
  • sloc: python: 28,119; xml: 15,085; makefile: 194
file content (46 lines) | stat: -rw-r--r-- 1,454 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from __future__ import print_function
from __future__ import unicode_literals

from builtins import str, bytes, dict, int
from builtins import range

import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from pattern.web import Bing, plaintext
from pattern.en import parsetree
from pattern.search import Pattern
from pattern.db import Datasheet, pprint

# "X IS MORE IMPORTANT THAN Y"
# Here is a rough example of how to build a web miner.
# It mines comparative statements from Bing and stores the results in a table,
# which can be saved as a text file for further processing later on.

# Pattern matching also works with Sentence objects from the MBSP module.
# MBSP's parser is much more robust (but also slower).
#from MBSP import Sentence, parse

q = '"more important than"'          # Bing search query
p = "NP VP? more important than NP"  # Search pattern.
p = Pattern.fromstring(p)
d = Datasheet()

engine = Bing(license=None)
for i in range(1):  # max=10
    for result in engine.search(q, start=i + 1, count=100, cached=True):
        s = result.description
        s = plaintext(s)
        t = parsetree(s)
        for m in p.search(t):
            a = m.constituents(constraint=0)[-1] # Left NP.
            b = m.constituents(constraint=5)[0]  # Right NP.
            d.append((
                a.string.lower(),
                b.string.lower()))

pprint(d)

print("")
print("%s results." % len(d))