File: 05-trends.py

package info (click to toggle)
python-pattern 2.6%2Bgit20150109-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 78,672 kB
  • sloc: python: 53,865; xml: 11,965; ansic: 2,318; makefile: 94
file content (41 lines) | stat: -rw-r--r-- 1,398 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os, sys; sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from pattern.web import Twitter
from pattern.graph import Graph

# This example demonstrates a simple Twitter miner + visualizer.
# We collect tweets containing "A is the new B", 
# mine A and B and use them as connected nodes in a graph.
# Then we export the graph as a browser visualization.

comparisons = []

for i in range(1,10):
    # Set cached=False for live results:
    for result in Twitter(language="en").search("\"is the new\"", start=i, count=100, cached=True):
        s = result.text
        s = s.replace("\n", " ")
        s = s.lower()
        s = s.replace("is the new", "NEW")
        s = s.split(" ")
        try:
            i = s.index("NEW")
            A = s[i-1].strip("?!.:;,#@\"'")
            B = s[i+1].strip("?!.:;,#@\"'")
            # Exclude common phrases such as "this is the new thing".
            if A and B and A not in ("it", "this", "here", "what", "why", "where"):
                comparisons.append((A,B))
        except:
            pass

g = Graph()
for A, B in comparisons:
    e = g.add_edge(B, A) # "A is the new B": A <= B
    e.weight += 0.1
    print B, "=>", A

# Not all nodes will be connected, there will be multiple subgraphs.
# Simply take the largest subgraph for our visualization.
g = g.split()[0]

g.export("trends", weighted=True, directed=True)