File: pagetitle.py

package info (click to toggle)
weechat-scripts 20221022-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 4,688 kB
  • sloc: python: 42,639; perl: 24,814; ruby: 2,261; lisp: 338; tcl: 244; javascript: 138; makefile: 14; sh: 9
file content (103 lines) | stat: -rw-r--r-- 2,753 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# pagetitle plugin for weechat-0.3.0
#
#  /pagetitle http://tech.slashdot.org/tech/08/11/12/199215.shtml
#  <user> http://tech.slashdot.org/tech/08/11/12/199215.shtml
#  ('Slashdot | Microsoft's "Dead Cow" Patch Was 7 Years In the Making')
#
# xororand @ irc://irc.freenode.net/#weechat
#
# 2021-06-05, Sébastien Helleu <flashcode@flashtux.org>:
#     version 0.6: make script compatible with Python 3,
#                  rename command /pt to /pagetitle, fix PEP8 errors
# 2009-05-02, Sébastien Helleu <flashcode@flashtux.org>:
#     version 0.5: sync with last API changes

from html import unescape
from urllib.error import URLError
from urllib.request import Request, urlopen

import re
import weechat

MAX_TITLE_LENGTH = 100

regex_url = re.compile("""https?://[^ ]+""")


def get_page_title(url):
    """Retrieve the HTML <title> from a webpage."""
    req = Request(
        url,
        headers={
            "User-agent": "Mozilla/5.0 (weechat/pagetitle)",
        },
    )
    try:
        head = urlopen(req).read(8192).decode("utf-8", errors="ignore")
    except URLError:
        return ""
    match = re.search("(?i)<title>(.*?)</title>", head)
    return unescape(match.group(1)) if match else ""


def add_page_titles(data):
    """Add page titles for all URLs of a message."""
    buffer, msg = data.split(";", 1)

    def url_replace(match):
        url = match.group()
        title = get_page_title(url)
        if len(title) > MAX_TITLE_LENGTH:
            title = "%s [...]" % title[0:MAX_TITLE_LENGTH]
        url = "%s ('%s')" % (url, title)
        return url

    msg = regex_url.sub(url_replace, msg)
    return f"{buffer};{msg}"


def process_cb(data, command, rc, stdout, stderr):
    """Process callback."""
    buffer, msg = stdout.split(";", 1)
    weechat.command(buffer, "/say %s" % msg)
    return weechat.WEECHAT_RC_OK


# /pagetitle http://foo
def cmd_pagetitle_cb(data, buffer, args):
    if len(args) == 0:
        return weechat.WEECHAT_RC_ERROR
    weechat.hook_process(
        "func:add_page_titles",
        30 * 1000,
        "process_cb",
        f"{buffer};{args}",
    )
    return weechat.WEECHAT_RC_OK


weechat.register(
    "pagetitle",
    "xororand",
    "0.6",
    "GPL3",
    """Adds HTML titles to http:// urls in your message.""",
    "",
    "",
)
desc = """\
Sends a message to the current buffer and adds HTML titles to http:// URLs.
Example: /pagetitle check this out: http://xkcd.com/364/
<you> check this out: http://xkcd.com/364/ (xkcd - A webcomic of romance, \
sarcasm, math and language)"""
weechat.hook_command(
    "pagetitle",
    desc,
    "message",
    "message with URL(s)",
    "",
    "cmd_pagetitle_cb",
    "",
)

# vim:set ts=4 sw=4 noexpandtab nowrap foldmethod=marker: