1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
|
# api: streamtuner2
# title: Internet-Radio
# description: Broad list of webradios from all genres.
# type: channel
# category: radio
# version: 1.5
# url: http://www.internet-radio.com/
# config:
# { name: internetradio_max_pages, type: int, value: 5, category: limit, description: How many pages to fetch and read. }
# priority: standard
# png:
# iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABHNCSVQICAgIfAhkiAAAAaZJREFUOI2N0j1PlEEUBeBnlsVoRJcCPwINxIJGAoWVFbVK4g8w
# oUUTFRNbG3+FhVErK36BhcYCrTR8FS4mRGUXFEKCUizRwLXgnfV1Awk3mUzmnHPvPXNnUkSE40RKFYxhHKMYxFb1GIlnMLVN/etBUjuGWDm6wEHHyQbLW/Qd
# JTu8QEq9mJlnogz3sHOJV3iHz2iKzuDiH+bm+J3XD74EU5Gc7pSn/4aYUi14s8BIhkZ5puKhvdgpNFVccaA5oaP7TO66SCuYKnG9weMmjaz5yadqqfvkPE/z
# 8TLTIp4U3I01ljY5f/gQu1LPGvWS7Rel5NtLzOzRlfk+Ngd4i48Ke9PZVpNGUCvwawvsZm6db8GtoLt9s4iIFotZFEwXybU1VjO+z4egv/MVKlIaqTMMJ2nh
# eeH0wQYX4BwbiQkRTSmdktJ1KY3lGYznuw3zWsS2lLo2WMl4P49ycjCbn3k0pfuVg5m1432e4zr9UGMbLwv8avmP/OJOBQOlAsvF3hYNMititzg2Kuxn7iyr
# VbSG/tltFHt3CVvATRBRH0lpEvfwXXL3L9zE/NEe0EfHAAAAAElFTkSuQmCC
# extraction-method: regex, dom
#
# Internet-Radio.co.uk/.com is one of the largest stream directories.
# Available music genre classifications are mirrored verbatim and flatly.
#
# The new version of this plugin alternates between PyQuery and Regex
# station extraction. Both overlook some paid or incomplete entries.
# HTTP retrieval happens in one batch, determined by the number of pages
# setting, rather than the global max_streams option.
#
from channels import *
import re
from config import *
import ahttp
from pq import pq
# streams and gui
class internet_radio (ChannelPlugin):
# control data
listformat = "pls"
categories = []
base_url = "https://www.internet-radio.com/"
has_search = True
# load genres
def update_categories(self):
html = ahttp.get(self.base_url)
rx = re.compile("""="/stations/[-+&.\w\s%]+/">([^<]+)<""")
cats = rx.findall(html)
cats = list(set(cats))
cats = [s.capitalize() for s in cats]
self.categories = sorted(list(set(cats)))
# fetch station lists
def update_streams(self, cat, search=None):
entries = []
if not search and cat not in self.categories:
return []
rx_pages = re.compile('href="/stations/[-+\w%\d\s]+/page(\d+)">\d+</a>')
# Fetch multiple pages at once
html = []
max_pages = max(int(conf.internetradio_max_pages), 1)
for page in range(1, max_pages):
# Append HTML source
if search:
html.append(
ahttp.get("%ssearch/?radio=%s%s" % (self.base_url, search, "&page=%s" % page if page>1 else ""))
)
else:
html.append(
ahttp.get("%sstations/%s/%s" % (self.base_url, cat.lower().replace(" ", "%20"), "page%s" % page if page>1 else ""))
)
# Is there a next page?
if str(page+1) not in rx_pages.findall(html[-1]):
break
self.parent.status(float(page)/float(max_pages+1), timeout=1)
# Alternatively try regex or pyquery parsing
#log.HTTP(html)
entries = self.from_html(html)
# fin
log.FINISHED("internet_radio.update_streams")
return entries
# Switch update method
@use_rx
def from_html(self, html, use_rx):
if use_rx:
return self.with_regex(html)
else:
return self.with_dom(html)
# Regex extraction
def with_regex(self, html):
log.PROC("internet-radio, regex")
r = []
html = "\n".join(html)
# Break up into <tr> blocks before extracting bits
rx_tr = re.compile("""<tr[^>]*>(.+?)</tr>""", re.S)
rx_data = re.compile(r"""
playjp',\s*'(https?://[^'">]+)
.*? <h4.*?>([^<>]+)</
.*? <b>([^<>]*)</b>
(?: .*? href="(.*?)" )?
(?: .*? Genres:((?:</?a[^>]+>|\w+|\s+)+) )?
.*? (\d+)\s*Listeners
.*? (\d+)\s*Kbps
""", re.S|re.X)
for div in rx_tr.findall(html):
if div.find('id="pagination"') < 0:
#log.DATA(len(div))
uu = rx_data.search(div)
if uu:
(url, title, playing, homepage, genres, listeners, bitrate) = uu.groups()
# transform data
r.append({
"url": url,
"genre": strip_tags(genres or ""),
"homepage": ahttp.fix_url(homepage or ""),
"title": nl(title or ""),
"playing": nl(playing or ""),
"bitrate": int(bitrate or 0),
"listeners": int(listeners or 0),
"format": "audio/mpeg", # there is no stream info on that, but internet-radio.org.uk doesn't seem very ogg-friendly anyway, so we assume the default here
})
else:
log.DATA("Regex couldn't decipher entry:", div)
return r
# DOM traversing
def with_dom(self, html_list):
log.PROC("internet-radio, dom")
rx_numbers = re.compile("(\d+)")
r = []
for html in html_list:
# the streams are arranged in table rows
doc = pq(html)
for dir in (pq(e) for e in doc("tr")):
#log.HTML(dir)
# bitrate/listeners
bl = dir.find("p")
if bl:
bl = rx_numbers.findall(str(bl.text()) + " 0 0")
else:
bl = [0, 0]
# stream url
url = dir.find("i").eq(0).parent().attr("onclick")
if url:
url = re.search("(http://[^\'\"\>]+)", url)
if url:
url = url.group(0)
else:
url = ""
else:
url = ""
row = {
"title": dir.find("h4").text(),
"homepage": ahttp.fix_url(dir.find("a.small").attr("href") or ""),
"url": url,
"genre": dir.find("a[href^='/stations/']").text() or "",
"listeners": int(bl[0]),
"bitrate": int(bl[1]),
"format": "audio/mpeg",
"playing": dir.find("b").text(),
}
#log.DATA(row)
r.append(row)
return r
|