1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
# encoding: UTF-8
# api: streamtuner2
# title: radio.net
# description: Europe's biggest radio platform
# url: http://radio.net/
# version: 0.6
# type: channel
# category: radio
# png:
# iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAt0lEQVR42mNgYGD4r+Ar/F/BDwkD+SBxojBMs1mLPBArgGlFqEEENYMNQNLsukIDYkirAvGu
# ABsA1OC6XOP/5f8nwIaYAg0k2gBFsAsgTgcZkvnfDugFEeK9AFKsCPMG0CU6eZJgQ4R1eP8H7LLEivWyFJANQcQCLPBAmkGG4MJohmA6C6QA5gI5OxEUDNII
# MwSvASBFIA3ociCxkWQAKMDICkSQIpgh2LDnSmP80YhsCFEJiRIMADpmeUOpqgjRAAAAAElFTkSuQmCC
# priority: optional
# extraction-method: regex
#
# Radio.net lists around 20.000 worldwide radio stations.
# A maximum of three pages from each genre are fetched here,
# some of the empty categories already omitted.
#
# The website heavily depends on JavaScript, a Flash player,
# some social tracking cookies. But still feasible to access
# per custom JSON extractor.
#
# May require refreshing the station lists once in a while,
# because there's an API key in each JSON station info URL.
import time
import json
import re
from config import *
from channels import *
import ahttp
import action
# hook special JSON format in to avoid grepping images by generic handler
action.playlist_fmt_prio.insert(5, "rnjs")
action.playlist_content_map.insert(7, ("rnjs", r'"logo175x175rounded"'))
action.extract_playlist.extr_urls["rnjs"] = dict(
url = r" (?x) \"streamUrl\" \s*:\s* \"(\w+:\\?/\\?/[^\"]+)\" ",
title = r" (?x) \"(?:description|seoTitle)\" \s*:\s* \"([^\"]+)\" ",
unesc = "json",
)
# Radio.net
#
# · Uses HTML block-wise regex extraction.
# → <a href="stationname.radio.net"> <img…> <strong>…</strong> <small>…</small>
#
# · There's an API key in each page listing, contained in a script block
# as `apiKey: '…'?`
#
# · Which is needed for generating the station info JSON urls:
# → https://api.radio.net/info/v2/search/station?apikey=…&pageindex=1&station=STNAME
#
# · To extract these JSON info targets, a custom extraction recipie is injected
# into the action module.
# → "streamUrl": and "description": are scanned for.
#
class radionet (ChannelPlugin):
# control flags
has_search = False
audioformat = "audio/mpeg"
listformat = "rnjs"
titles = dict(listeners=False, playing="Description")
# sources
apiPrefix = "https://api.radio.net/info/v2"
genre_url = "http://www.radio.net/genre/{}/"
apiKey = None
# Retrieve cat list and map
def update_categories(self):
html = ahttp.get("http://www.radio.net/")
self.set_key(html)
ls = re.findall("""<li><a class="language-info".*?>([\w\s']+)</a>""", html)
self.categories = [i for i in ls][0:-18]
# Fetch entries
def update_streams(self, cat, search=None):
# category page, get key
html = ahttp.get(self.genre_url.format(cat))
for p in range(2, 4):
if html.find('"?p={}">'.format(p)) >= 0:
html += ahttp.get(self.genre_url.format(cat) + "?p={}".format(p))
self.set_key(html)
r = []
# split station blocks
for row in re.split("""<div class="stationinfo""", html)[1:]:
# extract text fields
d = re.findall("""
<a\s+href="(?:https?:)?(//([\w-]+)\.radio\.net/?)" .*?
<img\s+src="([^<">]+)" .*?
<strong[^>]*>(.*?)</strong> .*?
<small[^>]*>\s*(.*?)\s*</small> .*?
""", row, re.X|re.S)
# refurbish extracted strings
if d and len(d) and len(d[0]) == 5:
href, name, img, title, desc = d[0]
r.append(dict(
name = name,
genre = cat,
title = unhtml(title),
playing = unhtml(desc),
url = self._url(name),
homepage = "http:{}".format(href),
img = img,
));
return r
# Patch together JSON station info URL
def _url(self, name):
return \
"{}/search/station?apikey={}&pageindex=1&station={}".format(
self.apiPrefix, self.apiKey, name
) # '?_={time}&' is omitted here, only relevant to jQuery/AJAX,
# and just made bookmarks.is_in() fail due to randomized URLs
# extract JavaScript key from any HTML blob (needed for station query)
def set_key(self, html):
ls = re.findall("""apiKey: '(\w+)'""", html)
if ls:
self.apiKey = ls[0]
|