Adjust RaiPlayPlaylist too and misc minor improvements - pkgsrc-localpatches - leot's pkgsrc LOCALPATCHES
(HTM) hg clone https://bitbucket.org/iamleot/pkgsrc-localpatches
(DIR) Log
(DIR) Files
(DIR) Refs
---
(DIR) changeset d62b4eae0a29ca9ffa5404d9c19ec80e173d4d52
(DIR) parent 5b85de0086d8ab48f8d41ab0db06eb1723dfe852
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Sun, 10 Nov 2019 11:43:42
Adjust RaiPlayPlaylist too and misc minor improvements
Diffstat:
net/youtube-dl/patch-youtube__dl_extractor_rai.py | 101 +++++++++++++++++----
1 files changed, 82 insertions(+), 19 deletions(-)
---
diff -r 5b85de0086d8 -r d62b4eae0a29 net/youtube-dl/patch-youtube__dl_extractor_rai.py
--- a/net/youtube-dl/patch-youtube__dl_extractor_rai.py Sun Nov 10 10:33:24 2019 +0100
+++ b/net/youtube-dl/patch-youtube__dl_extractor_rai.py Sun Nov 10 11:43:42 2019 +0100
@@ -1,13 +1,24 @@
$NetBSD$
-Preliminary workaround for raiplay.it updates... Only RaiPlay
-and RaiPlayLive were analyzed and adjusted.
+Preliminary workaround for raiplay.it updates... RaiPlay, RaiPlayLive,
+RaiPlayPlaylist were analyzed and adjusted.
--- youtube_dl/extractor/rai.py.orig 2019-11-10 08:50:05.552567084 +0000
+++ youtube_dl/extractor/rai.py
-@@ -124,39 +124,17 @@ class RaiBaseIE(InfoExtractor):
+@@ -30,6 +30,7 @@ class RaiBaseIE(InfoExtractor):
+ _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
+ _GEO_COUNTRIES = ['IT']
+ _GEO_BYPASS = False
++ _BASE_URL = 'https://www.raiplay.it'
+
+ def _extract_relinker_info(self, relinker_url, video_id):
+ if not re.match(r'https?://', relinker_url):
+@@ -122,41 +123,19 @@ class RaiBaseIE(InfoExtractor):
+
+
class RaiPlayIE(RaiBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
+- _VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
++ _VALID_URL = r'(?P<url>(?P<base>https?://(?:www\.)?raiplay\.it/.+?-)(?P<id>%s)(?P<ext>\.(?:html|json)))' % RaiBaseIE._UUID_RE
_TESTS = [{
- 'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
- 'md5': '340aa3b7afb54bfd14a8c11786450d76',
@@ -48,12 +59,16 @@
},
'params': {
'skip_download': True,
-@@ -171,13 +149,12 @@ class RaiPlayIE(RaiBaseIE):
- url, video_id = mobj.group('url', 'id')
+@@ -168,16 +147,15 @@ class RaiPlayIE(RaiBaseIE):
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+- url, video_id = mobj.group('url', 'id')
++ url, base, video_id, ext = mobj.group('url', 'base', 'id', 'ext')
media = self._download_json(
- '%s?json' % url, video_id, 'Downloading video JSON')
-+ '%s' % url.replace('.html', '.json'), video_id, 'Downloading video JSON')
++ '%s%s.json' % (base, video_id), video_id, 'Downloading video JSON')
title = media['name']
-
@@ -64,16 +79,16 @@
self._sort_formats(relinker_info['formats'])
thumbnails = []
-@@ -185,7 +162,7 @@ class RaiPlayIE(RaiBaseIE):
+@@ -185,7 +163,7 @@ class RaiPlayIE(RaiBaseIE):
for _, value in media.get('images').items():
if value:
thumbnails.append({
- 'url': value.replace('[RESOLUTION]', '600x400')
-+ 'url': 'https://www.raiplay.it' + value.replace('[RESOLUTION]', '600x400')
++ 'url': urljoin(RaiBaseIE._BASE_URL, value.replace('[RESOLUTION]', '600x400'))
})
timestamp = unified_timestamp(try_get(
-@@ -225,7 +202,7 @@ class RaiPlayLiveIE(RaiBaseIE):
+@@ -225,7 +203,7 @@ class RaiPlayLiveIE(RaiBaseIE):
'display_id': 'rainews24',
'ext': 'mp4',
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
@@ -82,20 +97,14 @@
'uploader': 'Rai News 24',
'creator': 'Rai News 24',
'is_live': True,
-@@ -238,20 +215,31 @@ class RaiPlayLiveIE(RaiBaseIE):
+@@ -238,20 +216,32 @@ class RaiPlayLiveIE(RaiBaseIE):
def _real_extract(self, url):
display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
+ media = self._download_json(
-+ '%s.json' % url, display_id, 'Downloading channel JSON')
-+
-+ title = media['name']
-+ video = media['video']
-+ video_id = media['id'].replace('ContentItem-', '')
-+
-+ relinker_info = self._extract_relinker_info(video['content_url'], video_id)
-+ self._sort_formats(relinker_info['formats'])
++ '%s.json' % urljoin(RaiBaseIE._BASE_URL, 'dirette/' + display_id),
++ display_id, 'Downloading channel JSON')
- video_id = self._search_regex(
- r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
@@ -105,6 +114,13 @@
- '_type': 'url_transparent',
- 'ie_key': RaiPlayIE.ie_key(),
- 'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
++ title = media['name']
++ video = media['video']
++ video_id = media['id'].replace('ContentItem-', '')
++
++ relinker_info = self._extract_relinker_info(video['content_url'], video_id)
++ self._sort_formats(relinker_info['formats'])
++
+ info = {
'id': video_id,
'display_id': display_id,
@@ -123,3 +139,50 @@
class RaiPlayPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
+@@ -260,7 +250,7 @@ class RaiPlayPlaylistIE(InfoExtractor):
+ 'info_dict': {
+ 'id': 'nondirloalmiocapo',
+ 'title': 'Non dirlo al mio capo',
+- 'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86',
++ 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
+ },
+ 'playlist_mincount': 12,
+ }]
+@@ -268,21 +258,25 @@ class RaiPlayPlaylistIE(InfoExtractor):
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+- webpage = self._download_webpage(url, playlist_id)
++ media = self._download_json(
++ '%s.json' % urljoin(RaiBaseIE._BASE_URL, 'programmi/' + playlist_id),
++ playlist_id, 'Downloading program JSON')
++
++ title = media['name']
++ description = media['program_info']['description']
+
+- title = self._html_search_meta(
+- ('programma', 'nomeProgramma'), webpage, 'title')
+- description = unescapeHTML(self._html_search_meta(
+- ('description', 'og:description'), webpage, 'description'))
++ content_sets = [s['id'] for b in media['blocks'] for s in b['sets']]
+
+ entries = []
+- for mobj in re.finditer(
+- r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1',
+- webpage):
+- video_url = urljoin(url, mobj.group('path'))
+- entries.append(self.url_result(
+- video_url, ie=RaiPlayIE.ie_key(),
+- video_id=RaiPlayIE._match_id(video_url)))
++ for cs in content_sets:
++ medias = self._download_json(
++ '%s/%s.json' % (urljoin(RaiBaseIE._BASE_URL, 'programmi/' + playlist_id), cs),
++ cs, 'Downloading content set JSON')
++ for m in medias['items']:
++ video_url = urljoin(url, m['path_id'])
++ entries.append(self.url_result(
++ video_url, ie=RaiPlayIE.ie_key(),
++ video_id=RaiPlayIE._match_id(video_url)))
+
+ return self.playlist_result(entries, playlist_id, title, description)
+