tccr.it

       Adjust RaiPlayPlaylist too and misc minor improvements - pkgsrc-localpatches - leot's pkgsrc LOCALPATCHES
 (HTM) hg clone https://bitbucket.org/iamleot/pkgsrc-localpatches
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
 (DIR) changeset d62b4eae0a29ca9ffa5404d9c19ec80e173d4d52
 (DIR) parent 5b85de0086d8ab48f8d41ab0db06eb1723dfe852
 (HTM) Author: Leonardo Taccari <iamleot@gmail.com>
       Date:   Sun, 10 Nov 2019 11:43:42 
       
       Adjust RaiPlayPlaylist too and misc minor improvements
       
       Diffstat:
        net/youtube-dl/patch-youtube__dl_extractor_rai.py |  101 +++++++++++++++++----
        1 files changed, 82 insertions(+), 19 deletions(-)
       ---
       diff -r 5b85de0086d8 -r d62b4eae0a29 net/youtube-dl/patch-youtube__dl_extractor_rai.py
       --- a/net/youtube-dl/patch-youtube__dl_extractor_rai.py Sun Nov 10 10:33:24 2019 +0100
       +++ b/net/youtube-dl/patch-youtube__dl_extractor_rai.py Sun Nov 10 11:43:42 2019 +0100
       @@ -1,13 +1,24 @@
        $NetBSD$
        
       -Preliminary workaround for raiplay.it updates... Only RaiPlay
       -and RaiPlayLive were analyzed and adjusted.
       +Preliminary workaround for raiplay.it updates... RaiPlay, RaiPlayLive,
       +RaiPlayPlaylist were analyzed and adjusted.
        
        --- youtube_dl/extractor/rai.py.orig   2019-11-10 08:50:05.552567084 +0000
        +++ youtube_dl/extractor/rai.py
       -@@ -124,39 +124,17 @@ class RaiBaseIE(InfoExtractor):
       +@@ -30,6 +30,7 @@ class RaiBaseIE(InfoExtractor):
       +     _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
       +     _GEO_COUNTRIES = ['IT']
       +     _GEO_BYPASS = False
       ++    _BASE_URL = 'https://www.raiplay.it'
       + 
       +     def _extract_relinker_info(self, relinker_url, video_id):
       +         if not re.match(r'https?://', relinker_url):
       +@@ -122,41 +123,19 @@ class RaiBaseIE(InfoExtractor):
       + 
       + 
         class RaiPlayIE(RaiBaseIE):
       -     _VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
       +-    _VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
       ++    _VALID_URL = r'(?P<url>(?P<base>https?://(?:www\.)?raiplay\.it/.+?-)(?P<id>%s)(?P<ext>\.(?:html|json)))' % RaiBaseIE._UUID_RE
             _TESTS = [{
        -        'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
        -        'md5': '340aa3b7afb54bfd14a8c11786450d76',
       @@ -48,12 +59,16 @@
                 },
                 'params': {
                     'skip_download': True,
       -@@ -171,13 +149,12 @@ class RaiPlayIE(RaiBaseIE):
       -         url, video_id = mobj.group('url', 'id')
       +@@ -168,16 +147,15 @@ class RaiPlayIE(RaiBaseIE):
       + 
       +     def _real_extract(self, url):
       +         mobj = re.match(self._VALID_URL, url)
       +-        url, video_id = mobj.group('url', 'id')
       ++        url, base, video_id, ext = mobj.group('url', 'base', 'id', 'ext')
         
                 media = self._download_json(
        -            '%s?json' % url, video_id, 'Downloading video JSON')
       -+            '%s' % url.replace('.html', '.json'), video_id, 'Downloading video JSON')
       ++            '%s%s.json' % (base, video_id), video_id, 'Downloading video JSON')
         
                 title = media['name']
        -
       @@ -64,16 +79,16 @@
                 self._sort_formats(relinker_info['formats'])
         
                 thumbnails = []
       -@@ -185,7 +162,7 @@ class RaiPlayIE(RaiBaseIE):
       +@@ -185,7 +163,7 @@ class RaiPlayIE(RaiBaseIE):
                     for _, value in media.get('images').items():
                         if value:
                             thumbnails.append({
        -                        'url': value.replace('[RESOLUTION]', '600x400')
       -+                        'url': 'https://www.raiplay.it' + value.replace('[RESOLUTION]', '600x400')
       ++                        'url': urljoin(RaiBaseIE._BASE_URL, value.replace('[RESOLUTION]', '600x400'))
                             })
         
                 timestamp = unified_timestamp(try_get(
       -@@ -225,7 +202,7 @@ class RaiPlayLiveIE(RaiBaseIE):
       +@@ -225,7 +203,7 @@ class RaiPlayLiveIE(RaiBaseIE):
                     'display_id': 'rainews24',
                     'ext': 'mp4',
                     'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
       @@ -82,20 +97,14 @@
                     'uploader': 'Rai News 24',
                     'creator': 'Rai News 24',
                     'is_live': True,
       -@@ -238,20 +215,31 @@ class RaiPlayLiveIE(RaiBaseIE):
       +@@ -238,20 +216,32 @@ class RaiPlayLiveIE(RaiBaseIE):
             def _real_extract(self, url):
                 display_id = self._match_id(url)
         
        -        webpage = self._download_webpage(url, display_id)
        +        media = self._download_json(
       -+            '%s.json' % url, display_id, 'Downloading channel JSON')
       -+
       -+        title = media['name']
       -+        video = media['video']
       -+        video_id = media['id'].replace('ContentItem-', '')
       -+
       -+        relinker_info = self._extract_relinker_info(video['content_url'], video_id)
       -+        self._sort_formats(relinker_info['formats'])
       ++            '%s.json' % urljoin(RaiBaseIE._BASE_URL, 'dirette/' + display_id),
       ++                        display_id, 'Downloading channel JSON')
         
        -        video_id = self._search_regex(
        -            r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
       @@ -105,6 +114,13 @@
        -            '_type': 'url_transparent',
        -            'ie_key': RaiPlayIE.ie_key(),
        -            'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
       ++        title = media['name']
       ++        video = media['video']
       ++        video_id = media['id'].replace('ContentItem-', '')
       ++
       ++        relinker_info = self._extract_relinker_info(video['content_url'], video_id)
       ++        self._sort_formats(relinker_info['formats'])
       ++
        +        info = {
                     'id': video_id,
                     'display_id': display_id,
       @@ -123,3 +139,50 @@
         
         class RaiPlayPlaylistIE(InfoExtractor):
             _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
       +@@ -260,7 +250,7 @@ class RaiPlayPlaylistIE(InfoExtractor):
       +         'info_dict': {
       +             'id': 'nondirloalmiocapo',
       +             'title': 'Non dirlo al mio capo',
       +-            'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86',
       ++            'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
       +         },
       +         'playlist_mincount': 12,
       +     }]
       +@@ -268,21 +258,25 @@ class RaiPlayPlaylistIE(InfoExtractor):
       +     def _real_extract(self, url):
       +         playlist_id = self._match_id(url)
       + 
       +-        webpage = self._download_webpage(url, playlist_id)
       ++        media = self._download_json(
       ++            '%s.json' % urljoin(RaiBaseIE._BASE_URL, 'programmi/' + playlist_id),
       ++                        playlist_id, 'Downloading program JSON')
       ++
       ++        title = media['name']
       ++        description = media['program_info']['description']
       + 
       +-        title = self._html_search_meta(
       +-            ('programma', 'nomeProgramma'), webpage, 'title')
       +-        description = unescapeHTML(self._html_search_meta(
       +-            ('description', 'og:description'), webpage, 'description'))
       ++        content_sets = [s['id'] for b in media['blocks'] for s in b['sets']]
       + 
       +         entries = []
       +-        for mobj in re.finditer(
       +-                r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1',
       +-                webpage):
       +-            video_url = urljoin(url, mobj.group('path'))
       +-            entries.append(self.url_result(
       +-                video_url, ie=RaiPlayIE.ie_key(),
       +-                video_id=RaiPlayIE._match_id(video_url)))
       ++        for cs in content_sets:
       ++            medias = self._download_json(
       ++                '%s/%s.json' % (urljoin(RaiBaseIE._BASE_URL, 'programmi/' + playlist_id), cs),
       ++                               cs, 'Downloading content set JSON')
       ++            for m in medias['items']:
       ++                video_url = urljoin(url, m['path_id'])
       ++                entries.append(self.url_result(
       ++                    video_url, ie=RaiPlayIE.ie_key(),
       ++                    video_id=RaiPlayIE._match_id(video_url)))
       + 
       +         return self.playlist_result(entries, playlist_id, title, description)
       +