tccr.it

       Add patch for new dumpert.nl - pkgsrc-localpatches - leot's pkgsrc LOCALPATCHES
 (HTM) hg clone https://bitbucket.org/iamleot/pkgsrc-localpatches
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
 (DIR) changeset 150fc314205bd5253edbd793875757a4828a127a
 (DIR) parent ac17afb46c920dd4c332393f37f69145afc68059
 (HTM) Author: Leonardo Taccari <iamleot@gmail.com>
       Date:   Sun, 13 Oct 2019 18:04:11 
       
       Add patch for new dumpert.nl
       
       From: <https://github.com/ytdl-org/youtube-dl/pull/22564>
       
       Diffstat:
        net/youtube-dl/patch-youtube__dl_extractor_dumpert.py |  115 ++++++++++++++++++
        1 files changed, 115 insertions(+), 0 deletions(-)
       ---
       diff -r ac17afb46c92 -r 150fc314205b net/youtube-dl/patch-youtube__dl_extractor_dumpert.py
       --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
       +++ b/net/youtube-dl/patch-youtube__dl_extractor_dumpert.py     Sun Oct 13 18:04:11 2019 +0200
       @@ -0,0 +1,115 @@
       +$NetBSD$
       +
       +[dumpert] Fix extractor for redesign (#22428)
       +
       +Note that this also still supports old-style URLs, and even embed URLs
       +which don't seem to be supported anymore.
       +
       +From: <https://github.com/ytdl-org/youtube-dl/pull/22564>
       +
       +--- youtube_dl/extractor/dumpert.py.orig
       ++++ youtube_dl/extractor/dumpert.py
       +@@ -4,20 +4,34 @@
       + import re
       + 
       + from .common import InfoExtractor
       +-from ..compat import compat_b64decode
       + from ..utils import (
       ++    ExtractorError,
       +     qualities,
       +     sanitized_Request,
       + )
       + 
       + 
       + class DumpertIE(InfoExtractor):
       +-    _VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
       ++    _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
       +     _TESTS = [{
       ++        # This is an old-style URL. Note that the video ID consists of two
       ++        # parts.
       +         'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
       +         'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
       +         'info_dict': {
       +-            'id': '6646981/951bc60f',
       ++            'id': '6646981_951bc60f',
       ++            'ext': 'mp4',
       ++            'title': 'Ik heb nieuws voor je',
       ++            'description': 'Niet schrikken hoor',
       ++            'thumbnail': r're:^https?://.*\.jpg$',
       ++        }
       ++    }, {
       ++        # This is a new-style URL. Note that the two parts of the video ID are
       ++        # now separated by _ instead of /.
       ++        'url': 'https://www.dumpert.nl/item/6646981_951bc60f/',
       ++        'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
       ++        'info_dict': {
       ++            'id': '6646981_951bc60f',
       +             'ext': 'mp4',
       +             'title': 'Ik heb nieuws voor je',
       +             'description': 'Niet schrikken hoor',
       +@@ -30,35 +44,47 @@ class DumpertIE(InfoExtractor):
       + 
       +     def _real_extract(self, url):
       +         mobj = re.match(self._VALID_URL, url)
       +-        video_id = mobj.group('id')
       +-        protocol = mobj.group('protocol')
       ++        video_id = mobj.group('id').replace('/', '_')
       + 
       +-        url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
       ++        url = 'https://www.dumpert.nl/item/%s' % (video_id)
       +         req = sanitized_Request(url)
       +-        req.add_header('Cookie', 'nsfw=1; cpc=10')
       ++        req.add_header('Cookie', 'filterNsfw=true; cpc=10')
       +         webpage = self._download_webpage(req, video_id)
       + 
       +-        files_base64 = self._search_regex(
       +-            r'data-files="([^"]+)"', webpage, 'data files')
       ++        state = self._parse_json(self._parse_json(self._search_regex(
       ++            r'__DUMPERT_STATE__\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;',
       ++            webpage, 'state'
       ++        ), video_id), video_id)
       ++
       ++        item = state.get('items', {}).get('item', {}).get('item')
       ++        if not item:
       ++            raise ExtractorError('Unable to find item on page')
       ++
       ++        video = None
       ++        for media_item in item.get('media', []):
       ++            if media_item.get('mediatype') == 'VIDEO':
       ++                video = media_item
       ++
       ++        if not video:
       ++            raise ExtractorError('Unable to find video on page')
       + 
       +-        files = self._parse_json(
       +-            compat_b64decode(files_base64).decode('utf-8'),
       +-            video_id)
       ++        variants = video.get('variants', [])
       ++        if not variants:
       ++            raise ExtractorError('Unable to find video variants on page')
       + 
       +         quality = qualities(['flv', 'mobile', 'tablet', '720p'])
       + 
       +         formats = [{
       +-            'url': video_url,
       +-            'format_id': format_id,
       +-            'quality': quality(format_id),
       +-        } for format_id, video_url in files.items() if format_id != 'still']
       ++            'url': variant.get('uri'),
       ++            'format_id': variant.get('version'),
       ++            'quality': quality(variant.get('version')),
       ++        } for variant in variants if 'uri' in variant and 'version' in variant]
       +         self._sort_formats(formats)
       + 
       +-        title = self._html_search_meta(
       +-            'title', webpage) or self._og_search_title(webpage)
       +-        description = self._html_search_meta(
       +-            'description', webpage) or self._og_search_description(webpage)
       +-        thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
       ++        title = item.get('title') or self._og_search_title(webpage)
       ++        description = item.get(
       ++            'description') or self._og_search_description(webpage)
       ++        thumbnail = item.get('still') or self._og_search_thumbnail(webpage)
       + 
       +         return {
       +             'id': video_id,