Add patch for new dumpert.nl - pkgsrc-localpatches - leot's pkgsrc LOCALPATCHES
(HTM) hg clone https://bitbucket.org/iamleot/pkgsrc-localpatches
(DIR) Log
(DIR) Files
(DIR) Refs
---
(DIR) changeset 150fc314205bd5253edbd793875757a4828a127a
(DIR) parent ac17afb46c920dd4c332393f37f69145afc68059
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Sun, 13 Oct 2019 18:04:11
Add patch for new dumpert.nl
From: <https://github.com/ytdl-org/youtube-dl/pull/22564>
Diffstat:
net/youtube-dl/patch-youtube__dl_extractor_dumpert.py | 115 ++++++++++++++++++
1 files changed, 115 insertions(+), 0 deletions(-)
---
diff -r ac17afb46c92 -r 150fc314205b net/youtube-dl/patch-youtube__dl_extractor_dumpert.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/net/youtube-dl/patch-youtube__dl_extractor_dumpert.py Sun Oct 13 18:04:11 2019 +0200
@@ -0,0 +1,115 @@
+$NetBSD$
+
+[dumpert] Fix extractor for redesign (#22428)
+
+Note that this also still supports old-style URLs, and even embed URLs
+which don't seem to be supported anymore.
+
+From: <https://github.com/ytdl-org/youtube-dl/pull/22564>
+
+--- youtube_dl/extractor/dumpert.py.orig
++++ youtube_dl/extractor/dumpert.py
+@@ -4,20 +4,34 @@
+ import re
+
+ from .common import InfoExtractor
+-from ..compat import compat_b64decode
+ from ..utils import (
++ ExtractorError,
+ qualities,
+ sanitized_Request,
+ )
+
+
+ class DumpertIE(InfoExtractor):
+- _VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
++ _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
+ _TESTS = [{
++ # This is an old-style URL. Note that the video ID consists of two
++ # parts.
+ 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
+ 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
+ 'info_dict': {
+- 'id': '6646981/951bc60f',
++ 'id': '6646981_951bc60f',
++ 'ext': 'mp4',
++ 'title': 'Ik heb nieuws voor je',
++ 'description': 'Niet schrikken hoor',
++ 'thumbnail': r're:^https?://.*\.jpg$',
++ }
++ }, {
++ # This is a new-style URL. Note that the two parts of the video ID are
++ # now separated by _ instead of /.
++ 'url': 'https://www.dumpert.nl/item/6646981_951bc60f/',
++ 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
++ 'info_dict': {
++ 'id': '6646981_951bc60f',
+ 'ext': 'mp4',
+ 'title': 'Ik heb nieuws voor je',
+ 'description': 'Niet schrikken hoor',
+@@ -30,35 +44,47 @@ class DumpertIE(InfoExtractor):
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+- video_id = mobj.group('id')
+- protocol = mobj.group('protocol')
++ video_id = mobj.group('id').replace('/', '_')
+
+- url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
++ url = 'https://www.dumpert.nl/item/%s' % (video_id)
+ req = sanitized_Request(url)
+- req.add_header('Cookie', 'nsfw=1; cpc=10')
++ req.add_header('Cookie', 'filterNsfw=true; cpc=10')
+ webpage = self._download_webpage(req, video_id)
+
+- files_base64 = self._search_regex(
+- r'data-files="([^"]+)"', webpage, 'data files')
++ state = self._parse_json(self._parse_json(self._search_regex(
++ r'__DUMPERT_STATE__\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;',
++ webpage, 'state'
++ ), video_id), video_id)
++
++ item = state.get('items', {}).get('item', {}).get('item')
++ if not item:
++ raise ExtractorError('Unable to find item on page')
++
++ video = None
++ for media_item in item.get('media', []):
++ if media_item.get('mediatype') == 'VIDEO':
++ video = media_item
++
++ if not video:
++ raise ExtractorError('Unable to find video on page')
+
+- files = self._parse_json(
+- compat_b64decode(files_base64).decode('utf-8'),
+- video_id)
++ variants = video.get('variants', [])
++ if not variants:
++ raise ExtractorError('Unable to find video variants on page')
+
+ quality = qualities(['flv', 'mobile', 'tablet', '720p'])
+
+ formats = [{
+- 'url': video_url,
+- 'format_id': format_id,
+- 'quality': quality(format_id),
+- } for format_id, video_url in files.items() if format_id != 'still']
++ 'url': variant.get('uri'),
++ 'format_id': variant.get('version'),
++ 'quality': quality(variant.get('version')),
++ } for variant in variants if 'uri' in variant and 'version' in variant]
+ self._sort_formats(formats)
+
+- title = self._html_search_meta(
+- 'title', webpage) or self._og_search_title(webpage)
+- description = self._html_search_meta(
+- 'description', webpage) or self._og_search_description(webpage)
+- thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
++ title = item.get('title') or self._og_search_title(webpage)
++ description = item.get(
++ 'description') or self._og_search_description(webpage)
++ thumbnail = item.get('still') or self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,