diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index bd59e6f..5b3c11b 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -114,6 +114,7 @@ modules = [ "vsco", "wallhaven", "warosu", + "webtoons", "weibo", "wikiart", "xhamster", diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py new file mode 100644 index 0000000..0434d5b --- /dev/null +++ b/gallery_dl/extractor/webtoons.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Leonardo Taccari +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://www.webtoons.com/""" + +from .common import Extractor, Message +from .. import text + + +class WebtoonsEpisodeExtractor(Extractor): + """Extractor for an episode on webtoons.com""" + category = "webtoons" + subcategory = "episode" + directory_fmt = ("{category}", "{comic}") + filename_fmt = "{episode}-{num:>02}.{extension}" + archive_fmt = "{episode}_{num}" + pattern = (r"(?:https?://)?(?:www\.)?webtoons\.com" + r"/(?:en)/([^/?&#]+)/([^/?&#]+)/(?:[^/?&#]+)" + r"/viewer\?title_no=(\d+)&episode_no=(\d+)") + test = ( + (("https://www.webtoons.com/en/comedy/safely-endangered" + "/ep-572-earth/viewer?title_no=352&episode_no=572"), { + "url": "11041d71a3f92728305c11a228e77cf0f7aa02ef", + "content": "4f7701a750368e377d65900e6e8f64a5f9cb9c86", + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.genre , self.comic, self.title_no, self.episode = match.groups() + self.session.headers["Referer"] = self.url + + def items(self): + page = self.request(self.url).text + data = self.get_job_metadata(page) + imgs = self.get_image_urls(page) + data["count"] = len(imgs) + yield Message.Version, 1 + yield Message.Directory, data + for data["num"], url in enumerate(imgs, 1): + yield Message.Url, url, text.nameext_from_url(url, data) + + def get_job_metadata(self, page): + """Collect metadata for extractor-job""" + title, pos = text.extract( + page, '