youtube: fix JSON extraction - frontends - front-ends for some sites (experiment)
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 416869b0c0f2efa0f43c93a59c6d9a89c01d9aec
(DIR) parent 5dbcb6f3c9ab9a48446054e954147b652fb26407
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 17 Oct 2020 18:38:35 +0200
youtube: fix JSON extraction
Youtube does not always serve the same content. This was a recent Youtube
change to intentionally break scrapers again.
Diffstat:
M youtube/youtube.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
---
(DIR) diff --git a/youtube/youtube.c b/youtube/youtube.c
@@ -55,12 +55,21 @@ request_search(const char *s, const char *page, const char *order)
int
extractjson(const char *s, char **start, char **end)
{
- if (!(*start = strstr(s, "window[\"ytInitialData\"] = ")))
+ *start = strstr(s, "window[\"ytInitialData\"] = ");
+ if (*start) {
+ (*start) += sizeof("window[\"ytInitialData\"] = ") - 1;
+ } else {
+ *start = strstr(s, "var ytInitialData = ");
+ if (*start)
+ (*start) += sizeof("var ytInitialData = ") - 1;
+ }
+ if (!*start)
return -1;
- if (!(*end = strstr(*start, "};\n")))
+ *end = strstr(*start, "};\n");
+ if (!*end)
+ *end = strstr(*start, "}; \n");
+ if (!*end)
return -1;
-
- (*start) += sizeof("window[\"ytInitialData\"] = ") - 1;
(*end)++;
return 0;