From 83e8e8cb44523eea4795fac612125bad3c7c3d38 Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sat, 26 Dec 2020 23:57:34 +0800 Subject: [PATCH] Update jav321.py --- WebCrawler/jav321.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/WebCrawler/jav321.py b/WebCrawler/jav321.py index 2d0b0b0..b7fd8b4 100644 --- a/WebCrawler/jav321.py +++ b/WebCrawler/jav321.py @@ -4,15 +4,18 @@ import json from bs4 import BeautifulSoup from lxml import html from ADC_function import post_html +import re def main(number: str) -> json: result = post_html(url="https://www.jav321.com/search", query={"sn": number}) + soup = BeautifulSoup(result.text, "html.parser") lx = html.fromstring(str(soup)) if "/video/" in result.url: data = parse_info(soup) + dic = { "title": get_title(lx), "year": get_year(data), @@ -20,6 +23,8 @@ def main(number: str) -> json: "director": "", "cover": get_cover(lx), "imagecut": 1, + "trailer": get_trailer(result.text), + "extrafanart": get_extrafanart(result.text), "actor_photo": "", "website": result.url, "source": "jav321.py", @@ -30,7 +35,6 @@ def main(number: str) -> json: return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':')) - def get_title(lx: html.HtmlElement) -> str: return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[1]/h3/text()")[0].strip() @@ -79,6 +83,24 @@ def get_anchor_info(h: str) -> str: def get_text_info(h: str) -> str: return h.split(": ")[1] +def get_trailer(html) -> str: + videourl_pather = re.compile(r'
[\s\S]*?