diff --git a/ADC_function.py b/ADC_function.py index 824b6d4..bf52378 100755 --- a/ADC_function.py +++ b/ADC_function.py @@ -1,6 +1,6 @@ import requests from lxml import etree - +import re import config SUPPORT_PROXY_TYPE = ("http", "socks5", "socks5h") @@ -463,3 +463,14 @@ def translate(src:str,target_language:str="zh_cn"): translate_list = [i["trans"] for i in result.json()["sentences"]] return "".join(translate_list) + +# ========================================================================是否为无码 +def is_uncensored(number): + if re.match('^\d{4,}', number) or re.match('n\d{4}', number) or 'HEYZO' in number.upper(): + return True + configs = config.Config().get_uncensored() + prefix_list = str(configs).split(',') + for pre in prefix_list: + if pre.upper() in number.upper(): + return True + return False diff --git a/Img/LEAK.png b/Img/LEAK.png new file mode 100644 index 0000000..5f68225 Binary files /dev/null and b/Img/LEAK.png differ diff --git a/Img/SUB.png b/Img/SUB.png new file mode 100644 index 0000000..1b06f28 Binary files /dev/null and b/Img/SUB.png differ diff --git a/Img/UNCENSORED.png b/Img/UNCENSORED.png new file mode 100644 index 0000000..094e424 Binary files /dev/null and b/Img/UNCENSORED.png differ diff --git a/WebCrawler/airav.py b/WebCrawler/airav.py index 9a13cec..58263a2 100644 --- a/WebCrawler/airav.py +++ b/WebCrawler/airav.py @@ -110,56 +110,65 @@ def getTag(htmlcode): # 获取标签 tag.append(i.get_text()) return tag +def getExtrafanart(htmlcode): # 获取剧照 + html_pather = re.compile(r'
[\s\S]*?
') + html = html_pather.search(htmlcode) + if html: + html = html.group() + extrafanart_pather = re.compile(r'') + html = html_pather.search(htmlcode) + if html: + html = html.group() + extrafanart_pather = re.compile(r'') + html = html_pather.search(htmlcode) + if html: + html = html.group() + extrafanart_pather = re.compile(r' json: result = post_html(url="https://www.jav321.com/search", query={"sn": number}) + soup = BeautifulSoup(result.text, "html.parser") lx = html.fromstring(str(soup)) if "/video/" in result.url: data = parse_info(soup) + dic = { "title": get_title(lx), "year": get_year(data), @@ -20,6 +23,8 @@ def main(number: str) -> json: "director": "", "cover": get_cover(lx), "imagecut": 1, + "trailer": get_trailer(result.text), + "extrafanart": get_extrafanart(result.text), "actor_photo": "", "website": result.url, "source": "jav321.py", @@ -30,7 +35,6 @@ def main(number: str) -> json: return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':')) - def get_title(lx: html.HtmlElement) -> str: return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[1]/h3/text()")[0].strip() @@ -79,6 +83,24 @@ def get_anchor_info(h: str) -> str: def get_text_info(h: str) -> str: return h.split(": ")[1] +def get_trailer(html) -> str: + videourl_pather = re.compile(r'
[\s\S]*?