diff --git a/ADC_function.py b/ADC_function.py index 2b48e0b..596a9ea 100755 --- a/ADC_function.py +++ b/ADC_function.py @@ -119,3 +119,18 @@ def get_html(url,cookies = None):#网页请求核心 print('[-]Connect Failed! Please check your Proxy or Network!') +def post_html(url: str, query: dict) -> requests.Response: + proxy, timeout, retry_count = get_network_settings() + + if proxy: + proxies = {"http": "http://" + proxy, "https": "https://" + proxy} + else: + proxies = {} + + for i in range(retry_count): + try: + result = requests.post(url, data=query, proxies=proxies) + return result + except requests.exceptions.ProxyError: + print("[-]Connect retry {}/{}".format(i+1, retry_count)) + print("[-]Connect Failed! Please check your Proxy or Network!") diff --git a/core.py b/core.py index 3ad271b..94cde7e 100755 --- a/core.py +++ b/core.py @@ -17,6 +17,7 @@ import avsox import javbus import javdb import fanza +import jav321 import requests import random @@ -58,6 +59,7 @@ def getDataFromJSON(file_number, filepath, failed_folder): # 从JSON返回元 "javdb": javdb.main, "javbus": javbus.main, "mgstage": mgstage.main, + "jav321": jav321.main, } # default fetch order list, from the begining to the end diff --git a/jav321.py b/jav321.py new file mode 100644 index 0000000..9e0665c --- /dev/null +++ b/jav321.py @@ -0,0 +1,138 @@ +import json +from bs4 import BeautifulSoup +from lxml import html +from ADC_function import post_html + + +def main(number: str) -> json: + result = post_html(url="https://www.jav321.com/search", query={"sn": number}) + soup = BeautifulSoup(result.text, "html.parser") + lx = html.fromstring(str(soup)) + + if "/video/" in result.url: + data = parse_info(soup) + dic = { + "title": get_title(lx), + "studio": "", + "year": get_year(data), + "outline": get_outline(lx), + "director": "", + "cover": get_cover(lx), + "imagecut": 1, + "actor_photo": "", + "website": result.url, + "source": "jav321.py", + **data, + } + else: + dic = {} + + return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':')) + + +def get_title(lx: html.HtmlElement) -> str: + return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[1]/h3/text()")[0].strip() + + +def parse_info(soup: BeautifulSoup) -> dict: + data = soup.select_one("div.row > div.col-md-9") + + if data: + dd = str(data).split("
") + data_dic = {} + for d in dd: + data_dic[get_bold_text(h=d)] = d + + return { + "actor": get_actor(data_dic), + "label": get_label(data_dic), + "tag": get_tag(data_dic), + "number": get_number(data_dic), + "release": get_release(data_dic), + "runtime": get_runtime(data_dic), + } + else: + return {} + + +def get_bold_text(h: str) -> str: + soup = BeautifulSoup(h, "html.parser") + if soup.b: + return soup.b.text + else: + return "UNKNOWN_TAG" + + +def get_anchor_info(h: str) -> str: + result = [] + + data = BeautifulSoup(h, "html.parser").find_all("a", href=True) + for d in data: + result.append(d.text) + + return ",".join(result) + + +def get_text_info(h: str) -> str: + return h.split(": ")[1] + + +def get_cover(lx: html.HtmlElement) -> str: + return lx.xpath("/html/body/div[2]/div[2]/div[1]/p/a/img/@src")[0] + + +def get_outline(lx: html.HtmlElement) -> str: + return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()")[0] + + +def get_actor(data: hash) -> str: + if "女优" in data: + return get_anchor_info(data["女优"]) + else: + return "" + + +def get_label(data: hash) -> str: + if "片商" in data: + return get_anchor_info(data["片商"]) + else: + return "" + + +def get_tag(data: hash) -> str: + if "标签" in data: + return get_anchor_info(data["标签"]) + else: + return "" + + +def get_number(data: hash) -> str: + if "番号" in data: + return get_text_info(data["番号"]) + else: + return "" + + +def get_release(data: hash) -> str: + if "发行日期" in data: + return get_text_info(data["发行日期"]) + else: + return "" + + +def get_runtime(data: hash) -> str: + if "播放时长" in data: + return get_text_info(data["播放时长"]) + else: + return "" + + +def get_year(data: hash) -> str: + if "release" in data: + return data["release"][:4] + else: + return "" + + +if __name__ == "__main__": + print(main("wmc-002"))