Update javlib.py

解决javlib部分番号匹配不到问题
This commit is contained in:
Feng4
2021-01-16 18:33:27 +08:00
committed by GitHub
parent a734725678
commit 3e849ddc4a

View File

@@ -2,6 +2,7 @@ import sys
sys.path.append('../') sys.path.append('../')
import json import json
import bs4 import bs4
import re
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from lxml import html from lxml import html
from http.cookies import SimpleCookie from http.cookies import SimpleCookie
@@ -33,6 +34,9 @@ def main(number: str):
soup = BeautifulSoup(result.text, "html.parser") soup = BeautifulSoup(result.text, "html.parser")
lx = html.fromstring(str(soup)) lx = html.fromstring(str(soup))
fanhao_pather = re.compile(r'<a href=".*?".*?><div class="id">(.*?)</div>')
fanhao = fanhao_pather.findall(result.text)
if "/?v=jav" in result.url: if "/?v=jav" in result.url:
dic = { dic = {
"title": get_title(lx, soup), "title": get_title(lx, soup),
@@ -53,6 +57,41 @@ def main(number: str):
"runtime": get_from_xpath(lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'), "runtime": get_from_xpath(lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'),
"series":'', "series":'',
} }
elif number.upper() in fanhao:
url_pather = re.compile(r'<a href="(.*?)".*?><div class="id">(.*?)</div>')
s = {}
url_list = url_pather.findall(result.text)
for url in url_list:
s[url[1]] = 'http://www.javlibrary.com/cn/' + url[0].lstrip('.')
av_url = s[number.upper()]
result = get_html(
av_url,
cookies=cookies,
ua=user_agent,
return_type="object"
)
soup = BeautifulSoup(result.text, "html.parser")
lx = html.fromstring(str(soup))
dic = {
"title": get_title(lx, soup),
"studio": get_table_el_single_anchor(soup, "video_maker"),
"year": get_table_el_td(soup, "video_date")[:4],
"outline": "",
"director": get_table_el_single_anchor(soup, "video_director"),
"cover": get_cover(lx),
"imagecut": 1,
"actor_photo": "",
"website": result.url,
"source": "javlib.py",
"actor": get_table_el_multi_anchor(soup, "video_cast"),
"label": get_table_el_td(soup, "video_label"),
"tag": get_table_el_multi_anchor(soup, "video_genres"),
"number": get_table_el_td(soup, "video_id"),
"release": get_table_el_td(soup, "video_date"),
"runtime": get_from_xpath(lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'),
"series": '',
}
else: else:
dic = {} dic = {}