xcity.py: get detail page by form query
This commit is contained in:
@@ -11,6 +11,7 @@ from lxml import etree
|
|||||||
import re
|
import re
|
||||||
import config
|
import config
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
import mechanicalsoup
|
||||||
|
|
||||||
|
|
||||||
def getXpathSingle(htmlcode, xpath):
|
def getXpathSingle(htmlcode, xpath):
|
||||||
@@ -83,6 +84,27 @@ def post_html(url: str, query: dict, headers: dict = None) -> requests.Response:
|
|||||||
print("[-]" + errors)
|
print("[-]" + errors)
|
||||||
|
|
||||||
|
|
||||||
|
def get_html_by_form(url, form_name: str = None, fields: dict = None, cookies: dict = None, ua: str = None, return_type: str = None):
|
||||||
|
browser = mechanicalsoup.StatefulBrowser(user_agent=G_USER_AGENT if ua is None else ua)
|
||||||
|
configProxy = config.Config().proxy()
|
||||||
|
if configProxy.enable:
|
||||||
|
browser.session.proxies = configProxy.proxies()
|
||||||
|
result = browser.open(url)
|
||||||
|
form = browser.select_form() if form_name is None else browser.select_form(form_name)
|
||||||
|
if isinstance(fields, dict):
|
||||||
|
for k, v in fields.items():
|
||||||
|
browser[k] = v
|
||||||
|
response = browser.submit_selected()
|
||||||
|
response.encoding = "utf-8"
|
||||||
|
|
||||||
|
if return_type == "object":
|
||||||
|
return response
|
||||||
|
elif return_type == "content":
|
||||||
|
return response.content
|
||||||
|
else:
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
|
||||||
# def get_javlib_cookie() -> [dict, str]:
|
# def get_javlib_cookie() -> [dict, str]:
|
||||||
# import cloudscraper
|
# import cloudscraper
|
||||||
# switch, proxy, timeout, retry_count, proxytype = config.Config().proxy()
|
# switch, proxy, timeout, retry_count, proxytype = config.Config().proxy()
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ def getYear(getRelease):
|
|||||||
def getRelease(a):
|
def getRelease(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
try:
|
try:
|
||||||
result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')[0]
|
result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[2]/text()')[1])
|
||||||
except:
|
except:
|
||||||
return ''
|
return ''
|
||||||
try:
|
try:
|
||||||
@@ -171,12 +171,13 @@ def getExtrafanart(htmlcode): # 获取剧照
|
|||||||
|
|
||||||
def main(number):
|
def main(number):
|
||||||
try:
|
try:
|
||||||
number = number.upper()
|
query_result = get_html_by_form('https://xcity.jp/about/',
|
||||||
query_result = get_html(
|
fields = {'q' : number.replace('-','').lower()})
|
||||||
'https://xcity.jp/result_published/?genre=%2Fresult_published%2F&q=' + number.replace('-','') + '&sg=main&num=30')
|
html = etree.fromstring(query_result, etree.HTMLParser())
|
||||||
html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
urls = str(html.xpath('//table[@class="resultList"]/tr[2]/td[1]/a/@href')).strip(" ['']")
|
||||||
urls = html.xpath("//table[contains(@class, 'resultList')]/tr[2]/td[1]/a/@href")[0]
|
if not len(urls):
|
||||||
detail_page = get_html('https://xcity.jp' + urls)
|
raise ValueError("xcity.py: urls not found")
|
||||||
|
detail_page = get_html(abs_url('https://xcity.jp', urls))
|
||||||
dic = {
|
dic = {
|
||||||
'actor': getActor(detail_page),
|
'actor': getActor(detail_page),
|
||||||
'title': getTitle(detail_page),
|
'title': getTitle(detail_page),
|
||||||
@@ -208,3 +209,4 @@ def main(number):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(main('VNDS-2624'))
|
print(main('VNDS-2624'))
|
||||||
|
print(main('ABP-345'))
|
||||||
|
|||||||
@@ -7,3 +7,4 @@ cloudscraper
|
|||||||
pysocks==1.7.1
|
pysocks==1.7.1
|
||||||
urllib3==1.24.3
|
urllib3==1.24.3
|
||||||
certifi==2020.12.5
|
certifi==2020.12.5
|
||||||
|
MechanicalSoup==1.1.0
|
||||||
|
|||||||
Reference in New Issue
Block a user