Update 3.5
This commit is contained in:
@@ -86,7 +86,7 @@ def create_data_and_move(file_path: str, c: config.Config):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
version = '3.4.3'
|
version = '3.5'
|
||||||
|
|
||||||
# Parse command line args
|
# Parse command line args
|
||||||
single_file_path, config_file, auto_exit = argparse_function()
|
single_file_path, config_file, auto_exit = argparse_function()
|
||||||
|
|||||||
11
avsox.py
11
avsox.py
@@ -72,6 +72,13 @@ def getTag(a): # 获取演员
|
|||||||
for i in a:
|
for i in a:
|
||||||
d.append(i.get_text())
|
d.append(i.get_text())
|
||||||
return d
|
return d
|
||||||
|
def getSeries(htmlcode):
|
||||||
|
try:
|
||||||
|
html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
|
result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']")
|
||||||
|
return result1
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
def main(number):
|
def main(number):
|
||||||
a = get_html('https://avsox.host/cn/search/' + number)
|
a = get_html('https://avsox.host/cn/search/' + number)
|
||||||
@@ -108,8 +115,10 @@ def main(number):
|
|||||||
'actor_photo': getActorPhoto(web),
|
'actor_photo': getActorPhoto(web),
|
||||||
'website': result1,
|
'website': result1,
|
||||||
'source': 'avsox.py',
|
'source': 'avsox.py',
|
||||||
|
'series': getSeries(info),
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
return js
|
return js
|
||||||
|
|
||||||
#print(main('012717_472'))
|
if __name__ == "__main__":
|
||||||
|
print(main('012717_472'))
|
||||||
7
core.py
7
core.py
@@ -98,6 +98,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON
|
|||||||
runtime = json_data['runtime']
|
runtime = json_data['runtime']
|
||||||
outline = json_data['outline']
|
outline = json_data['outline']
|
||||||
label = json_data['label']
|
label = json_data['label']
|
||||||
|
series = json_data['series']
|
||||||
year = json_data['year']
|
year = json_data['year']
|
||||||
try:
|
try:
|
||||||
cover_small = json_data['cover_small']
|
cover_small = json_data['cover_small']
|
||||||
@@ -166,7 +167,8 @@ def get_info(json_data): # 返回json里的数据
|
|||||||
number = json_data['number']
|
number = json_data['number']
|
||||||
cover = json_data['cover']
|
cover = json_data['cover']
|
||||||
website = json_data['website']
|
website = json_data['website']
|
||||||
return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website
|
series = json_data['series']
|
||||||
|
return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series
|
||||||
|
|
||||||
|
|
||||||
def small_cover_check(path, number, cover_small, c_word, conf: config.Config, filepath, failed_folder):
|
def small_cover_check(path, number, cover_small, c_word, conf: config.Config, filepath, failed_folder):
|
||||||
@@ -263,7 +265,7 @@ def image_download(cover, number, c_word, path, conf: config.Config, filepath, f
|
|||||||
|
|
||||||
|
|
||||||
def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag, actor_list, liuchu):
|
def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag, actor_list, liuchu):
|
||||||
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(json_data)
|
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series = get_info(json_data)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
@@ -300,6 +302,7 @@ def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, fa
|
|||||||
try:
|
try:
|
||||||
for i in tag:
|
for i in tag:
|
||||||
print(" <tag>" + i + "</tag>", file=code)
|
print(" <tag>" + i + "</tag>", file=code)
|
||||||
|
print(" <tag>" + series + "</tag>", file=code)
|
||||||
except:
|
except:
|
||||||
aaaaa = ''
|
aaaaa = ''
|
||||||
try:
|
try:
|
||||||
|
|||||||
25
fanza.py
25
fanza.py
@@ -108,7 +108,7 @@ def getRelease(text):
|
|||||||
)[0].lstrip("\n")
|
)[0].lstrip("\n")
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
return result
|
return result.replace('/','-')
|
||||||
|
|
||||||
|
|
||||||
def getTag(text):
|
def getTag(text):
|
||||||
@@ -174,6 +174,23 @@ def getOutline(text):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def getSeries(text):
|
||||||
|
try:
|
||||||
|
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
|
try:
|
||||||
|
result = html.xpath(
|
||||||
|
"//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()"
|
||||||
|
)[0]
|
||||||
|
except:
|
||||||
|
result = html.xpath(
|
||||||
|
"//td[contains(text(),'シリーズ:')]/following-sibling::td/text()"
|
||||||
|
)[0]
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main(number):
|
def main(number):
|
||||||
# fanza allow letter + number + underscore, normalize the input here
|
# fanza allow letter + number + underscore, normalize the input here
|
||||||
# @note: I only find the usage of underscore as h_test123456789
|
# @note: I only find the usage of underscore as h_test123456789
|
||||||
@@ -225,6 +242,7 @@ def main(number):
|
|||||||
"actor_photo": "",
|
"actor_photo": "",
|
||||||
"website": chosen_url,
|
"website": chosen_url,
|
||||||
"source": "fanza.py",
|
"source": "fanza.py",
|
||||||
|
"series": getSeries(htmlcode),
|
||||||
}
|
}
|
||||||
except:
|
except:
|
||||||
data = {
|
data = {
|
||||||
@@ -266,7 +284,4 @@ def main_htmlcode(number):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# print(main("DV-1562"))
|
print(main("DV-1562"))
|
||||||
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
|
|
||||||
# print(main("ipx292"))
|
|
||||||
pass
|
|
||||||
@@ -159,5 +159,5 @@ def main(number):
|
|||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
|
||||||
return js
|
return js
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
#print(main('1252953'))
|
print(main('1252953'))
|
||||||
22
jav321.py
22
jav321.py
@@ -13,7 +13,6 @@ def main(number: str) -> json:
|
|||||||
data = parse_info(soup)
|
data = parse_info(soup)
|
||||||
dic = {
|
dic = {
|
||||||
"title": get_title(lx),
|
"title": get_title(lx),
|
||||||
"studio": "",
|
|
||||||
"year": get_year(data),
|
"year": get_year(data),
|
||||||
"outline": get_outline(lx),
|
"outline": get_outline(lx),
|
||||||
"director": "",
|
"director": "",
|
||||||
@@ -46,10 +45,12 @@ def parse_info(soup: BeautifulSoup) -> dict:
|
|||||||
return {
|
return {
|
||||||
"actor": get_actor(data_dic),
|
"actor": get_actor(data_dic),
|
||||||
"label": get_label(data_dic),
|
"label": get_label(data_dic),
|
||||||
|
"studio": get_studio(data_dic),
|
||||||
"tag": get_tag(data_dic),
|
"tag": get_tag(data_dic),
|
||||||
"number": get_number(data_dic),
|
"number": get_number(data_dic),
|
||||||
"release": get_release(data_dic),
|
"release": get_release(data_dic),
|
||||||
"runtime": get_runtime(data_dic),
|
"runtime": get_runtime(data_dic),
|
||||||
|
"series": get_series(data_dic),
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
return {}
|
return {}
|
||||||
@@ -84,6 +85,9 @@ def get_cover(lx: html.HtmlElement) -> str:
|
|||||||
def get_outline(lx: html.HtmlElement) -> str:
|
def get_outline(lx: html.HtmlElement) -> str:
|
||||||
return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()")[0]
|
return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()")[0]
|
||||||
|
|
||||||
|
def get_series2(lx: html.HtmlElement) -> str:
|
||||||
|
return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[2]/div[1]/div[2]/a[11]/text()")[0]
|
||||||
|
|
||||||
|
|
||||||
def get_actor(data: hash) -> str:
|
def get_actor(data: hash) -> str:
|
||||||
if "女优" in data:
|
if "女优" in data:
|
||||||
@@ -106,6 +110,13 @@ def get_tag(data: hash) -> str:
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def get_studio(data: hash) -> str:
|
||||||
|
if "片商" in data:
|
||||||
|
return get_anchor_info(data["片商"])
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def get_number(data: hash) -> str:
|
def get_number(data: hash) -> str:
|
||||||
if "番号" in data:
|
if "番号" in data:
|
||||||
return get_text_info(data["番号"])
|
return get_text_info(data["番号"])
|
||||||
@@ -134,5 +145,12 @@ def get_year(data: hash) -> str:
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def get_series(data: hash) -> str:
|
||||||
|
if "系列" in data:
|
||||||
|
return get_anchor_info(data["系列"])
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print(main("wmc-002"))
|
print(main("soe-259"))
|
||||||
|
|||||||
14
javbus.py
14
javbus.py
@@ -75,9 +75,12 @@ def getOutline(htmlcode): #获取演员
|
|||||||
except:
|
except:
|
||||||
return ''
|
return ''
|
||||||
def getSerise(htmlcode):
|
def getSerise(htmlcode):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
try:
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
return result
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
def getTag(htmlcode): # 获取演员
|
def getTag(htmlcode): # 获取演员
|
||||||
tag = []
|
tag = []
|
||||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
soup = BeautifulSoup(htmlcode, 'lxml')
|
||||||
@@ -113,6 +116,7 @@ def main_uncensored(number):
|
|||||||
'actor_photo': '',
|
'actor_photo': '',
|
||||||
'website': 'https://www.javbus.com/' + number,
|
'website': 'https://www.javbus.com/' + number,
|
||||||
'source': 'javbus.py',
|
'source': 'javbus.py',
|
||||||
|
'series': getSerise(htmlcode),
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
return js
|
return js
|
||||||
@@ -143,6 +147,7 @@ def main(number):
|
|||||||
'actor_photo': getActorPhoto(htmlcode),
|
'actor_photo': getActorPhoto(htmlcode),
|
||||||
'website': 'https://www.javbus.com/' + number,
|
'website': 'https://www.javbus.com/' + number,
|
||||||
'source': 'javbus.py',
|
'source': 'javbus.py',
|
||||||
|
'series': getSerise(htmlcode),
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,
|
||||||
separators=(',', ':'), ) # .encode('UTF-8')
|
separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
@@ -157,3 +162,6 @@ def main(number):
|
|||||||
data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
|
data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
|
||||||
)
|
)
|
||||||
return js
|
return js
|
||||||
|
|
||||||
|
if __name__ == "__main__" :
|
||||||
|
print(main('ipx-292'))
|
||||||
|
|||||||
53
javdb.py
53
javdb.py
@@ -13,8 +13,8 @@ def getTitle(a):
|
|||||||
return result
|
return result
|
||||||
def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
|
def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
|
result1 = str(html.xpath('//strong[contains(text(),"演員")]/../span/text()')).strip(" ['']")
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']")
|
result2 = str(html.xpath('//strong[contains(text(),"演員")]/../span/a/text()')).strip(" ['']")
|
||||||
return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
|
return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
|
||||||
def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
|
def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
|
||||||
a = actor.split(',')
|
a = actor.split(',')
|
||||||
@@ -25,23 +25,23 @@ def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
|
|||||||
return d
|
return d
|
||||||
def getStudio(a):
|
def getStudio(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']")
|
result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']")
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
|
result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']")
|
||||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||||
def getRuntime(a):
|
def getRuntime(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']")
|
result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']")
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']")
|
result2 = str(html.xpath('//strong[contains(text(),"時長")]/../span/a/text()')).strip(" ['']")
|
||||||
return str(result1 + result2).strip('+').rstrip('mi')
|
return str(result1 + result2).strip('+').rstrip('mi')
|
||||||
def getLabel(a):
|
def getLabel(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']")
|
result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']")
|
result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
|
||||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||||
def getNum(a):
|
def getNum(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = etree.fromstring(a, etree.HTMLParser())
|
||||||
result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']")
|
result1 = str(html.xpath('//strong[contains(text(),"番號")]/../span/text()')).strip(" ['']")
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']")
|
result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip(" ['']")
|
||||||
return str(result2 + result1).strip('+')
|
return str(result2 + result1).strip('+')
|
||||||
def getYear(getRelease):
|
def getYear(getRelease):
|
||||||
try:
|
try:
|
||||||
@@ -51,14 +51,18 @@ def getYear(getRelease):
|
|||||||
return getRelease
|
return getRelease
|
||||||
def getRelease(a):
|
def getRelease(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']")
|
result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']")
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']")
|
result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']")
|
||||||
return str(result1 + result2).strip('+')
|
return str(result1 + result2).strip('+')
|
||||||
def getTag(a):
|
def getTag(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']")
|
try:
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']")
|
result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()')
|
||||||
return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',')
|
return result
|
||||||
|
except:
|
||||||
|
result = html.xpath('//strong[contains(text(),"類別")]/../span/text()')
|
||||||
|
return result
|
||||||
|
|
||||||
def getCover_small(a, index=0):
|
def getCover_small(a, index=0):
|
||||||
# same issue mentioned below,
|
# same issue mentioned below,
|
||||||
# javdb sometime returns multiple results
|
# javdb sometime returns multiple results
|
||||||
@@ -74,17 +78,26 @@ def getCover(htmlcode):
|
|||||||
return result
|
return result
|
||||||
def getDirector(a):
|
def getDirector(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']")
|
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']")
|
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
|
||||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||||
def getOutline(htmlcode):
|
def getOutline(htmlcode):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
|
result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
|
||||||
return result
|
return result
|
||||||
|
def getSeries(a):
|
||||||
|
#/html/body/section/div/div[3]/div[2]/nav/div[7]/span/a
|
||||||
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
|
result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
|
||||||
|
result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
|
||||||
|
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||||
def main(number):
|
def main(number):
|
||||||
try:
|
try:
|
||||||
number = number.upper()
|
number = number.upper()
|
||||||
query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
|
try:
|
||||||
|
query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
|
||||||
|
except:
|
||||||
|
query_result = get_html('https://javdb4.com/search?q=' + number + '&f=all')
|
||||||
html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
# javdb sometime returns multiple results,
|
# javdb sometime returns multiple results,
|
||||||
# and the first elememt maybe not the one we are looking for
|
# and the first elememt maybe not the one we are looking for
|
||||||
@@ -111,6 +124,7 @@ def main(number):
|
|||||||
'actor_photo': getActorPhoto(getActor(detail_page)),
|
'actor_photo': getActorPhoto(getActor(detail_page)),
|
||||||
'website': 'https://javdb.com' + correct_url,
|
'website': 'https://javdb.com' + correct_url,
|
||||||
'source': 'javdb.py',
|
'source': 'javdb.py',
|
||||||
|
'series': getSeries(detail_page),
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# print(e)
|
# print(e)
|
||||||
@@ -120,4 +134,5 @@ def main(number):
|
|||||||
|
|
||||||
# main('DV-1562')
|
# main('DV-1562')
|
||||||
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
|
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
|
||||||
#print(main('ipx-292'))
|
if __name__ == "__main__":
|
||||||
|
print(main('ipx-292'))
|
||||||
|
|||||||
16
mgstage.py
16
mgstage.py
@@ -21,8 +21,8 @@ def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/t
|
|||||||
return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
|
return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
|
||||||
def getStudio(a):
|
def getStudio(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
|
||||||
result1=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
result1=str(html.xpath('//th[contains(text(),"メーカー:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
||||||
result2=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
result2=str(html.xpath('//th[contains(text(),"メーカー:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
||||||
return str(result1+result2).strip('+').replace("', '",'').replace('"','')
|
return str(result1+result2).strip('+').replace("', '",'').replace('"','')
|
||||||
def getRuntime(a):
|
def getRuntime(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
@@ -55,14 +55,14 @@ def getRelease(a):
|
|||||||
'\\n')
|
'\\n')
|
||||||
result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
||||||
'\\n')
|
'\\n')
|
||||||
return str(result1 + result2).strip('+')
|
return str(result1 + result2).strip('+').replace('/','-')
|
||||||
def getTag(a):
|
def getTag(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
|
result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
|
||||||
'\\n')
|
'\\n')
|
||||||
result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
||||||
'\\n')
|
'\\n')
|
||||||
return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','')
|
return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
|
||||||
def getCover(htmlcode):
|
def getCover(htmlcode):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
|
result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
|
||||||
@@ -79,6 +79,13 @@ def getOutline(htmlcode):
|
|||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('//p/text()')).strip(" ['']").replace(u'\\n', '').replace("', '', '", '')
|
result = str(html.xpath('//p/text()')).strip(" ['']").replace(u'\\n', '').replace("', '', '", '')
|
||||||
return result
|
return result
|
||||||
|
def getSeries(a):
|
||||||
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
|
result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
|
||||||
|
'\\n')
|
||||||
|
result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
||||||
|
'\\n')
|
||||||
|
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||||
def main(number2):
|
def main(number2):
|
||||||
number=number2.upper()
|
number=number2.upper()
|
||||||
htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
|
htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
|
||||||
@@ -103,6 +110,7 @@ def main(number2):
|
|||||||
'actor_photo': '',
|
'actor_photo': '',
|
||||||
'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
|
'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
|
||||||
'source': 'mgstage.py',
|
'source': 'mgstage.py',
|
||||||
|
'series': getSeries(a),
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
return js
|
return js
|
||||||
|
|||||||
5
xcity.py
5
xcity.py
@@ -70,7 +70,7 @@ def getRelease(a):
|
|||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')).strip(" ['']")
|
result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')).strip(" ['']")
|
||||||
try:
|
try:
|
||||||
return re.findall('\d{4}/\d{2}/\d{2}', result1)[0]
|
return re.findall('\d{4}/\d{2}/\d{2}', result1)[0].replace('/','-')
|
||||||
except:
|
except:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
@@ -122,8 +122,7 @@ def main(number):
|
|||||||
try:
|
try:
|
||||||
number = number.upper()
|
number = number.upper()
|
||||||
query_result = get_html(
|
query_result = get_html(
|
||||||
'https://xcity.jp/result_published/?genre=%2Fresult_published%2F&q=' + number.replace('-',
|
'https://xcity.jp/result_published/?genre=%2Fresult_published%2F&q=' + number.replace('-','') + '&sg=main&num=30')
|
||||||
'') + '&sg=main&num=30')
|
|
||||||
html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
urls = html.xpath("//table[contains(@class, 'resultList')]/tr[2]/td[1]/a/@href")[0]
|
urls = html.xpath("//table[contains(@class, 'resultList')]/tr[2]/td[1]/a/@href")[0]
|
||||||
detail_page = get_html('https://xcity.jp' + urls)
|
detail_page = get_html('https://xcity.jp' + urls)
|
||||||
|
|||||||
Reference in New Issue
Block a user