Update 3.5
This commit is contained in:
@@ -86,7 +86,7 @@ def create_data_and_move(file_path: str, c: config.Config):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
version = '3.4.3'
|
||||
version = '3.5'
|
||||
|
||||
# Parse command line args
|
||||
single_file_path, config_file, auto_exit = argparse_function()
|
||||
|
||||
11
avsox.py
11
avsox.py
@@ -72,6 +72,13 @@ def getTag(a): # 获取演员
|
||||
for i in a:
|
||||
d.append(i.get_text())
|
||||
return d
|
||||
def getSeries(htmlcode):
|
||||
try:
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']")
|
||||
return result1
|
||||
except:
|
||||
return ''
|
||||
|
||||
def main(number):
|
||||
a = get_html('https://avsox.host/cn/search/' + number)
|
||||
@@ -108,8 +115,10 @@ def main(number):
|
||||
'actor_photo': getActorPhoto(web),
|
||||
'website': result1,
|
||||
'source': 'avsox.py',
|
||||
'series': getSeries(info),
|
||||
}
|
||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||
return js
|
||||
|
||||
#print(main('012717_472'))
|
||||
if __name__ == "__main__":
|
||||
print(main('012717_472'))
|
||||
7
core.py
7
core.py
@@ -98,6 +98,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON
|
||||
runtime = json_data['runtime']
|
||||
outline = json_data['outline']
|
||||
label = json_data['label']
|
||||
series = json_data['series']
|
||||
year = json_data['year']
|
||||
try:
|
||||
cover_small = json_data['cover_small']
|
||||
@@ -166,7 +167,8 @@ def get_info(json_data): # 返回json里的数据
|
||||
number = json_data['number']
|
||||
cover = json_data['cover']
|
||||
website = json_data['website']
|
||||
return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website
|
||||
series = json_data['series']
|
||||
return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series
|
||||
|
||||
|
||||
def small_cover_check(path, number, cover_small, c_word, conf: config.Config, filepath, failed_folder):
|
||||
@@ -263,7 +265,7 @@ def image_download(cover, number, c_word, path, conf: config.Config, filepath, f
|
||||
|
||||
|
||||
def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag, actor_list, liuchu):
|
||||
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(json_data)
|
||||
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series = get_info(json_data)
|
||||
|
||||
try:
|
||||
if not os.path.exists(path):
|
||||
@@ -300,6 +302,7 @@ def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, fa
|
||||
try:
|
||||
for i in tag:
|
||||
print(" <tag>" + i + "</tag>", file=code)
|
||||
print(" <tag>" + series + "</tag>", file=code)
|
||||
except:
|
||||
aaaaa = ''
|
||||
try:
|
||||
|
||||
25
fanza.py
25
fanza.py
@@ -108,7 +108,7 @@ def getRelease(text):
|
||||
)[0].lstrip("\n")
|
||||
except:
|
||||
pass
|
||||
return result
|
||||
return result.replace('/','-')
|
||||
|
||||
|
||||
def getTag(text):
|
||||
@@ -174,6 +174,23 @@ def getOutline(text):
|
||||
return result
|
||||
|
||||
|
||||
def getSeries(text):
|
||||
try:
|
||||
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
try:
|
||||
result = html.xpath(
|
||||
"//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()"
|
||||
)[0]
|
||||
except:
|
||||
result = html.xpath(
|
||||
"//td[contains(text(),'シリーズ:')]/following-sibling::td/text()"
|
||||
)[0]
|
||||
return result
|
||||
except:
|
||||
return ''
|
||||
|
||||
|
||||
|
||||
def main(number):
|
||||
# fanza allow letter + number + underscore, normalize the input here
|
||||
# @note: I only find the usage of underscore as h_test123456789
|
||||
@@ -225,6 +242,7 @@ def main(number):
|
||||
"actor_photo": "",
|
||||
"website": chosen_url,
|
||||
"source": "fanza.py",
|
||||
"series": getSeries(htmlcode),
|
||||
}
|
||||
except:
|
||||
data = {
|
||||
@@ -266,7 +284,4 @@ def main_htmlcode(number):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# print(main("DV-1562"))
|
||||
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
|
||||
# print(main("ipx292"))
|
||||
pass
|
||||
print(main("DV-1562"))
|
||||
@@ -159,5 +159,5 @@ def main(number):
|
||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
|
||||
return js
|
||||
|
||||
|
||||
#print(main('1252953'))
|
||||
if __name__ == '__main__':
|
||||
print(main('1252953'))
|
||||
22
jav321.py
22
jav321.py
@@ -13,7 +13,6 @@ def main(number: str) -> json:
|
||||
data = parse_info(soup)
|
||||
dic = {
|
||||
"title": get_title(lx),
|
||||
"studio": "",
|
||||
"year": get_year(data),
|
||||
"outline": get_outline(lx),
|
||||
"director": "",
|
||||
@@ -46,10 +45,12 @@ def parse_info(soup: BeautifulSoup) -> dict:
|
||||
return {
|
||||
"actor": get_actor(data_dic),
|
||||
"label": get_label(data_dic),
|
||||
"studio": get_studio(data_dic),
|
||||
"tag": get_tag(data_dic),
|
||||
"number": get_number(data_dic),
|
||||
"release": get_release(data_dic),
|
||||
"runtime": get_runtime(data_dic),
|
||||
"series": get_series(data_dic),
|
||||
}
|
||||
else:
|
||||
return {}
|
||||
@@ -84,6 +85,9 @@ def get_cover(lx: html.HtmlElement) -> str:
|
||||
def get_outline(lx: html.HtmlElement) -> str:
|
||||
return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()")[0]
|
||||
|
||||
def get_series2(lx: html.HtmlElement) -> str:
|
||||
return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[2]/div[1]/div[2]/a[11]/text()")[0]
|
||||
|
||||
|
||||
def get_actor(data: hash) -> str:
|
||||
if "女优" in data:
|
||||
@@ -106,6 +110,13 @@ def get_tag(data: hash) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
def get_studio(data: hash) -> str:
|
||||
if "片商" in data:
|
||||
return get_anchor_info(data["片商"])
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
def get_number(data: hash) -> str:
|
||||
if "番号" in data:
|
||||
return get_text_info(data["番号"])
|
||||
@@ -134,5 +145,12 @@ def get_year(data: hash) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
def get_series(data: hash) -> str:
|
||||
if "系列" in data:
|
||||
return get_anchor_info(data["系列"])
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(main("wmc-002"))
|
||||
print(main("soe-259"))
|
||||
|
||||
14
javbus.py
14
javbus.py
@@ -75,9 +75,12 @@ def getOutline(htmlcode): #获取演员
|
||||
except:
|
||||
return ''
|
||||
def getSerise(htmlcode):
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
||||
return result
|
||||
try:
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
||||
return result
|
||||
except:
|
||||
return ''
|
||||
def getTag(htmlcode): # 获取演员
|
||||
tag = []
|
||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
||||
@@ -113,6 +116,7 @@ def main_uncensored(number):
|
||||
'actor_photo': '',
|
||||
'website': 'https://www.javbus.com/' + number,
|
||||
'source': 'javbus.py',
|
||||
'series': getSerise(htmlcode),
|
||||
}
|
||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||
return js
|
||||
@@ -143,6 +147,7 @@ def main(number):
|
||||
'actor_photo': getActorPhoto(htmlcode),
|
||||
'website': 'https://www.javbus.com/' + number,
|
||||
'source': 'javbus.py',
|
||||
'series': getSerise(htmlcode),
|
||||
}
|
||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,
|
||||
separators=(',', ':'), ) # .encode('UTF-8')
|
||||
@@ -157,3 +162,6 @@ def main(number):
|
||||
data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
|
||||
)
|
||||
return js
|
||||
|
||||
if __name__ == "__main__" :
|
||||
print(main('ipx-292'))
|
||||
|
||||
53
javdb.py
53
javdb.py
@@ -13,8 +13,8 @@ def getTitle(a):
|
||||
return result
|
||||
def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']")
|
||||
result1 = str(html.xpath('//strong[contains(text(),"演員")]/../span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"演員")]/../span/a/text()')).strip(" ['']")
|
||||
return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
|
||||
def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
|
||||
a = actor.split(',')
|
||||
@@ -25,23 +25,23 @@ def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
|
||||
return d
|
||||
def getStudio(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
|
||||
result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']")
|
||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||
def getRuntime(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']")
|
||||
result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"時長")]/../span/a/text()')).strip(" ['']")
|
||||
return str(result1 + result2).strip('+').rstrip('mi')
|
||||
def getLabel(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']")
|
||||
result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
|
||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||
def getNum(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser())
|
||||
result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']")
|
||||
result1 = str(html.xpath('//strong[contains(text(),"番號")]/../span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip(" ['']")
|
||||
return str(result2 + result1).strip('+')
|
||||
def getYear(getRelease):
|
||||
try:
|
||||
@@ -51,14 +51,18 @@ def getYear(getRelease):
|
||||
return getRelease
|
||||
def getRelease(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']")
|
||||
result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']")
|
||||
return str(result1 + result2).strip('+')
|
||||
def getTag(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']")
|
||||
return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',')
|
||||
try:
|
||||
result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()')
|
||||
return result
|
||||
except:
|
||||
result = html.xpath('//strong[contains(text(),"類別")]/../span/text()')
|
||||
return result
|
||||
|
||||
def getCover_small(a, index=0):
|
||||
# same issue mentioned below,
|
||||
# javdb sometime returns multiple results
|
||||
@@ -74,17 +78,26 @@ def getCover(htmlcode):
|
||||
return result
|
||||
def getDirector(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']")
|
||||
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
|
||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||
def getOutline(htmlcode):
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
|
||||
return result
|
||||
def getSeries(a):
|
||||
#/html/body/section/div/div[3]/div[2]/nav/div[7]/span/a
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
|
||||
result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
|
||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||
def main(number):
|
||||
try:
|
||||
number = number.upper()
|
||||
query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
|
||||
try:
|
||||
query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
|
||||
except:
|
||||
query_result = get_html('https://javdb4.com/search?q=' + number + '&f=all')
|
||||
html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
# javdb sometime returns multiple results,
|
||||
# and the first elememt maybe not the one we are looking for
|
||||
@@ -111,6 +124,7 @@ def main(number):
|
||||
'actor_photo': getActorPhoto(getActor(detail_page)),
|
||||
'website': 'https://javdb.com' + correct_url,
|
||||
'source': 'javdb.py',
|
||||
'series': getSeries(detail_page),
|
||||
}
|
||||
except Exception as e:
|
||||
# print(e)
|
||||
@@ -120,4 +134,5 @@ def main(number):
|
||||
|
||||
# main('DV-1562')
|
||||
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
|
||||
#print(main('ipx-292'))
|
||||
if __name__ == "__main__":
|
||||
print(main('ipx-292'))
|
||||
|
||||
16
mgstage.py
16
mgstage.py
@@ -21,8 +21,8 @@ def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/t
|
||||
return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
|
||||
def getStudio(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
|
||||
result1=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
||||
result2=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
||||
result1=str(html.xpath('//th[contains(text(),"メーカー:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
||||
result2=str(html.xpath('//th[contains(text(),"メーカー:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
||||
return str(result1+result2).strip('+').replace("', '",'').replace('"','')
|
||||
def getRuntime(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
@@ -55,14 +55,14 @@ def getRelease(a):
|
||||
'\\n')
|
||||
result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
||||
'\\n')
|
||||
return str(result1 + result2).strip('+')
|
||||
return str(result1 + result2).strip('+').replace('/','-')
|
||||
def getTag(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
|
||||
'\\n')
|
||||
result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
||||
'\\n')
|
||||
return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','')
|
||||
return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
|
||||
def getCover(htmlcode):
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
|
||||
@@ -79,6 +79,13 @@ def getOutline(htmlcode):
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('//p/text()')).strip(" ['']").replace(u'\\n', '').replace("', '', '", '')
|
||||
return result
|
||||
def getSeries(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
|
||||
'\\n')
|
||||
result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
||||
'\\n')
|
||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||
def main(number2):
|
||||
number=number2.upper()
|
||||
htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
|
||||
@@ -103,6 +110,7 @@ def main(number2):
|
||||
'actor_photo': '',
|
||||
'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
|
||||
'source': 'mgstage.py',
|
||||
'series': getSeries(a),
|
||||
}
|
||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||
return js
|
||||
|
||||
5
xcity.py
5
xcity.py
@@ -70,7 +70,7 @@ def getRelease(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')).strip(" ['']")
|
||||
try:
|
||||
return re.findall('\d{4}/\d{2}/\d{2}', result1)[0]
|
||||
return re.findall('\d{4}/\d{2}/\d{2}', result1)[0].replace('/','-')
|
||||
except:
|
||||
return ''
|
||||
|
||||
@@ -122,8 +122,7 @@ def main(number):
|
||||
try:
|
||||
number = number.upper()
|
||||
query_result = get_html(
|
||||
'https://xcity.jp/result_published/?genre=%2Fresult_published%2F&q=' + number.replace('-',
|
||||
'') + '&sg=main&num=30')
|
||||
'https://xcity.jp/result_published/?genre=%2Fresult_published%2F&q=' + number.replace('-','') + '&sg=main&num=30')
|
||||
html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
urls = html.xpath("//table[contains(@class, 'resultList')]/tr[2]/td[1]/a/@href")[0]
|
||||
detail_page = get_html('https://xcity.jp' + urls)
|
||||
|
||||
Reference in New Issue
Block a user