diff --git a/config.ini b/config.ini index 75ddbf2..6a735e8 100644 --- a/config.ini +++ b/config.ini @@ -5,7 +5,7 @@ success_output_folder=JAV_output soft_link=0 [proxy] -proxy=127.0.0.1:1081 +proxy= timeout=10 retry=3 diff --git a/core.py b/core.py index f2a158e..dc47a65 100755 --- a/core.py +++ b/core.py @@ -140,7 +140,7 @@ def getDataFromJSON(file_number): # 从JSON返回元数据 json_data = json.loads(javbus.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(avsox.main(file_number)) - elif getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 + if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javdb.main(file_number)) # ================================================网站规则添加结束================================================ @@ -183,6 +183,7 @@ def getDataFromJSON(file_number): # 从JSON返回元数据 title = title.replace('<', '') title = title.replace('>', '') title = title.replace('|', '') + release = release.replace('/', '-') tmpArr = cover_small.split(',') if len(tmpArr) > 0: cover_small = tmpArr[0].strip('\"').strip('\'') @@ -346,7 +347,7 @@ def PrintFiles(): for key, value in actor_photo.items(): print(" ", file=code) print(" " + key + "", file=code) - if not actor_photo == '': # or actor_photo == []: + if not value == '': # or actor_photo == []: print(" " + value + "", file=code) print(" ", file=code) except: @@ -393,7 +394,7 @@ def PrintFiles(): for key, value in actor_photo.items(): print(" ", file=code) print(" " + key + "", file=code) - if not actor_photo == '': # or actor_photo == []: + if not value == '': # or actor_photo == []: print(" " + value + "", file=code) print(" ", file=code) except: @@ -439,7 +440,7 @@ def PrintFiles(): for key, value in actor_photo.items(): print(" ", file=code) print(" " + key + "", file=code) - if not actor_photo == '': # or actor_photo == []: + if not value == '': # or actor_photo == []: print(" " + value + "", file=code) print(" ", file=code) except: diff --git a/javdb.py b/javdb.py index c32cfeb..a85980c 100755 --- a/javdb.py +++ b/javdb.py @@ -8,19 +8,26 @@ def getTitle(a): try: html = etree.fromstring(a, etree.HTMLParser()) result = str(html.xpath('/html/body/section/div/h2/strong/text()')).strip(" ['']") - return re.sub('.*\] ','',result.replace('/', ',').replace('\\xa0','').replace(' : ','')) + return re.sub('.*\] ', '', result.replace('/', ',').replace('\\xa0', '').replace(' : ', '')) except: - return re.sub('.*\] ','',result.replace('/', ',').replace('\\xa0','')) -def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() - html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() + return re.sub('.*\] ', '', result.replace('/', ',').replace('\\xa0', '')) +def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace(",\\xa0","").replace("'","").replace(' ','').replace(',,','').lstrip(',').replace(',',', ') + return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ') +def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img + a = actor.split(',') + d={} + for i in a: + p={i:''} + d.update(p) + return d def getStudio(a): - html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"製作")]/../following-sibling::span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"製作")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1+result2).strip('+').replace("', '",'').replace('"','') + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']") + return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') def getRuntime(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']") @@ -30,15 +37,15 @@ def getLabel(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace("', '",'').replace('"','') + return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') def getNum(a): html = etree.fromstring(a, etree.HTMLParser()) result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+') + return str(result2 + result1).strip('+') def getYear(getRelease): try: - result = str(re.search('\d{4}',getRelease).group()) + result = str(re.search('\d{4}', getRelease).group()) return result except: return getRelease @@ -51,33 +58,40 @@ def getTag(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace(",\\xa0","").replace("'","").replace(' ','').replace(',,','').lstrip(',') + return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',') +def getCover_small(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result = 'http:' + html.xpath( + '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\'][1]/a[' + '@class=\'box\']/div[@class=\'item-image fix-scale-cover\']/img/@src')[0] + return result def getCover(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) result = str(html.xpath('/html/body/section/div/div[2]/div[1]/a/img/@src')).strip(" ['']") if result == '': - result = str(html.xpath('/html/body/section/div/div[3]/div[1]/a/img/@src')).strip(" ['']") + result = str(html.xpath('/html/body/section/div/div[4]/div[1]/a/img/@src')).strip(" ['']") return result def getDirector(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace("', '",'').replace('"','') + return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') def getOutline(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") return result def main(number): + number = number.upper() try: a = get_html('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ') html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") + result1 = html.xpath('//*[@id="videos"]/div/div/a/@href')[0] b = get_html('https://javdb.com' + result1).replace(u'\xa0', u' ') dic = { - 'actor': getActor(a), + 'actor': getActor(b), 'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace(getNum(a), '').replace( - '无码', '').replace('有码', '').lstrip(' '), + '无码', '').replace('有码', '').lstrip(' ').replace(number,''), 'studio': getStudio(b), 'outline': getOutline(b), 'runtime': getRuntime(b), @@ -85,27 +99,32 @@ def main(number): 'release': getRelease(b), 'number': getNum(b), 'cover': getCover(b), - 'imagecut': 0, + 'cover_small': getCover_small(a), + 'imagecut': 3, 'tag': getTag(b), 'label': getLabel(b), 'year': getYear(getRelease(b)), # str(re.search('\d{4}',getRelease(a)).group()), - 'actor_photo': '', + 'actor_photo': getActorPhoto(getActor(b)), 'website': 'https://javdb.com' + result1, 'source': 'javdb.py', } + if getNum(b) != number: # 与搜索到的番号不匹配 + dic['title'] = '' + dic['number'] = '' js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js except: a = get_html('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ') html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") + result1 = html.xpath('//*[@id="videos"]/div/div/a/@href')[0] + print(html.xpath('//*[@id="videos"]/div/div/a/@href')) b = get_html('https://javdb.com' + result1).replace(u'\xa0', u' ') dic = { 'actor': getActor(b), 'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace( getNum(b), '').replace( - '无码', '').replace('有码', '').lstrip(' '), + '无码', '').replace('有码', '').lstrip(' ').replace(number,''), 'studio': getStudio(b), 'outline': getOutline(b), 'runtime': getRuntime(b), @@ -113,15 +132,21 @@ def main(number): 'release': getRelease(b), 'number': getNum(b), 'cover': getCover(b), - 'imagecut': 0, + 'cover_small': getCover_small(a), + 'imagecut': 3, 'tag': getTag(b), 'label': getLabel(b), 'year': getYear(getRelease(b)), # str(re.search('\d{4}',getRelease(a)).group()), - 'actor_photo': '', + 'actor_photo': getActorPhoto(getActor(b)), 'website': 'https://javdb.com' + result1, 'source': 'javdb.py', } - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8') + if getNum(b) != number: # 与搜索到的番号不匹配 + dic['title'] = '' + dic['number'] = '' + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js -#print(get_html('https://javdb1.com/v/WwZ0Q')) \ No newline at end of file +# main('DV-1562') +# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") +# print(get_html('https://javdb1.com/v/WwZ0Q'))