diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index cb70529..ea41c8f 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -29,14 +29,19 @@ def check_update(current_version): else: print("[+]Update Check disabled!") -def argparse_get_file(): +def argparse_function(switch): parser = argparse.ArgumentParser() - parser.add_argument("file", default='',nargs='?', help="Write the file path on here") + parser.add_argument("file", default='',nargs='?', help="Single Movie file path.") + parser.add_argument("-c", "--config", default='config.ini', nargs='?', help="The config file Path.") + parser.add_argument("-e", "--exit", default='1', nargs='?', help="Exit Switch 1:Press enter key to exit. 2:Auto exit.") args = parser.parse_args() - if args.file == '': - return '' - else: - return args.file + if switch == 1: + if args.file == '': + return '' + elif switch == 2: + return args.config + elif switch == 3: + return args.exit def movie_lists(root, escape_folder): for folder in escape_folder: @@ -95,8 +100,8 @@ def getNumber(filepath,absolute_path = False): if __name__ == '__main__': - version = '3.0' - config_file = 'config.ini' + version = '3.1' + config_file = argparse_function(2) config = ConfigParser() config.read(config_file, encoding='UTF-8') success_folder = config['common']['success_output_folder'] @@ -114,7 +119,7 @@ if __name__ == '__main__': movie_list = movie_lists('.', escape_folder) #========== 野鸡番号拖动 ========== - number_argparse=argparse_get_file() + number_argparse = argparse_function(1) if not number_argparse == '': print("[!]Making Data for [" + number_argparse + "], the number is [" + getNumber(number_argparse,absolute_path = True) + "]") core_main(number_argparse, getNumber(number_argparse,absolute_path = True)) @@ -140,7 +145,7 @@ if __name__ == '__main__': # print("[*]======================================================") try: print("[!]Making Data for [" + i + "], the number is [" + getNumber(i) + "]") - core_main(i, getNumber(i)) + core_main(i, getNumber(i), config_file=config_file) print("[*]======================================================") except Exception as e: # 番号提取异常 print('[-]' + i + ' ERRPR :') @@ -159,4 +164,6 @@ if __name__ == '__main__': CEF(success_folder) CEF(failed_folder) print("[+]All finished!!!") + if argparse_function(3) == '2': + os._exit(0) input("[+][+]Press enter key exit, you can check the error messge before you exit.") diff --git a/core.py b/core.py index e7ba866..3e4e268 100755 --- a/core.py +++ b/core.py @@ -10,6 +10,8 @@ import json from ADC_function import * from configparser import ConfigParser import argparse +import requests +import random # =========website======== import fc2fans_club import mgstage @@ -18,8 +20,6 @@ import javbus import javdb import fanza import jav321 -import requests -import random # =====================本地文件处理=========================== @@ -335,7 +335,7 @@ def cutImage(imagecut, path, number, c_word): print('[-]Cover cut failed!') elif imagecut == 0: shutil.copyfile(path + '/' + number + c_word + '-fanart.jpg',path + '/' + number + c_word + '-poster.jpg') - print('[+]Image Copyed! ' + path + '/' + number + c_word + '-poster.jpg') + print('[+]Image Copyed! ' + path + '/' + number + c_word + '-poster.jpg') def pasteFileToFolder(filepath, path, number, c_word): # 文件路径,番号,后缀,要移动至的位置 @@ -418,14 +418,13 @@ def debug_mode(json_data): aaa = '' -def core_main(file_path, number_th): +def core_main(file_path, number_th, config_file): # =======================================================================初始化所需变量 multi_part = 0 part = '' c_word = '' cn_sub = '' liuchu = '' - config_file = 'config.ini' Config = ConfigParser() Config.read(config_file, encoding='UTF-8') program_mode = Config['common']['main_mode'] # 运行模式 diff --git a/fanza.py b/fanza.py index 4caed6d..ab2bb33 100644 --- a/fanza.py +++ b/fanza.py @@ -223,6 +223,34 @@ def main(number): ) # .encode('UTF-8') return js +def main_htmlcode(number): + # fanza allow letter + number + underscore, normalize the input here + # @note: I only find the usage of underscore as h_test123456789 + fanza_search_number = number + # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix + if fanza_search_number.startswith("h-"): + fanza_search_number = fanza_search_number.replace("h-", "h_") + + fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower() + + fanza_urls = [ + "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=", + "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=", + "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=", + "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=", + "https://www.dmm.co.jp/digital/videoc/-/detail/=/cid=", + "https://www.dmm.co.jp/digital/nikkatsu/-/detail/=/cid=", + ] + chosen_url = "" + for url in fanza_urls: + chosen_url = url + fanza_search_number + htmlcode = get_html(chosen_url) + if "404 Not Found" not in htmlcode: + break + if "404 Not Found" in htmlcode: + return json.dumps({"title": "",}) + return htmlcode + if __name__ == "__main__": # print(main("DV-1562")) diff --git a/javbus.py b/javbus.py index 83d61f8..9f77a25 100755 --- a/javbus.py +++ b/javbus.py @@ -4,6 +4,7 @@ from lxml import etree#need install from bs4 import BeautifulSoup#need install import json from ADC_function import * +import fanza def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img soup = BeautifulSoup(htmlcode, 'lxml') @@ -60,10 +61,19 @@ def getDirector(htmlcode): #获取导演 html = etree.fromstring(htmlcode, etree.HTMLParser()) result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']") return result -def getOutline(htmlcode): #获取演员 - doc = pq(htmlcode) - result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text()) +def getCID(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + #print(htmlcode) + string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','') + result = re.sub('/.*?.jpg','',string) return result +def getOutline(htmlcode): #获取演员 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + try: + result = html.xpath("string(//div[contains(@class,'mg-b20 lh4')])").replace('\n','') + return result + except: + return '' def getSerise(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']") @@ -78,43 +88,14 @@ def getTag(htmlcode): # 获取演员 tag.append(i.get_text()) return tag - -def main(number): - try: - htmlcode = get_html('https://www.javbus.com/' + number) - try: - dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) - except: - dww_htmlcode = '' - dic = { - 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), - 'studio': getStudio(htmlcode), - 'year': str(re.search('\d{4}', getYear(htmlcode)).group()), - 'outline': getOutline(dww_htmlcode), - 'runtime': getRuntime(htmlcode), - 'director': getDirector(htmlcode), - 'actor': getActor(htmlcode), - 'release': getRelease(htmlcode), - 'number': getNum(htmlcode), - 'cover': getCover(htmlcode), - 'imagecut': 1, - 'tag': getTag(htmlcode), - 'label': getSerise(htmlcode), - 'actor_photo': getActorPhoto(htmlcode), - 'website': 'https://www.javbus.com/' + number, - 'source' : 'javbus.py', - } - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - return js - except: - return main_uncensored(number) - def main_uncensored(number): htmlcode = get_html('https://www.javbus.com/' + number) - dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) if getTitle(htmlcode) == '': htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_')) - dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) + try: + dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode)) + except: + dww_htmlcode = '' dic = { 'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''), 'studio': getStudio(htmlcode), @@ -136,3 +117,43 @@ def main_uncensored(number): js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js + +def main(number): + try: + try: + htmlcode = get_html('https://www.javbus.com/' + number) + try: + dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode)) + except: + dww_htmlcode = '' + dic = { + 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), + 'studio': getStudio(htmlcode), + 'year': str(re.search('\d{4}', getYear(htmlcode)).group()), + 'outline': getOutline(dww_htmlcode), + 'runtime': getRuntime(htmlcode), + 'director': getDirector(htmlcode), + 'actor': getActor(htmlcode), + 'release': getRelease(htmlcode), + 'number': getNum(htmlcode), + 'cover': getCover(htmlcode), + 'imagecut': 1, + 'tag': getTag(htmlcode), + 'label': getSerise(htmlcode), + 'actor_photo': getActorPhoto(htmlcode), + 'website': 'https://www.javbus.com/' + number, + 'source': 'javbus.py', + } + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, + separators=(',', ':'), ) # .encode('UTF-8') + return js + except: + return main_uncensored(number) + except: + data = { + "title": "", + } + js = json.dumps( + data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":") + ) + return js diff --git a/update_check.json b/update_check.json index bbf041d..9cd0ba2 100644 --- a/update_check.json +++ b/update_check.json @@ -1,5 +1,5 @@ { - "version": "3.0", - "version_show": "3.0", + "version": "3.1", + "version_show": "3.1", "download": "https://github.com/yoshiko2/AV_Data_Capture/releases" }