diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index c2ab31e..3de8bba 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -1,11 +1,19 @@ import argparse -from core import * import os +import sys + from number_parser import get_number +from core import * def check_update(local_version): - data = json.loads(get_html("https://api.github.com/repos/yoshiko2/AV_Data_Capture/releases/latest")) + try: + data = json.loads(get_html("https://api.github.com/repos/yoshiko2/AV_Data_Capture/releases/latest")) + except Exception as e: + print("[-] Failed to update! Please check new version manually:") + print("[-] https://github.com/yoshiko2/AV_Data_Capture/releases") + print("[*]======================================================") + return remote = data["tag_name"] local = local_version @@ -22,18 +30,22 @@ def argparse_function(ver: str) -> [str, str, bool]: parser = argparse.ArgumentParser() parser.add_argument("file", default='', nargs='?', help="Single Movie file path.") parser.add_argument("-c", "--config", default='config.ini', nargs='?', help="The config file Path.") - parser.add_argument("-n", "--number", default='', nargs='?',help="Custom file number") - parser.add_argument("--version", action="version", version=ver) + parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number") + parser.add_argument("-a", "--auto-exit", dest='autoexit', action="store_true", + help="Auto exit after program complete") + parser.add_argument("-v", "--version", action="version", version=ver) args = parser.parse_args() - return args.file, args.config, args.number + return args.file, args.config, args.number, args.autoexit + def movie_lists(root, escape_folder): for folder in escape_folder: if folder in root: return [] total = [] - file_type = ['.mp4', '.avi', '.rmvb', '.wmv', '.mov', '.mkv', '.flv', '.ts', '.webm', '.MP4', '.AVI', '.RMVB', '.WMV','.MOV', '.MKV', '.FLV', '.TS', '.WEBM', '.iso','.ISO'] + file_type = ['.mp4', '.avi', '.rmvb', '.wmv', '.mov', '.mkv', '.flv', '.ts', '.webm', '.MP4', '.AVI', '.RMVB', + '.WMV', '.MOV', '.MKV', '.FLV', '.TS', '.WEBM', '.iso', '.ISO'] dirs = os.listdir(root) for entry in dirs: f = os.path.join(root, entry) @@ -50,22 +62,22 @@ def create_failed_folder(failed_folder): os.makedirs(failed_folder + '/') except: print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)") - os._exit(0) + sys.exit(0) -def CEF(path): +def rm_empty_folder(path): try: files = os.listdir(path) # 获取路径下的子文件(夹)列表 for file in files: - os.removedirs(path + '/' + file) # 删除这个空文件夹 + os.rmdir(path + '/' + file) # 删除这个空文件夹 print('[+]Deleting empty folder', path + '/' + file) except: a = '' -def create_data_and_move(file_path: str, c: config.Config,debug): +def create_data_and_move(file_path: str, c: config.Config, debug): # Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4 - n_number = get_number(debug,file_path) + n_number = get_number(debug, file_path) if debug == True: print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number)) @@ -96,6 +108,7 @@ def create_data_and_move(file_path: str, c: config.Config,debug): except Exception as err: print('[!]', err) + def create_data_and_move_with_custom_number(file_path: str, c: config.Config, custom_number=None): try: print("[!]Making Data for [{}], the number is [{}]".format(file_path, custom_number)) @@ -117,10 +130,10 @@ def create_data_and_move_with_custom_number(file_path: str, c: config.Config, cu if __name__ == '__main__': - version = '4.0.3' + version = '4.2.2' # Parse command line args - single_file_path, config_file, custom_number = argparse_function(version) + single_file_path, config_file, custom_number, auto_exit = argparse_function(version) # Read config.ini conf = config.Config(path=config_file) @@ -139,21 +152,21 @@ if __name__ == '__main__': # ========== Single File ========== if not single_file_path == '': print('[+]==================== Single File =====================') - create_data_and_move_with_custom_number(single_file_path, conf,custom_number) - CEF(conf.success_folder()) - CEF(conf.failed_folder()) + create_data_and_move_with_custom_number(single_file_path, conf, custom_number) + rm_empty_folder(conf.success_folder()) + rm_empty_folder(conf.failed_folder()) print("[+]All finished!!!") input("[+][+]Press enter key exit, you can check the error messge before you exit.") - exit() + sys.exit(0) # ========== Single File ========== - movie_list = movie_lists(".", re.split("[,,]", conf.escape_folder())) + movie_list = movie_lists(os.getcwd(), re.split("[,,]", conf.escape_folder())) count = 0 count_all = str(len(movie_list)) print('[+]Find', count_all, 'movies') if conf.debug() == True: - print('[+]'+' DEBUG MODE ON '.center(54, '-')) + print('[+]' + ' DEBUG MODE ON '.center(54, '-')) if conf.soft_link(): print('[!] --- Soft link mode is ENABLE! ----') for movie_path in movie_list: # 遍历电影列表 交给core处理 @@ -162,9 +175,12 @@ if __name__ == '__main__': print('[!] - ' + percentage + ' [' + str(count) + '/' + count_all + '] -') create_data_and_move(movie_path, conf, conf.debug()) - CEF(conf.success_folder()) - CEF(conf.failed_folder()) + rm_empty_folder(conf.success_folder()) + rm_empty_folder(conf.failed_folder()) print("[+]All finished!!!") if conf.auto_exit(): - os._exit(0) - input("[+][+]Press enter key exit, you can check the error message before you exit.") + sys.exit(0) + if auto_exit: + sys.exit(0) + input("Press enter key exit, you can check the error message before you exit...") + sys.exit(0) diff --git a/WebCrawler/airav.py b/WebCrawler/airav.py new file mode 100644 index 0000000..9a13cec --- /dev/null +++ b/WebCrawler/airav.py @@ -0,0 +1,174 @@ +import sys +sys.path.append('../') +import re +from pyquery import PyQuery as pq#need install +from lxml import etree#need install +from bs4 import BeautifulSoup#need install +import json +from ADC_function import * + + + +# airav这个网站没有演员图片,所以直接使用javbus的图 +def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img + soup = BeautifulSoup(htmlcode, 'lxml') + a = soup.find_all(attrs={'class': 'star-name'}) + d={} + for i in a: + l=i.a['href'] + t=i.get_text() + html = etree.fromstring(get_html(l), etree.HTMLParser()) + p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']") + p2={t:p} + d.update(p2) + return d + +def getTitle(htmlcode): #获取标题 + doc = pq(htmlcode) + # h5:first-child定位第一个h5标签,妈的找了好久才找到这个语法 + title = str(doc('div.d-flex.videoDataBlock h5.d-none.d-md-block:nth-child(2)').text()).replace(' ', '-') + try: + title2 = re.sub('n\d+-','',title) + + return title2 + except: + return title + +def getStudio(htmlcode): #获取厂商 已修改 + html = etree.fromstring(htmlcode,etree.HTMLParser()) + # 如果记录中冇导演,厂商排在第4位 + if '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"): + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']") + # 如果记录中有导演,厂商排在第5位 + elif '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"): + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']") + else: + result = '' + return result +def getYear(htmlcode): #获取年份 + html = etree.fromstring(htmlcode,etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']") + return result +def getCover(htmlcode): #获取封面链接 + doc = pq(htmlcode) + image = doc('a.bigImage') + return image.attr('href') +def getRelease(htmlcode): #获取出版日期 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']") + return result +def getRuntime(htmlcode): #获取分钟 已修改 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[3]/text()')).strip(" ['']分鐘") + return result + +def getActor(htmlcode): #获取女优 + b=[] + soup=BeautifulSoup(htmlcode,'lxml') + a=soup.find_all(attrs={'class':'videoAvstarListItem'}) + for i in a: + b.append(i.get_text()) + return b + +def getNum(htmlcode): #获取番号 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") + return result +def getDirector(htmlcode): #获取导演 已修改 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + if '導演:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"): + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']") + else: + result = '' # 记录中有可能没有导演数据 + return result + +def getOutline(htmlcode): #获取演员 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + try: + result = html.xpath("string(//div[@class='d-flex videoDataBlock']/div[@class='synopsis']/p)").replace('\n','') + return result + except: + return '' +def getSerise(htmlcode): #获取系列 已修改 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + # 如果记录中冇导演,系列排在第6位 + if '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"): + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']") + # 如果记录中有导演,系列排在第7位 + elif '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"): + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']") + else: + result = '' + return result +def getTag(htmlcode): # 获取标签 + tag = [] + soup = BeautifulSoup(htmlcode, 'lxml') + x = soup.find_all(attrs={'class': 'tagBtnMargin'}) + a = x[0].find_all('a') + + for i in a: + tag.append(i.get_text()) + return tag + +def main(number): + try: + try: + try: + htmlcode = get_html('https://cn.airav.wiki/video/' + number) + javbus_htmlcode = get_html('https://www.javbus.com/ja/' + number) + + + except: + print(number) + + dic = { + # 标题可使用airav + 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), + # 制作商选择使用javbus + 'studio': getStudio(javbus_htmlcode), + # 年份也是用javbus + 'year': str(re.search('\d{4}', getYear(javbus_htmlcode)).group()), + # 简介 使用 airav + 'outline': getOutline(htmlcode), + # 使用javbus + 'runtime': getRuntime(javbus_htmlcode), + # 导演 使用javbus + 'director': getDirector(javbus_htmlcode), + # 作者 使用airav + 'actor': getActor(htmlcode), + # 发售日使用javbus + 'release': getRelease(javbus_htmlcode), + # 番号使用javbus + 'number': getNum(javbus_htmlcode), + # 封面链接 使用javbus + 'cover': getCover(javbus_htmlcode), + + 'imagecut': 1, + # 使用 airav + 'tag': getTag(htmlcode), + # 使用javbus + 'label': getSerise(javbus_htmlcode), + # 妈的,airav不提供作者图片 + 'actor_photo': getActorPhoto(javbus_htmlcode), + + 'website': 'https://www.airav.wiki/video/' + number, + 'source': 'airav.py', + # 使用javbus + 'series': getSerise(javbus_htmlcode), + } + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8') + return js + except: + return main_uncensored(number) + except: + data = { + "title": "", + } + js = json.dumps( + data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":") + ) + return js + + +if __name__ == '__main__': + print(main('sdsi-019')) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index eda8cb6..b463b3d 100644 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -26,10 +26,17 @@ def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img d.update(p) return d def getStudio(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') +# html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() +# result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']") +# result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']") +# return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') + patherr = re.compile(r'片商\:[\s\S]*?(.*?)') + pianshang = patherr.findall(a) + if pianshang: + result = pianshang[0] + else: + result = "" + return result def getRuntime(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']") @@ -46,16 +53,30 @@ def getNum(a): result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip(" ['']") return str(result2 + result1).strip('+') def getYear(getRelease): - try: - result = str(re.search('\d{4}', getRelease).group()) - return result - except: - return getRelease +# try: +# result = str(re.search('\d{4}', getRelease).group()) +# return result +# except: +# return getRelease + patherr = re.compile(r'日期\:\s*?.*?(.*?)\-.*?') + dates = patherr.findall(getRelease) + if dates: + result = dates[0] + else: + result = '' + return result def getRelease(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+') +# html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() +# result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']") +# result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']") +# return str(result1 + result2).strip('+') + patherr = re.compile(r'日期\:\s*?.*?(.*?)') + dates = patherr.findall(a) + if dates: + result = dates[0] + else: + result = '' + return result def getTag(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() try: @@ -89,10 +110,17 @@ def getCover_small(a, index=0): result = 'https:' + result return result except: # 2020.7.17 Repair Cover Url crawl - result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index] - if not 'https' in result: - result = 'https:' + result - return result + try: + result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index] + if not 'https' in result: + result = 'https:' + result + return result + except: + result = html.xpath("//div[@class='item-image']/img/@data-src")[index] + if not 'https' in result: + result = 'https:' + result + return result + def getCover(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) try: @@ -127,14 +155,23 @@ def main(number): # and the first elememt maybe not the one we are looking for # iterate all candidates and find the match one urls = html.xpath('//*[@id="videos"]/div/div/a/@href') - ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()') - correct_url = urls[ids.index(number)] + # 记录一下欧美的ids ['Blacked','Blacked'] + if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number): + correct_url = urls[0] + else: + ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()') + correct_url = urls[ids.index(number)] + detail_page = get_html('https://javdb.com' + correct_url) # no cut image by default imagecut = 3 # If gray image exists ,then replace with normal cover - cover_small = getCover_small(query_result, index=ids.index(number)) + if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number): + cover_small = getCover_small(query_result) + else: + cover_small = getCover_small(query_result, index=ids.index(number)) + if 'placeholder' in cover_small: # replace wit normal cover and cut it imagecut = 1 diff --git a/core.py b/core.py index de04995..13a6673 100755 --- a/core.py +++ b/core.py @@ -1,5 +1,6 @@ import json import os.path +import pathlib import re import shutil import platform @@ -29,18 +30,17 @@ def escape_path(path, escape_literals: str): # Remove escape literals def moveFailedFolder(filepath, failed_folder): - print('[-]Move to Failed output folder') - shutil.move(filepath, str(os.getcwd()) + '/' + failed_folder + '/') - return - - -def CreatFailedFolder(failed_folder): - if not os.path.exists(failed_folder + '/'): # 新建failed文件夹 - try: - os.makedirs(failed_folder + '/') - except: - print("[-]failed!can not be make Failed output folder\n[-](Please run as Administrator)") - return + if config.Config().failed_move(): + root_path = str(pathlib.Path(filepath).parent) + file_name = pathlib.Path(filepath).name + destination_path = root_path + '/' + failed_folder + '/' + if config.Config.soft_link(): + print('[-]Create symlink to Failed output folder') + os.symlink(filepath, destination_path + '/' + file_name) + else: + print('[-]Move to Failed output folder') + shutil.move(filepath, destination_path) + return def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON返回元数据 @@ -111,23 +111,25 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON # ================================================网站规则添加结束================================================ - title = json_data['title'] - actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表 - release = json_data['release'] - number = json_data['number'] - studio = json_data['studio'] - source = json_data['source'] - runtime = json_data['runtime'] - outline = json_data['outline'] - label = json_data['label'] - series = json_data['series'] - year = json_data['year'] - try: - cover_small = json_data['cover_small'] - except: + title = json_data.get('title') + actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',') # 字符串转列表 + release = json_data.get('release') + number = json_data.get('number') + studio = json_data.get('studio') + source = json_data.get('source') + runtime = json_data.get('runtime') + outline = json_data.get('outline') + label = json_data.get('label') + series = json_data.get('series') + year = json_data.get('year') + + if json_data.get('cover_small') == None: cover_small = '' - imagecut = json_data['imagecut'] - tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @ + else: + cover_small = json_data.get('cover_small') + + imagecut = json_data.get('imagecut') + tag = str(json_data.get('tag')).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @ actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') if title == '' or number == '': @@ -152,6 +154,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON tmpArr = cover_small.split(',') if len(tmpArr) > 0: cover_small = tmpArr[0].strip('\"').strip('\'') + # ====================处理异常字符 END================== #\/:*?"<>| # === 替换Studio片假名 @@ -193,15 +196,13 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON location_rule = eval(conf.location_rule()) - # Process only Windows. - if platform.system() == "Windows": - if 'actor' in conf.location_rule() and len(actor) > 100: - print(conf.location_rule()) - location_rule = eval(conf.location_rule().replace("actor","'多人作品'")) - maxlen = conf.max_title_len() - if 'title' in conf.location_rule() and len(title) > maxlen: - shorttitle = title[0:maxlen] - location_rule = location_rule.replace(title, shorttitle) + if 'actor' in conf.location_rule() and len(actor) > 100: + print(conf.location_rule()) + location_rule = eval(conf.location_rule().replace("actor","'多人作品'")) + maxlen = conf.max_title_len() + if 'title' in conf.location_rule() and len(title) > maxlen: + shorttitle = title[0:maxlen] + location_rule = location_rule.replace(title, shorttitle) # 返回处理后的json_data json_data['title'] = title @@ -219,26 +220,26 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON naming_rule="" for i in conf.naming_rule().split("+"): if i not in json_data: - naming_rule+=i.strip("'").strip('"') + naming_rule += i.strip("'").strip('"') else: - naming_rule+=json_data[i] + naming_rule += json_data.get(i) json_data['naming_rule'] = naming_rule return json_data def get_info(json_data): # 返回json里的数据 - title = json_data['title'] - studio = json_data['studio'] - year = json_data['year'] - outline = json_data['outline'] - runtime = json_data['runtime'] - director = json_data['director'] - actor_photo = json_data['actor_photo'] - release = json_data['release'] - number = json_data['number'] - cover = json_data['cover'] - website = json_data['website'] - series = json_data['series'] + title = json_data.get('title') + studio = json_data.get('studio') + year = json_data.get('year') + outline = json_data.get('outline') + runtime = json_data.get('runtime') + director = json_data.get('director') + actor_photo = json_data.get('actor_photo') + release = json_data.get('release') + number = json_data.get('number') + cover = json_data.get('cover') + website = json_data.get('website') + series = json_data.get('series') label = json_data.get('label', "") return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series, label @@ -260,7 +261,7 @@ def create_folder(success_folder, location_rule, json_data, conf: config.Config) try: os.makedirs(path) except: - path = success_folder + '/' + location_rule.replace('/[' + number + ']-' + title, "/number") + path = success_folder + '/' + location_rule.replace('/[' + number + ')-' + title, "/number") path = escape_path(path, conf.escape_literals()) os.makedirs(path) @@ -284,7 +285,7 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa for i in range(retry_count): try: - if switch == 1: + if switch == 1 or switch == '1': if not os.path.exists(path): os.makedirs(path) proxies = get_proxy(proxy, proxytype) @@ -429,22 +430,22 @@ def cutImage(imagecut, path, number, c_word): def paste_file_to_folder(filepath, path, number, c_word, conf: config.Config): # 文件路径,番号,后缀,要移动至的位置 - houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group()) - + houzhui = str(re.search('[.](iso|ISO|AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group()) + file_parent_origin_path = str(pathlib.Path(filepath).parent) try: # 如果soft_link=1 使用软链接 if conf.soft_link(): os.symlink(filepath, path + '/' + number + c_word + houzhui) else: os.rename(filepath, path + '/' + number + c_word + houzhui) - if os.path.exists(os.getcwd() + '/' + number + c_word + '.srt'): # 字幕移动 - os.rename(os.getcwd() + '/' + number + c_word + '.srt', path + '/' + number + c_word + '.srt') + if os.path.exists(file_parent_origin_path + '/' + number + c_word + '.srt'): # 字幕移动 + os.rename(file_parent_origin_path + '/' + number + c_word + '.srt', path + '/' + number + c_word + '.srt') print('[+]Sub moved!') - elif os.path.exists(os.getcwd() + '/' + number + c_word + '.ssa'): - os.rename(os.getcwd() + '/' + number + c_word + '.ssa', path + '/' + number + c_word + '.ssa') + elif os.path.exists(file_parent_origin_path + '/' + number + c_word + '.ssa'): + os.rename(file_parent_origin_path + '/' + number + c_word + '.ssa', path + '/' + number + c_word + '.ssa') print('[+]Sub moved!') - elif os.path.exists(os.getcwd() + '/' + number + c_word + '.sub'): - os.rename(os.getcwd() + '/' + number + c_word + '.sub', path + '/' + number + c_word + '.sub') + elif os.path.exists(file_parent_origin_path + '/' + number + c_word + '.sub'): + os.rename(file_parent_origin_path + '/' + number + c_word + '.sub', path + '/' + number + c_word + '.sub') print('[+]Sub moved!') except FileExistsError: print('[-]File Exists! Please check your movie!') @@ -458,21 +459,21 @@ def paste_file_to_folder(filepath, path, number, c_word, conf: config.Config): def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, c_word, conf): # 文件路径,番号,后缀,要移动至的位置 if multi_part == 1: number += part # 这时number会被附加上CD1后缀 - houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group()) - + houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm|iso|ISO)$', filepath).group()) + file_parent_origin_path = str(pathlib.Path(filepath).parent) try: if conf.soft_link(): os.symlink(filepath, path + '/' + number + part + c_word + houzhui) else: os.rename(filepath, path + '/' + number + part + c_word + houzhui) - if os.path.exists(number + '.srt'): # 字幕移动 - os.rename(number + part + c_word + '.srt', path + '/' + number + part + c_word + '.srt') + if os.path.exists(file_parent_origin_path + '/' + number + '.srt'): # 字幕移动 + os.rename(file_parent_origin_path + '/' + number + part + c_word + '.srt', path + '/' + number + part + c_word + '.srt') print('[+]Sub moved!') - elif os.path.exists(number + part + c_word + '.ass'): - os.rename(number + part + c_word + '.ass', path + '/' + number + part + c_word + '.ass') + elif os.path.exists(file_parent_origin_path + '/' + number + part + c_word + '.ass'): + os.rename(file_parent_origin_path + '/' + number + part + c_word + '.ass', path + '/' + number + part + c_word + '.ass') print('[+]Sub moved!') - elif os.path.exists(number + part + c_word + '.sub'): - os.rename(number + part + c_word + '.sub', path + '/' + number + part + c_word + '.sub') + elif os.path.exists(file_parent_origin_path + '/' + number + part + c_word + '.sub'): + os.rename(file_parent_origin_path + '/' + number + part + c_word + '.sub', path + '/' + number + part + c_word + '.sub') print('[+]Sub moved!') print('[!]Success') except FileExistsError: @@ -519,7 +520,9 @@ def core_main(file_path, number_th, conf: config.Config): cn_sub = '' liuchu = '' - filepath = file_path # 影片的路径 + + filepath = file_path # 影片的路径 绝对路径 + rootpath = str(pathlib.Path(filepath).parent) number = number_th json_data = get_data_from_json(number, filepath, conf) # 定义番号 @@ -534,8 +537,8 @@ def core_main(file_path, number_th, conf: config.Config): # but paste_file_to_folder() still use the input raw search id # so the solution is: use the normalized search id number = json_data["number"] - imagecut = json_data['imagecut'] - tag = json_data['tag'] + imagecut = json_data.get('imagecut') + tag = json_data.get('tag') # =======================================================================判断-C,-CD后缀 if '-CD' in filepath or '-cd' in filepath: multi_part = 1 @@ -546,15 +549,12 @@ def core_main(file_path, number_th, conf: config.Config): if '流出' in filepath: liuchu = '流出' - # 创建输出失败目录 - CreatFailedFolder(conf.failed_folder()) - # 调试模式检测 if conf.debug(): debug_print(json_data) # 创建文件夹 - path = create_folder(conf.success_folder(), json_data['location_rule'], json_data, conf) + path = create_folder(rootpath + '/' + conf.success_folder(), json_data.get('location_rule'), json_data, conf) # main_mode # 1: 刮削模式 / Scraping mode @@ -565,16 +565,16 @@ def core_main(file_path, number_th, conf: config.Config): # 检查小封面, 如果image cut为3,则下载小封面 if imagecut == 3: - small_cover_check(path, number, json_data['cover_small'], c_word, conf, filepath, conf.failed_folder()) + small_cover_check(path, number, json_data.get('cover_small'), c_word, conf, filepath, conf.failed_folder()) # creatFolder会返回番号路径 - image_download(json_data['cover'], number, c_word, path, conf, filepath, conf.failed_folder()) + image_download( json_data.get('cover'), number, c_word, path, conf, filepath, conf.failed_folder()) # 裁剪图 cutImage(imagecut, path, number, c_word) # 打印文件 - print_files(path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, conf.failed_folder(), tag, json_data['actor_list'], liuchu) + print_files(path, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, conf.failed_folder(), tag, json_data.get('actor_list'), liuchu) # 移动文件 paste_file_to_folder(filepath, path, number, c_word, conf) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh deleted file mode 100644 index 2afa39f..0000000 --- a/docker-entrypoint.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh - -git fetch --all -git reset --hard origin/master -git pull - -apt update -apt upgrade -y -pip3 install --upgrade -r requirements.txt -make -mv bin/* /avdc_bin - -cd /avdc_bin -version=$(./AV_Data_Capture --version) -zip AV_Data_Capture-CLI-$(echo $version)-Linux-$(dpkg --print-architecture).zip AV_Data_Capture config.ini diff --git a/number_parser.py b/number_parser.py index 6e7f7b4..025e2cf 100644 --- a/number_parser.py +++ b/number_parser.py @@ -41,6 +41,11 @@ def get_number(debug,filepath: str) -> str: file_number = re.search(r'\w+-\w+', filename, re.A).group() return file_number else: # 提取不含减号-的番号,FANZA CID + # 欧美番号匹配规则 + oumei = re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', filepath) + if oumei: + return oumei.group() + try: return str( re.findall(r'(.+?)\.', @@ -61,6 +66,11 @@ def get_number(debug,filepath: str) -> str: file_number = re.search(r'\w+-\w+', filename, re.A).group() return file_number else: # 提取不含减号-的番号,FANZA CID + # 欧美番号匹配规则 + oumei = re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', filepath) + if oumei: + return oumei.group() + try: return str( re.findall(r'(.+?)\.', @@ -72,4 +82,4 @@ def get_number(debug,filepath: str) -> str: # if __name__ == "__main__": # import doctest -# doctest.testmod(raise_on_error=True) \ No newline at end of file +# doctest.testmod(raise_on_error=True) diff --git a/py_to_exe.ps1 b/py_to_exe.ps1 index 9c919ca..34802fb 100644 --- a/py_to_exe.ps1 +++ b/py_to_exe.ps1 @@ -3,6 +3,9 @@ $CLOUDSCRAPER_PATH=$(python -c 'import cloudscraper as _; print(_.__path__[0])' | select -Last 1) +mkdir build +mkdir __pycache__ + pyinstaller --onefile AV_Data_Capture.py ` --hidden-import ADC_function.py ` --hidden-import core.py ` diff --git a/wrapper/FreeBSD.sh b/wrapper/FreeBSD.sh new file mode 100755 index 0000000..20d87e0 --- /dev/null +++ b/wrapper/FreeBSD.sh @@ -0,0 +1,4 @@ +pkg install python37 py37-requests py37-pip py37-lxml py37-pillow py37-cloudscraper py37-pysocks git zip py37-pyinstaller py37-beautifulsoup448 +pip install pyquery +pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py +cp config.ini ./dist diff --git a/wrapper/Linux.sh b/wrapper/Linux.sh new file mode 100755 index 0000000..e1c761e --- /dev/null +++ b/wrapper/Linux.sh @@ -0,0 +1,15 @@ +if ['$(dpkg --print-architecture)' != 'amd64'] || ['$(dpkg --print-architecture)' != 'i386']; then + apt install python3 python3-pip git sudo libxml2-dev libxslt-dev build-essential wget nano libcmocka-dev libcmocka0 -y + apt install zlib* libjpeg-dev -y + wget https://files.pythonhosted.org/packages/82/96/21ba3619647bac2b34b4996b2dbbea8e74a703767ce24192899d9153c058/pyinstaller-4.0.tar.gz + tar -zxvf pyinstaller-4.0.tar.gz + cd pyinstaller-4.0/bootloader + sed -i "s/ '-Werror',//" wscript + python3 ./waf distclean all + cd ../ + python3 setup.py install + cd ../ +fi +pip3 install -r requirements.txt +pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py +cp config.ini ./dist