From 6f5bd41eb0db4fa6b5ec823a6aa1161d56754544 Mon Sep 17 00:00:00 2001 From: newQian Date: Sun, 6 Dec 2020 17:31:44 +0800 Subject: [PATCH 01/32] =?UTF-8?q?=E6=94=AF=E6=8C=81iso=E6=89=A9=E5=B1=95?= =?UTF-8?q?=E5=90=8D=E7=9A=84=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core.py | 4 ++-- py_to_exe.ps1 | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/core.py b/core.py index fd55148..a43bbf7 100755 --- a/core.py +++ b/core.py @@ -427,7 +427,7 @@ def cutImage(imagecut, path, number, c_word): def paste_file_to_folder(filepath, path, number, c_word, conf: config.Config): # 文件路径,番号,后缀,要移动至的位置 - houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group()) + houzhui = str(re.search('[.](iso|ISO|AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group()) try: # 如果soft_link=1 使用软链接 @@ -456,7 +456,7 @@ def paste_file_to_folder(filepath, path, number, c_word, conf: config.Config): def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, c_word, conf): # 文件路径,番号,后缀,要移动至的位置 if multi_part == 1: number += part # 这时number会被附加上CD1后缀 - houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group()) + houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm|iso|ISO)$', filepath).group()) try: if conf.soft_link(): diff --git a/py_to_exe.ps1 b/py_to_exe.ps1 index 9c919ca..34802fb 100644 --- a/py_to_exe.ps1 +++ b/py_to_exe.ps1 @@ -3,6 +3,9 @@ $CLOUDSCRAPER_PATH=$(python -c 'import cloudscraper as _; print(_.__path__[0])' | select -Last 1) +mkdir build +mkdir __pycache__ + pyinstaller --onefile AV_Data_Capture.py ` --hidden-import ADC_function.py ` --hidden-import core.py ` From 9454a38ce6c61bbaaf74c9ad57a05c534b53b3a7 Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sun, 6 Dec 2020 19:45:19 +0800 Subject: [PATCH 02/32] Create airav.py --- WebCrawler/airav.py | 174 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 WebCrawler/airav.py diff --git a/WebCrawler/airav.py b/WebCrawler/airav.py new file mode 100644 index 0000000..9a13cec --- /dev/null +++ b/WebCrawler/airav.py @@ -0,0 +1,174 @@ +import sys +sys.path.append('../') +import re +from pyquery import PyQuery as pq#need install +from lxml import etree#need install +from bs4 import BeautifulSoup#need install +import json +from ADC_function import * + + + +# airav这个网站没有演员图片,所以直接使用javbus的图 +def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img + soup = BeautifulSoup(htmlcode, 'lxml') + a = soup.find_all(attrs={'class': 'star-name'}) + d={} + for i in a: + l=i.a['href'] + t=i.get_text() + html = etree.fromstring(get_html(l), etree.HTMLParser()) + p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']") + p2={t:p} + d.update(p2) + return d + +def getTitle(htmlcode): #获取标题 + doc = pq(htmlcode) + # h5:first-child定位第一个h5标签,妈的找了好久才找到这个语法 + title = str(doc('div.d-flex.videoDataBlock h5.d-none.d-md-block:nth-child(2)').text()).replace(' ', '-') + try: + title2 = re.sub('n\d+-','',title) + + return title2 + except: + return title + +def getStudio(htmlcode): #获取厂商 已修改 + html = etree.fromstring(htmlcode,etree.HTMLParser()) + # 如果记录中冇导演,厂商排在第4位 + if '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"): + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']") + # 如果记录中有导演,厂商排在第5位 + elif '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"): + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']") + else: + result = '' + return result +def getYear(htmlcode): #获取年份 + html = etree.fromstring(htmlcode,etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']") + return result +def getCover(htmlcode): #获取封面链接 + doc = pq(htmlcode) + image = doc('a.bigImage') + return image.attr('href') +def getRelease(htmlcode): #获取出版日期 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']") + return result +def getRuntime(htmlcode): #获取分钟 已修改 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[3]/text()')).strip(" ['']分鐘") + return result + +def getActor(htmlcode): #获取女优 + b=[] + soup=BeautifulSoup(htmlcode,'lxml') + a=soup.find_all(attrs={'class':'videoAvstarListItem'}) + for i in a: + b.append(i.get_text()) + return b + +def getNum(htmlcode): #获取番号 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") + return result +def getDirector(htmlcode): #获取导演 已修改 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + if '導演:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"): + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']") + else: + result = '' # 记录中有可能没有导演数据 + return result + +def getOutline(htmlcode): #获取演员 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + try: + result = html.xpath("string(//div[@class='d-flex videoDataBlock']/div[@class='synopsis']/p)").replace('\n','') + return result + except: + return '' +def getSerise(htmlcode): #获取系列 已修改 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + # 如果记录中冇导演,系列排在第6位 + if '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"): + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']") + # 如果记录中有导演,系列排在第7位 + elif '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"): + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']") + else: + result = '' + return result +def getTag(htmlcode): # 获取标签 + tag = [] + soup = BeautifulSoup(htmlcode, 'lxml') + x = soup.find_all(attrs={'class': 'tagBtnMargin'}) + a = x[0].find_all('a') + + for i in a: + tag.append(i.get_text()) + return tag + +def main(number): + try: + try: + try: + htmlcode = get_html('https://cn.airav.wiki/video/' + number) + javbus_htmlcode = get_html('https://www.javbus.com/ja/' + number) + + + except: + print(number) + + dic = { + # 标题可使用airav + 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), + # 制作商选择使用javbus + 'studio': getStudio(javbus_htmlcode), + # 年份也是用javbus + 'year': str(re.search('\d{4}', getYear(javbus_htmlcode)).group()), + # 简介 使用 airav + 'outline': getOutline(htmlcode), + # 使用javbus + 'runtime': getRuntime(javbus_htmlcode), + # 导演 使用javbus + 'director': getDirector(javbus_htmlcode), + # 作者 使用airav + 'actor': getActor(htmlcode), + # 发售日使用javbus + 'release': getRelease(javbus_htmlcode), + # 番号使用javbus + 'number': getNum(javbus_htmlcode), + # 封面链接 使用javbus + 'cover': getCover(javbus_htmlcode), + + 'imagecut': 1, + # 使用 airav + 'tag': getTag(htmlcode), + # 使用javbus + 'label': getSerise(javbus_htmlcode), + # 妈的,airav不提供作者图片 + 'actor_photo': getActorPhoto(javbus_htmlcode), + + 'website': 'https://www.airav.wiki/video/' + number, + 'source': 'airav.py', + # 使用javbus + 'series': getSerise(javbus_htmlcode), + } + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8') + return js + except: + return main_uncensored(number) + except: + data = { + "title": "", + } + js = json.dumps( + data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":") + ) + return js + + +if __name__ == '__main__': + print(main('sdsi-019')) From ecd8aebda6fb30fbcb03b96831b3ea19ad8263ca Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sun, 6 Dec 2020 19:53:53 +0800 Subject: [PATCH 03/32] Update config.ini --- config.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.ini b/config.ini index bfaf5ce..07936f2 100644 --- a/config.ini +++ b/config.ini @@ -24,7 +24,7 @@ max_title_len= 50 update_check=1 [priority] -website=javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321,javlib,dlsite +website=airav,javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321,javlib,dlsite [escape] literals=\()/ @@ -35,4 +35,4 @@ switch=0 [transalte] switch=0 -values=title,outline \ No newline at end of file +values=title,outline From 1c9e2fc8229eb4c4575aac747ed4e98e0c5d3dda Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sun, 6 Dec 2020 19:58:14 +0800 Subject: [PATCH 04/32] Update config.py --- config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.py b/config.py index 658a7ce..90b27e3 100644 --- a/config.py +++ b/config.py @@ -131,7 +131,7 @@ class Config: sec5 = "priority" conf.add_section(sec5) - conf.set(sec5, "website", "javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321,xcity") + conf.set(sec5, "website", "airav,javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321,xcity") sec6 = "escape" conf.add_section(sec6) From b6ee20b88c27fb11d13501942eea6fe4b6902204 Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sun, 6 Dec 2020 19:59:29 +0800 Subject: [PATCH 05/32] Update core.py --- core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core.py b/core.py index fd55148..de04995 100755 --- a/core.py +++ b/core.py @@ -8,6 +8,7 @@ from PIL import Image from ADC_function import * # =========website======== +from WebCrawler import airav from WebCrawler import avsox from WebCrawler import fanza from WebCrawler import fc2 @@ -48,6 +49,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON """ func_mapping = { + "airav": airav.main, "avsox": avsox.main, "fc2": fc2.main, "fanza": fanza.main, From 5edba21558f2e05a4e1a6d0ffb6ca9a44e9d377d Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sun, 6 Dec 2020 20:05:05 +0800 Subject: [PATCH 06/32] Update core.py --- core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core.py b/core.py index de04995..d90cbd9 100755 --- a/core.py +++ b/core.py @@ -284,7 +284,7 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa for i in range(retry_count): try: - if switch == 1: + if switch == 1 or switch == '1': if not os.path.exists(path): os.makedirs(path) proxies = get_proxy(proxy, proxytype) From 580d57cd756b9c4a0c00b33a3d439857cd0a7dd0 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 8 Dec 2020 13:56:08 +0800 Subject: [PATCH 07/32] Fix file name too long in Linux --- core.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/core.py b/core.py index fd55148..90a55c1 100755 --- a/core.py +++ b/core.py @@ -191,15 +191,13 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON location_rule = eval(conf.location_rule()) - # Process only Windows. - if platform.system() == "Windows": - if 'actor' in conf.location_rule() and len(actor) > 100: - print(conf.location_rule()) - location_rule = eval(conf.location_rule().replace("actor","'多人作品'")) - maxlen = conf.max_title_len() - if 'title' in conf.location_rule() and len(title) > maxlen: - shorttitle = title[0:maxlen] - location_rule = location_rule.replace(title, shorttitle) + if 'actor' in conf.location_rule() and len(actor) > 100: + print(conf.location_rule()) + location_rule = eval(conf.location_rule().replace("actor","'多人作品'")) + maxlen = conf.max_title_len() + if 'title' in conf.location_rule() and len(title) > maxlen: + shorttitle = title[0:maxlen] + location_rule = location_rule.replace(title, shorttitle) # 返回处理后的json_data json_data['title'] = title From 77b4431bde03f315ae02ea0fe20f4f6fd96b80b8 Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Sun, 13 Dec 2020 02:42:07 +0800 Subject: [PATCH 08/32] core.py access dictionary (json_data) method is changed to get() --- core.py | 75 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/core.py b/core.py index afdce33..26ee2f0 100755 --- a/core.py +++ b/core.py @@ -111,23 +111,23 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON # ================================================网站规则添加结束================================================ - title = json_data['title'] - actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表 - release = json_data['release'] - number = json_data['number'] - studio = json_data['studio'] - source = json_data['source'] - runtime = json_data['runtime'] - outline = json_data['outline'] - label = json_data['label'] - series = json_data['series'] - year = json_data['year'] - try: - cover_small = json_data['cover_small'] - except: + title = json_data.get('title') + actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',') # 字符串转列表 + release = json_data.get('release') + number = json_data.get('number') + studio = json_data.get('studio') + source = json_data.get('source') + runtime = json_data.get('runtime') + outline = json_data.get('outline') + label = json_data.get('label') + series = json_data.get('series') + year = json_data.get('year') + + if json_data.get('cover_small') == None: cover_small = '' - imagecut = json_data['imagecut'] - tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @ + + imagecut = json_data.get('imagecut') + tag = str(json_data.get('tag')).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @ actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') if title == '' or number == '': @@ -152,6 +152,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON tmpArr = cover_small.split(',') if len(tmpArr) > 0: cover_small = tmpArr[0].strip('\"').strip('\'') + # ====================处理异常字符 END================== #\/:*?"<>| # === 替换Studio片假名 @@ -217,26 +218,26 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON naming_rule="" for i in conf.naming_rule().split("+"): if i not in json_data: - naming_rule+=i.strip("'").strip('"') + naming_rule += i.strip("'").strip('"') else: - naming_rule+=json_data[i] + naming_rule += json_data.get(i) json_data['naming_rule'] = naming_rule return json_data def get_info(json_data): # 返回json里的数据 - title = json_data['title'] - studio = json_data['studio'] - year = json_data['year'] - outline = json_data['outline'] - runtime = json_data['runtime'] - director = json_data['director'] - actor_photo = json_data['actor_photo'] - release = json_data['release'] - number = json_data['number'] - cover = json_data['cover'] - website = json_data['website'] - series = json_data['series'] + title = json_data.get('title') + studio = json_data.get('studio') + year = json_data.get('year') + outline = json_data.get('outline') + runtime = json_data.get('runtime') + director = json_data.get('director') + actor_photo = json_data.get('actor_photo') + release = json_data.get('release') + number = json_data.get('number') + cover = json_data.get('cover') + website = json_data.get('website') + series = json_data.get('series') label = json_data.get('label', "") return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series, label @@ -258,7 +259,7 @@ def create_folder(success_folder, location_rule, json_data, conf: config.Config) try: os.makedirs(path) except: - path = success_folder + '/' + location_rule.replace('/[' + number + ']-' + title, "/number") + path = success_folder + '/' + location_rule.replace('/[' + number + ')-' + title, "/number") path = escape_path(path, conf.escape_literals()) os.makedirs(path) @@ -532,8 +533,8 @@ def core_main(file_path, number_th, conf: config.Config): # but paste_file_to_folder() still use the input raw search id # so the solution is: use the normalized search id number = json_data["number"] - imagecut = json_data['imagecut'] - tag = json_data['tag'] + imagecut = json_data.get('imagecut') + tag = json_data.get('tag') # =======================================================================判断-C,-CD后缀 if '-CD' in filepath or '-cd' in filepath: multi_part = 1 @@ -552,7 +553,7 @@ def core_main(file_path, number_th, conf: config.Config): debug_print(json_data) # 创建文件夹 - path = create_folder(conf.success_folder(), json_data['location_rule'], json_data, conf) + path = create_folder(conf.success_folder(), json_data.get('location_rule'), json_data, conf) # main_mode # 1: 刮削模式 / Scraping mode @@ -563,16 +564,16 @@ def core_main(file_path, number_th, conf: config.Config): # 检查小封面, 如果image cut为3,则下载小封面 if imagecut == 3: - small_cover_check(path, number, json_data['cover_small'], c_word, conf, filepath, conf.failed_folder()) + small_cover_check(path, number, json_data.get('cover_small'), c_word, conf, filepath, conf.failed_folder()) # creatFolder会返回番号路径 - image_download(json_data['cover'], number, c_word, path, conf, filepath, conf.failed_folder()) + image_download( json_data.get('cover'), number, c_word, path, conf, filepath, conf.failed_folder()) # 裁剪图 cutImage(imagecut, path, number, c_word) # 打印文件 - print_files(path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, conf.failed_folder(), tag, json_data['actor_list'], liuchu) + print_files(path, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, conf.failed_folder(), tag, json_data.get('actor_list'), liuchu) # 移动文件 paste_file_to_folder(filepath, path, number, c_word, conf) From d79f6ce009a3fe3cd1cbd7d8e7bdb21649405377 Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Sun, 13 Dec 2020 02:44:33 +0800 Subject: [PATCH 09/32] Update to 4.1.1, add auto exit argparse function, change os._exit(0) and exit() to sys.exit() --- AV_Data_Capture.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index c2ab31e..f89f680 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -1,8 +1,9 @@ import argparse -from core import * import os -from number_parser import get_number +import sys +from number_parser import get_number +from core import * def check_update(local_version): data = json.loads(get_html("https://api.github.com/repos/yoshiko2/AV_Data_Capture/releases/latest")) @@ -23,10 +24,11 @@ def argparse_function(ver: str) -> [str, str, bool]: parser.add_argument("file", default='', nargs='?', help="Single Movie file path.") parser.add_argument("-c", "--config", default='config.ini', nargs='?', help="The config file Path.") parser.add_argument("-n", "--number", default='', nargs='?',help="Custom file number") - parser.add_argument("--version", action="version", version=ver) + parser.add_argument("-a", "--auto-exit", dest='autoexit', action="store_true", help="Auto exit after program complete") + parser.add_argument("-v", "--version", action="version", version=ver) args = parser.parse_args() - return args.file, args.config, args.number + return args.file, args.config, args.number, args.autoexit def movie_lists(root, escape_folder): for folder in escape_folder: @@ -50,7 +52,7 @@ def create_failed_folder(failed_folder): os.makedirs(failed_folder + '/') except: print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)") - os._exit(0) + sys.exit(0) def CEF(path): @@ -117,10 +119,10 @@ def create_data_and_move_with_custom_number(file_path: str, c: config.Config, cu if __name__ == '__main__': - version = '4.0.3' + version = '4.1.1' # Parse command line args - single_file_path, config_file, custom_number = argparse_function(version) + single_file_path, config_file, custom_number, auto_exit = argparse_function(version) # Read config.ini conf = config.Config(path=config_file) @@ -144,7 +146,7 @@ if __name__ == '__main__': CEF(conf.failed_folder()) print("[+]All finished!!!") input("[+][+]Press enter key exit, you can check the error messge before you exit.") - exit() + sys.exit(0) # ========== Single File ========== movie_list = movie_lists(".", re.split("[,,]", conf.escape_folder())) @@ -166,5 +168,8 @@ if __name__ == '__main__': CEF(conf.failed_folder()) print("[+]All finished!!!") if conf.auto_exit(): - os._exit(0) - input("[+][+]Press enter key exit, you can check the error message before you exit.") + sys.exit(0) + if auto_exit: + sys.exit(0) + input("Press enter key exit, you can check the error message before you exit...") + sys.exit(0) \ No newline at end of file From f998403e823a44d5998280f278fea1cbcc2775d2 Mon Sep 17 00:00:00 2001 From: Yoshiko2 Date: Mon, 14 Dec 2020 10:31:33 +0800 Subject: [PATCH 10/32] Update docker-entrypoint.sh --- docker-entrypoint.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 2afa39f..c78bdad 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -8,8 +8,8 @@ apt update apt upgrade -y pip3 install --upgrade -r requirements.txt make -mv bin/* /avdc_bin -cd /avdc_bin +cd bin version=$(./AV_Data_Capture --version) zip AV_Data_Capture-CLI-$(echo $version)-Linux-$(dpkg --print-architecture).zip AV_Data_Capture config.ini +mv *zip /avdc_bin From 2769f0d95bb03b680681e69ceb62d6e9bf340176 Mon Sep 17 00:00:00 2001 From: Yoshiko2 Date: Mon, 14 Dec 2020 11:11:16 +0800 Subject: [PATCH 11/32] Update docker-entrypoint.sh --- docker-entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index c78bdad..76e4443 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -11,5 +11,5 @@ make cd bin version=$(./AV_Data_Capture --version) -zip AV_Data_Capture-CLI-$(echo $version)-Linux-$(dpkg --print-architecture).zip AV_Data_Capture config.ini +zip AV_Data_Capture-CLI-$(echo $version)-$(uname)-$(dpkg --print-architecture).zip AV_Data_Capture config.ini mv *zip /avdc_bin From 7efc96f27d5344905558b0fb01d62093bad725b3 Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sat, 19 Dec 2020 20:40:41 +0800 Subject: [PATCH 12/32] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=8F=91=E5=B8=83?= =?UTF-8?q?=E6=97=B6=E9=97=B4=E6=8A=93=E5=8F=96=E4=B8=8D=E5=88=B0=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/javdb.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index eda8cb6..eac8d7a 100644 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -26,10 +26,17 @@ def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img d.update(p) return d def getStudio(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') +# html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() +# result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']") +# result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']") +# return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') + patherr = re.compile(r'片商\:[\s\S]*?(.*?)') + pianshang = patherr.findall(a) + if pianshang: + result = pianshang[0] + else: + result = "" + return result def getRuntime(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']") @@ -46,11 +53,18 @@ def getNum(a): result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip(" ['']") return str(result2 + result1).strip('+') def getYear(getRelease): - try: - result = str(re.search('\d{4}', getRelease).group()) - return result - except: - return getRelease +# try: +# result = str(re.search('\d{4}', getRelease).group()) +# return result +# except: +# return getRelease + patherr = re.compile(r'日期\:\s*?.*?(.*?)\-.*?') + dates = patherr.findall(getRelease) + if dates: + result = dates[0] + else: + result = '' + return result def getRelease(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']") From 32fb9a0d7fa92c9bf26f619615c5b959e64601bc Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sat, 19 Dec 2020 21:52:55 +0800 Subject: [PATCH 13/32] =?UTF-8?q?=E8=A7=A3=E5=86=B3cover=5Fsmall=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E7=A8=8B=E5=BA=8F=E6=AD=A3=E5=B8=B8=E5=88=AE=E5=89=8A?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core.py b/core.py index 26ee2f0..080b32e 100755 --- a/core.py +++ b/core.py @@ -125,7 +125,9 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON if json_data.get('cover_small') == None: cover_small = '' - + else: + cover_small = json_data.get('cover_small') + imagecut = json_data.get('imagecut') tag = str(json_data.get('tag')).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @ actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') From c2a9261ccb5b4aa8a52fd29bc6714b453440f53d Mon Sep 17 00:00:00 2001 From: Yoshiko2 Date: Sat, 19 Dec 2020 23:27:41 +0800 Subject: [PATCH 14/32] Create encapsulation.sh --- encapsulation.sh | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 encapsulation.sh diff --git a/encapsulation.sh b/encapsulation.sh new file mode 100644 index 0000000..c3b7a78 --- /dev/null +++ b/encapsulation.sh @@ -0,0 +1,6 @@ +pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py +cp config.ini dist/ +find . -name '*.pyc' -delete +find . -name '__pycache__' -type d | xargs rm -fr +find . -name '.pytest_cache' -type d | xargs rm -fr +rm -rf build/ From 86db4b132dca5b846f1c261cdca294419f156a30 Mon Sep 17 00:00:00 2001 From: Yoshiko2 Date: Sat, 19 Dec 2020 23:31:44 +0800 Subject: [PATCH 15/32] Create FreeBSD-amd64.sh --- wrapper/FreeBSD-amd64.sh | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 wrapper/FreeBSD-amd64.sh diff --git a/wrapper/FreeBSD-amd64.sh b/wrapper/FreeBSD-amd64.sh new file mode 100644 index 0000000..a7cf92a --- /dev/null +++ b/wrapper/FreeBSD-amd64.sh @@ -0,0 +1,5 @@ +pkg install python37 py37-requests py37-pip py37-lxml py37-pillow py37-cloudscraper py37-pysocks git zip py37-pyinstaller py37-beautifulsoup448 +pip install pyquery +cd ../ +chmox +x encapsulation.sh +./encapsulation.sh From 7af0951b825eb1697f7821c8dc52cb9966ab6bc1 Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sun, 20 Dec 2020 00:34:42 +0800 Subject: [PATCH 16/32] Update number_parser.py --- number_parser.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/number_parser.py b/number_parser.py index 6e7f7b4..554d3d3 100644 --- a/number_parser.py +++ b/number_parser.py @@ -61,6 +61,11 @@ def get_number(debug,filepath: str) -> str: file_number = re.search(r'\w+-\w+', filename, re.A).group() return file_number else: # 提取不含减号-的番号,FANZA CID + # 欧美番号匹配规则 + oumei = re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', filepath) + if oumei: + return oumei.group() + try: return str( re.findall(r'(.+?)\.', @@ -72,4 +77,4 @@ def get_number(debug,filepath: str) -> str: # if __name__ == "__main__": # import doctest -# doctest.testmod(raise_on_error=True) \ No newline at end of file +# doctest.testmod(raise_on_error=True) From c94fcd47facc18942358434d3b25b0a49d065034 Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sun, 20 Dec 2020 00:37:03 +0800 Subject: [PATCH 17/32] Update number_parser.py --- number_parser.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/number_parser.py b/number_parser.py index 554d3d3..025e2cf 100644 --- a/number_parser.py +++ b/number_parser.py @@ -41,6 +41,11 @@ def get_number(debug,filepath: str) -> str: file_number = re.search(r'\w+-\w+', filename, re.A).group() return file_number else: # 提取不含减号-的番号,FANZA CID + # 欧美番号匹配规则 + oumei = re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', filepath) + if oumei: + return oumei.group() + try: return str( re.findall(r'(.+?)\.', From fc4cc4c122785da7690f11465a5cd2bc301b0e2f Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sun, 20 Dec 2020 00:42:58 +0800 Subject: [PATCH 18/32] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=AC=A7=E7=BE=8E?= =?UTF-8?q?=E7=9A=84=E5=88=AE=E5=89=8A=E5=88=A4=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/javdb.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index eac8d7a..fa38bae 100644 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -103,10 +103,17 @@ def getCover_small(a, index=0): result = 'https:' + result return result except: # 2020.7.17 Repair Cover Url crawl - result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index] - if not 'https' in result: - result = 'https:' + result - return result + try: + result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index] + if not 'https' in result: + result = 'https:' + result + return result + except: + result = html.xpath("//div[@class='item-image']/img/@data-src")[index] + if not 'https' in result: + result = 'https:' + result + return result + def getCover(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) try: @@ -141,14 +148,23 @@ def main(number): # and the first elememt maybe not the one we are looking for # iterate all candidates and find the match one urls = html.xpath('//*[@id="videos"]/div/div/a/@href') - ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()') - correct_url = urls[ids.index(number)] + # 记录一下欧美的ids ['Blacked','Blacked'] + if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number): + correct_url = urls[0] + else: + ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()') + correct_url = urls[ids.index(number)] + detail_page = get_html('https://javdb.com' + correct_url) # no cut image by default imagecut = 3 # If gray image exists ,then replace with normal cover - cover_small = getCover_small(query_result, index=ids.index(number)) + if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number): + cover_small = getCover_small(query_result) + else: + cover_small = getCover_small(query_result, index=ids.index(number)) + if 'placeholder' in cover_small: # replace wit normal cover and cut it imagecut = 1 From 23281a4a64417207d1f9c31f7bf8a3d1b06aae19 Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sun, 20 Dec 2020 00:49:55 +0800 Subject: [PATCH 19/32] Update javdb.py --- WebCrawler/javdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index fa38bae..41a4775 100644 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -160,7 +160,7 @@ def main(number): # no cut image by default imagecut = 3 # If gray image exists ,then replace with normal cover - if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number): + if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number): cover_small = getCover_small(query_result) else: cover_small = getCover_small(query_result, index=ids.index(number)) From a1c7d644b128190edd466ceb6b232b2dbf5072fe Mon Sep 17 00:00:00 2001 From: Yoshiko2 Date: Sun, 20 Dec 2020 20:51:17 +0800 Subject: [PATCH 20/32] change Version to 4.1.2 --- AV_Data_Capture.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index f89f680..9257256 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -119,7 +119,7 @@ def create_data_and_move_with_custom_number(file_path: str, c: config.Config, cu if __name__ == '__main__': - version = '4.1.1' + version = '4.1.2' # Parse command line args single_file_path, config_file, custom_number, auto_exit = argparse_function(version) @@ -172,4 +172,4 @@ if __name__ == '__main__': if auto_exit: sys.exit(0) input("Press enter key exit, you can check the error message before you exit...") - sys.exit(0) \ No newline at end of file + sys.exit(0) From 4c8665f633a4292117b2c8c82d0b3a8b0e9eb635 Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sun, 20 Dec 2020 21:10:43 +0800 Subject: [PATCH 21/32] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=8F=91=E5=B8=83?= =?UTF-8?q?=E6=97=A5=E6=9C=9F=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/javdb.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 41a4775..b463b3d 100644 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -66,10 +66,17 @@ def getYear(getRelease): result = '' return result def getRelease(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+') +# html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() +# result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']") +# result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']") +# return str(result1 + result2).strip('+') + patherr = re.compile(r'日期\:\s*?.*?(.*?)') + dates = patherr.findall(a) + if dates: + result = dates[0] + else: + result = '' + return result def getTag(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() try: From 9340f57b96e5930d7e3b6665a715bfca515e91f8 Mon Sep 17 00:00:00 2001 From: Yoshiko2 Date: Sun, 20 Dec 2020 21:14:09 +0800 Subject: [PATCH 22/32] change version to 4.2.1 --- AV_Data_Capture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 9257256..e99462d 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -119,7 +119,7 @@ def create_data_and_move_with_custom_number(file_path: str, c: config.Config, cu if __name__ == '__main__': - version = '4.1.2' + version = '4.2.1' # Parse command line args single_file_path, config_file, custom_number, auto_exit = argparse_function(version) From 144203ad3e33181546e4ad91b7b06f66fcace204 Mon Sep 17 00:00:00 2001 From: benjamin <24502572+ddtyjmyjm@users.noreply.github.com> Date: Mon, 21 Dec 2020 20:10:22 +0800 Subject: [PATCH 23/32] rename fun name 'CEF' to 'rm_empty_folder' --- AV_Data_Capture.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index e99462d..1209a22 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -55,7 +55,7 @@ def create_failed_folder(failed_folder): sys.exit(0) -def CEF(path): +def rm_empty_folder(path): try: files = os.listdir(path) # 获取路径下的子文件(夹)列表 for file in files: @@ -142,8 +142,8 @@ if __name__ == '__main__': if not single_file_path == '': print('[+]==================== Single File =====================') create_data_and_move_with_custom_number(single_file_path, conf,custom_number) - CEF(conf.success_folder()) - CEF(conf.failed_folder()) + rm_empty_folder(conf.success_folder()) + rm_empty_folder(conf.failed_folder()) print("[+]All finished!!!") input("[+][+]Press enter key exit, you can check the error messge before you exit.") sys.exit(0) @@ -164,8 +164,8 @@ if __name__ == '__main__': print('[!] - ' + percentage + ' [' + str(count) + '/' + count_all + '] -') create_data_and_move(movie_path, conf, conf.debug()) - CEF(conf.success_folder()) - CEF(conf.failed_folder()) + rm_empty_folder(conf.success_folder()) + rm_empty_folder(conf.failed_folder()) print("[+]All finished!!!") if conf.auto_exit(): sys.exit(0) From 63b76d02b5346b3c19477c4e8e21f8fa31090b83 Mon Sep 17 00:00:00 2001 From: benjamin <24502572+ddtyjmyjm@users.noreply.github.com> Date: Mon, 21 Dec 2020 20:15:40 +0800 Subject: [PATCH 24/32] remove redundant functions and usage: same function is in AV_Data_capture.py and used --- core.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/core.py b/core.py index 080b32e..9e47fce 100755 --- a/core.py +++ b/core.py @@ -34,15 +34,6 @@ def moveFailedFolder(filepath, failed_folder): return -def CreatFailedFolder(failed_folder): - if not os.path.exists(failed_folder + '/'): # 新建failed文件夹 - try: - os.makedirs(failed_folder + '/') - except: - print("[-]failed!can not be make Failed output folder\n[-](Please run as Administrator)") - return - - def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON返回元数据 """ iterate through all services and fetch the data @@ -547,9 +538,6 @@ def core_main(file_path, number_th, conf: config.Config): if '流出' in filepath: liuchu = '流出' - # 创建输出失败目录 - CreatFailedFolder(conf.failed_folder()) - # 调试模式检测 if conf.debug(): debug_print(json_data) From d29ad47f7bd01e8f4d3a7e95cce39e745ebcd819 Mon Sep 17 00:00:00 2001 From: benjamin <24502572+ddtyjmyjm@users.noreply.github.com> Date: Mon, 21 Dec 2020 21:41:32 +0800 Subject: [PATCH 25/32] fix: fix the bug that if fail or success folder only has empty folders, it will also remove the fail or success folder. --- AV_Data_Capture.py | 2 +- core.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 1209a22..145d233 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -59,7 +59,7 @@ def rm_empty_folder(path): try: files = os.listdir(path) # 获取路径下的子文件(夹)列表 for file in files: - os.removedirs(path + '/' + file) # 删除这个空文件夹 + os.rmdir(path + '/' + file) # 删除这个空文件夹 print('[+]Deleting empty folder', path + '/' + file) except: a = '' diff --git a/core.py b/core.py index 9e47fce..27a51f0 100755 --- a/core.py +++ b/core.py @@ -31,7 +31,7 @@ def escape_path(path, escape_literals: str): # Remove escape literals def moveFailedFolder(filepath, failed_folder): print('[-]Move to Failed output folder') shutil.move(filepath, str(os.getcwd()) + '/' + failed_folder + '/') - return + return def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON返回元数据 From 8b6c40375cd6f6547f76ed846f3397473d792765 Mon Sep 17 00:00:00 2001 From: benjamin <24502572+ddtyjmyjm@users.noreply.github.com> Date: Mon, 21 Dec 2020 23:50:17 +0800 Subject: [PATCH 26/32] fix: wrong symlink created. Using absolute path instead of relative path to reduce (potential) problems --- core.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/core.py b/core.py index 27a51f0..0a9ec16 100755 --- a/core.py +++ b/core.py @@ -1,5 +1,6 @@ import json import os.path +import pathlib import re import shutil import platform @@ -422,21 +423,21 @@ def cutImage(imagecut, path, number, c_word): def paste_file_to_folder(filepath, path, number, c_word, conf: config.Config): # 文件路径,番号,后缀,要移动至的位置 houzhui = str(re.search('[.](iso|ISO|AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group()) - + file_parent_origin_path = str(pathlib.Path(filepath).parent) try: # 如果soft_link=1 使用软链接 if conf.soft_link(): os.symlink(filepath, path + '/' + number + c_word + houzhui) else: os.rename(filepath, path + '/' + number + c_word + houzhui) - if os.path.exists(os.getcwd() + '/' + number + c_word + '.srt'): # 字幕移动 - os.rename(os.getcwd() + '/' + number + c_word + '.srt', path + '/' + number + c_word + '.srt') + if os.path.exists(file_parent_origin_path + '/' + number + c_word + '.srt'): # 字幕移动 + os.rename(file_parent_origin_path + '/' + number + c_word + '.srt', path + '/' + number + c_word + '.srt') print('[+]Sub moved!') - elif os.path.exists(os.getcwd() + '/' + number + c_word + '.ssa'): - os.rename(os.getcwd() + '/' + number + c_word + '.ssa', path + '/' + number + c_word + '.ssa') + elif os.path.exists(file_parent_origin_path + '/' + number + c_word + '.ssa'): + os.rename(file_parent_origin_path + '/' + number + c_word + '.ssa', path + '/' + number + c_word + '.ssa') print('[+]Sub moved!') - elif os.path.exists(os.getcwd() + '/' + number + c_word + '.sub'): - os.rename(os.getcwd() + '/' + number + c_word + '.sub', path + '/' + number + c_word + '.sub') + elif os.path.exists(file_parent_origin_path + '/' + number + c_word + '.sub'): + os.rename(file_parent_origin_path + '/' + number + c_word + '.sub', path + '/' + number + c_word + '.sub') print('[+]Sub moved!') except FileExistsError: print('[-]File Exists! Please check your movie!') @@ -451,20 +452,20 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, c_word, if multi_part == 1: number += part # 这时number会被附加上CD1后缀 houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm|iso|ISO)$', filepath).group()) - + file_parent_origin_path = str(pathlib.Path(filepath).parent) try: if conf.soft_link(): os.symlink(filepath, path + '/' + number + part + c_word + houzhui) else: os.rename(filepath, path + '/' + number + part + c_word + houzhui) - if os.path.exists(number + '.srt'): # 字幕移动 - os.rename(number + part + c_word + '.srt', path + '/' + number + part + c_word + '.srt') + if os.path.exists(file_parent_origin_path + '/' + number + '.srt'): # 字幕移动 + os.rename(file_parent_origin_path + '/' + number + part + c_word + '.srt', path + '/' + number + part + c_word + '.srt') print('[+]Sub moved!') - elif os.path.exists(number + part + c_word + '.ass'): - os.rename(number + part + c_word + '.ass', path + '/' + number + part + c_word + '.ass') + elif os.path.exists(file_parent_origin_path + '/' + number + part + c_word + '.ass'): + os.rename(file_parent_origin_path + '/' + number + part + c_word + '.ass', path + '/' + number + part + c_word + '.ass') print('[+]Sub moved!') - elif os.path.exists(number + part + c_word + '.sub'): - os.rename(number + part + c_word + '.sub', path + '/' + number + part + c_word + '.sub') + elif os.path.exists(file_parent_origin_path + '/' + number + part + c_word + '.sub'): + os.rename(file_parent_origin_path + '/' + number + part + c_word + '.sub', path + '/' + number + part + c_word + '.sub') print('[+]Sub moved!') print('[!]Success') except FileExistsError: @@ -511,7 +512,9 @@ def core_main(file_path, number_th, conf: config.Config): cn_sub = '' liuchu = '' - filepath = file_path # 影片的路径 + + filepath = file_path # 影片的路径 绝对路径 + rootpath = str(pathlib.Path(filepath).parent) number = number_th json_data = get_data_from_json(number, filepath, conf) # 定义番号 @@ -543,7 +546,7 @@ def core_main(file_path, number_th, conf: config.Config): debug_print(json_data) # 创建文件夹 - path = create_folder(conf.success_folder(), json_data.get('location_rule'), json_data, conf) + path = create_folder(rootpath + '/' + conf.success_folder(), json_data.get('location_rule'), json_data, conf) # main_mode # 1: 刮削模式 / Scraping mode From 7183041cbe296b1a364fc72c971e66a26077d9ea Mon Sep 17 00:00:00 2001 From: benjamin <24502572+ddtyjmyjm@users.noreply.github.com> Date: Tue, 22 Dec 2020 00:45:41 +0800 Subject: [PATCH 27/32] fix: if process failed, though soft_link mode is on, it will still move the file to the failed folder. --- AV_Data_Capture.py | 2 +- core.py | 53 ++++++++++++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 145d233..3d3d85b 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -149,7 +149,7 @@ if __name__ == '__main__': sys.exit(0) # ========== Single File ========== - movie_list = movie_lists(".", re.split("[,,]", conf.escape_folder())) + movie_list = movie_lists(os.getcwd(), re.split("[,,]", conf.escape_folder())) count = 0 count_all = str(len(movie_list)) diff --git a/core.py b/core.py index 0a9ec16..70389cc 100755 --- a/core.py +++ b/core.py @@ -29,9 +29,16 @@ def escape_path(path, escape_literals: str): # Remove escape literals return path -def moveFailedFolder(filepath, failed_folder): - print('[-]Move to Failed output folder') - shutil.move(filepath, str(os.getcwd()) + '/' + failed_folder + '/') +def moveFailedFolder(filepath, failed_folder, symlink: bool = False): + root_path = str(pathlib.Path(filepath).parent) + file_name = pathlib.Path(filepath).name + destination_path = root_path + '/' + failed_folder + '/' + if symlink: + print('[-]Create symlink to Failed output folder') + os.symlink(filepath, destination_path + '/' + file_name) + else: + print('[-]Move to Failed output folder') + shutil.move(filepath, destination_path) return @@ -98,7 +105,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON # Return if data not found in all sources if not json_data: print('[-]Movie Data not found!') - moveFailedFolder(filepath, conf.failed_folder()) + moveFailedFolder(filepath, conf.failed_folder(), conf.soft_link()) return # ================================================网站规则添加结束================================================ @@ -126,7 +133,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON if title == '' or number == '': print('[-]Movie Data not found!') - moveFailedFolder(filepath, conf.failed_folder()) + moveFailedFolder(filepath, conf.failed_folder(), conf.soft_link()) return # if imagecut == '3': @@ -315,14 +322,14 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa i += 1 print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) print('[-]Connect Failed! Please check your Proxy or Network!') - moveFailedFolder(filepath, failed_folder) + moveFailedFolder(filepath, failed_folder, conf.soft_link()) return # 封面是否下载成功,否则移动到failed def image_download(cover, number, c_word, path, conf: config.Config, filepath, failed_folder): if download_file_with_filename(cover, number + c_word + '-fanart.jpg', path, conf, filepath, failed_folder) == 'failed': - moveFailedFolder(filepath, failed_folder) + moveFailedFolder(filepath, failed_folder, conf.soft_link()) return switch, _proxy, _timeout, retry, _proxytype = conf.proxy() @@ -395,13 +402,11 @@ def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, fa except IOError as e: print("[-]Write Failed!") print(e) - moveFailedFolder(filepath, failed_folder) - return + raise e except Exception as e1: print(e1) print("[-]Write Failed!") - moveFailedFolder(filepath, failed_folder) - return + raise e1 def cutImage(imagecut, path, number, c_word): @@ -476,16 +481,12 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, c_word, print('[-]Error! Please run as administrator!') return + def get_part(filepath, failed_folder): - try: - if re.search('-CD\d+', filepath): - return re.findall('-CD\d+', filepath)[0] - if re.search('-cd\d+', filepath): - return re.findall('-cd\d+', filepath)[0] - except: - print("[-]failed!Please rename the filename again!") - moveFailedFolder(filepath, failed_folder) - return + if re.search('-CD\d+', filepath): + return re.findall('-CD\d+', filepath)[0] + if re.search('-cd\d+', filepath): + return re.findall('-cd\d+', filepath)[0] def debug_print(data: json): @@ -534,7 +535,11 @@ def core_main(file_path, number_th, conf: config.Config): # =======================================================================判断-C,-CD后缀 if '-CD' in filepath or '-cd' in filepath: multi_part = 1 - part = get_part(filepath, conf.failed_folder()) + try: + part = get_part(filepath, conf.failed_folder()) + except: + print("[-]failed!Please rename the filename again!") + moveFailedFolder(filepath, conf.failed_folder(),conf.soft_link()) if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath: cn_sub = '1' c_word = '-C' # 中文字幕影片后缀 @@ -566,8 +571,10 @@ def core_main(file_path, number_th, conf: config.Config): cutImage(imagecut, path, number, c_word) # 打印文件 - print_files(path, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, conf.failed_folder(), tag, json_data.get('actor_list'), liuchu) - + try: + print_files(path, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, conf.failed_folder(), tag, json_data.get('actor_list'), liuchu) + except: + moveFailedFolder(filepath, conf.failed_folder(), conf.soft_link()) # 移动文件 paste_file_to_folder(filepath, path, number, c_word, conf) elif conf.main_mode() == 2: From 2a00cc5a48fe845a7d89b56a18e75ef4e78e356d Mon Sep 17 00:00:00 2001 From: benjamin <24502572+ddtyjmyjm@users.noreply.github.com> Date: Wed, 23 Dec 2020 14:44:35 +0800 Subject: [PATCH 28/32] fix: program will crash when the update checker function is failed. --- AV_Data_Capture.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 3d3d85b..9d07009 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -5,8 +5,15 @@ import sys from number_parser import get_number from core import * + def check_update(local_version): - data = json.loads(get_html("https://api.github.com/repos/yoshiko2/AV_Data_Capture/releases/latest")) + try: + data = json.loads(get_html("https://api.github.com/repos/yoshiko2/AV_Data_Capture/releases/latest")) + except Exception as e: + print("[-] Failed to update! Please check new version manually:") + print("[-] https://github.com/yoshiko2/AV_Data_Capture/releases") + print("[*]======================================================") + return remote = data["tag_name"] local = local_version @@ -23,19 +30,22 @@ def argparse_function(ver: str) -> [str, str, bool]: parser = argparse.ArgumentParser() parser.add_argument("file", default='', nargs='?', help="Single Movie file path.") parser.add_argument("-c", "--config", default='config.ini', nargs='?', help="The config file Path.") - parser.add_argument("-n", "--number", default='', nargs='?',help="Custom file number") - parser.add_argument("-a", "--auto-exit", dest='autoexit', action="store_true", help="Auto exit after program complete") + parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number") + parser.add_argument("-a", "--auto-exit", dest='autoexit', action="store_true", + help="Auto exit after program complete") parser.add_argument("-v", "--version", action="version", version=ver) args = parser.parse_args() return args.file, args.config, args.number, args.autoexit + def movie_lists(root, escape_folder): for folder in escape_folder: if folder in root: return [] total = [] - file_type = ['.mp4', '.avi', '.rmvb', '.wmv', '.mov', '.mkv', '.flv', '.ts', '.webm', '.MP4', '.AVI', '.RMVB', '.WMV','.MOV', '.MKV', '.FLV', '.TS', '.WEBM', '.iso','.ISO'] + file_type = ['.mp4', '.avi', '.rmvb', '.wmv', '.mov', '.mkv', '.flv', '.ts', '.webm', '.MP4', '.AVI', '.RMVB', + '.WMV', '.MOV', '.MKV', '.FLV', '.TS', '.WEBM', '.iso', '.ISO'] dirs = os.listdir(root) for entry in dirs: f = os.path.join(root, entry) @@ -65,9 +75,9 @@ def rm_empty_folder(path): a = '' -def create_data_and_move(file_path: str, c: config.Config,debug): +def create_data_and_move(file_path: str, c: config.Config, debug): # Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4 - n_number = get_number(debug,file_path) + n_number = get_number(debug, file_path) if debug == True: print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number)) @@ -98,6 +108,7 @@ def create_data_and_move(file_path: str, c: config.Config,debug): except Exception as err: print('[!]', err) + def create_data_and_move_with_custom_number(file_path: str, c: config.Config, custom_number=None): try: print("[!]Making Data for [{}], the number is [{}]".format(file_path, custom_number)) @@ -141,7 +152,7 @@ if __name__ == '__main__': # ========== Single File ========== if not single_file_path == '': print('[+]==================== Single File =====================') - create_data_and_move_with_custom_number(single_file_path, conf,custom_number) + create_data_and_move_with_custom_number(single_file_path, conf, custom_number) rm_empty_folder(conf.success_folder()) rm_empty_folder(conf.failed_folder()) print("[+]All finished!!!") @@ -155,7 +166,7 @@ if __name__ == '__main__': count_all = str(len(movie_list)) print('[+]Find', count_all, 'movies') if conf.debug() == True: - print('[+]'+' DEBUG MODE ON '.center(54, '-')) + print('[+]' + ' DEBUG MODE ON '.center(54, '-')) if conf.soft_link(): print('[!] --- Soft link mode is ENABLE! ----') for movie_path in movie_list: # 遍历电影列表 交给core处理 From a49de4cb12798a112ffdfba523520d14afb8f397 Mon Sep 17 00:00:00 2001 From: bigfoxtail Date: Wed, 23 Dec 2020 18:47:13 +0800 Subject: [PATCH 29/32] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dfailed=5Fmove=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E6=97=A0=E6=B3=95=E7=94=9F=E6=95=88=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core.py b/core.py index 080b32e..6356598 100755 --- a/core.py +++ b/core.py @@ -29,8 +29,9 @@ def escape_path(path, escape_literals: str): # Remove escape literals def moveFailedFolder(filepath, failed_folder): - print('[-]Move to Failed output folder') - shutil.move(filepath, str(os.getcwd()) + '/' + failed_folder + '/') + if config.Config().failed_move(): + print('[-]Move to Failed output folder') + shutil.move(filepath, str(os.getcwd()) + '/' + failed_folder + '/') return From 7e94b024ff27af4ef3a488454c6725529783caad Mon Sep 17 00:00:00 2001 From: benjamin <24502572+ddtyjmyjm@users.noreply.github.com> Date: Thu, 24 Dec 2020 12:32:06 +0800 Subject: [PATCH 30/32] fix: revert the api of moveFailedFolder --- core.py | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/core.py b/core.py index 70389cc..f062824 100755 --- a/core.py +++ b/core.py @@ -29,11 +29,11 @@ def escape_path(path, escape_literals: str): # Remove escape literals return path -def moveFailedFolder(filepath, failed_folder, symlink: bool = False): +def moveFailedFolder(filepath, failed_folder): root_path = str(pathlib.Path(filepath).parent) file_name = pathlib.Path(filepath).name destination_path = root_path + '/' + failed_folder + '/' - if symlink: + if config.Config.soft_link(): print('[-]Create symlink to Failed output folder') os.symlink(filepath, destination_path + '/' + file_name) else: @@ -105,7 +105,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON # Return if data not found in all sources if not json_data: print('[-]Movie Data not found!') - moveFailedFolder(filepath, conf.failed_folder(), conf.soft_link()) + moveFailedFolder(filepath, conf.failed_folder()) return # ================================================网站规则添加结束================================================ @@ -133,7 +133,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON if title == '' or number == '': print('[-]Movie Data not found!') - moveFailedFolder(filepath, conf.failed_folder(), conf.soft_link()) + moveFailedFolder(filepath, conf.failed_folder()) return # if imagecut == '3': @@ -322,14 +322,14 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa i += 1 print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) print('[-]Connect Failed! Please check your Proxy or Network!') - moveFailedFolder(filepath, failed_folder, conf.soft_link()) + moveFailedFolder(filepath, failed_folder) return # 封面是否下载成功,否则移动到failed def image_download(cover, number, c_word, path, conf: config.Config, filepath, failed_folder): if download_file_with_filename(cover, number + c_word + '-fanart.jpg', path, conf, filepath, failed_folder) == 'failed': - moveFailedFolder(filepath, failed_folder, conf.soft_link()) + moveFailedFolder(filepath, failed_folder) return switch, _proxy, _timeout, retry, _proxytype = conf.proxy() @@ -402,11 +402,13 @@ def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, fa except IOError as e: print("[-]Write Failed!") print(e) - raise e + moveFailedFolder(filepath, failed_folder) + return except Exception as e1: print(e1) print("[-]Write Failed!") - raise e1 + moveFailedFolder(filepath, failed_folder) + return def cutImage(imagecut, path, number, c_word): @@ -481,12 +483,16 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, c_word, print('[-]Error! Please run as administrator!') return - def get_part(filepath, failed_folder): - if re.search('-CD\d+', filepath): - return re.findall('-CD\d+', filepath)[0] - if re.search('-cd\d+', filepath): - return re.findall('-cd\d+', filepath)[0] + try: + if re.search('-CD\d+', filepath): + return re.findall('-CD\d+', filepath)[0] + if re.search('-cd\d+', filepath): + return re.findall('-cd\d+', filepath)[0] + except: + print("[-]failed!Please rename the filename again!") + moveFailedFolder(filepath, failed_folder) + return def debug_print(data: json): @@ -535,11 +541,7 @@ def core_main(file_path, number_th, conf: config.Config): # =======================================================================判断-C,-CD后缀 if '-CD' in filepath or '-cd' in filepath: multi_part = 1 - try: - part = get_part(filepath, conf.failed_folder()) - except: - print("[-]failed!Please rename the filename again!") - moveFailedFolder(filepath, conf.failed_folder(),conf.soft_link()) + part = get_part(filepath, conf.failed_folder()) if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath: cn_sub = '1' c_word = '-C' # 中文字幕影片后缀 @@ -571,10 +573,8 @@ def core_main(file_path, number_th, conf: config.Config): cutImage(imagecut, path, number, c_word) # 打印文件 - try: - print_files(path, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, conf.failed_folder(), tag, json_data.get('actor_list'), liuchu) - except: - moveFailedFolder(filepath, conf.failed_folder(), conf.soft_link()) + print_files(path, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, conf.failed_folder(), tag, json_data.get('actor_list'), liuchu) + # 移动文件 paste_file_to_folder(filepath, path, number, c_word, conf) elif conf.main_mode() == 2: From 38739feee2b303cd1d9fb14e98b0437df54a2804 Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Fri, 25 Dec 2020 12:11:54 +0800 Subject: [PATCH 31/32] Update to 4.2.2, remove docker-entrypoint.sh and encapsulation.sh, use Python3_Crosss_Wrapper --- AV_Data_Capture.py | 2 +- docker-entrypoint.sh | 15 --------------- encapsulation.sh | 6 ------ wrapper/FreeBSD.sh | 4 ++++ wrapper/Linux.sh | 15 +++++++++++++++ 5 files changed, 20 insertions(+), 22 deletions(-) delete mode 100644 docker-entrypoint.sh delete mode 100644 encapsulation.sh create mode 100755 wrapper/FreeBSD.sh create mode 100755 wrapper/Linux.sh diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 9d07009..3de8bba 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -130,7 +130,7 @@ def create_data_and_move_with_custom_number(file_path: str, c: config.Config, cu if __name__ == '__main__': - version = '4.2.1' + version = '4.2.2' # Parse command line args single_file_path, config_file, custom_number, auto_exit = argparse_function(version) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh deleted file mode 100644 index 76e4443..0000000 --- a/docker-entrypoint.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh - -git fetch --all -git reset --hard origin/master -git pull - -apt update -apt upgrade -y -pip3 install --upgrade -r requirements.txt -make - -cd bin -version=$(./AV_Data_Capture --version) -zip AV_Data_Capture-CLI-$(echo $version)-$(uname)-$(dpkg --print-architecture).zip AV_Data_Capture config.ini -mv *zip /avdc_bin diff --git a/encapsulation.sh b/encapsulation.sh deleted file mode 100644 index c3b7a78..0000000 --- a/encapsulation.sh +++ /dev/null @@ -1,6 +0,0 @@ -pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py -cp config.ini dist/ -find . -name '*.pyc' -delete -find . -name '__pycache__' -type d | xargs rm -fr -find . -name '.pytest_cache' -type d | xargs rm -fr -rm -rf build/ diff --git a/wrapper/FreeBSD.sh b/wrapper/FreeBSD.sh new file mode 100755 index 0000000..20d87e0 --- /dev/null +++ b/wrapper/FreeBSD.sh @@ -0,0 +1,4 @@ +pkg install python37 py37-requests py37-pip py37-lxml py37-pillow py37-cloudscraper py37-pysocks git zip py37-pyinstaller py37-beautifulsoup448 +pip install pyquery +pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py +cp config.ini ./dist diff --git a/wrapper/Linux.sh b/wrapper/Linux.sh new file mode 100755 index 0000000..e1c761e --- /dev/null +++ b/wrapper/Linux.sh @@ -0,0 +1,15 @@ +if ['$(dpkg --print-architecture)' != 'amd64'] || ['$(dpkg --print-architecture)' != 'i386']; then + apt install python3 python3-pip git sudo libxml2-dev libxslt-dev build-essential wget nano libcmocka-dev libcmocka0 -y + apt install zlib* libjpeg-dev -y + wget https://files.pythonhosted.org/packages/82/96/21ba3619647bac2b34b4996b2dbbea8e74a703767ce24192899d9153c058/pyinstaller-4.0.tar.gz + tar -zxvf pyinstaller-4.0.tar.gz + cd pyinstaller-4.0/bootloader + sed -i "s/ '-Werror',//" wscript + python3 ./waf distclean all + cd ../ + python3 setup.py install + cd ../ +fi +pip3 install -r requirements.txt +pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py +cp config.ini ./dist From 4c31eee978747b0fc39b0afc161c6b1cfc9cfe1e Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Fri, 25 Dec 2020 12:12:09 +0800 Subject: [PATCH 32/32] Update to 4.2.2, remove docker-entrypoint.sh and encapsulation.sh, use Python3_Crosss_Wrapper --- wrapper/FreeBSD-amd64.sh | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 wrapper/FreeBSD-amd64.sh diff --git a/wrapper/FreeBSD-amd64.sh b/wrapper/FreeBSD-amd64.sh deleted file mode 100644 index a7cf92a..0000000 --- a/wrapper/FreeBSD-amd64.sh +++ /dev/null @@ -1,5 +0,0 @@ -pkg install python37 py37-requests py37-pip py37-lxml py37-pillow py37-cloudscraper py37-pysocks git zip py37-pyinstaller py37-beautifulsoup448 -pip install pyquery -cd ../ -chmox +x encapsulation.sh -./encapsulation.sh