diff --git a/.vscode/launch.json b/.vscode/launch.json index b98d0fb..d471cae 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,7 +12,7 @@ "env": { "PYTHONIOENCODING": "utf-8" }, - "program": "${workspaceFolder}/AV_Data_capture.py", + "program": "${workspaceFolder}/Movie_Data_capture.py", "program1": "${workspaceFolder}/WebCrawler/javbus.py", "program2": "${workspaceFolder}/WebCrawler/javdb.py", "program3": "${workspaceFolder}/WebCrawler/xcity.py", @@ -20,12 +20,12 @@ "program5": "${workspaceFolder}/config.py", "cwd0": "${fileDirname}", "cwd1": "${workspaceFolder}/dist", - "cwd2": "${env:HOME}${env:USERPROFILE}/.avdc", + "cwd2": "${env:HOME}${env:USERPROFILE}/.mdc", "args0": ["-a","-p","J:/Downloads","-o","J:/log"], "args1": ["-g","-m","3","-c","1","-d","0"], - "args2": ["-igd0", "-m3", "-p", "J:/JAV_output", "-q", "121220_001"], - "args3": ["-agd0","-m3", "-q", ".*","-p","J:/#JAV_output3"], - "args4": ["-gic1", "-d0", "-m3", "-o", "avlog", "-p", "I:/JAV_output"], + "args2": ["-igd0", "-m3", "-p", "J:/output", "-q", "121220_001"], + "args3": ["-agd0","-m3", "-q", ".*","-p","J:/#output"], + "args4": ["-gic1", "-d0", "-m3", "-o", "avlog", "-p", "I:/output"], "args5": ["-gic1", "-d0", "-m1", "-o", "avlog", "-p", "J:/Downloads"], "args6": ["-z", "-o", "J:/log"] } diff --git a/ADC_function.py b/ADC_function.py index 83842e6..1a64477 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -349,8 +349,8 @@ def load_cookies(filename): path_search_order = ( Path.cwd() / filename, Path.home() / filename, - Path.home() / f".avdc/{filename}", - Path.home() / f".local/share/avdc/{filename}" + Path.home() / f".mdc/{filename}", + Path.home() / f".local/share/mdc/{filename}" ) cookies_filename = None try: diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index 43225e4..3efecb6 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -46,7 +46,7 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]: parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder") parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.") # parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.") - default_logdir = str(Path.home() / '.avlogs') + default_logdir = str(Path.home() / '.mlogs') parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?', help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on. default folder for current user: '{default_logdir}'. Change default folder to an empty file, @@ -136,8 +136,8 @@ def dupe_stdout_to_logfile(logdir: str): return # Tips for disabling logs by change directory to a same name empty regular file abslog_dir = log_dir.resolve() log_tmstr = datetime.now().strftime("%Y%m%dT%H%M%S") - logfile = abslog_dir / f'avdc_{log_tmstr}.txt' - errlog = abslog_dir / f'avdc_{log_tmstr}_err.txt' + logfile = abslog_dir / f'mdc_{log_tmstr}.txt' + errlog = abslog_dir / f'mdc_{log_tmstr}_err.txt' sys.stdout = OutLogger(logfile) sys.stderr = ErrLogger(errlog) @@ -169,35 +169,35 @@ def close_logfile(logdir: str): # 按月合并为单个月志,去年及以前的月志,今年4月以后将之按年合并为年志 # 测试步骤: """ - LOGDIR=/tmp/avlog + LOGDIR=/tmp/mlog mkdir -p $LOGDIR - for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/avdc_${f}T235959.txt;done - for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/avdc_2021${f}T235959.txt;done - for f in {00..23};do;echo 20211001T$f>$LOGDIR/avdc_20211001T${f}5959.txt;done + for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/mdc_${f}T235959.txt;done + for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/mdc_2021${f}T235959.txt;done + for f in {00..23};do;echo 20211001T$f>$LOGDIR/mdc_20211001T${f}5959.txt;done echo "$(ls -1 $LOGDIR|wc -l) files in $LOGDIR" - # 1932 files in /tmp/avlog - avdc -zgic1 -d0 -m3 -o $LOGDIR - # python3 ./AV_Data_Capture.py -zgic1 -o $LOGDIR + # 1932 files in /tmp/mlog + mdc -zgic1 -d0 -m3 -o $LOGDIR + # python3 ./Movie_Data_Capture.py -zgic1 -o $LOGDIR ls $LOGDIR # rm -rf $LOGDIR """ today = datetime.today() # 第一步,合并到日。3天前的日志,文件名是同一天的合并为一份日志 for i in range(1): - txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}T\d{6}$', f.stem, re.A)] + txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{8}T\d{6}$', f.stem, re.A)] if not txts or not len(txts): break e = [f for f in txts if '_err' in f.stem] txts.sort() tmstr_3_days_ago = (today.replace(hour=0) - timedelta(days=3)).strftime("%Y%m%dT99") - deadline_day = f'avdc_{tmstr_3_days_ago}' + deadline_day = f'mdc_{tmstr_3_days_ago}' day_merge = [f for f in txts if f.stem < deadline_day] if not day_merge or not len(day_merge): break - cutday = len('T235959.txt') # cut length avdc_20201201|T235959.txt + cutday = len('T235959.txt') # cut length mdc_20201201|T235959.txt for f in day_merge: try: - day_file_name = str(f)[:-cutday] + '.txt' # avdc_20201201.txt + day_file_name = str(f)[:-cutday] + '.txt' # mdc_20201201.txt with open(day_file_name, 'a', encoding='utf-8') as m: m.write(f.read_text(encoding='utf-8')) f.unlink(missing_ok=True) @@ -205,19 +205,19 @@ def close_logfile(logdir: str): pass # 第二步,合并到月 for i in range(1): # 利用1次循环的break跳到第二步,避免大块if缩进或者使用goto语法 - txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}$', f.stem, re.A)] + txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{8}$', f.stem, re.A)] if not txts or not len(txts): break txts.sort() tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32") - deadline_month = f'avdc_{tmstr_3_month_ago}' + deadline_month = f'mdc_{tmstr_3_month_ago}' month_merge = [f for f in txts if f.stem < deadline_month] if not month_merge or not len(month_merge): break - tomonth = len('01.txt') # cut length avdc_202012|01.txt + tomonth = len('01.txt') # cut length mdc_202012|01.txt for f in month_merge: try: - month_file_name = str(f)[:-tomonth] + '.txt' # avdc_202012.txt + month_file_name = str(f)[:-tomonth] + '.txt' # mdc_202012.txt with open(month_file_name, 'a', encoding='utf-8') as m: m.write(f.read_text(encoding='utf-8')) f.unlink(missing_ok=True) @@ -226,18 +226,18 @@ def close_logfile(logdir: str): # 第三步,月合并到年 if today.month < 4: return - mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{6}$', f.stem, re.A)] + mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)] if not mons or not len(mons): return mons.sort() - deadline_year = f'avdc_{today.year-1}13' + deadline_year = f'mdc_{today.year-1}13' year_merge = [f for f in mons if f.stem < deadline_year] if not year_merge or not len(year_merge): return - toyear = len('12.txt') # cut length avdc_2020|12.txt + toyear = len('12.txt') # cut length mdc_2020|12.txt for f in year_merge: try: - year_file_name = str(f)[:-toyear] + '.txt' # avdc_2020.txt + year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt with open(year_file_name, 'a', encoding='utf-8') as y: y.write(f.read_text(encoding='utf-8')) f.unlink(missing_ok=True) @@ -500,8 +500,8 @@ def main(): # Download Mapping Table, parallel version def fmd(f): - return ('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/' + f, - Path.home() / '.local' / 'share' / 'avdc' / f) + return ('https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f, + Path.home() / '.local' / 'share' / 'mdc' / f) map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json')) for k,v in map_tab: if v.exists(): diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index 2f33fcf..c661700 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -41,8 +41,8 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据 iterate through all services and fetch the data """ - actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'avdc' / 'mapping_actor.xml')) - info_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'avdc' / 'mapping_info.xml')) + actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_actor.xml')) + info_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_info.xml')) func_mapping = { "airav": airav.main, @@ -238,7 +238,7 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据 continue if translate_value == "title": title_dict = json.load( - open(str(Path.home() / '.local' / 'share' / 'avdc' / 'c_number.json'), 'r', encoding="utf-8")) + open(str(Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json'), 'r', encoding="utf-8")) try: json_data[translate_value] = title_dict[number] continue diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index a8f2e51..80088b0 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -320,12 +320,12 @@ if __name__ == "__main__": # print(main('BANK-022')) # print(main('070116-197')) # print(main('093021_539')) # 没有剧照 片商pacopacomama - print(main('FC2-2278260')) + #print(main('FC2-2278260')) # print(main('FC2-735670')) # print(main('FC2-1174949')) # not found - print(main('MVSD-439')) + #print(main('MVSD-439')) # print(main('EHM0001')) # not found print(main('FC2-2314275')) # print(main('EBOD-646')) # print(main('LOVE-262')) - print(main('ABP-890')) + #print(main('ABP-890')) diff --git a/WebCrawler/storyline.py b/WebCrawler/storyline.py index 14052cd..cf4c7cc 100644 --- a/WebCrawler/storyline.py +++ b/WebCrawler/storyline.py @@ -333,7 +333,7 @@ def getStoryline_amazon(q_title, number, debug): # 删除无效cookies,无论是用户创建还是自动创建,以避免持续故障 cookies_filepath and len(cookies_filepath) and Path(cookies_filepath).is_file() and Path(cookies_filepath).unlink(missing_ok=True) # 自动创建的cookies文件放在搜索路径表的末端,最低优先级。有amazon.co.jp帐号的用户可以从浏览器导出cookie放在靠前搜索路径 - ama_save = Path.home() / ".local/share/avdc/amazon.json" + ama_save = Path.home() / ".local/share/mdc/amazon.json" ama_save.parent.mkdir(parents=True, exist_ok=True) ama_save.write_text(json.dumps(session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8') diff --git a/config.py b/config.py index 2f41c4c..f5084df 100644 --- a/config.py +++ b/config.py @@ -31,10 +31,10 @@ class Config: path_search_order = ( Path(path), Path.cwd() / "config.ini", - Path.home() / "avdc.ini", - Path.home() / ".avdc.ini", - Path.home() / ".avdc/config.ini", - Path.home() / ".config/avdc/config.ini" + Path.home() / "mdc.ini", + Path.home() / ".mdc.ini", + Path.home() / ".mdc/config.ini", + Path.home() / ".config/mdc/config.ini" ) ini_path = None for p in path_search_order: @@ -79,9 +79,9 @@ class Config: ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:") if re.search('n', ins, re.I): sys.exit(2) - # 用户目录才确定具有写权限,因此选择 ~/avdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的 + # 用户目录才确定具有写权限,因此选择 ~/mdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的 # 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了,只是作为多配置文件的切换技巧保留。 - write_path = path_search_order[2] # Path.home() / "avdc.ini" + write_path = path_search_order[2] # Path.home() / "mdc.ini" write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8') print("Config file '{}' created.".format(write_path.resolve())) input("Press Enter key exit...")