Update to 6.0.1 #2

This commit is contained in:
yoshiko2
2021-12-17 23:39:02 +08:00
parent d44166d9ac
commit 0b0d0fcafc
7 changed files with 44 additions and 44 deletions

10
.vscode/launch.json vendored
View File

@@ -12,7 +12,7 @@
"env": { "env": {
"PYTHONIOENCODING": "utf-8" "PYTHONIOENCODING": "utf-8"
}, },
"program": "${workspaceFolder}/AV_Data_capture.py", "program": "${workspaceFolder}/Movie_Data_capture.py",
"program1": "${workspaceFolder}/WebCrawler/javbus.py", "program1": "${workspaceFolder}/WebCrawler/javbus.py",
"program2": "${workspaceFolder}/WebCrawler/javdb.py", "program2": "${workspaceFolder}/WebCrawler/javdb.py",
"program3": "${workspaceFolder}/WebCrawler/xcity.py", "program3": "${workspaceFolder}/WebCrawler/xcity.py",
@@ -20,12 +20,12 @@
"program5": "${workspaceFolder}/config.py", "program5": "${workspaceFolder}/config.py",
"cwd0": "${fileDirname}", "cwd0": "${fileDirname}",
"cwd1": "${workspaceFolder}/dist", "cwd1": "${workspaceFolder}/dist",
"cwd2": "${env:HOME}${env:USERPROFILE}/.avdc", "cwd2": "${env:HOME}${env:USERPROFILE}/.mdc",
"args0": ["-a","-p","J:/Downloads","-o","J:/log"], "args0": ["-a","-p","J:/Downloads","-o","J:/log"],
"args1": ["-g","-m","3","-c","1","-d","0"], "args1": ["-g","-m","3","-c","1","-d","0"],
"args2": ["-igd0", "-m3", "-p", "J:/JAV_output", "-q", "121220_001"], "args2": ["-igd0", "-m3", "-p", "J:/output", "-q", "121220_001"],
"args3": ["-agd0","-m3", "-q", ".*","-p","J:/#JAV_output3"], "args3": ["-agd0","-m3", "-q", ".*","-p","J:/#output"],
"args4": ["-gic1", "-d0", "-m3", "-o", "avlog", "-p", "I:/JAV_output"], "args4": ["-gic1", "-d0", "-m3", "-o", "avlog", "-p", "I:/output"],
"args5": ["-gic1", "-d0", "-m1", "-o", "avlog", "-p", "J:/Downloads"], "args5": ["-gic1", "-d0", "-m1", "-o", "avlog", "-p", "J:/Downloads"],
"args6": ["-z", "-o", "J:/log"] "args6": ["-z", "-o", "J:/log"]
} }

View File

@@ -349,8 +349,8 @@ def load_cookies(filename):
path_search_order = ( path_search_order = (
Path.cwd() / filename, Path.cwd() / filename,
Path.home() / filename, Path.home() / filename,
Path.home() / f".avdc/{filename}", Path.home() / f".mdc/{filename}",
Path.home() / f".local/share/avdc/{filename}" Path.home() / f".local/share/mdc/{filename}"
) )
cookies_filename = None cookies_filename = None
try: try:

View File

@@ -46,7 +46,7 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder") parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.") parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
# parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.") # parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
default_logdir = str(Path.home() / '.avlogs') default_logdir = str(Path.home() / '.mlogs')
parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?', parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?',
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on. help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
default folder for current user: '{default_logdir}'. Change default folder to an empty file, default folder for current user: '{default_logdir}'. Change default folder to an empty file,
@@ -136,8 +136,8 @@ def dupe_stdout_to_logfile(logdir: str):
return # Tips for disabling logs by change directory to a same name empty regular file return # Tips for disabling logs by change directory to a same name empty regular file
abslog_dir = log_dir.resolve() abslog_dir = log_dir.resolve()
log_tmstr = datetime.now().strftime("%Y%m%dT%H%M%S") log_tmstr = datetime.now().strftime("%Y%m%dT%H%M%S")
logfile = abslog_dir / f'avdc_{log_tmstr}.txt' logfile = abslog_dir / f'mdc_{log_tmstr}.txt'
errlog = abslog_dir / f'avdc_{log_tmstr}_err.txt' errlog = abslog_dir / f'mdc_{log_tmstr}_err.txt'
sys.stdout = OutLogger(logfile) sys.stdout = OutLogger(logfile)
sys.stderr = ErrLogger(errlog) sys.stderr = ErrLogger(errlog)
@@ -169,35 +169,35 @@ def close_logfile(logdir: str):
# 按月合并为单个月志去年及以前的月志今年4月以后将之按年合并为年志 # 按月合并为单个月志去年及以前的月志今年4月以后将之按年合并为年志
# 测试步骤: # 测试步骤:
""" """
LOGDIR=/tmp/avlog LOGDIR=/tmp/mlog
mkdir -p $LOGDIR mkdir -p $LOGDIR
for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/avdc_${f}T235959.txt;done for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/mdc_${f}T235959.txt;done
for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/avdc_2021${f}T235959.txt;done for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/mdc_2021${f}T235959.txt;done
for f in {00..23};do;echo 20211001T$f>$LOGDIR/avdc_20211001T${f}5959.txt;done for f in {00..23};do;echo 20211001T$f>$LOGDIR/mdc_20211001T${f}5959.txt;done
echo "$(ls -1 $LOGDIR|wc -l) files in $LOGDIR" echo "$(ls -1 $LOGDIR|wc -l) files in $LOGDIR"
# 1932 files in /tmp/avlog # 1932 files in /tmp/mlog
avdc -zgic1 -d0 -m3 -o $LOGDIR mdc -zgic1 -d0 -m3 -o $LOGDIR
# python3 ./AV_Data_Capture.py -zgic1 -o $LOGDIR # python3 ./Movie_Data_Capture.py -zgic1 -o $LOGDIR
ls $LOGDIR ls $LOGDIR
# rm -rf $LOGDIR # rm -rf $LOGDIR
""" """
today = datetime.today() today = datetime.today()
# 第一步合并到日。3天前的日志文件名是同一天的合并为一份日志 # 第一步合并到日。3天前的日志文件名是同一天的合并为一份日志
for i in range(1): for i in range(1):
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}T\d{6}$', f.stem, re.A)] txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{8}T\d{6}$', f.stem, re.A)]
if not txts or not len(txts): if not txts or not len(txts):
break break
e = [f for f in txts if '_err' in f.stem] e = [f for f in txts if '_err' in f.stem]
txts.sort() txts.sort()
tmstr_3_days_ago = (today.replace(hour=0) - timedelta(days=3)).strftime("%Y%m%dT99") tmstr_3_days_ago = (today.replace(hour=0) - timedelta(days=3)).strftime("%Y%m%dT99")
deadline_day = f'avdc_{tmstr_3_days_ago}' deadline_day = f'mdc_{tmstr_3_days_ago}'
day_merge = [f for f in txts if f.stem < deadline_day] day_merge = [f for f in txts if f.stem < deadline_day]
if not day_merge or not len(day_merge): if not day_merge or not len(day_merge):
break break
cutday = len('T235959.txt') # cut length avdc_20201201|T235959.txt cutday = len('T235959.txt') # cut length mdc_20201201|T235959.txt
for f in day_merge: for f in day_merge:
try: try:
day_file_name = str(f)[:-cutday] + '.txt' # avdc_20201201.txt day_file_name = str(f)[:-cutday] + '.txt' # mdc_20201201.txt
with open(day_file_name, 'a', encoding='utf-8') as m: with open(day_file_name, 'a', encoding='utf-8') as m:
m.write(f.read_text(encoding='utf-8')) m.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True) f.unlink(missing_ok=True)
@@ -205,19 +205,19 @@ def close_logfile(logdir: str):
pass pass
# 第二步,合并到月 # 第二步,合并到月
for i in range(1): # 利用1次循环的break跳到第二步避免大块if缩进或者使用goto语法 for i in range(1): # 利用1次循环的break跳到第二步避免大块if缩进或者使用goto语法
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}$', f.stem, re.A)] txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{8}$', f.stem, re.A)]
if not txts or not len(txts): if not txts or not len(txts):
break break
txts.sort() txts.sort()
tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32") tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32")
deadline_month = f'avdc_{tmstr_3_month_ago}' deadline_month = f'mdc_{tmstr_3_month_ago}'
month_merge = [f for f in txts if f.stem < deadline_month] month_merge = [f for f in txts if f.stem < deadline_month]
if not month_merge or not len(month_merge): if not month_merge or not len(month_merge):
break break
tomonth = len('01.txt') # cut length avdc_202012|01.txt tomonth = len('01.txt') # cut length mdc_202012|01.txt
for f in month_merge: for f in month_merge:
try: try:
month_file_name = str(f)[:-tomonth] + '.txt' # avdc_202012.txt month_file_name = str(f)[:-tomonth] + '.txt' # mdc_202012.txt
with open(month_file_name, 'a', encoding='utf-8') as m: with open(month_file_name, 'a', encoding='utf-8') as m:
m.write(f.read_text(encoding='utf-8')) m.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True) f.unlink(missing_ok=True)
@@ -226,18 +226,18 @@ def close_logfile(logdir: str):
# 第三步,月合并到年 # 第三步,月合并到年
if today.month < 4: if today.month < 4:
return return
mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{6}$', f.stem, re.A)] mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)]
if not mons or not len(mons): if not mons or not len(mons):
return return
mons.sort() mons.sort()
deadline_year = f'avdc_{today.year-1}13' deadline_year = f'mdc_{today.year-1}13'
year_merge = [f for f in mons if f.stem < deadline_year] year_merge = [f for f in mons if f.stem < deadline_year]
if not year_merge or not len(year_merge): if not year_merge or not len(year_merge):
return return
toyear = len('12.txt') # cut length avdc_2020|12.txt toyear = len('12.txt') # cut length mdc_2020|12.txt
for f in year_merge: for f in year_merge:
try: try:
year_file_name = str(f)[:-toyear] + '.txt' # avdc_2020.txt year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt
with open(year_file_name, 'a', encoding='utf-8') as y: with open(year_file_name, 'a', encoding='utf-8') as y:
y.write(f.read_text(encoding='utf-8')) y.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True) f.unlink(missing_ok=True)
@@ -500,8 +500,8 @@ def main():
# Download Mapping Table, parallel version # Download Mapping Table, parallel version
def fmd(f): def fmd(f):
return ('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/' + f, return ('https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f,
Path.home() / '.local' / 'share' / 'avdc' / f) Path.home() / '.local' / 'share' / 'mdc' / f)
map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json')) map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json'))
for k,v in map_tab: for k,v in map_tab:
if v.exists(): if v.exists():

View File

@@ -41,8 +41,8 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
iterate through all services and fetch the data iterate through all services and fetch the data
""" """
actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'avdc' / 'mapping_actor.xml')) actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_actor.xml'))
info_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'avdc' / 'mapping_info.xml')) info_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_info.xml'))
func_mapping = { func_mapping = {
"airav": airav.main, "airav": airav.main,
@@ -238,7 +238,7 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
continue continue
if translate_value == "title": if translate_value == "title":
title_dict = json.load( title_dict = json.load(
open(str(Path.home() / '.local' / 'share' / 'avdc' / 'c_number.json'), 'r', encoding="utf-8")) open(str(Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json'), 'r', encoding="utf-8"))
try: try:
json_data[translate_value] = title_dict[number] json_data[translate_value] = title_dict[number]
continue continue

View File

@@ -320,12 +320,12 @@ if __name__ == "__main__":
# print(main('BANK-022')) # print(main('BANK-022'))
# print(main('070116-197')) # print(main('070116-197'))
# print(main('093021_539')) # 没有剧照 片商pacopacomama # print(main('093021_539')) # 没有剧照 片商pacopacomama
print(main('FC2-2278260')) #print(main('FC2-2278260'))
# print(main('FC2-735670')) # print(main('FC2-735670'))
# print(main('FC2-1174949')) # not found # print(main('FC2-1174949')) # not found
print(main('MVSD-439')) #print(main('MVSD-439'))
# print(main('EHM0001')) # not found # print(main('EHM0001')) # not found
print(main('FC2-2314275')) print(main('FC2-2314275'))
# print(main('EBOD-646')) # print(main('EBOD-646'))
# print(main('LOVE-262')) # print(main('LOVE-262'))
print(main('ABP-890')) #print(main('ABP-890'))

View File

@@ -333,7 +333,7 @@ def getStoryline_amazon(q_title, number, debug):
# 删除无效cookies无论是用户创建还是自动创建以避免持续故障 # 删除无效cookies无论是用户创建还是自动创建以避免持续故障
cookies_filepath and len(cookies_filepath) and Path(cookies_filepath).is_file() and Path(cookies_filepath).unlink(missing_ok=True) cookies_filepath and len(cookies_filepath) and Path(cookies_filepath).is_file() and Path(cookies_filepath).unlink(missing_ok=True)
# 自动创建的cookies文件放在搜索路径表的末端最低优先级。有amazon.co.jp帐号的用户可以从浏览器导出cookie放在靠前搜索路径 # 自动创建的cookies文件放在搜索路径表的末端最低优先级。有amazon.co.jp帐号的用户可以从浏览器导出cookie放在靠前搜索路径
ama_save = Path.home() / ".local/share/avdc/amazon.json" ama_save = Path.home() / ".local/share/mdc/amazon.json"
ama_save.parent.mkdir(parents=True, exist_ok=True) ama_save.parent.mkdir(parents=True, exist_ok=True)
ama_save.write_text(json.dumps(session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8') ama_save.write_text(json.dumps(session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8')

View File

@@ -31,10 +31,10 @@ class Config:
path_search_order = ( path_search_order = (
Path(path), Path(path),
Path.cwd() / "config.ini", Path.cwd() / "config.ini",
Path.home() / "avdc.ini", Path.home() / "mdc.ini",
Path.home() / ".avdc.ini", Path.home() / ".mdc.ini",
Path.home() / ".avdc/config.ini", Path.home() / ".mdc/config.ini",
Path.home() / ".config/avdc/config.ini" Path.home() / ".config/mdc/config.ini"
) )
ini_path = None ini_path = None
for p in path_search_order: for p in path_search_order:
@@ -79,9 +79,9 @@ class Config:
ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:") ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:")
if re.search('n', ins, re.I): if re.search('n', ins, re.I):
sys.exit(2) sys.exit(2)
# 用户目录才确定具有写权限,因此选择 ~/avdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的 # 用户目录才确定具有写权限,因此选择 ~/mdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的
# 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了,只是作为多配置文件的切换技巧保留。 # 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了,只是作为多配置文件的切换技巧保留。
write_path = path_search_order[2] # Path.home() / "avdc.ini" write_path = path_search_order[2] # Path.home() / "mdc.ini"
write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8') write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8')
print("Config file '{}' created.".format(write_path.resolve())) print("Config file '{}' created.".format(write_path.resolve()))
input("Press Enter key exit...") input("Press Enter key exit...")