Update to 6.0.1 #2
This commit is contained in:
10
.vscode/launch.json
vendored
10
.vscode/launch.json
vendored
@@ -12,7 +12,7 @@
|
|||||||
"env": {
|
"env": {
|
||||||
"PYTHONIOENCODING": "utf-8"
|
"PYTHONIOENCODING": "utf-8"
|
||||||
},
|
},
|
||||||
"program": "${workspaceFolder}/AV_Data_capture.py",
|
"program": "${workspaceFolder}/Movie_Data_capture.py",
|
||||||
"program1": "${workspaceFolder}/WebCrawler/javbus.py",
|
"program1": "${workspaceFolder}/WebCrawler/javbus.py",
|
||||||
"program2": "${workspaceFolder}/WebCrawler/javdb.py",
|
"program2": "${workspaceFolder}/WebCrawler/javdb.py",
|
||||||
"program3": "${workspaceFolder}/WebCrawler/xcity.py",
|
"program3": "${workspaceFolder}/WebCrawler/xcity.py",
|
||||||
@@ -20,12 +20,12 @@
|
|||||||
"program5": "${workspaceFolder}/config.py",
|
"program5": "${workspaceFolder}/config.py",
|
||||||
"cwd0": "${fileDirname}",
|
"cwd0": "${fileDirname}",
|
||||||
"cwd1": "${workspaceFolder}/dist",
|
"cwd1": "${workspaceFolder}/dist",
|
||||||
"cwd2": "${env:HOME}${env:USERPROFILE}/.avdc",
|
"cwd2": "${env:HOME}${env:USERPROFILE}/.mdc",
|
||||||
"args0": ["-a","-p","J:/Downloads","-o","J:/log"],
|
"args0": ["-a","-p","J:/Downloads","-o","J:/log"],
|
||||||
"args1": ["-g","-m","3","-c","1","-d","0"],
|
"args1": ["-g","-m","3","-c","1","-d","0"],
|
||||||
"args2": ["-igd0", "-m3", "-p", "J:/JAV_output", "-q", "121220_001"],
|
"args2": ["-igd0", "-m3", "-p", "J:/output", "-q", "121220_001"],
|
||||||
"args3": ["-agd0","-m3", "-q", ".*","-p","J:/#JAV_output3"],
|
"args3": ["-agd0","-m3", "-q", ".*","-p","J:/#output"],
|
||||||
"args4": ["-gic1", "-d0", "-m3", "-o", "avlog", "-p", "I:/JAV_output"],
|
"args4": ["-gic1", "-d0", "-m3", "-o", "avlog", "-p", "I:/output"],
|
||||||
"args5": ["-gic1", "-d0", "-m1", "-o", "avlog", "-p", "J:/Downloads"],
|
"args5": ["-gic1", "-d0", "-m1", "-o", "avlog", "-p", "J:/Downloads"],
|
||||||
"args6": ["-z", "-o", "J:/log"]
|
"args6": ["-z", "-o", "J:/log"]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -349,8 +349,8 @@ def load_cookies(filename):
|
|||||||
path_search_order = (
|
path_search_order = (
|
||||||
Path.cwd() / filename,
|
Path.cwd() / filename,
|
||||||
Path.home() / filename,
|
Path.home() / filename,
|
||||||
Path.home() / f".avdc/{filename}",
|
Path.home() / f".mdc/{filename}",
|
||||||
Path.home() / f".local/share/avdc/{filename}"
|
Path.home() / f".local/share/mdc/{filename}"
|
||||||
)
|
)
|
||||||
cookies_filename = None
|
cookies_filename = None
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
|
|||||||
parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
|
parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
|
||||||
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
|
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
|
||||||
# parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
|
# parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
|
||||||
default_logdir = str(Path.home() / '.avlogs')
|
default_logdir = str(Path.home() / '.mlogs')
|
||||||
parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?',
|
parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?',
|
||||||
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
|
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
|
||||||
default folder for current user: '{default_logdir}'. Change default folder to an empty file,
|
default folder for current user: '{default_logdir}'. Change default folder to an empty file,
|
||||||
@@ -136,8 +136,8 @@ def dupe_stdout_to_logfile(logdir: str):
|
|||||||
return # Tips for disabling logs by change directory to a same name empty regular file
|
return # Tips for disabling logs by change directory to a same name empty regular file
|
||||||
abslog_dir = log_dir.resolve()
|
abslog_dir = log_dir.resolve()
|
||||||
log_tmstr = datetime.now().strftime("%Y%m%dT%H%M%S")
|
log_tmstr = datetime.now().strftime("%Y%m%dT%H%M%S")
|
||||||
logfile = abslog_dir / f'avdc_{log_tmstr}.txt'
|
logfile = abslog_dir / f'mdc_{log_tmstr}.txt'
|
||||||
errlog = abslog_dir / f'avdc_{log_tmstr}_err.txt'
|
errlog = abslog_dir / f'mdc_{log_tmstr}_err.txt'
|
||||||
|
|
||||||
sys.stdout = OutLogger(logfile)
|
sys.stdout = OutLogger(logfile)
|
||||||
sys.stderr = ErrLogger(errlog)
|
sys.stderr = ErrLogger(errlog)
|
||||||
@@ -169,35 +169,35 @@ def close_logfile(logdir: str):
|
|||||||
# 按月合并为单个月志,去年及以前的月志,今年4月以后将之按年合并为年志
|
# 按月合并为单个月志,去年及以前的月志,今年4月以后将之按年合并为年志
|
||||||
# 测试步骤:
|
# 测试步骤:
|
||||||
"""
|
"""
|
||||||
LOGDIR=/tmp/avlog
|
LOGDIR=/tmp/mlog
|
||||||
mkdir -p $LOGDIR
|
mkdir -p $LOGDIR
|
||||||
for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/avdc_${f}T235959.txt;done
|
for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/mdc_${f}T235959.txt;done
|
||||||
for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/avdc_2021${f}T235959.txt;done
|
for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/mdc_2021${f}T235959.txt;done
|
||||||
for f in {00..23};do;echo 20211001T$f>$LOGDIR/avdc_20211001T${f}5959.txt;done
|
for f in {00..23};do;echo 20211001T$f>$LOGDIR/mdc_20211001T${f}5959.txt;done
|
||||||
echo "$(ls -1 $LOGDIR|wc -l) files in $LOGDIR"
|
echo "$(ls -1 $LOGDIR|wc -l) files in $LOGDIR"
|
||||||
# 1932 files in /tmp/avlog
|
# 1932 files in /tmp/mlog
|
||||||
avdc -zgic1 -d0 -m3 -o $LOGDIR
|
mdc -zgic1 -d0 -m3 -o $LOGDIR
|
||||||
# python3 ./AV_Data_Capture.py -zgic1 -o $LOGDIR
|
# python3 ./Movie_Data_Capture.py -zgic1 -o $LOGDIR
|
||||||
ls $LOGDIR
|
ls $LOGDIR
|
||||||
# rm -rf $LOGDIR
|
# rm -rf $LOGDIR
|
||||||
"""
|
"""
|
||||||
today = datetime.today()
|
today = datetime.today()
|
||||||
# 第一步,合并到日。3天前的日志,文件名是同一天的合并为一份日志
|
# 第一步,合并到日。3天前的日志,文件名是同一天的合并为一份日志
|
||||||
for i in range(1):
|
for i in range(1):
|
||||||
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}T\d{6}$', f.stem, re.A)]
|
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{8}T\d{6}$', f.stem, re.A)]
|
||||||
if not txts or not len(txts):
|
if not txts or not len(txts):
|
||||||
break
|
break
|
||||||
e = [f for f in txts if '_err' in f.stem]
|
e = [f for f in txts if '_err' in f.stem]
|
||||||
txts.sort()
|
txts.sort()
|
||||||
tmstr_3_days_ago = (today.replace(hour=0) - timedelta(days=3)).strftime("%Y%m%dT99")
|
tmstr_3_days_ago = (today.replace(hour=0) - timedelta(days=3)).strftime("%Y%m%dT99")
|
||||||
deadline_day = f'avdc_{tmstr_3_days_ago}'
|
deadline_day = f'mdc_{tmstr_3_days_ago}'
|
||||||
day_merge = [f for f in txts if f.stem < deadline_day]
|
day_merge = [f for f in txts if f.stem < deadline_day]
|
||||||
if not day_merge or not len(day_merge):
|
if not day_merge or not len(day_merge):
|
||||||
break
|
break
|
||||||
cutday = len('T235959.txt') # cut length avdc_20201201|T235959.txt
|
cutday = len('T235959.txt') # cut length mdc_20201201|T235959.txt
|
||||||
for f in day_merge:
|
for f in day_merge:
|
||||||
try:
|
try:
|
||||||
day_file_name = str(f)[:-cutday] + '.txt' # avdc_20201201.txt
|
day_file_name = str(f)[:-cutday] + '.txt' # mdc_20201201.txt
|
||||||
with open(day_file_name, 'a', encoding='utf-8') as m:
|
with open(day_file_name, 'a', encoding='utf-8') as m:
|
||||||
m.write(f.read_text(encoding='utf-8'))
|
m.write(f.read_text(encoding='utf-8'))
|
||||||
f.unlink(missing_ok=True)
|
f.unlink(missing_ok=True)
|
||||||
@@ -205,19 +205,19 @@ def close_logfile(logdir: str):
|
|||||||
pass
|
pass
|
||||||
# 第二步,合并到月
|
# 第二步,合并到月
|
||||||
for i in range(1): # 利用1次循环的break跳到第二步,避免大块if缩进或者使用goto语法
|
for i in range(1): # 利用1次循环的break跳到第二步,避免大块if缩进或者使用goto语法
|
||||||
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}$', f.stem, re.A)]
|
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{8}$', f.stem, re.A)]
|
||||||
if not txts or not len(txts):
|
if not txts or not len(txts):
|
||||||
break
|
break
|
||||||
txts.sort()
|
txts.sort()
|
||||||
tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32")
|
tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32")
|
||||||
deadline_month = f'avdc_{tmstr_3_month_ago}'
|
deadline_month = f'mdc_{tmstr_3_month_ago}'
|
||||||
month_merge = [f for f in txts if f.stem < deadline_month]
|
month_merge = [f for f in txts if f.stem < deadline_month]
|
||||||
if not month_merge or not len(month_merge):
|
if not month_merge or not len(month_merge):
|
||||||
break
|
break
|
||||||
tomonth = len('01.txt') # cut length avdc_202012|01.txt
|
tomonth = len('01.txt') # cut length mdc_202012|01.txt
|
||||||
for f in month_merge:
|
for f in month_merge:
|
||||||
try:
|
try:
|
||||||
month_file_name = str(f)[:-tomonth] + '.txt' # avdc_202012.txt
|
month_file_name = str(f)[:-tomonth] + '.txt' # mdc_202012.txt
|
||||||
with open(month_file_name, 'a', encoding='utf-8') as m:
|
with open(month_file_name, 'a', encoding='utf-8') as m:
|
||||||
m.write(f.read_text(encoding='utf-8'))
|
m.write(f.read_text(encoding='utf-8'))
|
||||||
f.unlink(missing_ok=True)
|
f.unlink(missing_ok=True)
|
||||||
@@ -226,18 +226,18 @@ def close_logfile(logdir: str):
|
|||||||
# 第三步,月合并到年
|
# 第三步,月合并到年
|
||||||
if today.month < 4:
|
if today.month < 4:
|
||||||
return
|
return
|
||||||
mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{6}$', f.stem, re.A)]
|
mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)]
|
||||||
if not mons or not len(mons):
|
if not mons or not len(mons):
|
||||||
return
|
return
|
||||||
mons.sort()
|
mons.sort()
|
||||||
deadline_year = f'avdc_{today.year-1}13'
|
deadline_year = f'mdc_{today.year-1}13'
|
||||||
year_merge = [f for f in mons if f.stem < deadline_year]
|
year_merge = [f for f in mons if f.stem < deadline_year]
|
||||||
if not year_merge or not len(year_merge):
|
if not year_merge or not len(year_merge):
|
||||||
return
|
return
|
||||||
toyear = len('12.txt') # cut length avdc_2020|12.txt
|
toyear = len('12.txt') # cut length mdc_2020|12.txt
|
||||||
for f in year_merge:
|
for f in year_merge:
|
||||||
try:
|
try:
|
||||||
year_file_name = str(f)[:-toyear] + '.txt' # avdc_2020.txt
|
year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt
|
||||||
with open(year_file_name, 'a', encoding='utf-8') as y:
|
with open(year_file_name, 'a', encoding='utf-8') as y:
|
||||||
y.write(f.read_text(encoding='utf-8'))
|
y.write(f.read_text(encoding='utf-8'))
|
||||||
f.unlink(missing_ok=True)
|
f.unlink(missing_ok=True)
|
||||||
@@ -500,8 +500,8 @@ def main():
|
|||||||
|
|
||||||
# Download Mapping Table, parallel version
|
# Download Mapping Table, parallel version
|
||||||
def fmd(f):
|
def fmd(f):
|
||||||
return ('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/' + f,
|
return ('https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f,
|
||||||
Path.home() / '.local' / 'share' / 'avdc' / f)
|
Path.home() / '.local' / 'share' / 'mdc' / f)
|
||||||
map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json'))
|
map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json'))
|
||||||
for k,v in map_tab:
|
for k,v in map_tab:
|
||||||
if v.exists():
|
if v.exists():
|
||||||
|
|||||||
@@ -41,8 +41,8 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
|
|||||||
iterate through all services and fetch the data
|
iterate through all services and fetch the data
|
||||||
"""
|
"""
|
||||||
|
|
||||||
actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'avdc' / 'mapping_actor.xml'))
|
actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_actor.xml'))
|
||||||
info_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'avdc' / 'mapping_info.xml'))
|
info_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_info.xml'))
|
||||||
|
|
||||||
func_mapping = {
|
func_mapping = {
|
||||||
"airav": airav.main,
|
"airav": airav.main,
|
||||||
@@ -238,7 +238,7 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
|
|||||||
continue
|
continue
|
||||||
if translate_value == "title":
|
if translate_value == "title":
|
||||||
title_dict = json.load(
|
title_dict = json.load(
|
||||||
open(str(Path.home() / '.local' / 'share' / 'avdc' / 'c_number.json'), 'r', encoding="utf-8"))
|
open(str(Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json'), 'r', encoding="utf-8"))
|
||||||
try:
|
try:
|
||||||
json_data[translate_value] = title_dict[number]
|
json_data[translate_value] = title_dict[number]
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -320,12 +320,12 @@ if __name__ == "__main__":
|
|||||||
# print(main('BANK-022'))
|
# print(main('BANK-022'))
|
||||||
# print(main('070116-197'))
|
# print(main('070116-197'))
|
||||||
# print(main('093021_539')) # 没有剧照 片商pacopacomama
|
# print(main('093021_539')) # 没有剧照 片商pacopacomama
|
||||||
print(main('FC2-2278260'))
|
#print(main('FC2-2278260'))
|
||||||
# print(main('FC2-735670'))
|
# print(main('FC2-735670'))
|
||||||
# print(main('FC2-1174949')) # not found
|
# print(main('FC2-1174949')) # not found
|
||||||
print(main('MVSD-439'))
|
#print(main('MVSD-439'))
|
||||||
# print(main('EHM0001')) # not found
|
# print(main('EHM0001')) # not found
|
||||||
print(main('FC2-2314275'))
|
print(main('FC2-2314275'))
|
||||||
# print(main('EBOD-646'))
|
# print(main('EBOD-646'))
|
||||||
# print(main('LOVE-262'))
|
# print(main('LOVE-262'))
|
||||||
print(main('ABP-890'))
|
#print(main('ABP-890'))
|
||||||
|
|||||||
@@ -333,7 +333,7 @@ def getStoryline_amazon(q_title, number, debug):
|
|||||||
# 删除无效cookies,无论是用户创建还是自动创建,以避免持续故障
|
# 删除无效cookies,无论是用户创建还是自动创建,以避免持续故障
|
||||||
cookies_filepath and len(cookies_filepath) and Path(cookies_filepath).is_file() and Path(cookies_filepath).unlink(missing_ok=True)
|
cookies_filepath and len(cookies_filepath) and Path(cookies_filepath).is_file() and Path(cookies_filepath).unlink(missing_ok=True)
|
||||||
# 自动创建的cookies文件放在搜索路径表的末端,最低优先级。有amazon.co.jp帐号的用户可以从浏览器导出cookie放在靠前搜索路径
|
# 自动创建的cookies文件放在搜索路径表的末端,最低优先级。有amazon.co.jp帐号的用户可以从浏览器导出cookie放在靠前搜索路径
|
||||||
ama_save = Path.home() / ".local/share/avdc/amazon.json"
|
ama_save = Path.home() / ".local/share/mdc/amazon.json"
|
||||||
ama_save.parent.mkdir(parents=True, exist_ok=True)
|
ama_save.parent.mkdir(parents=True, exist_ok=True)
|
||||||
ama_save.write_text(json.dumps(session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8')
|
ama_save.write_text(json.dumps(session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8')
|
||||||
|
|
||||||
|
|||||||
12
config.py
12
config.py
@@ -31,10 +31,10 @@ class Config:
|
|||||||
path_search_order = (
|
path_search_order = (
|
||||||
Path(path),
|
Path(path),
|
||||||
Path.cwd() / "config.ini",
|
Path.cwd() / "config.ini",
|
||||||
Path.home() / "avdc.ini",
|
Path.home() / "mdc.ini",
|
||||||
Path.home() / ".avdc.ini",
|
Path.home() / ".mdc.ini",
|
||||||
Path.home() / ".avdc/config.ini",
|
Path.home() / ".mdc/config.ini",
|
||||||
Path.home() / ".config/avdc/config.ini"
|
Path.home() / ".config/mdc/config.ini"
|
||||||
)
|
)
|
||||||
ini_path = None
|
ini_path = None
|
||||||
for p in path_search_order:
|
for p in path_search_order:
|
||||||
@@ -79,9 +79,9 @@ class Config:
|
|||||||
ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:")
|
ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:")
|
||||||
if re.search('n', ins, re.I):
|
if re.search('n', ins, re.I):
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
# 用户目录才确定具有写权限,因此选择 ~/avdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的
|
# 用户目录才确定具有写权限,因此选择 ~/mdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的
|
||||||
# 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了,只是作为多配置文件的切换技巧保留。
|
# 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了,只是作为多配置文件的切换技巧保留。
|
||||||
write_path = path_search_order[2] # Path.home() / "avdc.ini"
|
write_path = path_search_order[2] # Path.home() / "mdc.ini"
|
||||||
write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8')
|
write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8')
|
||||||
print("Config file '{}' created.".format(write_path.resolve()))
|
print("Config file '{}' created.".format(write_path.resolve()))
|
||||||
input("Press Enter key exit...")
|
input("Press Enter key exit...")
|
||||||
|
|||||||
Reference in New Issue
Block a user