Update to 6.0.1 #2
This commit is contained in:
10
.vscode/launch.json
vendored
10
.vscode/launch.json
vendored
@@ -12,7 +12,7 @@
|
||||
"env": {
|
||||
"PYTHONIOENCODING": "utf-8"
|
||||
},
|
||||
"program": "${workspaceFolder}/AV_Data_capture.py",
|
||||
"program": "${workspaceFolder}/Movie_Data_capture.py",
|
||||
"program1": "${workspaceFolder}/WebCrawler/javbus.py",
|
||||
"program2": "${workspaceFolder}/WebCrawler/javdb.py",
|
||||
"program3": "${workspaceFolder}/WebCrawler/xcity.py",
|
||||
@@ -20,12 +20,12 @@
|
||||
"program5": "${workspaceFolder}/config.py",
|
||||
"cwd0": "${fileDirname}",
|
||||
"cwd1": "${workspaceFolder}/dist",
|
||||
"cwd2": "${env:HOME}${env:USERPROFILE}/.avdc",
|
||||
"cwd2": "${env:HOME}${env:USERPROFILE}/.mdc",
|
||||
"args0": ["-a","-p","J:/Downloads","-o","J:/log"],
|
||||
"args1": ["-g","-m","3","-c","1","-d","0"],
|
||||
"args2": ["-igd0", "-m3", "-p", "J:/JAV_output", "-q", "121220_001"],
|
||||
"args3": ["-agd0","-m3", "-q", ".*","-p","J:/#JAV_output3"],
|
||||
"args4": ["-gic1", "-d0", "-m3", "-o", "avlog", "-p", "I:/JAV_output"],
|
||||
"args2": ["-igd0", "-m3", "-p", "J:/output", "-q", "121220_001"],
|
||||
"args3": ["-agd0","-m3", "-q", ".*","-p","J:/#output"],
|
||||
"args4": ["-gic1", "-d0", "-m3", "-o", "avlog", "-p", "I:/output"],
|
||||
"args5": ["-gic1", "-d0", "-m1", "-o", "avlog", "-p", "J:/Downloads"],
|
||||
"args6": ["-z", "-o", "J:/log"]
|
||||
}
|
||||
|
||||
@@ -349,8 +349,8 @@ def load_cookies(filename):
|
||||
path_search_order = (
|
||||
Path.cwd() / filename,
|
||||
Path.home() / filename,
|
||||
Path.home() / f".avdc/{filename}",
|
||||
Path.home() / f".local/share/avdc/{filename}"
|
||||
Path.home() / f".mdc/{filename}",
|
||||
Path.home() / f".local/share/mdc/{filename}"
|
||||
)
|
||||
cookies_filename = None
|
||||
try:
|
||||
|
||||
@@ -46,7 +46,7 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
|
||||
parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
|
||||
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
|
||||
# parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
|
||||
default_logdir = str(Path.home() / '.avlogs')
|
||||
default_logdir = str(Path.home() / '.mlogs')
|
||||
parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?',
|
||||
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
|
||||
default folder for current user: '{default_logdir}'. Change default folder to an empty file,
|
||||
@@ -136,8 +136,8 @@ def dupe_stdout_to_logfile(logdir: str):
|
||||
return # Tips for disabling logs by change directory to a same name empty regular file
|
||||
abslog_dir = log_dir.resolve()
|
||||
log_tmstr = datetime.now().strftime("%Y%m%dT%H%M%S")
|
||||
logfile = abslog_dir / f'avdc_{log_tmstr}.txt'
|
||||
errlog = abslog_dir / f'avdc_{log_tmstr}_err.txt'
|
||||
logfile = abslog_dir / f'mdc_{log_tmstr}.txt'
|
||||
errlog = abslog_dir / f'mdc_{log_tmstr}_err.txt'
|
||||
|
||||
sys.stdout = OutLogger(logfile)
|
||||
sys.stderr = ErrLogger(errlog)
|
||||
@@ -169,35 +169,35 @@ def close_logfile(logdir: str):
|
||||
# 按月合并为单个月志,去年及以前的月志,今年4月以后将之按年合并为年志
|
||||
# 测试步骤:
|
||||
"""
|
||||
LOGDIR=/tmp/avlog
|
||||
LOGDIR=/tmp/mlog
|
||||
mkdir -p $LOGDIR
|
||||
for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/avdc_${f}T235959.txt;done
|
||||
for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/avdc_2021${f}T235959.txt;done
|
||||
for f in {00..23};do;echo 20211001T$f>$LOGDIR/avdc_20211001T${f}5959.txt;done
|
||||
for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/mdc_${f}T235959.txt;done
|
||||
for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/mdc_2021${f}T235959.txt;done
|
||||
for f in {00..23};do;echo 20211001T$f>$LOGDIR/mdc_20211001T${f}5959.txt;done
|
||||
echo "$(ls -1 $LOGDIR|wc -l) files in $LOGDIR"
|
||||
# 1932 files in /tmp/avlog
|
||||
avdc -zgic1 -d0 -m3 -o $LOGDIR
|
||||
# python3 ./AV_Data_Capture.py -zgic1 -o $LOGDIR
|
||||
# 1932 files in /tmp/mlog
|
||||
mdc -zgic1 -d0 -m3 -o $LOGDIR
|
||||
# python3 ./Movie_Data_Capture.py -zgic1 -o $LOGDIR
|
||||
ls $LOGDIR
|
||||
# rm -rf $LOGDIR
|
||||
"""
|
||||
today = datetime.today()
|
||||
# 第一步,合并到日。3天前的日志,文件名是同一天的合并为一份日志
|
||||
for i in range(1):
|
||||
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}T\d{6}$', f.stem, re.A)]
|
||||
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{8}T\d{6}$', f.stem, re.A)]
|
||||
if not txts or not len(txts):
|
||||
break
|
||||
e = [f for f in txts if '_err' in f.stem]
|
||||
txts.sort()
|
||||
tmstr_3_days_ago = (today.replace(hour=0) - timedelta(days=3)).strftime("%Y%m%dT99")
|
||||
deadline_day = f'avdc_{tmstr_3_days_ago}'
|
||||
deadline_day = f'mdc_{tmstr_3_days_ago}'
|
||||
day_merge = [f for f in txts if f.stem < deadline_day]
|
||||
if not day_merge or not len(day_merge):
|
||||
break
|
||||
cutday = len('T235959.txt') # cut length avdc_20201201|T235959.txt
|
||||
cutday = len('T235959.txt') # cut length mdc_20201201|T235959.txt
|
||||
for f in day_merge:
|
||||
try:
|
||||
day_file_name = str(f)[:-cutday] + '.txt' # avdc_20201201.txt
|
||||
day_file_name = str(f)[:-cutday] + '.txt' # mdc_20201201.txt
|
||||
with open(day_file_name, 'a', encoding='utf-8') as m:
|
||||
m.write(f.read_text(encoding='utf-8'))
|
||||
f.unlink(missing_ok=True)
|
||||
@@ -205,19 +205,19 @@ def close_logfile(logdir: str):
|
||||
pass
|
||||
# 第二步,合并到月
|
||||
for i in range(1): # 利用1次循环的break跳到第二步,避免大块if缩进或者使用goto语法
|
||||
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}$', f.stem, re.A)]
|
||||
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{8}$', f.stem, re.A)]
|
||||
if not txts or not len(txts):
|
||||
break
|
||||
txts.sort()
|
||||
tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32")
|
||||
deadline_month = f'avdc_{tmstr_3_month_ago}'
|
||||
deadline_month = f'mdc_{tmstr_3_month_ago}'
|
||||
month_merge = [f for f in txts if f.stem < deadline_month]
|
||||
if not month_merge or not len(month_merge):
|
||||
break
|
||||
tomonth = len('01.txt') # cut length avdc_202012|01.txt
|
||||
tomonth = len('01.txt') # cut length mdc_202012|01.txt
|
||||
for f in month_merge:
|
||||
try:
|
||||
month_file_name = str(f)[:-tomonth] + '.txt' # avdc_202012.txt
|
||||
month_file_name = str(f)[:-tomonth] + '.txt' # mdc_202012.txt
|
||||
with open(month_file_name, 'a', encoding='utf-8') as m:
|
||||
m.write(f.read_text(encoding='utf-8'))
|
||||
f.unlink(missing_ok=True)
|
||||
@@ -226,18 +226,18 @@ def close_logfile(logdir: str):
|
||||
# 第三步,月合并到年
|
||||
if today.month < 4:
|
||||
return
|
||||
mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{6}$', f.stem, re.A)]
|
||||
mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)]
|
||||
if not mons or not len(mons):
|
||||
return
|
||||
mons.sort()
|
||||
deadline_year = f'avdc_{today.year-1}13'
|
||||
deadline_year = f'mdc_{today.year-1}13'
|
||||
year_merge = [f for f in mons if f.stem < deadline_year]
|
||||
if not year_merge or not len(year_merge):
|
||||
return
|
||||
toyear = len('12.txt') # cut length avdc_2020|12.txt
|
||||
toyear = len('12.txt') # cut length mdc_2020|12.txt
|
||||
for f in year_merge:
|
||||
try:
|
||||
year_file_name = str(f)[:-toyear] + '.txt' # avdc_2020.txt
|
||||
year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt
|
||||
with open(year_file_name, 'a', encoding='utf-8') as y:
|
||||
y.write(f.read_text(encoding='utf-8'))
|
||||
f.unlink(missing_ok=True)
|
||||
@@ -500,8 +500,8 @@ def main():
|
||||
|
||||
# Download Mapping Table, parallel version
|
||||
def fmd(f):
|
||||
return ('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/' + f,
|
||||
Path.home() / '.local' / 'share' / 'avdc' / f)
|
||||
return ('https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f,
|
||||
Path.home() / '.local' / 'share' / 'mdc' / f)
|
||||
map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json'))
|
||||
for k,v in map_tab:
|
||||
if v.exists():
|
||||
|
||||
@@ -41,8 +41,8 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
|
||||
iterate through all services and fetch the data
|
||||
"""
|
||||
|
||||
actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'avdc' / 'mapping_actor.xml'))
|
||||
info_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'avdc' / 'mapping_info.xml'))
|
||||
actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_actor.xml'))
|
||||
info_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_info.xml'))
|
||||
|
||||
func_mapping = {
|
||||
"airav": airav.main,
|
||||
@@ -238,7 +238,7 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
|
||||
continue
|
||||
if translate_value == "title":
|
||||
title_dict = json.load(
|
||||
open(str(Path.home() / '.local' / 'share' / 'avdc' / 'c_number.json'), 'r', encoding="utf-8"))
|
||||
open(str(Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json'), 'r', encoding="utf-8"))
|
||||
try:
|
||||
json_data[translate_value] = title_dict[number]
|
||||
continue
|
||||
|
||||
@@ -320,12 +320,12 @@ if __name__ == "__main__":
|
||||
# print(main('BANK-022'))
|
||||
# print(main('070116-197'))
|
||||
# print(main('093021_539')) # 没有剧照 片商pacopacomama
|
||||
print(main('FC2-2278260'))
|
||||
#print(main('FC2-2278260'))
|
||||
# print(main('FC2-735670'))
|
||||
# print(main('FC2-1174949')) # not found
|
||||
print(main('MVSD-439'))
|
||||
#print(main('MVSD-439'))
|
||||
# print(main('EHM0001')) # not found
|
||||
print(main('FC2-2314275'))
|
||||
# print(main('EBOD-646'))
|
||||
# print(main('LOVE-262'))
|
||||
print(main('ABP-890'))
|
||||
#print(main('ABP-890'))
|
||||
|
||||
@@ -333,7 +333,7 @@ def getStoryline_amazon(q_title, number, debug):
|
||||
# 删除无效cookies,无论是用户创建还是自动创建,以避免持续故障
|
||||
cookies_filepath and len(cookies_filepath) and Path(cookies_filepath).is_file() and Path(cookies_filepath).unlink(missing_ok=True)
|
||||
# 自动创建的cookies文件放在搜索路径表的末端,最低优先级。有amazon.co.jp帐号的用户可以从浏览器导出cookie放在靠前搜索路径
|
||||
ama_save = Path.home() / ".local/share/avdc/amazon.json"
|
||||
ama_save = Path.home() / ".local/share/mdc/amazon.json"
|
||||
ama_save.parent.mkdir(parents=True, exist_ok=True)
|
||||
ama_save.write_text(json.dumps(session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8')
|
||||
|
||||
|
||||
12
config.py
12
config.py
@@ -31,10 +31,10 @@ class Config:
|
||||
path_search_order = (
|
||||
Path(path),
|
||||
Path.cwd() / "config.ini",
|
||||
Path.home() / "avdc.ini",
|
||||
Path.home() / ".avdc.ini",
|
||||
Path.home() / ".avdc/config.ini",
|
||||
Path.home() / ".config/avdc/config.ini"
|
||||
Path.home() / "mdc.ini",
|
||||
Path.home() / ".mdc.ini",
|
||||
Path.home() / ".mdc/config.ini",
|
||||
Path.home() / ".config/mdc/config.ini"
|
||||
)
|
||||
ini_path = None
|
||||
for p in path_search_order:
|
||||
@@ -79,9 +79,9 @@ class Config:
|
||||
ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:")
|
||||
if re.search('n', ins, re.I):
|
||||
sys.exit(2)
|
||||
# 用户目录才确定具有写权限,因此选择 ~/avdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的
|
||||
# 用户目录才确定具有写权限,因此选择 ~/mdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的
|
||||
# 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了,只是作为多配置文件的切换技巧保留。
|
||||
write_path = path_search_order[2] # Path.home() / "avdc.ini"
|
||||
write_path = path_search_order[2] # Path.home() / "mdc.ini"
|
||||
write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8')
|
||||
print("Config file '{}' created.".format(write_path.resolve()))
|
||||
input("Press Enter key exit...")
|
||||
|
||||
Reference in New Issue
Block a user