Update to 6.0.1 #2

2021-12-17 23:39:02 +08:00
parent d44166d9ac
commit 0b0d0fcafc
7 changed files with 44 additions and 44 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -12,7 +12,7 @@
            "env": {
                "PYTHONIOENCODING": "utf-8"
            },
-            "program": "${workspaceFolder}/AV_Data_capture.py",
+            "program": "${workspaceFolder}/Movie_Data_capture.py",
            "program1": "${workspaceFolder}/WebCrawler/javbus.py",
            "program2": "${workspaceFolder}/WebCrawler/javdb.py",
            "program3": "${workspaceFolder}/WebCrawler/xcity.py",
@@ -20,12 +20,12 @@
            "program5": "${workspaceFolder}/config.py",
            "cwd0": "${fileDirname}",
            "cwd1": "${workspaceFolder}/dist",
-            "cwd2": "${env:HOME}${env:USERPROFILE}/.avdc",
+            "cwd2": "${env:HOME}${env:USERPROFILE}/.mdc",
            "args0": ["-a","-p","J:/Downloads","-o","J:/log"],
            "args1": ["-g","-m","3","-c","1","-d","0"],
-            "args2": ["-igd0", "-m3", "-p", "J:/JAV_output", "-q", "121220_001"],
+            "args2": ["-igd0", "-m3", "-p", "J:/output", "-q", "121220_001"],
-            "args3": ["-agd0","-m3", "-q", ".*","-p","J:/#JAV_output3"],
+            "args3": ["-agd0","-m3", "-q", ".*","-p","J:/#output"],
-            "args4": ["-gic1", "-d0", "-m3", "-o", "avlog", "-p", "I:/JAV_output"],
+            "args4": ["-gic1", "-d0", "-m3", "-o", "avlog", "-p", "I:/output"],
            "args5": ["-gic1", "-d0", "-m1", "-o", "avlog", "-p", "J:/Downloads"],
            "args6": ["-z", "-o", "J:/log"]
        }
--- a/ADC_function.py
+++ b/ADC_function.py
@@ -349,8 +349,8 @@ def load_cookies(filename):
    path_search_order = (
        Path.cwd() / filename,
        Path.home() / filename,
-        Path.home() / f".avdc/{filename}",
+        Path.home() / f".mdc/{filename}",
-        Path.home() / f".local/share/avdc/{filename}"
+        Path.home() / f".local/share/mdc/{filename}"
    )
    cookies_filename = None
    try:
--- a/Movie_Data_Capture.py
+++ b/Movie_Data_Capture.py
@@ -46,7 +46,7 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
    parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
    parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
    # parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
-    default_logdir = str(Path.home() / '.avlogs')
+    default_logdir = str(Path.home() / '.mlogs')
    parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?',
        help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
        default folder for current user: '{default_logdir}'. Change default folder to an empty file,
@@ -136,8 +136,8 @@ def dupe_stdout_to_logfile(logdir: str):
        return  # Tips for disabling logs by change directory to a same name empty regular file
    abslog_dir = log_dir.resolve()
    log_tmstr = datetime.now().strftime("%Y%m%dT%H%M%S")
-    logfile = abslog_dir / f'avdc_{log_tmstr}.txt'
+    logfile = abslog_dir / f'mdc_{log_tmstr}.txt'
-    errlog = abslog_dir / f'avdc_{log_tmstr}_err.txt'
+    errlog = abslog_dir / f'mdc_{log_tmstr}_err.txt'
    sys.stdout = OutLogger(logfile)
    sys.stderr = ErrLogger(errlog)
@@ -169,35 +169,35 @@ def close_logfile(logdir: str):
    # 按月合并为单个月志，去年及以前的月志，今年4月以后将之按年合并为年志
    # 测试步骤：
    """
-    LOGDIR=/tmp/avlog
+    LOGDIR=/tmp/mlog
    mkdir -p $LOGDIR
-    for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/avdc_${f}T235959.txt;done
+    for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/mdc_${f}T235959.txt;done
-    for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/avdc_2021${f}T235959.txt;done
+    for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/mdc_2021${f}T235959.txt;done
-    for f in {00..23};do;echo 20211001T$f>$LOGDIR/avdc_20211001T${f}5959.txt;done
+    for f in {00..23};do;echo 20211001T$f>$LOGDIR/mdc_20211001T${f}5959.txt;done
    echo "$(ls -1 $LOGDIR|wc -l) files in $LOGDIR"
-    # 1932 files in /tmp/avlog
+    # 1932 files in /tmp/mlog
-    avdc -zgic1 -d0 -m3 -o $LOGDIR
+    mdc -zgic1 -d0 -m3 -o $LOGDIR
-    # python3 ./AV_Data_Capture.py -zgic1 -o $LOGDIR
+    # python3 ./Movie_Data_Capture.py -zgic1 -o $LOGDIR
    ls $LOGDIR
    # rm -rf $LOGDIR
    """
    today = datetime.today()
    # 第一步，合并到日。3天前的日志，文件名是同一天的合并为一份日志
    for i in range(1):
-        txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}T\d{6}$', f.stem, re.A)]
+        txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{8}T\d{6}$', f.stem, re.A)]
        if not txts or not len(txts):
            break
        e = [f for f in txts if '_err' in f.stem]
        txts.sort()
        tmstr_3_days_ago = (today.replace(hour=0) - timedelta(days=3)).strftime("%Y%m%dT99")
-        deadline_day = f'avdc_{tmstr_3_days_ago}'
+        deadline_day = f'mdc_{tmstr_3_days_ago}'
        day_merge = [f for f in txts if f.stem < deadline_day]
        if not day_merge or not len(day_merge):
            break
-        cutday = len('T235959.txt')  # cut length avdc_20201201|T235959.txt
+        cutday = len('T235959.txt')  # cut length mdc_20201201|T235959.txt
        for f in day_merge:
            try:
-                day_file_name = str(f)[:-cutday] + '.txt' # avdc_20201201.txt
+                day_file_name = str(f)[:-cutday] + '.txt' # mdc_20201201.txt
                with open(day_file_name, 'a', encoding='utf-8') as m:
                    m.write(f.read_text(encoding='utf-8'))
                f.unlink(missing_ok=True)
@@ -205,19 +205,19 @@ def close_logfile(logdir: str):
                pass
    # 第二步，合并到月
    for i in range(1):  # 利用1次循环的break跳到第二步，避免大块if缩进或者使用goto语法
-        txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}$', f.stem, re.A)]
+        txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{8}$', f.stem, re.A)]
        if not txts or not len(txts):
            break
        txts.sort()
        tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32")
-        deadline_month = f'avdc_{tmstr_3_month_ago}'
+        deadline_month = f'mdc_{tmstr_3_month_ago}'
        month_merge = [f for f in txts if f.stem < deadline_month]
        if not month_merge or not len(month_merge):
            break
-        tomonth = len('01.txt')  # cut length avdc_202012|01.txt
+        tomonth = len('01.txt')  # cut length mdc_202012|01.txt
        for f in month_merge:
            try:
-                month_file_name = str(f)[:-tomonth] + '.txt' # avdc_202012.txt
+                month_file_name = str(f)[:-tomonth] + '.txt' # mdc_202012.txt
                with open(month_file_name, 'a', encoding='utf-8') as m:
                    m.write(f.read_text(encoding='utf-8'))
                f.unlink(missing_ok=True)
@@ -226,18 +226,18 @@ def close_logfile(logdir: str):
    # 第三步，月合并到年
    if today.month < 4:
        return
-    mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{6}$', f.stem, re.A)]
+    mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)]
    if not mons or not len(mons):
        return
    mons.sort()
-    deadline_year = f'avdc_{today.year-1}13'
+    deadline_year = f'mdc_{today.year-1}13'
    year_merge = [f for f in mons if f.stem < deadline_year]
    if not year_merge or not len(year_merge):
        return
-    toyear = len('12.txt')   # cut length avdc_2020|12.txt
+    toyear = len('12.txt')   # cut length mdc_2020|12.txt
    for f in year_merge:
        try:
-            year_file_name = str(f)[:-toyear] + '.txt' # avdc_2020.txt
+            year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt
            with open(year_file_name, 'a', encoding='utf-8') as y:
                y.write(f.read_text(encoding='utf-8'))
            f.unlink(missing_ok=True)
@@ -500,8 +500,8 @@ def main():
    # Download Mapping Table, parallel version
    def fmd(f):
-        return ('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/' + f,
+        return ('https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f,
-                Path.home() / '.local' / 'share' / 'avdc' / f)
+                Path.home() / '.local' / 'share' / 'mdc' / f)
    map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json'))
    for k,v in map_tab:
        if v.exists():
--- a/WebCrawler/init.py
+++ b/WebCrawler/init.py
@@ -41,8 +41,8 @@ def get_data_from_json(file_number, oCC):  # 从JSON返回元数据
    iterate through all services and fetch the data
    """
-    actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'avdc' / 'mapping_actor.xml'))
+    actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_actor.xml'))
-    info_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'avdc' / 'mapping_info.xml'))
+    info_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_info.xml'))
    func_mapping = {
        "airav": airav.main,
@@ -238,7 +238,7 @@ def get_data_from_json(file_number, oCC):  # 从JSON返回元数据
                continue
            if translate_value == "title":
                title_dict = json.load(
-                    open(str(Path.home() / '.local' / 'share' / 'avdc' / 'c_number.json'), 'r', encoding="utf-8"))
+                    open(str(Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json'), 'r', encoding="utf-8"))
                try:
                    json_data[translate_value] = title_dict[number]
                    continue
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -320,12 +320,12 @@ if __name__ == "__main__":
    # print(main('BANK-022'))
    # print(main('070116-197'))
    # print(main('093021_539'))  # 没有剧照 片商pacopacomama
-    print(main('FC2-2278260'))
+    #print(main('FC2-2278260'))
    # print(main('FC2-735670'))
    # print(main('FC2-1174949')) # not found
-    print(main('MVSD-439'))
+    #print(main('MVSD-439'))
    # print(main('EHM0001')) # not found
    print(main('FC2-2314275'))
    # print(main('EBOD-646'))
    # print(main('LOVE-262'))
-    print(main('ABP-890'))
+    #print(main('ABP-890'))
--- a/WebCrawler/storyline.py
+++ b/WebCrawler/storyline.py
@@ -333,7 +333,7 @@ def getStoryline_amazon(q_title, number, debug):
            # 删除无效cookies，无论是用户创建还是自动创建，以避免持续故障
            cookies_filepath and len(cookies_filepath) and Path(cookies_filepath).is_file() and Path(cookies_filepath).unlink(missing_ok=True)
            # 自动创建的cookies文件放在搜索路径表的末端，最低优先级。有amazon.co.jp帐号的用户可以从浏览器导出cookie放在靠前搜索路径
-            ama_save = Path.home() / ".local/share/avdc/amazon.json"
+            ama_save = Path.home() / ".local/share/mdc/amazon.json"
            ama_save.parent.mkdir(parents=True, exist_ok=True)
            ama_save.write_text(json.dumps(session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8')
--- a/config.py
+++ b/config.py
@@ -31,10 +31,10 @@ class Config:
        path_search_order = (
            Path(path),
            Path.cwd() / "config.ini",
-            Path.home() / "avdc.ini",
+            Path.home() / "mdc.ini",
-            Path.home() / ".avdc.ini",
+            Path.home() / ".mdc.ini",
-            Path.home() / ".avdc/config.ini",
+            Path.home() / ".mdc/config.ini",
-            Path.home() / ".config/avdc/config.ini"
+            Path.home() / ".config/mdc/config.ini"
        )
        ini_path = None
        for p in path_search_order:
@@ -79,9 +79,9 @@ class Config:
            ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:")
            if re.search('n', ins, re.I):
                sys.exit(2)
-            # 用户目录才确定具有写权限，因此选择 ~/avdc.ini 作为配置文件生成路径，而不是有可能并没有写权限的
+            # 用户目录才确定具有写权限，因此选择 ~/mdc.ini 作为配置文件生成路径，而不是有可能并没有写权限的
            # 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了，只是作为多配置文件的切换技巧保留。
-            write_path = path_search_order[2]   # Path.home() / "avdc.ini"
+            write_path = path_search_order[2]   # Path.home() / "mdc.ini"
            write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8')
            print("Config file '{}' created.".format(write_path.resolve()))
            input("Press Enter key exit...")