Update 3.2

2020-04-15 15:15:24 +08:00
parent 92e631ff66
commit 1f4b7e6633
8 changed files with 191 additions and 72 deletions
@@ -13,58 +13,16 @@ import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
 # sys.setdefaultencoding('utf-8')

-config_file='config.ini'
+config_file = 'config.ini'
 config = ConfigParser()

 if os.path.exists(config_file):
    try:
        config.read(config_file, encoding='UTF-8')
-    except:
+    except Exception as e:
+        print('[-]'+e)
        print('[-]Config.ini read failed! Please use the offical file!')
-else:
-    print('[+]config.ini: not found, creating...',end='')
-    with open("config.ini", "wt", encoding='UTF-8') as code:
-        file_text = """[common]
-main_mode=1
-failed_output_folder=failed
-success_output_folder=JAV_output
-soft_link=0

-[proxy]
-proxy=192.168.2.2:1080
-timeout=10
-retry=3
-
-[Name_Rule]
-location_rule=actor+'/'+number
-naming_rule=number+'-'+title
-
-[update]
-update_check=1
-
-[media]
-media_warehouse=emby
-#emby or plex or kodi ,emby=jellyfin
-
-[escape]
-literals=\()/
-folders=failed,JAV_output
-
-[debug_mode]
-switch=0
-
-"""
-        print(file_text, file=code)
-    time.sleep(2)
-    print('.')
-    print('[+]config.ini: created!')
-    print('[+]Please restart the program!')
-    time.sleep(4)
-    os._exit(0)
-    try:
-        config.read(config_file, encoding='UTF-8')
-    except:
-        print('[-]Config.ini read failed! Please use the offical file!')

 def get_network_settings():
    try:
@@ -33,7 +33,7 @@ def argparse_function(switch):
    parser = argparse.ArgumentParser()
    parser.add_argument("file", default='',nargs='?', help="Single Movie file path.")
    parser.add_argument("-c", "--config", default='config.ini', nargs='?', help="The config file Path.")
-    parser.add_argument("-e", "--exit", default='1', nargs='?', help="Exit Switch 1:Press enter key to exit.  2:Auto exit.")
+    parser.add_argument("-a", "--auto-exit", dest='autoexit', action="store_true", help="Auto exit after program complete")
    args = parser.parse_args()
    if switch == 1:
        if args.file == '':
@@ -41,7 +41,7 @@ def argparse_function(switch):
    elif switch == 2:
        return args.config
    elif switch == 3:
-        return args.exit
+        return args.autoexit

 def movie_lists(root, escape_folder):
    for folder in escape_folder:
@@ -100,10 +100,10 @@ def getNumber(filepath,absolute_path = False):


 if __name__ == '__main__':
-    version = '3.1.2'
+    version = '3.2'
    config_file = argparse_function(2)
    config = ConfigParser()
-    config.read(config_file, encoding='UTF-8')
+    config.read(argparse_function(2), encoding='UTF-8')
    success_folder = config['common']['success_output_folder']
    failed_folder = config['common']['failed_output_folder']  # 失败输出目录
    escape_folder = config['escape']['folders']  # 多级目录刮削需要排除的目录
@@ -148,7 +148,7 @@ if __name__ == '__main__':
            core_main(i, getNumber(i), config_file=config_file)
            print("[*]======================================================")
        except Exception as e:  # 番号提取异常
-            print('[-]' + i + ' ERRPR :')
+            print('[-]' + i + ' ERROR :')
            print('[-]',e)
            if config['common']['soft_link'] == '1':
                print('[-]Link', i, 'to failed folder')
@@ -164,6 +164,6 @@ if __name__ == '__main__':
    CEF(success_folder)
    CEF(failed_folder)
    print("[+]All finished!!!")
-    if argparse_function(3) == '2':
+    if argparse_function(3) == True:
        os._exit(0)
-    input("[+][+]Press enter key exit, you can check the error messge before you exit.")
+    input("[+][+]Press enter key exit, you can check the error messge before you exit.")
@@ -173,14 +173,12 @@ update_check=1
 0为关闭，1为开启，不建议关闭

 ---
-### 媒体库选择 
+### 刮削网站优先级
 ```
-[media]
-media_warehouse=emby
-#emby plex kodi
+[priority]
+website=javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321
 ```
-可选择emby, plex, kodi
-如果是PLEX，请安装插件：```XBMCnfoMoviesImporter```
+用```,```英文逗号分开网站，刮削顺序从左往右

 ---
 ### 排除指定字符和目录
@@ -267,12 +265,12 @@ AV_Data_Capture xxx-xxx-xxx.mp4
 ```
 AV_Data_Capture -c config_other.ini
 ```
-### 程序退出选择参数
-默认值为```1```
+### 程序自动退出
+
 ```
-AV_Data_Capture -e 1
+AV_Data_Capture -a
 ```
-1为默认值，刮削结束后要按下回车键程序才会结束，如果是2，程序刮削完毕后会自动结束程序
+输入参数即可在刮削结束后自动结束程序

 ## 多集影片处理
 **建议使用视频合并合并为一个视频文件**
@@ -16,9 +16,12 @@ naming_rule=number+'-'+title
 [update]
 update_check=1

+[priority]
+website=javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321
+
 [escape]
 literals=\()/
 folders=failed,JAV_output

 [debug_mode]
-switch=0
+switch=0
@@ -20,7 +20,7 @@ import javbus
 import javdb
 import fanza
 import jav321
-
+import xcity

 # =====================本地文件处理===========================

@@ -47,7 +47,7 @@ def CreatFailedFolder(failed_folder):
            return 


-def getDataFromJSON(file_number, filepath, failed_folder):  # 从JSON返回元数据
+def getDataFromJSON(file_number, filepath, failed_folder, sources):  # 从JSON返回元数据
    """
    iterate through all services and fetch the data
    """
@@ -60,10 +60,11 @@ def getDataFromJSON(file_number, filepath, failed_folder):  # 从JSON返回元
        "javbus": javbus.main,
        "mgstage": mgstage.main,
        "jav321": jav321.main,
+        "xcity" : xcity.main,
    }

    # default fetch order list, from the begining to the end
-    sources = ["javbus", "javdb", "fanza", "mgstage", "fc2",  "avsox", "jav321"]
+    sources = sources.split(',')

    # if the input file name matches centain rules,
    # move some web service to the begining of the list
@@ -308,7 +309,7 @@ def PrintFiles(path, c_word, naming_rule, part, cn_sub, json_data, filepath, fai
            print("  <cover>" + cover + "</cover>", file=code)
            print("  <website>" + website + "</website>", file=code)
            print("</movie>", file=code)
-            print("[+]Writeed!          " + path + "/" + number + c_word + ".nfo")
+            print("[+]Wrote!            " + path + "/" + number + c_word + ".nfo")
    except IOError as e:
        print("[-]Write Failed!")
        print(e)
@@ -430,9 +431,10 @@ def core_main(file_path, number_th, config_file):
    program_mode = Config['common']['main_mode']  # 运行模式
    failed_folder = Config['common']['failed_output_folder']  # 失败输出目录
    success_folder = Config['common']['success_output_folder']  # 成功输出目录
+    sources = Config['priority']['website'] # 网站优先级
    filepath = file_path  # 影片的路径
    number = number_th
-    json_data = getDataFromJSON(number, filepath, failed_folder)  # 定义番号
+    json_data = getDataFromJSON(number, filepath, failed_folder, sources)  # 定义番号
    if json_data["number"] != number:
        # fix issue #119
        # the root cause is we normalize the search id
@@ -77,7 +77,7 @@ def getDirector(a):
    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
 def getOutline(htmlcode):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//p/text()')).strip(" ['']")
+    result = str(html.xpath('//p/text()')).strip(" ['']").replace(u'\\n', '').replace("', '', '", '')
    return result
 def main(number2):
    number=number2.upper()
@@ -108,4 +108,5 @@ def main(number2):
    return js
    #print(htmlcode)

-#print(main('SIRO-3607'))
+if __name__ == '__main__':
+    print(main('SIRO-4149'))
@@ -1,5 +1,5 @@
 {
-	"version": "3.1.2",
-	"version_show": "3.1.2",
+	"version": "3.2",
+	"version_show": "3.2",
 	"download": "https://github.com/yoshiko2/AV_Data_Capture/releases"
 }
@@ -0,0 +1,157 @@
+import re
+from lxml import etree
+import json
+from bs4 import BeautifulSoup
+from ADC_function import *
+
+
+# import sys
+# import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+
+def getTitle(a):
+    html = etree.fromstring(a, etree.HTMLParser())
+    result = html.xpath('//*[@id="program_detail_title"]/text()')[0]
+    return result
+
+
+def getActor(a):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[3]/a/text()')[0]
+    return result1
+
+
+def getActorPhoto(actor):  # //*[@id="star_qdt"]/li/a/img
+    a = actor.split(',')
+    d = {}
+    for i in a:
+        p = {i: ''}
+        d.update(p)
+    return d
+
+
+def getStudio(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[4]/a/span/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
+    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+
+
+def getRuntime(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[3]/text()')).strip(" ['']")
+    try:
+        return re.findall('\d+',result1)[0]
+    except:
+        return ''
+
+
+def getLabel(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[5]/a/span/text()')).strip(" ['']")
+    return result1
+
+
+def getNum(a):
+    html = etree.fromstring(a, etree.HTMLParser())
+    result1 = str(html.xpath('//*[@id="hinban"]/text()')).strip(" ['']")
+    return result1
+
+
+def getYear(getRelease):
+    try:
+        result = str(re.search('\d{4}', getRelease).group())
+        return result
+    except:
+        return getRelease
+
+
+def getRelease(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')).strip(" ['']")
+    try:
+        return re.findall('\d{4}/\d{2}/\d{2}', result1)[0]
+    except:
+        return ''
+
+
+def getTag(a):
+    result2=[]
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[6]/a/text()')
+    for i in result1:
+        i=i.replace(u'\n','')
+        i=i.replace(u'\t','')
+        result2.append(i)
+    return result2
+
+
+def getCover_small(a, index=0):
+    # same issue mentioned below,
+    # javdb sometime returns multiple results
+    # DO NOT just get the firt one, get the one with correct index number
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
+    if not 'https' in result:
+        result = 'https:' + result
+    return result
+
+
+def getCover(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[1]/p/a/@href')).strip(" ['']")
+    return 'https:'+result
+
+
+def getDirector(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//*[@id="program_detail_director"]/text()')).strip(" ['']").replace(u'\\n','').replace(u'\\t','')
+    return result1
+
+
+def getOutline(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[5]/p/text()')).strip(" ['']")
+    try:
+        return re.sub('\\\\\w*\d+','',result)
+    except:
+        return result
+
+
+def main(number):
+    try:
+        number = number.upper()
+        query_result = get_html(
+            'https://xcity.jp/result_published/?genre=%2Fresult_published%2F&q=' + number.replace('-',
+                                                                                                  '') + '&sg=main&num=30')
+        html = etree.fromstring(query_result, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+        urls = html.xpath("//table[contains(@class, 'resultList')]/tr[2]/td[1]/a/@href")[0]
+        detail_page = get_html('https://xcity.jp' + urls)
+        dic = {
+            'actor': getActor(detail_page),
+            'title': getTitle(detail_page),
+            'studio': getStudio(detail_page),
+            'outline': getOutline(detail_page),
+            'runtime': getRuntime(detail_page),
+            'director': getDirector(detail_page),
+            'release': getRelease(detail_page),
+            'number': getNum(detail_page),
+            'cover': getCover(detail_page),
+            'cover_small': '',
+            'imagecut': 1,
+            'tag': getTag(detail_page),
+            'label': getLabel(detail_page),
+            'year': getYear(getRelease(detail_page)),  # str(re.search('\d{4}',getRelease(a)).group()),
+            'actor_photo': getActorPhoto(getActor(detail_page)),
+            'website': 'https://javdb.com' + urls,
+            'source': 'xcity.py',
+        }
+    except Exception as e:
+        # print(e)
+        dic = {"title": ""}
+
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+    return js
+
+if __name__ == '__main__':
+    print(main('VNDS-2624'))