Merge pull request #168 from jnozsc/normalize_EOL

normalize working tree line endings in Git
2020-03-27 01:58:18 +08:00
parent 8d1b1eb84d 08be49c998
commit b701d7b6af
8 changed files with 1689 additions and 1689 deletions
--- a/ADC_function.py
+++ b/ADC_function.py
@@ -1,121 +1,121 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
-
+
-import requests
+import requests
-from configparser import ConfigParser
+from configparser import ConfigParser
-import os
+import os
-import re
+import re
-import time
+import time
-import sys
+import sys
-from lxml import etree
+from lxml import etree
-import sys
+import sys
-import io
+import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-# sys.setdefaultencoding('utf-8')
+# sys.setdefaultencoding('utf-8')
-
+
-config_file='config.ini'
+config_file='config.ini'
-config = ConfigParser()
+config = ConfigParser()
-
+
-if os.path.exists(config_file):
+if os.path.exists(config_file):
-    try:
+    try:
-        config.read(config_file, encoding='UTF-8')
+        config.read(config_file, encoding='UTF-8')
-    except:
+    except:
-        print('[-]Config.ini read failed! Please use the offical file!')
+        print('[-]Config.ini read failed! Please use the offical file!')
-else:
+else:
-    print('[+]config.ini: not found, creating...',end='')
+    print('[+]config.ini: not found, creating...',end='')
-    with open("config.ini", "wt", encoding='UTF-8') as code:
+    with open("config.ini", "wt", encoding='UTF-8') as code:
-        print("[common]", file=code)
+        print("[common]", file=code)
-        print("main_mode = 1", file=code)
+        print("main_mode = 1", file=code)
-        print("failed_output_folder = failed", file=code)
+        print("failed_output_folder = failed", file=code)
-        print("success_output_folder = JAV_output", file=code)
+        print("success_output_folder = JAV_output", file=code)
-        print("", file=code)
+        print("", file=code)
-        print("[proxy]",file=code)
+        print("[proxy]",file=code)
-        print("proxy=127.0.0.1:1081",file=code)
+        print("proxy=127.0.0.1:1081",file=code)
-        print("timeout=10", file=code)
+        print("timeout=10", file=code)
-        print("retry=3", file=code)
+        print("retry=3", file=code)
-        print("", file=code)
+        print("", file=code)
-        print("[Name_Rule]", file=code)
+        print("[Name_Rule]", file=code)
-        print("location_rule=actor+'/'+number",file=code)
+        print("location_rule=actor+'/'+number",file=code)
-        print("naming_rule=number+'-'+title",file=code)
+        print("naming_rule=number+'-'+title",file=code)
-        print("", file=code)
+        print("", file=code)
-        print("[update]",file=code)
+        print("[update]",file=code)
-        print("update_check=1",file=code)
+        print("update_check=1",file=code)
-        print("", file=code)
+        print("", file=code)
-        print("[media]", file=code)
+        print("[media]", file=code)
-        print("media_warehouse=emby", file=code)
+        print("media_warehouse=emby", file=code)
-        print("#emby plex kodi", file=code)
+        print("#emby plex kodi", file=code)
-        print("", file=code)
+        print("", file=code)
-        print("[escape]", file=code)
+        print("[escape]", file=code)
-        print("literals=\\", file=code)
+        print("literals=\\", file=code)
-        print("", file=code)
+        print("", file=code)
-        print("[movie_location]", file=code)
+        print("[movie_location]", file=code)
-        print("path=", file=code)
+        print("path=", file=code)
-        print("", file=code)
+        print("", file=code)
-        print('.',end='')
+        print('.',end='')
-    time.sleep(2)
+    time.sleep(2)
-    print('.')
+    print('.')
-    print('[+]config.ini: created!')
+    print('[+]config.ini: created!')
-    print('[+]Please restart the program!')
+    print('[+]Please restart the program!')
-    time.sleep(4)
+    time.sleep(4)
-    os._exit(0)
+    os._exit(0)
-    try:
+    try:
-        config.read(config_file, encoding='UTF-8')
+        config.read(config_file, encoding='UTF-8')
-    except:
+    except:
-        print('[-]Config.ini read failed! Please use the offical file!')
+        print('[-]Config.ini read failed! Please use the offical file!')
-
+
-def get_network_settings():
+def get_network_settings():
-    try:
+    try:
-        proxy = config["proxy"]["proxy"]
+        proxy = config["proxy"]["proxy"]
-        timeout = int(config["proxy"]["timeout"])
+        timeout = int(config["proxy"]["timeout"])
-        retry_count = int(config["proxy"]["retry"])
+        retry_count = int(config["proxy"]["retry"])
-        assert timeout > 0
+        assert timeout > 0
-        assert retry_count > 0
+        assert retry_count > 0
-    except:
+    except:
-        raise ValueError("[-]Proxy config error! Please check the config.")
+        raise ValueError("[-]Proxy config error! Please check the config.")
-    return proxy, timeout, retry_count
+    return proxy, timeout, retry_count
-
+
-def getDataState(json_data):  # 元数据获取失败检测
+def getDataState(json_data):  # 元数据获取失败检测
-    if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
+    if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
-        return 0
+        return 0
-    else:
+    else:
-        return 1
+        return 1
-
+
-def ReadMediaWarehouse():
+def ReadMediaWarehouse():
-    return config['media']['media_warehouse']
+    return config['media']['media_warehouse']
-
+
-def UpdateCheckSwitch():
+def UpdateCheckSwitch():
-    check=str(config['update']['update_check'])
+    check=str(config['update']['update_check'])
-    if check == '1':
+    if check == '1':
-        return '1'
+        return '1'
-    elif check == '0':
+    elif check == '0':
-        return '0'
+        return '0'
-    elif check == '':
+    elif check == '':
-        return '0'
+        return '0'
-
+
-def getXpathSingle(htmlcode,xpath):
+def getXpathSingle(htmlcode,xpath):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result1 = str(html.xpath(xpath)).strip(" ['']")
+    result1 = str(html.xpath(xpath)).strip(" ['']")
-    return result1
+    return result1
-
+
-def get_html(url,cookies = None):#网页请求核心
+def get_html(url,cookies = None):#网页请求核心
-    proxy, timeout, retry_count = get_network_settings()
+    proxy, timeout, retry_count = get_network_settings()
-    i = 0
+    i = 0
-    while i < retry_count:
+    while i < retry_count:
-        try:
+        try:
-            if not proxy == '':
+            if not proxy == '':
-                proxies = {"http": "http://" + proxy,"https": "https://" + proxy}
+                proxies = {"http": "http://" + proxy,"https": "https://" + proxy}
-                headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
+                headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
-                getweb = requests.get(str(url), headers=headers, timeout=timeout,proxies=proxies, cookies=cookies)
+                getweb = requests.get(str(url), headers=headers, timeout=timeout,proxies=proxies, cookies=cookies)
-                getweb.encoding = 'utf-8'
+                getweb.encoding = 'utf-8'
-                return getweb.text
+                return getweb.text
-            else:
+            else:
-                headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
+                headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
-                getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
+                getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
-                getweb.encoding = 'utf-8'
+                getweb.encoding = 'utf-8'
-                return getweb.text
+                return getweb.text
-        except:
+        except:
-            i += 1
+            i += 1
-            print('[-]Connect retry '+str(i)+'/'+str(retry_count))
+            print('[-]Connect retry '+str(i)+'/'+str(retry_count))
-    print('[-]Connect Failed! Please check your Proxy or Network!')
+    print('[-]Connect Failed! Please check your Proxy or Network!')
-
+
-
+
--- a/avsox.py
+++ b/avsox.py
@@ -1,115 +1,115 @@
-import re
+import re
-from lxml import etree
+from lxml import etree
-import json
+import json
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup
-from ADC_function import *
+from ADC_function import *
-# import sys
+# import sys
-# import io
+# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
+
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
-    soup = BeautifulSoup(htmlcode, 'lxml')
+    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = soup.find_all(attrs={'class': 'avatar-box'})
+    a = soup.find_all(attrs={'class': 'avatar-box'})
-    d = {}
+    d = {}
-    for i in a:
+    for i in a:
-        l = i.img['src']
+        l = i.img['src']
-        t = i.span.get_text()
+        t = i.span.get_text()
-        p2 = {t: l}
+        p2 = {t: l}
-        d.update(p2)
+        d.update(p2)
-    return d
+    return d
-def getTitle(a):
+def getTitle(a):
-    try:
+    try:
-        html = etree.fromstring(a, etree.HTMLParser())
+        html = etree.fromstring(a, etree.HTMLParser())
-        result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
+        result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
-        return result.replace('/', '')
+        return result.replace('/', '')
-    except:
+    except:
-        return ''
+        return ''
-def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    soup = BeautifulSoup(a, 'lxml')
+    soup = BeautifulSoup(a, 'lxml')
-    a = soup.find_all(attrs={'class': 'avatar-box'})
+    a = soup.find_all(attrs={'class': 'avatar-box'})
-    d = []
+    d = []
-    for i in a:
+    for i in a:
-        d.append(i.span.get_text())
+        d.append(i.span.get_text())
-    return d
+    return d
-def getStudio(a):
+def getStudio(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
+    result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
-    return result1
+    return result1
-def getRuntime(a):
+def getRuntime(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
+    result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
-    return result1
+    return result1
-def getLabel(a):
+def getLabel(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
+    result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
-    return result1
+    return result1
-def getNum(a):
+def getNum(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
+    result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
-    return result1
+    return result1
-def getYear(release):
+def getYear(release):
-    try:
+    try:
-        result = str(re.search('\d{4}',release).group())
+        result = str(re.search('\d{4}',release).group())
-        return result
+        return result
-    except:
+    except:
-        return release
+        return release
-def getRelease(a):
+def getRelease(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
+    result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
-    return result1
+    return result1
-def getCover(htmlcode):
+def getCover(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
-    return result
+    return result
-def getCover_small(htmlcode):
+def getCover_small(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
+    result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
-    return result
+    return result
-def getTag(a):  # 获取演员
+def getTag(a):  # 获取演员
-    soup = BeautifulSoup(a, 'lxml')
+    soup = BeautifulSoup(a, 'lxml')
-    a = soup.find_all(attrs={'class': 'genre'})
+    a = soup.find_all(attrs={'class': 'genre'})
-    d = []
+    d = []
-    for i in a:
+    for i in a:
-        d.append(i.get_text())
+        d.append(i.get_text())
-    return d
+    return d
-
+
-def main(number):
+def main(number):
-    a = get_html('https://avsox.host/cn/search/' + number)
+    a = get_html('https://avsox.host/cn/search/' + number)
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+    result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-    if result1 == '' or result1 == 'null' or result1 == 'None':
+    if result1 == '' or result1 == 'null' or result1 == 'None':
-        a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
+        a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
-        print(a)
+        print(a)
-        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+        result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-        if result1 == '' or result1 == 'null' or result1 == 'None':
+        if result1 == '' or result1 == 'null' or result1 == 'None':
-            a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
+            a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
-            print(a)
+            print(a)
-            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-            result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+            result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-    web = get_html(result1)
+    web = get_html(result1)
-    soup = BeautifulSoup(web, 'lxml')
+    soup = BeautifulSoup(web, 'lxml')
-    info = str(soup.find(attrs={'class': 'row movie'}))
+    info = str(soup.find(attrs={'class': 'row movie'}))
-    dic = {
+    dic = {
-        'actor': getActor(web),
+        'actor': getActor(web),
-        'title': getTitle(web).strip(getNum(web)),
+        'title': getTitle(web).strip(getNum(web)),
-        'studio': getStudio(info),
+        'studio': getStudio(info),
-        'outline': '',#
+        'outline': '',#
-        'runtime': getRuntime(info),
+        'runtime': getRuntime(info),
-        'director': '', #
+        'director': '', #
-        'release': getRelease(info),
+        'release': getRelease(info),
-        'number': getNum(info),
+        'number': getNum(info),
-        'cover': getCover(web),
+        'cover': getCover(web),
-        'cover_small': getCover_small(a),
+        'cover_small': getCover_small(a),
-        'imagecut': 3,
+        'imagecut': 3,
-        'tag': getTag(web),
+        'tag': getTag(web),
-        'label': getLabel(info),
+        'label': getLabel(info),
-        'year': getYear(getRelease(info)),  # str(re.search('\d{4}',getRelease(a)).group()),
+        'year': getYear(getRelease(info)),  # str(re.search('\d{4}',getRelease(a)).group()),
-        'actor_photo': getActorPhoto(web),
+        'actor_photo': getActorPhoto(web),
-        'website': result1,
+        'website': result1,
-        'source': 'avsox.py',
+        'source': 'avsox.py',
-    }
+    }
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-    return js
+    return js
-
+
 #print(main('012717_472'))
--- a/core.py
+++ b/core.py
--- a/fanza.py
+++ b/fanza.py
@@ -1,229 +1,229 @@
-#!/usr/bin/python3
+#!/usr/bin/python3
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
-import json
+import json
-import re
+import re
-
+
-from lxml import etree
+from lxml import etree
-
+
-from ADC_function import *
+from ADC_function import *
-
+
-# import sys
+# import sys
-# import io
+# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
+
-
+
-def getTitle(text):
+def getTitle(text):
-    html = etree.fromstring(text, etree.HTMLParser())
+    html = etree.fromstring(text, etree.HTMLParser())
-    result = html.xpath('//*[@id="title"]/text()')[0]
+    result = html.xpath('//*[@id="title"]/text()')[0]
-    return result
+    return result
-
+
-
+
-def getActor(text):
+def getActor(text):
-    # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+    # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    html = etree.fromstring(text, etree.HTMLParser())
+    html = etree.fromstring(text, etree.HTMLParser())
-    result = (
+    result = (
-        str(
+        str(
-            html.xpath(
+            html.xpath(
-                "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
+                "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
-            )
+            )
-        )
+        )
-        .strip(" ['']")
+        .strip(" ['']")
-        .replace("', '", ",")
+        .replace("', '", ",")
-    )
+    )
-    return result
+    return result
-
+
-
+
-def getStudio(text):
+def getStudio(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
+    try:
-        result = html.xpath(
+        result = html.xpath(
-            "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()"
+            "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()"
-        )[0]
+        )[0]
-    except:
+    except:
-        result = html.xpath(
+        result = html.xpath(
-            "//td[contains(text(),'メーカー')]/following-sibling::td/text()"
+            "//td[contains(text(),'メーカー')]/following-sibling::td/text()"
-        )[0]
+        )[0]
-    return result
+    return result
-
+
-
+
-def getRuntime(text):
+def getRuntime(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
+    result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
-    return re.search(r"\d+", str(result)).group()
+    return re.search(r"\d+", str(result)).group()
-
+
-
+
-def getLabel(text):
+def getLabel(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
+    try:
-        result = html.xpath(
+        result = html.xpath(
-            "//td[contains(text(),'シリーズ：')]/following-sibling::td/a/text()"
+            "//td[contains(text(),'シリーズ：')]/following-sibling::td/a/text()"
-        )[0]
+        )[0]
-    except:
+    except:
-        result = html.xpath(
+        result = html.xpath(
-            "//td[contains(text(),'シリーズ：')]/following-sibling::td/text()"
+            "//td[contains(text(),'シリーズ：')]/following-sibling::td/text()"
-        )[0]
+        )[0]
-    return result
+    return result
-
+
-
+
-def getNum(text):
+def getNum(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
+    try:
-        result = html.xpath(
+        result = html.xpath(
-            "//td[contains(text(),'品番：')]/following-sibling::td/a/text()"
+            "//td[contains(text(),'品番：')]/following-sibling::td/a/text()"
-        )[0]
+        )[0]
-    except:
+    except:
-        result = html.xpath(
+        result = html.xpath(
-            "//td[contains(text(),'品番：')]/following-sibling::td/text()"
+            "//td[contains(text(),'品番：')]/following-sibling::td/text()"
-        )[0]
+        )[0]
-    return result
+    return result
-
+
-
+
-def getYear(getRelease):
+def getYear(getRelease):
-    try:
+    try:
-        result = str(re.search(r"\d{4}", getRelease).group())
+        result = str(re.search(r"\d{4}", getRelease).group())
-        return result
+        return result
-    except:
+    except:
-        return getRelease
+        return getRelease
-
+
-
+
-def getRelease(text):
+def getRelease(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
+    try:
-        result = html.xpath(
+        result = html.xpath(
-            "//td[contains(text(),'発売日：')]/following-sibling::td/a/text()"
+            "//td[contains(text(),'発売日：')]/following-sibling::td/a/text()"
-        )[0].lstrip("\n")
+        )[0].lstrip("\n")
-    except:
+    except:
-        result = html.xpath(
+        result = html.xpath(
-            "//td[contains(text(),'発売日：')]/following-sibling::td/text()"
+            "//td[contains(text(),'発売日：')]/following-sibling::td/text()"
-        )[0].lstrip("\n")
+        )[0].lstrip("\n")
-    return result
+    return result
-
+
-
+
-def getTag(text):
+def getTag(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
+    try:
-        result = html.xpath(
+        result = html.xpath(
-            "//td[contains(text(),'ジャンル：')]/following-sibling::td/a/text()"
+            "//td[contains(text(),'ジャンル：')]/following-sibling::td/a/text()"
-        )
+        )
-    except:
+    except:
-        result = html.xpath(
+        result = html.xpath(
-            "//td[contains(text(),'ジャンル：')]/following-sibling::td/text()"
+            "//td[contains(text(),'ジャンル：')]/following-sibling::td/text()"
-        )
+        )
-    return result
+    return result
-
+
-
+
-def getCover(text, number):
+def getCover(text, number):
-    html = etree.fromstring(text, etree.HTMLParser())
+    html = etree.fromstring(text, etree.HTMLParser())
-    cover_number = number
+    cover_number = number
-    try:
+    try:
-        result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
+        result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
-    except:
+    except:
-        # sometimes fanza modify _ to \u0005f for image id
+        # sometimes fanza modify _ to \u0005f for image id
-        if "_" in cover_number:
+        if "_" in cover_number:
-            cover_number = cover_number.replace("_", r"\u005f")
+            cover_number = cover_number.replace("_", r"\u005f")
-        try:
+        try:
-            result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
+            result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
-        except:
+        except:
-            # (TODO) handle more edge case
+            # (TODO) handle more edge case
-            # print(html)
+            # print(html)
-            # raise exception here, same behavior as before
+            # raise exception here, same behavior as before
-            # people's major requirement is fetching the picture
+            # people's major requirement is fetching the picture
-            raise ValueError("can not find image")
+            raise ValueError("can not find image")
-    return result
+    return result
-
+
-
+
-def getDirector(text):
+def getDirector(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
+    try:
-        result = html.xpath(
+        result = html.xpath(
-            "//td[contains(text(),'監督：')]/following-sibling::td/a/text()"
+            "//td[contains(text(),'監督：')]/following-sibling::td/a/text()"
-        )[0]
+        )[0]
-    except:
+    except:
-        result = html.xpath(
+        result = html.xpath(
-            "//td[contains(text(),'監督：')]/following-sibling::td/text()"
+            "//td[contains(text(),'監督：')]/following-sibling::td/text()"
-        )[0]
+        )[0]
-    return result
+    return result
-
+
-
+
-def getOutline(text):
+def getOutline(text):
-    html = etree.fromstring(text, etree.HTMLParser())
+    html = etree.fromstring(text, etree.HTMLParser())
-    try:
+    try:
-        result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace(
+        result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace(
-            "\n", ""
+            "\n", ""
-        )
+        )
-        if result == "":
+        if result == "":
-            result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace(
+            result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace(
-                "\n", ""
+                "\n", ""
-            )
+            )
-    except:
+    except:
-        # (TODO) handle more edge case
+        # (TODO) handle more edge case
-        # print(html)
+        # print(html)
-        return ""
+        return ""
-    return result
+    return result
-
+
-
+
-def main(number):
+def main(number):
-    # fanza allow letter + number + underscore, normalize the input here
+    # fanza allow letter + number + underscore, normalize the input here
-    # @note: I only find the usage of underscore as h_test123456789
+    # @note: I only find the usage of underscore as h_test123456789
-    fanza_search_number = number
+    fanza_search_number = number
-    # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
+    # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
-    if fanza_search_number.startswith("h-"):
+    if fanza_search_number.startswith("h-"):
-        fanza_search_number = fanza_search_number.replace("h-", "h_")
+        fanza_search_number = fanza_search_number.replace("h-", "h_")
-
+
-    fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()
+    fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()
-
+
-    fanza_urls = [
+    fanza_urls = [
-        "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
+        "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
-        "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
+        "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
-        "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
+        "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
-        "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
+        "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
-    ]
+    ]
-    chosen_url = ""
+    chosen_url = ""
-    for url in fanza_urls:
+    for url in fanza_urls:
-        chosen_url = url + fanza_search_number
+        chosen_url = url + fanza_search_number
-        htmlcode = get_html(chosen_url)
+        htmlcode = get_html(chosen_url)
-        if "404 Not Found" not in htmlcode:
+        if "404 Not Found" not in htmlcode:
-            break
+            break
-    if "404 Not Found" in htmlcode:
+    if "404 Not Found" in htmlcode:
-        return json.dumps({"title": "",})
+        return json.dumps({"title": "",})
-    try:
+    try:
-        # for some old page, the input number does not match the page
+        # for some old page, the input number does not match the page
-        # for example, the url will be cid=test012
+        # for example, the url will be cid=test012
-        # but the hinban on the page is test00012
+        # but the hinban on the page is test00012
-        # so get the hinban first, and then pass it to following functions
+        # so get the hinban first, and then pass it to following functions
-        fanza_hinban = getNum(htmlcode)
+        fanza_hinban = getNum(htmlcode)
-        data = {
+        data = {
-            "title": getTitle(htmlcode).strip(getActor(htmlcode)),
+            "title": getTitle(htmlcode).strip(getActor(htmlcode)),
-            "studio": getStudio(htmlcode),
+            "studio": getStudio(htmlcode),
-            "outline": getOutline(htmlcode),
+            "outline": getOutline(htmlcode),
-            "runtime": getRuntime(htmlcode),
+            "runtime": getRuntime(htmlcode),
-            "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
+            "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
-            "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
+            "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
-            "release": getRelease(htmlcode),
+            "release": getRelease(htmlcode),
-            "number": fanza_hinban,
+            "number": fanza_hinban,
-            "cover": getCover(htmlcode, fanza_hinban),
+            "cover": getCover(htmlcode, fanza_hinban),
-            "imagecut": 1,
+            "imagecut": 1,
-            "tag": getTag(htmlcode),
+            "tag": getTag(htmlcode),
-            "label": getLabel(htmlcode),
+            "label": getLabel(htmlcode),
-            "year": getYear(
+            "year": getYear(
-                getRelease(htmlcode)
+                getRelease(htmlcode)
-            ),  # str(re.search('\d{4}',getRelease(a)).group()),
+            ),  # str(re.search('\d{4}',getRelease(a)).group()),
-            "actor_photo": "",
+            "actor_photo": "",
-            "website": chosen_url,
+            "website": chosen_url,
-            "source": "fanza.py",
+            "source": "fanza.py",
-        }
+        }
-    except:
+    except:
-        data = {
+        data = {
-            "title": "",
+            "title": "",
-        }
+        }
-    js = json.dumps(
+    js = json.dumps(
-        data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
+        data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
-    )  # .encode('UTF-8')
+    )  # .encode('UTF-8')
-    return js
+    return js
-
+
-
+
-if __name__ == "__main__":
+if __name__ == "__main__":
-    # print(main("DV-1562"))
+    # print(main("DV-1562"))
-    # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
+    # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
-    # print(main("ipx292"))
+    # print(main("ipx292"))
-    pass
+    pass
--- a/fc2fans_club.py
+++ b/fc2fans_club.py
@@ -1,162 +1,162 @@
-import re
+import re
-from lxml import etree#need install
+from lxml import etree#need install
-import json
+import json
-import ADC_function
+import ADC_function
-# import sys
+# import sys
-# import io
+# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
+
-def getTitle(htmlcode): #获取厂商
+def getTitle(htmlcode): #获取厂商
-    #print(htmlcode)
+    #print(htmlcode)
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
+    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']")
-    result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1)
+    result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1)
-    #print(result2)
+    #print(result2)
-    return result2
+    return result2
-def getActor(htmlcode):
+def getActor(htmlcode):
-    try:
+    try:
-        html = etree.fromstring(htmlcode, etree.HTMLParser())
+        html = etree.fromstring(htmlcode, etree.HTMLParser())
-        result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']")
+        result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']")
-        return result
+        return result
-    except:
+    except:
-        return ''
+        return ''
-def getStudio(htmlcode): #获取厂商
+def getStudio(htmlcode): #获取厂商
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
+    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']")
-    return result
+    return result
-def getNum(htmlcode):     #获取番号
+def getNum(htmlcode):     #获取番号
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
-    #print(result)
+    #print(result)
-    return result
+    return result
-def getRelease(htmlcode2): #
+def getRelease(htmlcode2): #
-    #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
+    #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
-    html=etree.fromstring(htmlcode2,etree.HTMLParser())
+    html=etree.fromstring(htmlcode2,etree.HTMLParser())
-    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
+    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
-    return result
+    return result
-def getCover(htmlcode,number,htmlcode2): #获取厂商 #
+def getCover(htmlcode,number,htmlcode2): #获取厂商 #
-    #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
+    #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
-    html = etree.fromstring(htmlcode2, etree.HTMLParser())
+    html = etree.fromstring(htmlcode2, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']")
+    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']")
-    if result == '':
+    if result == '':
-        html = etree.fromstring(htmlcode, etree.HTMLParser())
+        html = etree.fromstring(htmlcode, etree.HTMLParser())
-        result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']")
+        result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']")
-        return 'https://fc2club.com' +  result2
+        return 'https://fc2club.com' +  result2
-    return 'http:' + result
+    return 'http:' + result
-def getOutline(htmlcode2):     #获取番号 #
+def getOutline(htmlcode2):     #获取番号 #
-    html = etree.fromstring(htmlcode2, etree.HTMLParser())
+    html = etree.fromstring(htmlcode2, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',')
+    result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',')
-    return result
+    return result
-def getTag(htmlcode):     #获取番号
+def getTag(htmlcode):     #获取番号
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()'))
+    result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()'))
-    return result.strip(" ['']").replace("'",'').replace(' ','')
+    return result.strip(" ['']").replace("'",'').replace(' ','')
-def getYear(release):
+def getYear(release):
-    try:
+    try:
-        result = re.search('\d{4}',release).group()
+        result = re.search('\d{4}',release).group()
-        return result
+        return result
-    except:
+    except:
-        return ''
+        return ''
-
+
-def getTitle_fc2com(htmlcode): #获取厂商
+def getTitle_fc2com(htmlcode): #获取厂商
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
+    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0]
+    result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0]
-    return result
+    return result
-def getActor_fc2com(htmlcode):
+def getActor_fc2com(htmlcode):
-    try:
+    try:
-        html = etree.fromstring(htmlcode, etree.HTMLParser())
+        html = etree.fromstring(htmlcode, etree.HTMLParser())
-        result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
+        result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
-        return result
+        return result
-    except:
+    except:
-        return ''
+        return ''
-def getStudio_fc2com(htmlcode): #获取厂商
+def getStudio_fc2com(htmlcode): #获取厂商
-    try:
+    try:
-        html = etree.fromstring(htmlcode, etree.HTMLParser())
+        html = etree.fromstring(htmlcode, etree.HTMLParser())
-        result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']")
+        result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']")
-        return result
+        return result
-    except:
+    except:
-        return ''
+        return ''
-def getNum_fc2com(htmlcode):     #获取番号
+def getNum_fc2com(htmlcode):     #获取番号
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
-    return result
+    return result
-def getRelease_fc2com(htmlcode2): #
+def getRelease_fc2com(htmlcode2): #
-    html=etree.fromstring(htmlcode2,etree.HTMLParser())
+    html=etree.fromstring(htmlcode2,etree.HTMLParser())
-    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
+    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
-    return result
+    return result
-def getCover_fc2com(htmlcode2): #获取厂商 #
+def getCover_fc2com(htmlcode2): #获取厂商 #
-    html = etree.fromstring(htmlcode2, etree.HTMLParser())
+    html = etree.fromstring(htmlcode2, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']")
+    result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']")
-    return 'http:' + result
+    return 'http:' + result
-def getOutline_fc2com(htmlcode2):     #获取番号 #
+def getOutline_fc2com(htmlcode2):     #获取番号 #
-    html = etree.fromstring(htmlcode2, etree.HTMLParser())
+    html = etree.fromstring(htmlcode2, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',')
+    result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',')
-    return result
+    return result
-def getTag_fc2com(number):     #获取番号
+def getTag_fc2com(number):     #获取番号
-    htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape'))
+    htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape'))
-    result = re.findall('"tag":"(.*?)"', htmlcode)
+    result = re.findall('"tag":"(.*?)"', htmlcode)
-    return result
+    return result
-def getYear_fc2com(release):
+def getYear_fc2com(release):
-    try:
+    try:
-        result = re.search('\d{4}',release).group()
+        result = re.search('\d{4}',release).group()
-        return result
+        return result
-    except:
+    except:
-        return ''
+        return ''
-
+
-def main(number):
+def main(number):
-    try:
+    try:
-        htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/')
+        htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/')
-        htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html')
+        htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html')
-        actor = getActor(htmlcode)
+        actor = getActor(htmlcode)
-        if getActor(htmlcode) == '':
+        if getActor(htmlcode) == '':
-            actor = 'FC2系列'
+            actor = 'FC2系列'
-        dic = {
+        dic = {
-            'title':    getTitle(htmlcode),
+            'title':    getTitle(htmlcode),
-            'studio':   getStudio(htmlcode),
+            'studio':   getStudio(htmlcode),
-            'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
+            'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
-            'outline':  '',#getOutline(htmlcode2),
+            'outline':  '',#getOutline(htmlcode2),
-            'runtime':  getYear(getRelease(htmlcode)),
+            'runtime':  getYear(getRelease(htmlcode)),
-            'director': getStudio(htmlcode),
+            'director': getStudio(htmlcode),
-            'actor':    actor,
+            'actor':    actor,
-            'release':  getRelease(number),
+            'release':  getRelease(number),
-            'number':  'FC2-'+number,
+            'number':  'FC2-'+number,
-            'label': '',
+            'label': '',
-            'cover':    getCover(htmlcode,number,htmlcode2),
+            'cover':    getCover(htmlcode,number,htmlcode2),
-            'imagecut': 0,
+            'imagecut': 0,
-            'tag':      getTag(htmlcode),
+            'tag':      getTag(htmlcode),
-            'actor_photo':'',
+            'actor_photo':'',
-            'website':  'https://fc2club.com//html/FC2-' + number + '.html',
+            'website':  'https://fc2club.com//html/FC2-' + number + '.html',
-            'source':'https://fc2club.com//html/FC2-' + number + '.html',
+            'source':'https://fc2club.com//html/FC2-' + number + '.html',
-        }
+        }
-        if dic['title'] == '':
+        if dic['title'] == '':
-            htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'})
+            htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'})
-            actor = getActor(htmlcode)
+            actor = getActor(htmlcode)
-            if getActor(htmlcode) == '':
+            if getActor(htmlcode) == '':
-                actor = 'FC2系列'
+                actor = 'FC2系列'
-            dic = {
+            dic = {
-                'title': getTitle_fc2com(htmlcode2),
+                'title': getTitle_fc2com(htmlcode2),
-                'studio': getStudio_fc2com(htmlcode2),
+                'studio': getStudio_fc2com(htmlcode2),
-                'year': '',  # str(re.search('\d{4}',getRelease(number)).group()),
+                'year': '',  # str(re.search('\d{4}',getRelease(number)).group()),
-                'outline': getOutline_fc2com(htmlcode2),
+                'outline': getOutline_fc2com(htmlcode2),
-                'runtime': getYear_fc2com(getRelease(htmlcode2)),
+                'runtime': getYear_fc2com(getRelease(htmlcode2)),
-                'director': getStudio_fc2com(htmlcode2),
+                'director': getStudio_fc2com(htmlcode2),
-                'actor': actor,
+                'actor': actor,
-                'release': getRelease_fc2com(number),
+                'release': getRelease_fc2com(number),
-                'number': 'FC2-' + number,
+                'number': 'FC2-' + number,
-                'cover': getCover_fc2com(htmlcode2),
+                'cover': getCover_fc2com(htmlcode2),
-                'imagecut': 0,
+                'imagecut': 0,
-                'tag': getTag_fc2com(number),
+                'tag': getTag_fc2com(number),
-                'label': '',
+                'label': '',
-                'actor_photo': '',
+                'actor_photo': '',
-                'website': 'http://adult.contents.fc2.com/article/' + number + '/',
+                'website': 'http://adult.contents.fc2.com/article/' + number + '/',
-                'source': 'http://adult.contents.fc2.com/article/' + number + '/',
+                'source': 'http://adult.contents.fc2.com/article/' + number + '/',
-            }
+            }
-    except Exception as e:
+    except Exception as e:
-        # (TODO) better handle this
+        # (TODO) better handle this
-        # print(e)
+        # print(e)
-        dic = {"title": ""}
+        dic = {"title": ""}
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
-    return js
+    return js
-
+
-
+
-#print(main('1252953'))
+#print(main('1252953'))
--- a/javbus.py
+++ b/javbus.py
@@ -1,138 +1,138 @@
-import re
+import re
-from pyquery import PyQuery as pq#need install
+from pyquery import PyQuery as pq#need install
-from lxml import etree#need install
+from lxml import etree#need install
-from bs4 import BeautifulSoup#need install
+from bs4 import BeautifulSoup#need install
-import json
+import json
-from ADC_function import *
+from ADC_function import *
-
+
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
-    soup = BeautifulSoup(htmlcode, 'lxml')
+    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = soup.find_all(attrs={'class': 'star-name'})
+    a = soup.find_all(attrs={'class': 'star-name'})
-    d={}
+    d={}
-    for i in a:
+    for i in a:
-        l=i.a['href']
+        l=i.a['href']
-        t=i.get_text()
+        t=i.get_text()
-        html = etree.fromstring(get_html(l), etree.HTMLParser())
+        html = etree.fromstring(get_html(l), etree.HTMLParser())
-        p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
+        p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
-        p2={t:p}
+        p2={t:p}
-        d.update(p2)
+        d.update(p2)
-    return d
+    return d
-def getTitle(htmlcode):  #获取标题
+def getTitle(htmlcode):  #获取标题
-    doc = pq(htmlcode)
+    doc = pq(htmlcode)
-    title=str(doc('div.container h3').text()).replace(' ','-')
+    title=str(doc('div.container h3').text()).replace(' ','-')
-    try:
+    try:
-        title2 = re.sub('n\d+-','',title)
+        title2 = re.sub('n\d+-','',title)
-        return title2
+        return title2
-    except:
+    except:
-        return title
+        return title
-def getStudio(htmlcode): #获取厂商
+def getStudio(htmlcode): #获取厂商
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
+    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
-    return result
+    return result
-def getYear(htmlcode):   #获取年份
+def getYear(htmlcode):   #获取年份
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
+    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
-    return result
+    return result
-def getCover(htmlcode):  #获取封面链接
+def getCover(htmlcode):  #获取封面链接
-    doc = pq(htmlcode)
+    doc = pq(htmlcode)
-    image = doc('a.bigImage')
+    image = doc('a.bigImage')
-    return image.attr('href')
+    return image.attr('href')
-def getRelease(htmlcode): #获取出版日期
+def getRelease(htmlcode): #获取出版日期
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
-    return result
+    return result
-def getRuntime(htmlcode): #获取分钟
+def getRuntime(htmlcode): #获取分钟
-    soup = BeautifulSoup(htmlcode, 'lxml')
+    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = soup.find(text=re.compile('分鐘'))
+    a = soup.find(text=re.compile('分鐘'))
-    return a
+    return a
-def getActor(htmlcode):   #获取女优
+def getActor(htmlcode):   #获取女优
-    b=[]
+    b=[]
-    soup=BeautifulSoup(htmlcode,'lxml')
+    soup=BeautifulSoup(htmlcode,'lxml')
-    a=soup.find_all(attrs={'class':'star-name'})
+    a=soup.find_all(attrs={'class':'star-name'})
-    for i in a:
+    for i in a:
-        b.append(i.get_text())
+        b.append(i.get_text())
-    return b
+    return b
-def getNum(htmlcode):     #获取番号
+def getNum(htmlcode):     #获取番号
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
-    return result
+    return result
-def getDirector(htmlcode): #获取导演
+def getDirector(htmlcode): #获取导演
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
-    return result
+    return result
-def getOutline(htmlcode):  #获取演员
+def getOutline(htmlcode):  #获取演员
-    doc = pq(htmlcode)
+    doc = pq(htmlcode)
-    result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
+    result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
-    return result
+    return result
-def getSerise(htmlcode):
+def getSerise(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
-    return result
+    return result
-def getTag(htmlcode):  # 获取演员
+def getTag(htmlcode):  # 获取演员
-    tag = []
+    tag = []
-    soup = BeautifulSoup(htmlcode, 'lxml')
+    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = soup.find_all(attrs={'class': 'genre'})
+    a = soup.find_all(attrs={'class': 'genre'})
-    for i in a:
+    for i in a:
-        if 'onmouseout' in str(i):
+        if 'onmouseout' in str(i):
-            continue
+            continue
-        tag.append(i.get_text())
+        tag.append(i.get_text())
-    return tag
+    return tag
-
+
-
+
-def main(number):
+def main(number):
-    try:
+    try:
-        htmlcode = get_html('https://www.javbus.com/' + number)
+        htmlcode = get_html('https://www.javbus.com/' + number)
-        try:
+        try:
-            dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+            dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
-        except:
+        except:
-            dww_htmlcode = ''
+            dww_htmlcode = ''
-        dic = {
+        dic = {
-            'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
+            'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
-            'studio': getStudio(htmlcode),
+            'studio': getStudio(htmlcode),
-            'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
+            'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
-            'outline': getOutline(dww_htmlcode),
+            'outline': getOutline(dww_htmlcode),
-            'runtime': getRuntime(htmlcode),
+            'runtime': getRuntime(htmlcode),
-            'director': getDirector(htmlcode),
+            'director': getDirector(htmlcode),
-            'actor': getActor(htmlcode),
+            'actor': getActor(htmlcode),
-            'release': getRelease(htmlcode),
+            'release': getRelease(htmlcode),
-            'number': getNum(htmlcode),
+            'number': getNum(htmlcode),
-            'cover': getCover(htmlcode),
+            'cover': getCover(htmlcode),
-            'imagecut': 1,
+            'imagecut': 1,
-            'tag': getTag(htmlcode),
+            'tag': getTag(htmlcode),
-            'label': getSerise(htmlcode),
+            'label': getSerise(htmlcode),
-            'actor_photo': getActorPhoto(htmlcode),
+            'actor_photo': getActorPhoto(htmlcode),
-            'website': 'https://www.javbus.com/' + number,
+            'website': 'https://www.javbus.com/' + number,
-            'source' : 'javbus.py',
+            'source' : 'javbus.py',
-        }
+        }
-        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-        return js
+        return js
-    except:
+    except:
-        return main_uncensored(number)
+        return main_uncensored(number)
-
+
-def main_uncensored(number):
+def main_uncensored(number):
-    htmlcode = get_html('https://www.javbus.com/' + number)
+    htmlcode = get_html('https://www.javbus.com/' + number)
-    dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+    dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
-    if getTitle(htmlcode) == '':
+    if getTitle(htmlcode) == '':
-        htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
+        htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
-        dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+        dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
-    dic = {
+    dic = {
-        'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
+        'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
-        'studio': getStudio(htmlcode),
+        'studio': getStudio(htmlcode),
-        'year': getYear(htmlcode),
+        'year': getYear(htmlcode),
-        'outline': getOutline(dww_htmlcode),
+        'outline': getOutline(dww_htmlcode),
-        'runtime': getRuntime(htmlcode),
+        'runtime': getRuntime(htmlcode),
-        'director': getDirector(htmlcode),
+        'director': getDirector(htmlcode),
-        'actor': getActor(htmlcode),
+        'actor': getActor(htmlcode),
-        'release': getRelease(htmlcode),
+        'release': getRelease(htmlcode),
-        'number': getNum(htmlcode),
+        'number': getNum(htmlcode),
-        'cover': getCover(htmlcode),
+        'cover': getCover(htmlcode),
-        'tag': getTag(htmlcode),
+        'tag': getTag(htmlcode),
-        'label': getSerise(htmlcode),
+        'label': getSerise(htmlcode),
-        'imagecut': 0,
+        'imagecut': 0,
-        'actor_photo': '',
+        'actor_photo': '',
-        'website': 'https://www.javbus.com/' + number,
+        'website': 'https://www.javbus.com/' + number,
-        'source': 'javbus.py',
+        'source': 'javbus.py',
-    }
+    }
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-    return js
+    return js
-
+
--- a/javdb.py
+++ b/javdb.py
@@ -1,123 +1,123 @@
-import re
+import re
-from lxml import etree
+from lxml import etree
-import json
+import json
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup
-from ADC_function import *
+from ADC_function import *
-# import sys
+# import sys
-# import io
+# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
+
-def getTitle(a):
+def getTitle(a):
-    html = etree.fromstring(a, etree.HTMLParser())
+    html = etree.fromstring(a, etree.HTMLParser())
-    result = html.xpath("/html/body/section/div/h2/strong/text()")[0]
+    result = html.xpath("/html/body/section/div/h2/strong/text()")[0]
-    return result
+    return result
-def getActor(a):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+def getActor(a):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
+    return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
-def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
+def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
-    a = actor.split(',')
+    a = actor.split(',')
-    d={}
+    d={}
-    for i in a:
+    for i in a:
-        p={i:''}
+        p={i:''}
-        d.update(p)
+        d.update(p)
-    return d
+    return d
-def getStudio(a):
+def getStudio(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getRuntime(a):
+def getRuntime(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').rstrip('mi')
+    return str(result1 + result2).strip('+').rstrip('mi')
-def getLabel(a):
+def getLabel(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getNum(a):
+def getNum(a):
-    html = etree.fromstring(a, etree.HTMLParser())
+    html = etree.fromstring(a, etree.HTMLParser())
-    result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result2 + result1).strip('+')
+    return str(result2 + result1).strip('+')
-def getYear(getRelease):
+def getYear(getRelease):
-    try:
+    try:
-        result = str(re.search('\d{4}', getRelease).group())
+        result = str(re.search('\d{4}', getRelease).group())
-        return result
+        return result
-    except:
+    except:
-        return getRelease
+        return getRelease
-def getRelease(a):
+def getRelease(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+')
+    return str(result1 + result2).strip('+')
-def getTag(a):
+def getTag(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',')
+    return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',')
-def getCover_small(a, index=0):
+def getCover_small(a, index=0):
-    # same issue mentioned below,
+    # same issue mentioned below,
-    # javdb sometime returns multiple results
+    # javdb sometime returns multiple results
-    # DO NOT just get the firt one, get the one with correct index number
+    # DO NOT just get the firt one, get the one with correct index number
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
+    result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
-    if not 'https' in result:
+    if not 'https' in result:
-        result = 'https:' + result
+        result = 'https:' + result
-    return result
+    return result
-def getCover(htmlcode):
+def getCover(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath("//div[@class='column column-video-cover']/a/img/@src")).strip(" ['']")
+    result = str(html.xpath("//div[@class='column column-video-cover']/a/img/@src")).strip(" ['']")
-    return result
+    return result
-def getDirector(a):
+def getDirector(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getOutline(htmlcode):
+def getOutline(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
+    result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
-    return result
+    return result
-def main(number):
+def main(number):
-    try:
+    try:
-        number = number.upper()
+        number = number.upper()
-        query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
+        query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
-        html = etree.fromstring(query_result, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+        html = etree.fromstring(query_result, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        # javdb sometime returns multiple results,
+        # javdb sometime returns multiple results,
-        # and the first elememt maybe not the one we are looking for
+        # and the first elememt maybe not the one we are looking for
-        # iterate all candidates and find the match one
+        # iterate all candidates and find the match one
-        urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
+        urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
-        ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
+        ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
-        correct_url = urls[ids.index(number)]
+        correct_url = urls[ids.index(number)]
-        detail_page = get_html('https://javdb.com' + correct_url)
+        detail_page = get_html('https://javdb.com' + correct_url)
-        dic = {
+        dic = {
-            'actor': getActor(detail_page),
+            'actor': getActor(detail_page),
-            'title': getTitle(detail_page),
+            'title': getTitle(detail_page),
-            'studio': getStudio(detail_page),
+            'studio': getStudio(detail_page),
-            'outline': getOutline(detail_page),
+            'outline': getOutline(detail_page),
-            'runtime': getRuntime(detail_page),
+            'runtime': getRuntime(detail_page),
-            'director': getDirector(detail_page),
+            'director': getDirector(detail_page),
-            'release': getRelease(detail_page),
+            'release': getRelease(detail_page),
-            'number': getNum(detail_page),
+            'number': getNum(detail_page),
-            'cover': getCover(detail_page),
+            'cover': getCover(detail_page),
-            'cover_small': getCover_small(query_result, index=ids.index(number)),
+            'cover_small': getCover_small(query_result, index=ids.index(number)),
-            'imagecut': 3,
+            'imagecut': 3,
-            'tag': getTag(detail_page),
+            'tag': getTag(detail_page),
-            'label': getLabel(detail_page),
+            'label': getLabel(detail_page),
-            'year': getYear(getRelease(detail_page)),  # str(re.search('\d{4}',getRelease(a)).group()),
+            'year': getYear(getRelease(detail_page)),  # str(re.search('\d{4}',getRelease(a)).group()),
-            'actor_photo': getActorPhoto(getActor(detail_page)),
+            'actor_photo': getActorPhoto(getActor(detail_page)),
-            'website': 'https://javdb.com' + correct_url,
+            'website': 'https://javdb.com' + correct_url,
-            'source': 'javdb.py',
+            'source': 'javdb.py',
-        }
+        }
-    except Exception as e:
+    except Exception as e:
-        # print(e)
+        # print(e)
-        dic = {"title": ""}
+        dic = {"title": ""}
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-    return js
+    return js
-
+
-# main('DV-1562')
+# main('DV-1562')
-# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
+# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
-#print(main('ipx-292'))
+#print(main('ipx-292'))
--- a/mgstage.py
+++ b/mgstage.py
@@ -1,111 +1,111 @@
-import re
+import re
-from lxml import etree
+from lxml import etree
-import json
+import json
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup
-from ADC_function import *
+from ADC_function import *
-# import sys
+# import sys
-# import io
+# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
+
-def getTitle(a):
+def getTitle(a):
-    try:
+    try:
-        html = etree.fromstring(a, etree.HTMLParser())
+        html = etree.fromstring(a, etree.HTMLParser())
-        result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
+        result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
-        return result.replace('/', ',')
+        return result.replace('/', ',')
-    except:
+    except:
-        return ''
+        return ''
-def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
-    result1=str(html.xpath('//th[contains(text(),"出演：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+    result1=str(html.xpath('//th[contains(text(),"出演：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    result2=str(html.xpath('//th[contains(text(),"出演：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+    result2=str(html.xpath('//th[contains(text(),"出演：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
+    return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
-def getStudio(a):
+def getStudio(a):
-    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
-    result1=str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+    result1=str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    result2=str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+    result2=str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    return str(result1+result2).strip('+').replace("', '",'').replace('"','')
+    return str(result1+result2).strip('+').replace("', '",'').replace('"','')
-def getRuntime(a):
+def getRuntime(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+    result1 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    result2 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+    result2 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    return str(result1 + result2).strip('+').rstrip('mi')
+    return str(result1 + result2).strip('+').rstrip('mi')
-def getLabel(a):
+def getLabel(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
+    result1 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
+        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
+    result2 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
+        '\\n')
-    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
+    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
-def getNum(a):
+def getNum(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"品番：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
+    result1 = str(html.xpath('//th[contains(text(),"品番：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
+        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"品番：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
+    result2 = str(html.xpath('//th[contains(text(),"品番：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
+        '\\n')
-    return str(result1 + result2).strip('+')
+    return str(result1 + result2).strip('+')
-def getYear(getRelease):
+def getYear(getRelease):
-    try:
+    try:
-        result = str(re.search('\d{4}',getRelease).group())
+        result = str(re.search('\d{4}',getRelease).group())
-        return result
+        return result
-    except:
+    except:
-        return getRelease
+        return getRelease
-def getRelease(a):
+def getRelease(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
+    result1 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
+        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
+    result2 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
+        '\\n')
-    return str(result1 + result2).strip('+')
+    return str(result1 + result2).strip('+')
-def getTag(a):
+def getTag(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
+    result1 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
+        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
+    result2 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
+        '\\n')
-    return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','')
+    return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','')
-def getCover(htmlcode):
+def getCover(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
+    result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
-    #                    /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src
+    #                    /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src
-    return result
+    return result
-def getDirector(a):
+def getDirector(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
+    result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
+        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
+    result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
+        '\\n')
-    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
+    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
-def getOutline(htmlcode):
+def getOutline(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//p/text()')).strip(" ['']")
+    result = str(html.xpath('//p/text()')).strip(" ['']")
-    return result
+    return result
-def main(number2):
+def main(number2):
-    number=number2.upper()
+    number=number2.upper()
-    htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
+    htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
-    soup = BeautifulSoup(htmlcode, 'lxml')
+    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
+    a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
-    b = str(soup.find(attrs={'id': 'introduction'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
+    b = str(soup.find(attrs={'id': 'introduction'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
-    print(b)
+    print(b)
-    dic = {
+    dic = {
-        'title': getTitle(htmlcode).replace("\\n",'').replace('        ',''),
+        'title': getTitle(htmlcode).replace("\\n",'').replace('        ',''),
-        'studio': getStudio(a),
+        'studio': getStudio(a),
-        'outline': getOutline(b),
+        'outline': getOutline(b),
-        'runtime': getRuntime(a),
+        'runtime': getRuntime(a),
-        'director': getDirector(a),
+        'director': getDirector(a),
-        'actor': getActor(a),
+        'actor': getActor(a),
-        'release': getRelease(a),
+        'release': getRelease(a),
-        'number': getNum(a),
+        'number': getNum(a),
-        'cover': getCover(htmlcode),
+        'cover': getCover(htmlcode),
-        'imagecut': 0,
+        'imagecut': 0,
-        'tag': getTag(a),
+        'tag': getTag(a),
-        'label':getLabel(a),
+        'label':getLabel(a),
-        'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()),
+        'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()),
-        'actor_photo': '',
+        'actor_photo': '',
-        'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
+        'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
-        'source': 'mgstage.py',
+        'source': 'mgstage.py',
-    }
+    }
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-    return js
+    return js
-    #print(htmlcode)
+    #print(htmlcode)
-
+
-print(main('SIRO-3607'))
+print(main('SIRO-3607'))