Update 3.7-2

2020-08-12 18:27:58 +08:00
parent 72a9790858
commit f6da5db276
9 changed files with 0 additions and 1485 deletions
--- a/avsox.py
+++ b/avsox.py
@@ -1,124 +0,0 @@
-import re
-from lxml import etree
-import json
-from bs4 import BeautifulSoup
-from ADC_function import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
-    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = soup.find_all(attrs={'class': 'avatar-box'})
-    d = {}
-    for i in a:
-        l = i.img['src']
-        t = i.span.get_text()
-        p2 = {t: l}
-        d.update(p2)
-    return d
-def getTitle(a):
-    try:
-        html = etree.fromstring(a, etree.HTMLParser())
-        result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
-        return result.replace('/', '')
-    except:
-        return ''
-def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    soup = BeautifulSoup(a, 'lxml')
-    a = soup.find_all(attrs={'class': 'avatar-box'})
-    d = []
-    for i in a:
-        d.append(i.span.get_text())
-    return d
-def getStudio(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
-    return result1
-def getRuntime(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
-    return result1
-def getLabel(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
-    return result1
-def getNum(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
-    return result1
-def getYear(release):
-    try:
-        result = str(re.search('\d{4}',release).group())
-        return result
-    except:
-        return release
-def getRelease(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
-    return result1
-def getCover(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
-    return result
-def getCover_small(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
-    return result
-def getTag(a):  # 获取演员
-    soup = BeautifulSoup(a, 'lxml')
-    a = soup.find_all(attrs={'class': 'genre'})
-    d = []
-    for i in a:
-        d.append(i.get_text())
-    return d
-def getSeries(htmlcode):
-    try:
-        html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']")
-        return result1
-    except:
-        return ''
-
-def main(number):
-    a = get_html('https://avsox.host/cn/search/' + number)
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-    if result1 == '' or result1 == 'null' or result1 == 'None':
-        a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
-        print(a)
-        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-        if result1 == '' or result1 == 'null' or result1 == 'None':
-            a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
-            print(a)
-            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-            result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-    web = get_html(result1)
-    soup = BeautifulSoup(web, 'lxml')
-    info = str(soup.find(attrs={'class': 'row movie'}))
-    dic = {
-        'actor': getActor(web),
-        'title': getTitle(web).strip(getNum(web)),
-        'studio': getStudio(info),
-        'outline': '',#
-        'runtime': getRuntime(info),
-        'director': '', #
-        'release': getRelease(info),
-        'number': getNum(info),
-        'cover': getCover(web),
-        'cover_small': getCover_small(a),
-        'imagecut': 3,
-        'tag': getTag(web),
-        'label': getLabel(info),
-        'year': getYear(getRelease(info)),  # str(re.search('\d{4}',getRelease(a)).group()),
-        'actor_photo': getActorPhoto(web),
-        'website': result1,
-        'source': 'avsox.py',
-        'series': getSeries(info),
-    }
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-    return js
-
-if __name__ == "__main__":
-    print(main('012717_472'))
--- a/fanza.py
+++ b/fanza.py
@@ -1,297 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-import json
-import re
-from urllib.parse import urlencode
-
-from lxml import etree
-
-from ADC_function import *
-
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-
-def getTitle(text):
-    html = etree.fromstring(text, etree.HTMLParser())
-    result = html.xpath('//*[starts-with(@id, "title")]/text()')[0]
-    return result
-
-
-def getActor(text):
-    # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    html = etree.fromstring(text, etree.HTMLParser())
-    result = (
-        str(
-            html.xpath(
-                "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
-            )
-        )
-        .strip(" ['']")
-        .replace("', '", ",")
-    )
-    return result
-
-
-def getStudio(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath(
-            "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()"
-        )[0]
-    except:
-        result = html.xpath(
-            "//td[contains(text(),'メーカー')]/following-sibling::td/text()"
-        )[0]
-    return result
-
-
-def getRuntime(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
-    return re.search(r"\d+", str(result)).group()
-
-
-def getLabel(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath(
-            "//td[contains(text(),'レーベル：')]/following-sibling::td/a/text()"
-        )[0]
-    except:
-        result = html.xpath(
-            "//td[contains(text(),'レーベル：')]/following-sibling::td/text()"
-        )[0]
-    return result
-
-
-def getNum(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath(
-            "//td[contains(text(),'品番：')]/following-sibling::td/a/text()"
-        )[0]
-    except:
-        result = html.xpath(
-            "//td[contains(text(),'品番：')]/following-sibling::td/text()"
-        )[0]
-    return result
-
-
-def getYear(getRelease):
-    try:
-        result = str(re.search(r"\d{4}", getRelease).group())
-        return result
-    except:
-        return getRelease
-
-
-def getRelease(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath(
-            "//td[contains(text(),'発売日：')]/following-sibling::td/a/text()"
-        )[0].lstrip("\n")
-    except:
-        try:
-            result = html.xpath(
-                "//td[contains(text(),'発売日：')]/following-sibling::td/text()"
-            )[0].lstrip("\n")
-        except:
-            result = "----"
-    if result == "----":
-        try:
-            result = html.xpath(
-                "//td[contains(text(),'配信開始日：')]/following-sibling::td/a/text()"
-            )[0].lstrip("\n")
-        except:
-            try:
-                result = html.xpath(
-                    "//td[contains(text(),'配信開始日：')]/following-sibling::td/text()"
-                )[0].lstrip("\n")
-            except:
-                pass
-    return result.replace("/", "-")
-
-
-def getTag(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath(
-            "//td[contains(text(),'ジャンル：')]/following-sibling::td/a/text()"
-        )
-    except:
-        result = html.xpath(
-            "//td[contains(text(),'ジャンル：')]/following-sibling::td/text()"
-        )
-    return result
-
-
-def getCover(text, number):
-    html = etree.fromstring(text, etree.HTMLParser())
-    cover_number = number
-    try:
-        result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
-    except:
-        # sometimes fanza modify _ to \u0005f for image id
-        if "_" in cover_number:
-            cover_number = cover_number.replace("_", r"\u005f")
-        try:
-            result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
-        except:
-            # (TODO) handle more edge case
-            # print(html)
-            # raise exception here, same behavior as before
-            # people's major requirement is fetching the picture
-            raise ValueError("can not find image")
-    return result
-
-
-def getDirector(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath(
-            "//td[contains(text(),'監督：')]/following-sibling::td/a/text()"
-        )[0]
-    except:
-        result = html.xpath(
-            "//td[contains(text(),'監督：')]/following-sibling::td/text()"
-        )[0]
-    return result
-
-
-def getOutline(text):
-    html = etree.fromstring(text, etree.HTMLParser())
-    try:
-        result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace(
-            "\n", ""
-        )
-        if result == "":
-            result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace(
-                "\n", ""
-            )
-    except:
-        # (TODO) handle more edge case
-        # print(html)
-        return ""
-    return result
-
-
-def getSeries(text):
-    try:
-        html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        try:
-            result = html.xpath(
-                "//td[contains(text(),'シリーズ：')]/following-sibling::td/a/text()"
-            )[0]
-        except:
-            result = html.xpath(
-                "//td[contains(text(),'シリーズ：')]/following-sibling::td/text()"
-            )[0]
-        return result
-    except:
-        return ""
-
-
-def main(number):
-    # fanza allow letter + number + underscore, normalize the input here
-    # @note: I only find the usage of underscore as h_test123456789
-    fanza_search_number = number
-    # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
-    if fanza_search_number.startswith("h-"):
-        fanza_search_number = fanza_search_number.replace("h-", "h_")
-
-    fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()
-
-    fanza_urls = [
-        "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
-        "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
-        "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
-        "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
-        "https://www.dmm.co.jp/digital/videoc/-/detail/=/cid=",
-        "https://www.dmm.co.jp/digital/nikkatsu/-/detail/=/cid=",
-        "https://www.dmm.co.jp/rental/-/detail/=/cid=",
-    ]
-    chosen_url = ""
-
-    for url in fanza_urls:
-        chosen_url = url + fanza_search_number
-        htmlcode = get_html(
-            "https://www.dmm.co.jp/age_check/=/declared=yes/?{}".format(
-                urlencode({"rurl": chosen_url})
-            )
-        )
-        if "404 Not Found" not in htmlcode:
-            break
-    if "404 Not Found" in htmlcode:
-        return json.dumps({"title": "",})
-    try:
-        # for some old page, the input number does not match the page
-        # for example, the url will be cid=test012
-        # but the hinban on the page is test00012
-        # so get the hinban first, and then pass it to following functions
-        fanza_hinban = getNum(htmlcode)
-        data = {
-            "title": getTitle(htmlcode).strip(),
-            "studio": getStudio(htmlcode),
-            "outline": getOutline(htmlcode),
-            "runtime": getRuntime(htmlcode),
-            "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
-            "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
-            "release": getRelease(htmlcode),
-            "number": fanza_hinban,
-            "cover": getCover(htmlcode, fanza_hinban),
-            "imagecut": 1,
-            "tag": getTag(htmlcode),
-            "label": getLabel(htmlcode),
-            "year": getYear(
-                getRelease(htmlcode)
-            ),  # str(re.search('\d{4}',getRelease(a)).group()),
-            "actor_photo": "",
-            "website": chosen_url,
-            "source": "fanza.py",
-            "series": getSeries(htmlcode),
-        }
-    except:
-        data = {
-            "title": "",
-        }
-    js = json.dumps(
-        data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
-    )  # .encode('UTF-8')
-    return js
-
-
-def main_htmlcode(number):
-    # fanza allow letter + number + underscore, normalize the input here
-    # @note: I only find the usage of underscore as h_test123456789
-    fanza_search_number = number
-    # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
-    if fanza_search_number.startswith("h-"):
-        fanza_search_number = fanza_search_number.replace("h-", "h_")
-
-    fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()
-
-    fanza_urls = [
-        "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
-        "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
-        "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
-        "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
-        "https://www.dmm.co.jp/digital/videoc/-/detail/=/cid=",
-        "https://www.dmm.co.jp/digital/nikkatsu/-/detail/=/cid=",
-    ]
-    chosen_url = ""
-    for url in fanza_urls:
-        chosen_url = url + fanza_search_number
-        htmlcode = get_html(chosen_url)
-        if "404 Not Found" not in htmlcode:
-            break
-    if "404 Not Found" in htmlcode:
-        return json.dumps({"title": "",})
-    return htmlcode
-
-
-if __name__ == "__main__":
-    print(main("DV-1562"))
-    print(main("96fad1217"))
--- a/fc2fans_club.py
+++ b/fc2fans_club.py
@@ -1,165 +0,0 @@
-import re
-from lxml import etree#need install
-import json
-import ADC_function
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getTitle(htmlcode): #获取厂商
-    #print(htmlcode)
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']")
-    result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1)
-    #print(result2)
-    return result2
-def getActor(htmlcode):
-    try:
-        html = etree.fromstring(htmlcode, etree.HTMLParser())
-        result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']")
-        return result
-    except:
-        return ''
-def getStudio(htmlcode): #获取厂商
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']")
-    return result
-def getNum(htmlcode):     #获取番号
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
-    #print(result)
-    return result
-def getRelease(htmlcode2): #
-    #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
-    html=etree.fromstring(htmlcode2,etree.HTMLParser())
-    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
-    return result
-def getCover(htmlcode,number,htmlcode2): #获取厂商 #
-    #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
-    html = etree.fromstring(htmlcode2, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']")
-    if result == '':
-        html = etree.fromstring(htmlcode, etree.HTMLParser())
-        result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']")
-        return 'https://fc2club.com' +  result2
-    return 'http:' + result
-def getOutline(htmlcode2):     #获取番号 #
-    html = etree.fromstring(htmlcode2, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',')
-    return result
-def getTag(htmlcode):     #获取番号
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()'))
-    return result.strip(" ['']").replace("'",'').replace(' ','')
-def getYear(release):
-    try:
-        result = re.search('\d{4}',release).group()
-        return result
-    except:
-        return ''
-
-def getTitle_fc2com(htmlcode): #获取厂商
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0]
-    return result
-def getActor_fc2com(htmlcode):
-    try:
-        html = etree.fromstring(htmlcode, etree.HTMLParser())
-        result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
-        return result
-    except:
-        return ''
-def getStudio_fc2com(htmlcode): #获取厂商
-    try:
-        html = etree.fromstring(htmlcode, etree.HTMLParser())
-        result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']")
-        return result
-    except:
-        return ''
-def getNum_fc2com(htmlcode):     #获取番号
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
-    return result
-def getRelease_fc2com(htmlcode2): #
-    html=etree.fromstring(htmlcode2,etree.HTMLParser())
-    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
-    return result
-def getCover_fc2com(htmlcode2): #获取厂商 #
-    html = etree.fromstring(htmlcode2, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']")
-    return 'http:' + result
-def getOutline_fc2com(htmlcode2):     #获取番号 #
-    html = etree.fromstring(htmlcode2, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',')
-    return result
-def getTag_fc2com(number):     #获取番号
-    htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape'))
-    result = re.findall('"tag":"(.*?)"', htmlcode)
-    return result
-def getYear_fc2com(release):
-    try:
-        result = re.search('\d{4}',release).group()
-        return result
-    except:
-        return ''
-
-def main(number):
-    try:
-        number = number.replace('FC2-', '').replace('fc2-', '')
-        htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/')
-        htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html')
-        actor = getActor(htmlcode)
-        if getActor(htmlcode) == '':
-            actor = 'FC2系列'
-        dic = {
-            'title':    getTitle(htmlcode),
-            'studio':   getStudio(htmlcode),
-            'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
-            'outline':  '',#getOutline(htmlcode2),
-            'runtime':  getYear(getRelease(htmlcode)),
-            'director': getStudio(htmlcode),
-            'actor':    actor,
-            'release':  getRelease(number),
-            'number':  'FC2-'+number,
-            'label': '',
-            'cover':    getCover(htmlcode,number,htmlcode2),
-            'imagecut': 0,
-            'tag':      getTag(htmlcode),
-            'actor_photo':'',
-            'website':  'https://fc2club.com//html/FC2-' + number + '.html',
-            'source':'https://fc2club.com//html/FC2-' + number + '.html',
-            'series': '',
-        }
-        if dic['title'] == '':
-            htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'})
-            actor = getActor(htmlcode)
-            if getActor(htmlcode) == '':
-                actor = 'FC2系列'
-            dic = {
-                'title': getTitle_fc2com(htmlcode2),
-                'studio': getStudio_fc2com(htmlcode2),
-                'year': '',  # str(re.search('\d{4}',getRelease(number)).group()),
-                'outline': getOutline_fc2com(htmlcode2),
-                'runtime': getYear_fc2com(getRelease(htmlcode2)),
-                'director': getStudio_fc2com(htmlcode2),
-                'actor': actor,
-                'release': getRelease_fc2com(number),
-                'number': 'FC2-' + number,
-                'cover': getCover_fc2com(htmlcode2),
-                'imagecut': 0,
-                'tag': getTag_fc2com(number),
-                'label': '',
-                'actor_photo': '',
-                'website': 'http://adult.contents.fc2.com/article/' + number + '/',
-                'source': 'http://adult.contents.fc2.com/article/' + number + '/',
-                'series': '',
-            }
-    except Exception as e:
-        # (TODO) better handle this
-        # print(e)
-        dic = {"title": ""}
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
-    return js
-
-if __name__ == '__main__':
-    print(main('1252953'))
--- a/jav321.py
+++ b/jav321.py
@@ -1,156 +0,0 @@
-import json
-from bs4 import BeautifulSoup
-from lxml import html
-from ADC_function import post_html
-
-
-def main(number: str) -> json:
-    result = post_html(url="https://www.jav321.com/search", query={"sn": number})
-    soup = BeautifulSoup(result.text, "html.parser")
-    lx = html.fromstring(str(soup))
-
-    if "/video/" in result.url:
-        data = parse_info(soup)
-        dic = {
-            "title": get_title(lx),
-            "year": get_year(data),
-            "outline": get_outline(lx),
-            "director": "",
-            "cover": get_cover(lx),
-            "imagecut": 1,
-            "actor_photo": "",
-            "website": result.url,
-            "source": "jav321.py",
-            **data,
-        }
-    else:
-        dic = {}
-
-    return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))
-
-
-def get_title(lx: html.HtmlElement) -> str:
-    return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[1]/h3/text()")[0].strip()
-
-
-def parse_info(soup: BeautifulSoup) -> dict:
-    data = soup.select_one("div.row > div.col-md-9")
-
-    if data:
-        dd = str(data).split("<br/>")
-        data_dic = {}
-        for d in dd:
-            data_dic[get_bold_text(h=d)] = d
-
-        return {
-            "actor": get_actor(data_dic),
-            "label": get_label(data_dic),
-            "studio": get_studio(data_dic),
-            "tag": get_tag(data_dic),
-            "number": get_number(data_dic),
-            "release": get_release(data_dic),
-            "runtime": get_runtime(data_dic),
-            "series": get_series(data_dic),
-        }
-    else:
-        return {}
-
-
-def get_bold_text(h: str) -> str:
-    soup = BeautifulSoup(h, "html.parser")
-    if soup.b:
-        return soup.b.text
-    else:
-        return "UNKNOWN_TAG"
-
-
-def get_anchor_info(h: str) -> str:
-    result = []
-
-    data = BeautifulSoup(h, "html.parser").find_all("a", href=True)
-    for d in data:
-        result.append(d.text)
-
-    return ",".join(result)
-
-
-def get_text_info(h: str) -> str:
-    return h.split(": ")[1]
-
-
-def get_cover(lx: html.HtmlElement) -> str:
-    return lx.xpath("/html/body/div[2]/div[2]/div[1]/p/a/img/@src")[0]
-
-
-def get_outline(lx: html.HtmlElement) -> str:
-    return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()")[0]
-
-def get_series2(lx: html.HtmlElement) -> str:
-    return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[2]/div[1]/div[2]/a[11]/text()")[0]
-
-
-def get_actor(data: hash) -> str:
-    if "女优" in data:
-        return get_anchor_info(data["女优"])
-    else:
-        return ""
-
-
-def get_label(data: hash) -> str:
-    if "片商" in data:
-        return get_anchor_info(data["片商"])
-    else:
-        return ""
-
-
-def get_tag(data: hash) -> str:
-    if "标签" in data:
-        return get_anchor_info(data["标签"])
-    else:
-        return ""
-
-
-def get_studio(data: hash) -> str:
-    if "片商" in data:
-        return get_anchor_info(data["片商"])
-    else:
-        return ""
-
-
-def get_number(data: hash) -> str:
-    if "番号" in data:
-        return get_text_info(data["番号"])
-    else:
-        return ""
-
-
-def get_release(data: hash) -> str:
-    if "发行日期" in data:
-        return get_text_info(data["发行日期"])
-    else:
-        return ""
-
-
-def get_runtime(data: hash) -> str:
-    if "播放时长" in data:
-        return get_text_info(data["播放时长"])
-    else:
-        return ""
-
-
-def get_year(data: hash) -> str:
-    if "release" in data:
-        return data["release"][:4]
-    else:
-        return ""
-
-
-def get_series(data: hash) -> str:
-    if "系列" in data:
-        return get_anchor_info(data["系列"])
-    else:
-        return ""
-
-
-if __name__ == "__main__":
-    print(main("soe-259"))
--- a/javbus.py
+++ b/javbus.py
@@ -1,167 +0,0 @@
-import re
-from pyquery import PyQuery as pq#need install
-from lxml import etree#need install
-from bs4 import BeautifulSoup#need install
-import json
-from ADC_function import *
-import fanza
-
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
-    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = soup.find_all(attrs={'class': 'star-name'})
-    d={}
-    for i in a:
-        l=i.a['href']
-        t=i.get_text()
-        html = etree.fromstring(get_html(l), etree.HTMLParser())
-        p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
-        p2={t:p}
-        d.update(p2)
-    return d
-def getTitle(htmlcode):  #获取标题
-    doc = pq(htmlcode)
-    title=str(doc('div.container h3').text()).replace(' ','-')
-    try:
-        title2 = re.sub('n\d+-','',title)
-        return title2
-    except:
-        return title
-def getStudio(htmlcode): #获取厂商
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
-    return result
-def getYear(htmlcode):   #获取年份
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
-    return result
-def getCover(htmlcode):  #获取封面链接
-    doc = pq(htmlcode)
-    image = doc('a.bigImage')
-    return image.attr('href')
-def getRelease(htmlcode): #获取出版日期
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
-    return result
-def getRuntime(htmlcode): #获取分钟
-    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = soup.find(text=re.compile('分鐘'))
-    return a
-def getActor(htmlcode):   #获取女优
-    b=[]
-    soup=BeautifulSoup(htmlcode,'lxml')
-    a=soup.find_all(attrs={'class':'star-name'})
-    for i in a:
-        b.append(i.get_text())
-    return b
-def getNum(htmlcode):     #获取番号
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
-    return result
-def getDirector(htmlcode): #获取导演
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
-    return result
-def getCID(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    #print(htmlcode)
-    string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
-    result = re.sub('/.*?.jpg','',string)
-    return result
-def getOutline(htmlcode):  #获取演员
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    try:
-        result = html.xpath("string(//div[contains(@class,'mg-b20 lh4')])").replace('\n','')
-        return result
-    except:
-        return ''
-def getSerise(htmlcode):
-    try:
-        html = etree.fromstring(htmlcode, etree.HTMLParser())
-        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
-        return result
-    except:
-        return ''
-def getTag(htmlcode):  # 获取演员
-    tag = []
-    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = soup.find_all(attrs={'class': 'genre'})
-    for i in a:
-        if 'onmouseout' in str(i):
-            continue
-        tag.append(i.get_text())
-    return tag
-
-def main_uncensored(number):
-    htmlcode = get_html('https://www.javbus.com/' + number)
-    if getTitle(htmlcode) == '':
-        htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
-    try:
-        dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
-    except:
-        dww_htmlcode = ''
-    dic = {
-        'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
-        'studio': getStudio(htmlcode),
-        'year': getYear(htmlcode),
-        'outline': getOutline(dww_htmlcode),
-        'runtime': getRuntime(htmlcode),
-        'director': getDirector(htmlcode),
-        'actor': getActor(htmlcode),
-        'release': getRelease(htmlcode),
-        'number': getNum(htmlcode),
-        'cover': getCover(htmlcode),
-        'tag': getTag(htmlcode),
-        'label': getSerise(htmlcode),
-        'imagecut': 0,
-        'actor_photo': '',
-        'website': 'https://www.javbus.com/' + number,
-        'source': 'javbus.py',
-        'series': getSerise(htmlcode),
-    }
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-    return js
-
-
-def main(number):
-    try:
-        try:
-            htmlcode = get_html('https://www.javbus.com/' + number)
-            try:
-                dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
-            except:
-                dww_htmlcode = ''
-            dic = {
-                'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
-                'studio': getStudio(htmlcode),
-                'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
-                'outline': getOutline(dww_htmlcode),
-                'runtime': getRuntime(htmlcode),
-                'director': getDirector(htmlcode),
-                'actor': getActor(htmlcode),
-                'release': getRelease(htmlcode),
-                'number': getNum(htmlcode),
-                'cover': getCover(htmlcode),
-                'imagecut': 1,
-                'tag': getTag(htmlcode),
-                'label': getSerise(htmlcode),
-                'actor_photo': getActorPhoto(htmlcode),
-                'website': 'https://www.javbus.com/' + number,
-                'source': 'javbus.py',
-                'series': getSerise(htmlcode),
-            }
-            js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,
-                            separators=(',', ':'), )  # .encode('UTF-8')
-            return js
-        except:
-            return main_uncensored(number)
-    except:
-        data = {
-            "title": "",
-        }
-        js = json.dumps(
-            data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
-        )
-        return js
-
-if __name__ == "__main__" :
-    print(main('ipx-292'))
--- a/javdb.py
+++ b/javdb.py
@@ -1,154 +0,0 @@
-import re
-from lxml import etree
-import json
-from bs4 import BeautifulSoup
-from ADC_function import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getTitle(a):
-    html = etree.fromstring(a, etree.HTMLParser())
-    result = html.xpath("/html/body/section/div/h2/strong/text()")[0]
-    return result
-def getActor(a):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"演員")]/../span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"演員")]/../span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
-def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
-    a = actor.split(',')
-    d={}
-    for i in a:
-        p={i:''}
-        d.update(p)
-    return d
-def getStudio(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getRuntime(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"時長")]/../span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').rstrip('mi')
-def getLabel(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getNum(a):
-    html = etree.fromstring(a, etree.HTMLParser())
-    result1 = str(html.xpath('//strong[contains(text(),"番號")]/../span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip(" ['']")
-    return str(result2 + result1).strip('+')
-def getYear(getRelease):
-    try:
-        result = str(re.search('\d{4}', getRelease).group())
-        return result
-    except:
-        return getRelease
-def getRelease(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+')
-def getTag(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()')
-        return result
-    except:
-        result = html.xpath('//strong[contains(text(),"類別")]/../span/text()')
-        return result
-
-def getCover_small(a, index=0):
-    # same issue mentioned below,
-    # javdb sometime returns multiple results
-    # DO NOT just get the firt one, get the one with correct index number
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
-        if not 'https' in result:
-            result = 'https:' + result
-        return result
-    except: # 2020.7.17 Repair Cover Url crawl
-        result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index]
-        if not 'https' in result:
-            result = 'https:' + result
-        return result
-def getCover(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    try:
-        result = html.xpath("//div[contains(@class, 'column-video-cover')]/a/img/@src")[0]
-    except: # 2020.7.17 Repair Cover Url crawl
-        result = html.xpath("//div[contains(@class, 'column-video-cover')]/img/@src")[0]
-    return result
-def getDirector(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getOutline(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
-    return result
-def getSeries(a):
-    #/html/body/section/div/div[3]/div[2]/nav/div[7]/span/a
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def main(number):
-    try:
-        number = number.upper()
-        try:
-            query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
-        except:
-            query_result = get_html('https://javdb4.com/search?q=' + number + '&f=all')
-        html = etree.fromstring(query_result, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        # javdb sometime returns multiple results,
-        # and the first elememt maybe not the one we are looking for
-        # iterate all candidates and find the match one
-        urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
-        ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
-        correct_url = urls[ids.index(number)]
-        detail_page = get_html('https://javdb.com' + correct_url)
-
-        # If gray image exists ,then replace with normal cover
-        cover_small = getCover_small(query_result, index=ids.index(number))
-        if 'placeholder' in cover_small:
-            cover_small = getCover(detail_page)
-
-
-        dic = {
-            'actor': getActor(detail_page),
-            'title': getTitle(detail_page),
-            'studio': getStudio(detail_page),
-            'outline': getOutline(detail_page),
-            'runtime': getRuntime(detail_page),
-            'director': getDirector(detail_page),
-            'release': getRelease(detail_page),
-            'number': getNum(detail_page),
-            'cover': getCover(detail_page),
-            'cover_small': cover_small,
-            'imagecut': 3,
-            'tag': getTag(detail_page),
-            'label': getLabel(detail_page),
-            'year': getYear(getRelease(detail_page)),  # str(re.search('\d{4}',getRelease(a)).group()),
-            'actor_photo': getActorPhoto(getActor(detail_page)),
-            'website': 'https://javdb.com' + correct_url,
-            'source': 'javdb.py',
-            'series': getSeries(detail_page),
-        }
-    except Exception as e:
-        # print(e)
-        dic = {"title": ""}
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-    return js
-
-# main('DV-1562')
-# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
-if __name__ == "__main__":
-    print(main('snyz-007'))
--- a/javlib.py
+++ b/javlib.py
@@ -1,110 +0,0 @@
-import json
-import bs4
-from bs4 import BeautifulSoup
-from lxml import html
-from http.cookies import SimpleCookie
-
-from ADC_function import get_javlib_cookie, get_html
-
-
-def main(number: str):
-    raw_cookies, user_agent = get_javlib_cookie()
-
-    # Blank cookies mean javlib site return error
-    if not raw_cookies:
-        return json.dumps({}, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))
-
-    # Manually construct a dictionary
-    s_cookie = SimpleCookie()
-    s_cookie.load(raw_cookies)
-    cookies = {}
-    for key, morsel in s_cookie.items():
-        cookies[key] = morsel.value
-
-    # Scraping
-    result = get_html(
-        "http://www.javlibrary.com/cn/vl_searchbyid.php?keyword={}".format(number),
-        cookies=cookies,
-        ua=user_agent,
-        return_type="object"
-    )
-    soup = BeautifulSoup(result.text, "html.parser")
-    lx = html.fromstring(str(soup))
-
-    if "/?v=jav" in result.url:
-        dic = {
-            "title": get_title(lx, soup),
-            "studio": get_table_el_single_anchor(soup, "video_maker"),
-            "year": get_table_el_td(soup, "video_date")[:4],
-            "outline": "",
-            "director": get_table_el_single_anchor(soup, "video_director"),
-            "cover": get_cover(lx),
-            "imagecut": 1,
-            "actor_photo": "",
-            "website": result.url,
-            "source": "javlib.py",
-            "actor": get_table_el_multi_anchor(soup, "video_cast"),
-            "label": get_table_el_td(soup, "video_label"),
-            "tag": get_table_el_multi_anchor(soup, "video_genres"),
-            "number": get_table_el_td(soup, "video_id"),
-            "release": get_table_el_td(soup, "video_date"),
-            "runtime": get_from_xpath(lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'),
-            "series":'',
-        }
-    else:
-        dic = {}
-
-    return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))
-
-
-def get_from_xpath(lx: html.HtmlElement, xpath: str) -> str:
-    return lx.xpath(xpath)[0].strip()
-
-
-def get_table_el_single_anchor(soup: BeautifulSoup, tag_id: str) -> str:
-    tag = soup.find(id=tag_id).find("a")
-
-    if tag is not None:
-        return tag.string.strip()
-    else:
-        return ""
-
-
-def get_table_el_multi_anchor(soup: BeautifulSoup, tag_id: str) -> str:
-    tags = soup.find(id=tag_id).find_all("a")
-
-    return process(tags)
-
-
-def get_table_el_td(soup: BeautifulSoup, tag_id: str) -> str:
-    tags = soup.find(id=tag_id).find_all("td", class_="text")
-
-    return process(tags)
-
-
-def process(tags: bs4.element.ResultSet) -> str:
-    values = []
-    for tag in tags:
-        value = tag.string
-        if value is not None and value != "----":
-            values.append(value)
-
-    return ",".join(x for x in values if x)
-
-
-def get_title(lx: html.HtmlElement, soup: BeautifulSoup) -> str:
-    title = get_from_xpath(lx, '//*[@id="video_title"]/h3/a/text()')
-    number = get_table_el_td(soup, "video_id")
-
-    return title.replace(number, "").strip()
-
-
-def get_cover(lx: html.HtmlComment) -> str:
-    return "http:{}".format(get_from_xpath(lx, '//*[@id="video_jacket_img"]/@src'))
-
-
-if __name__ == "__main__":
-    lists = ["DVMC-003", "GS-0167", "JKREZ-001", "KMHRS-010", "KNSD-023"]
-    #lists = ["DVMC-003"]
-    for num in lists:
-        print(main(num))
--- a/mgstage.py
+++ b/mgstage.py
@@ -1,120 +0,0 @@
-import re
-from lxml import etree
-import json
-from bs4 import BeautifulSoup
-from ADC_function import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getTitle(a):
-    try:
-        html = etree.fromstring(a, etree.HTMLParser())
-        result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
-        return result.replace('/', ',')
-    except:
-        return ''
-def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
-    result1=str(html.xpath('//th[contains(text(),"出演：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    result2=str(html.xpath('//th[contains(text(),"出演：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
-def getStudio(a):
-    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
-    result1=str(html.xpath('//th[contains(text(),"メーカー：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    result2=str(html.xpath('//th[contains(text(),"メーカー：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    return str(result1+result2).strip('+').replace("', '",'').replace('"','')
-def getRuntime(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    result2 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    return str(result1 + result2).strip('+').rstrip('mi')
-def getLabel(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
-def getNum(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"品番：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"品番：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    return str(result1 + result2).strip('+')
-def getYear(getRelease):
-    try:
-        result = str(re.search('\d{4}',getRelease).group())
-        return result
-    except:
-        return getRelease
-def getRelease(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    return str(result1 + result2).strip('+').replace('/','-')
-def getTag(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
-def getCover(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
-    #                    /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src
-    return result
-def getDirector(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
-def getOutline(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//p/text()')).strip(" ['']").replace(u'\\n', '').replace("', '', '", '')
-    return result
-def getSeries(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def main(number2):
-    number=number2.upper()
-    htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
-    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
-    b = str(soup.find(attrs={'id': 'introduction'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
-    #print(b)
-    dic = {
-        'title': getTitle(htmlcode).replace("\\n",'').replace('        ',''),
-        'studio': getStudio(a),
-        'outline': getOutline(b),
-        'runtime': getRuntime(a),
-        'director': getDirector(a),
-        'actor': getActor(a),
-        'release': getRelease(a),
-        'number': getNum(a),
-        'cover': getCover(htmlcode),
-        'imagecut': 0,
-        'tag': getTag(a),
-        'label':getLabel(a),
-        'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()),
-        'actor_photo': '',
-        'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
-        'source': 'mgstage.py',
-        'series': getSeries(a),
-    }
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-    return js
-    #print(htmlcode)
-
-if __name__ == '__main__':
-    print(main('SIRO-4149'))
--- a/xcity.py
+++ b/xcity.py
@@ -1,192 +0,0 @@
-import re
-from lxml import etree
-import json
-from bs4 import BeautifulSoup
-from ADC_function import *
-
-
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getTitle(a):
-    html = etree.fromstring(a, etree.HTMLParser())
-    result = html.xpath('//*[@id="program_detail_title"]/text()')[0]
-    return result
-
-
-def getActor(a):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[3]/a/text()')[0]
-    return result1
-
-
-def getActorPhoto(actor):  # //*[@id="star_qdt"]/li/a/img
-    a = actor.split(',')
-    d = {}
-    for i in a:
-        p = {i: ''}
-        d.update(p)
-    return d
-
-
-def getStudio(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[4]/a/span/text()')).strip(" ['']")
-    except:
-        result = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return result.strip('+').replace("', '", '').replace('"', '')
-
-
-def getRuntime(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[3]/text()')[0]
-    except:
-        return ''
-    try:
-        return re.findall('\d+',result1)[0]
-    except:
-        return ''
-
-
-def getLabel(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[5]/a/span/text()')[0]
-        return result
-    except:
-        return ''
-
-
-def getNum(a):
-    html = etree.fromstring(a, etree.HTMLParser())
-    try:
-        result = html.xpath('//*[@id="hinban"]/text()')[0]
-        return result
-    except:
-        return ''
-
-
-def getYear(getRelease):
-    try:
-        result = str(re.search('\d{4}', getRelease).group())
-        return result
-    except:
-        return getRelease
-
-
-def getRelease(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')[0]
-    except:
-        return ''
-    try:
-        return re.findall('\d{4}/\d{2}/\d{2}', result)[0].replace('/','-')
-    except:
-        return ''
-
-
-def getTag(a):
-    result2=[]
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[6]/a/text()')
-    for i in result1:
-        i=i.replace(u'\n','')
-        i=i.replace(u'\t','')
-        result2.append(i)
-    return result2
-
-
-def getCover_small(a, index=0):
-    # same issue mentioned below,
-    # javdb sometime returns multiple results
-    # DO NOT just get the firt one, get the one with correct index number
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
-    if not 'https' in result:
-        result = 'https:' + result
-    return result
-
-
-def getCover(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    try:
-        result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[1]/p/a/@href')[0]
-        return 'https:' + result
-    except:
-        return ''
-
-
-def getDirector(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath('//*[@id="program_detail_director"]/text()')[0].replace(u'\n','').replace(u'\t', '')
-        return result
-    except:
-        return ''
-
-
-def getOutline(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    try:
-        result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[5]/p/text()')[0]
-    except:
-        return ''
-    try:
-        return re.sub('\\\\\w*\d+','',result)
-    except:
-        return result
-
-def getSeries(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    try:
-        try:
-            result = html.xpath("//span[contains(text(),'シリーズ')]/../a/span/text()")[0]
-            return result
-        except:
-            result = html.xpath("//span[contains(text(),'シリーズ')]/../span/text()")[0]
-            return result
-    except:
-        return ''
-
-
-def main(number):
-    try:
-        number = number.upper()
-        query_result = get_html(
-            'https://xcity.jp/result_published/?genre=%2Fresult_published%2F&q=' + number.replace('-','') + '&sg=main&num=30')
-        html = etree.fromstring(query_result, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        urls = html.xpath("//table[contains(@class, 'resultList')]/tr[2]/td[1]/a/@href")[0]
-        detail_page = get_html('https://xcity.jp' + urls)
-        dic = {
-            'actor': getActor(detail_page),
-            'title': getTitle(detail_page),
-            'studio': getStudio(detail_page),
-            'outline': getOutline(detail_page),
-            'runtime': getRuntime(detail_page),
-            'director': getDirector(detail_page),
-            'release': getRelease(detail_page),
-            'number': getNum(detail_page),
-            'cover': getCover(detail_page),
-            'cover_small': '',
-            'imagecut': 1,
-            'tag': getTag(detail_page),
-            'label': getLabel(detail_page),
-            'year': getYear(getRelease(detail_page)),  # str(re.search('\d{4}',getRelease(a)).group()),
-            'actor_photo': getActorPhoto(getActor(detail_page)),
-            'website': 'https://xcity.jp' + urls,
-            'source': 'xcity.py',
-            'series': getSeries(detail_page),
-        }
-    except Exception as e:
-        # print(e)
-        dic = {"title": ""}
-
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-    return js
-
-if __name__ == '__main__':
-    print(main('VNDS-2624'))