diff --git a/ADC_function.py b/ADC_function.py
index 2b48e0b..746384c 100755
--- a/ADC_function.py
+++ b/ADC_function.py
@@ -1,121 +1,121 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import requests
-from configparser import ConfigParser
-import os
-import re
-import time
-import sys
-from lxml import etree
-import sys
-import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-# sys.setdefaultencoding('utf-8')
-
-config_file='config.ini'
-config = ConfigParser()
-
-if os.path.exists(config_file):
- try:
- config.read(config_file, encoding='UTF-8')
- except:
- print('[-]Config.ini read failed! Please use the offical file!')
-else:
- print('[+]config.ini: not found, creating...',end='')
- with open("config.ini", "wt", encoding='UTF-8') as code:
- print("[common]", file=code)
- print("main_mode = 1", file=code)
- print("failed_output_folder = failed", file=code)
- print("success_output_folder = JAV_output", file=code)
- print("", file=code)
- print("[proxy]",file=code)
- print("proxy=127.0.0.1:1081",file=code)
- print("timeout=10", file=code)
- print("retry=3", file=code)
- print("", file=code)
- print("[Name_Rule]", file=code)
- print("location_rule=actor+'/'+number",file=code)
- print("naming_rule=number+'-'+title",file=code)
- print("", file=code)
- print("[update]",file=code)
- print("update_check=1",file=code)
- print("", file=code)
- print("[media]", file=code)
- print("media_warehouse=emby", file=code)
- print("#emby plex kodi", file=code)
- print("", file=code)
- print("[escape]", file=code)
- print("literals=\\", file=code)
- print("", file=code)
- print("[movie_location]", file=code)
- print("path=", file=code)
- print("", file=code)
- print('.',end='')
- time.sleep(2)
- print('.')
- print('[+]config.ini: created!')
- print('[+]Please restart the program!')
- time.sleep(4)
- os._exit(0)
- try:
- config.read(config_file, encoding='UTF-8')
- except:
- print('[-]Config.ini read failed! Please use the offical file!')
-
-def get_network_settings():
- try:
- proxy = config["proxy"]["proxy"]
- timeout = int(config["proxy"]["timeout"])
- retry_count = int(config["proxy"]["retry"])
- assert timeout > 0
- assert retry_count > 0
- except:
- raise ValueError("[-]Proxy config error! Please check the config.")
- return proxy, timeout, retry_count
-
-def getDataState(json_data): # 元数据获取失败检测
- if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
- return 0
- else:
- return 1
-
-def ReadMediaWarehouse():
- return config['media']['media_warehouse']
-
-def UpdateCheckSwitch():
- check=str(config['update']['update_check'])
- if check == '1':
- return '1'
- elif check == '0':
- return '0'
- elif check == '':
- return '0'
-
-def getXpathSingle(htmlcode,xpath):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result1 = str(html.xpath(xpath)).strip(" ['']")
- return result1
-
-def get_html(url,cookies = None):#网页请求核心
- proxy, timeout, retry_count = get_network_settings()
- i = 0
- while i < retry_count:
- try:
- if not proxy == '':
- proxies = {"http": "http://" + proxy,"https": "https://" + proxy}
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
- getweb = requests.get(str(url), headers=headers, timeout=timeout,proxies=proxies, cookies=cookies)
- getweb.encoding = 'utf-8'
- return getweb.text
- else:
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
- getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
- getweb.encoding = 'utf-8'
- return getweb.text
- except:
- i += 1
- print('[-]Connect retry '+str(i)+'/'+str(retry_count))
- print('[-]Connect Failed! Please check your Proxy or Network!')
-
-
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import requests
+from configparser import ConfigParser
+import os
+import re
+import time
+import sys
+from lxml import etree
+import sys
+import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+# sys.setdefaultencoding('utf-8')
+
+config_file='config.ini'
+config = ConfigParser()
+
+if os.path.exists(config_file):
+ try:
+ config.read(config_file, encoding='UTF-8')
+ except:
+ print('[-]Config.ini read failed! Please use the offical file!')
+else:
+ print('[+]config.ini: not found, creating...',end='')
+ with open("config.ini", "wt", encoding='UTF-8') as code:
+ print("[common]", file=code)
+ print("main_mode = 1", file=code)
+ print("failed_output_folder = failed", file=code)
+ print("success_output_folder = JAV_output", file=code)
+ print("", file=code)
+ print("[proxy]",file=code)
+ print("proxy=127.0.0.1:1081",file=code)
+ print("timeout=10", file=code)
+ print("retry=3", file=code)
+ print("", file=code)
+ print("[Name_Rule]", file=code)
+ print("location_rule=actor+'/'+number",file=code)
+ print("naming_rule=number+'-'+title",file=code)
+ print("", file=code)
+ print("[update]",file=code)
+ print("update_check=1",file=code)
+ print("", file=code)
+ print("[media]", file=code)
+ print("media_warehouse=emby", file=code)
+ print("#emby plex kodi", file=code)
+ print("", file=code)
+ print("[escape]", file=code)
+ print("literals=\\", file=code)
+ print("", file=code)
+ print("[movie_location]", file=code)
+ print("path=", file=code)
+ print("", file=code)
+ print('.',end='')
+ time.sleep(2)
+ print('.')
+ print('[+]config.ini: created!')
+ print('[+]Please restart the program!')
+ time.sleep(4)
+ os._exit(0)
+ try:
+ config.read(config_file, encoding='UTF-8')
+ except:
+ print('[-]Config.ini read failed! Please use the offical file!')
+
+def get_network_settings():
+ try:
+ proxy = config["proxy"]["proxy"]
+ timeout = int(config["proxy"]["timeout"])
+ retry_count = int(config["proxy"]["retry"])
+ assert timeout > 0
+ assert retry_count > 0
+ except:
+ raise ValueError("[-]Proxy config error! Please check the config.")
+ return proxy, timeout, retry_count
+
+def getDataState(json_data): # 元数据获取失败检测
+ if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
+ return 0
+ else:
+ return 1
+
+def ReadMediaWarehouse():
+ return config['media']['media_warehouse']
+
+def UpdateCheckSwitch():
+ check=str(config['update']['update_check'])
+ if check == '1':
+ return '1'
+ elif check == '0':
+ return '0'
+ elif check == '':
+ return '0'
+
+def getXpathSingle(htmlcode,xpath):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result1 = str(html.xpath(xpath)).strip(" ['']")
+ return result1
+
+def get_html(url,cookies = None):#网页请求核心
+ proxy, timeout, retry_count = get_network_settings()
+ i = 0
+ while i < retry_count:
+ try:
+ if not proxy == '':
+ proxies = {"http": "http://" + proxy,"https": "https://" + proxy}
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
+ getweb = requests.get(str(url), headers=headers, timeout=timeout,proxies=proxies, cookies=cookies)
+ getweb.encoding = 'utf-8'
+ return getweb.text
+ else:
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
+ getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
+ getweb.encoding = 'utf-8'
+ return getweb.text
+ except:
+ i += 1
+ print('[-]Connect retry '+str(i)+'/'+str(retry_count))
+ print('[-]Connect Failed! Please check your Proxy or Network!')
+
+
diff --git a/avsox.py b/avsox.py
index 67ee9bf..e54d8d1 100644
--- a/avsox.py
+++ b/avsox.py
@@ -1,115 +1,115 @@
-import re
-from lxml import etree
-import json
-from bs4 import BeautifulSoup
-from ADC_function import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
- soup = BeautifulSoup(htmlcode, 'lxml')
- a = soup.find_all(attrs={'class': 'avatar-box'})
- d = {}
- for i in a:
- l = i.img['src']
- t = i.span.get_text()
- p2 = {t: l}
- d.update(p2)
- return d
-def getTitle(a):
- try:
- html = etree.fromstring(a, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
- return result.replace('/', '')
- except:
- return ''
-def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
- soup = BeautifulSoup(a, 'lxml')
- a = soup.find_all(attrs={'class': 'avatar-box'})
- d = []
- for i in a:
- d.append(i.span.get_text())
- return d
-def getStudio(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
- return result1
-def getRuntime(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
- return result1
-def getLabel(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
- return result1
-def getNum(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
- return result1
-def getYear(release):
- try:
- result = str(re.search('\d{4}',release).group())
- return result
- except:
- return release
-def getRelease(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
- return result1
-def getCover(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
- return result
-def getCover_small(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
- return result
-def getTag(a): # 获取演员
- soup = BeautifulSoup(a, 'lxml')
- a = soup.find_all(attrs={'class': 'genre'})
- d = []
- for i in a:
- d.append(i.get_text())
- return d
-
-def main(number):
- a = get_html('https://avsox.host/cn/search/' + number)
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
- if result1 == '' or result1 == 'null' or result1 == 'None':
- a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
- print(a)
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
- if result1 == '' or result1 == 'null' or result1 == 'None':
- a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
- print(a)
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
- web = get_html(result1)
- soup = BeautifulSoup(web, 'lxml')
- info = str(soup.find(attrs={'class': 'row movie'}))
- dic = {
- 'actor': getActor(web),
- 'title': getTitle(web).strip(getNum(web)),
- 'studio': getStudio(info),
- 'outline': '',#
- 'runtime': getRuntime(info),
- 'director': '', #
- 'release': getRelease(info),
- 'number': getNum(info),
- 'cover': getCover(web),
- 'cover_small': getCover_small(a),
- 'imagecut': 3,
- 'tag': getTag(web),
- 'label': getLabel(info),
- 'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()),
- 'actor_photo': getActorPhoto(web),
- 'website': result1,
- 'source': 'avsox.py',
- }
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
- return js
-
+import re
+from lxml import etree
+import json
+from bs4 import BeautifulSoup
+from ADC_function import *
+# import sys
+# import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+
+def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+ soup = BeautifulSoup(htmlcode, 'lxml')
+ a = soup.find_all(attrs={'class': 'avatar-box'})
+ d = {}
+ for i in a:
+ l = i.img['src']
+ t = i.span.get_text()
+ p2 = {t: l}
+ d.update(p2)
+ return d
+def getTitle(a):
+ try:
+ html = etree.fromstring(a, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
+ return result.replace('/', '')
+ except:
+ return ''
+def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+ soup = BeautifulSoup(a, 'lxml')
+ a = soup.find_all(attrs={'class': 'avatar-box'})
+ d = []
+ for i in a:
+ d.append(i.span.get_text())
+ return d
+def getStudio(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
+ return result1
+def getRuntime(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
+ return result1
+def getLabel(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
+ return result1
+def getNum(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
+ return result1
+def getYear(release):
+ try:
+ result = str(re.search('\d{4}',release).group())
+ return result
+ except:
+ return release
+def getRelease(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
+ return result1
+def getCover(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
+ return result
+def getCover_small(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
+ return result
+def getTag(a): # 获取演员
+ soup = BeautifulSoup(a, 'lxml')
+ a = soup.find_all(attrs={'class': 'genre'})
+ d = []
+ for i in a:
+ d.append(i.get_text())
+ return d
+
+def main(number):
+ a = get_html('https://avsox.host/cn/search/' + number)
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+ if result1 == '' or result1 == 'null' or result1 == 'None':
+ a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
+ print(a)
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+ if result1 == '' or result1 == 'null' or result1 == 'None':
+ a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
+ print(a)
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+ web = get_html(result1)
+ soup = BeautifulSoup(web, 'lxml')
+ info = str(soup.find(attrs={'class': 'row movie'}))
+ dic = {
+ 'actor': getActor(web),
+ 'title': getTitle(web).strip(getNum(web)),
+ 'studio': getStudio(info),
+ 'outline': '',#
+ 'runtime': getRuntime(info),
+ 'director': '', #
+ 'release': getRelease(info),
+ 'number': getNum(info),
+ 'cover': getCover(web),
+ 'cover_small': getCover_small(a),
+ 'imagecut': 3,
+ 'tag': getTag(web),
+ 'label': getLabel(info),
+ 'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()),
+ 'actor_photo': getActorPhoto(web),
+ 'website': result1,
+ 'source': 'avsox.py',
+ }
+ js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
+ return js
+
#print(main('012717_472'))
\ No newline at end of file
diff --git a/core.py b/core.py
index 3ad271b..8860911 100755
--- a/core.py
+++ b/core.py
@@ -1,691 +1,691 @@
-# -*- coding: utf-8 -*-
-
-import re
-import os
-import os.path
-import shutil
-from PIL import Image
-import time
-import json
-from ADC_function import *
-from configparser import ConfigParser
-import argparse
-# =========website========
-import fc2fans_club
-import mgstage
-import avsox
-import javbus
-import javdb
-import fanza
-import requests
-import random
-
-
-# =====================本地文件处理===========================
-
-def escapePath(path, Config): # Remove escape literals
- escapeLiterals = Config['escape']['literals']
- backslash = '\\'
- for literal in escapeLiterals:
- path = path.replace(backslash + literal, '')
- return path
-
-
-def moveFailedFolder(filepath, failed_folder):
- print('[-]Move to Failed output folder')
- shutil.move(filepath, str(os.getcwd()) + '/' + failed_folder + '/')
- return
-
-
-def CreatFailedFolder(failed_folder):
- if not os.path.exists(failed_folder + '/'): # 新建failed文件夹
- try:
- os.makedirs(failed_folder + '/')
- except:
- print("[-]failed!can not be make Failed output folder\n[-](Please run as Administrator)")
- return
-
-
-def getDataFromJSON(file_number, filepath, failed_folder): # 从JSON返回元数据
- """
- iterate through all services and fetch the data
- """
-
- func_mapping = {
- "avsox": avsox.main,
- "fc2": fc2fans_club.main,
- "fanza": fanza.main,
- "javdb": javdb.main,
- "javbus": javbus.main,
- "mgstage": mgstage.main,
- }
-
- # default fetch order list, from the begining to the end
- sources = ["javbus", "javdb", "fanza", "mgstage", "fc2", "avsox"]
-
- # if the input file name matches centain rules,
- # move some web service to the begining of the list
- if re.match(r"^\d{5,}", file_number) or (
- "HEYZO" in file_number or "heyzo" in file_number or "Heyzo" in file_number
- ):
- sources.insert(0, sources.pop(sources.index("avsox")))
- elif re.match(r"\d+\D+", file_number) or (
- "siro" in file_number or "SIRO" in file_number or "Siro" in file_number
- ):
- sources.insert(0, sources.pop(sources.index("fanza")))
- elif "fc2" in file_number or "FC2" in file_number:
- sources.insert(0, sources.pop(sources.index("fc2")))
-
- for source in sources:
- json_data = json.loads(func_mapping[source](file_number))
- # if any service return a valid return, break
- if getDataState(json_data) != 0:
- break
-
- # ================================================网站规则添加结束================================================
-
- title = json_data['title']
- actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表
- release = json_data['release']
- number = json_data['number']
- studio = json_data['studio']
- source = json_data['source']
- runtime = json_data['runtime']
- outline = json_data['runtime']
- label = json_data['label']
- year = json_data['year']
- try:
- cover_small = json_data['cover_small']
- except:
- cover_small = ''
- imagecut = json_data['imagecut']
- tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @
- actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
-
-
- if title == '' or number == '':
- print('[-]Movie Data not found!')
- moveFailedFolder(filepath, failed_folder)
- return
-
- # if imagecut == '3':
- # DownloadFileWithFilename()
-
- # ====================处理异常字符====================== #\/:*?"<>|
- title = title.replace('\\', '')
- title = title.replace('/', '')
- title = title.replace(':', '')
- title = title.replace('*', '')
- title = title.replace('?', '')
- title = title.replace('"', '')
- title = title.replace('<', '')
- title = title.replace('>', '')
- title = title.replace('|', '')
- release = release.replace('/', '-')
- tmpArr = cover_small.split(',')
- if len(tmpArr) > 0:
- cover_small = tmpArr[0].strip('\"').strip('\'')
- # ====================处理异常字符 END================== #\/:*?"<>|
-
- naming_rule = eval(config['Name_Rule']['naming_rule'])
- location_rule = eval(config['Name_Rule']['location_rule'])
- if 'actor' in config['Name_Rule']['location_rule'] and len(actor) > 100:
- print(config['Name_Rule']['location_rule'])
- location_rule = eval(config['Name_Rule']['location_rule'].replace("actor","'多人作品'"))
- if 'title' in config['Name_Rule']['location_rule'] and len(title) > 100:
- location_rule = eval(config['Name_Rule']['location_rule'].replace("title",'number'))
-
- # 返回处理后的json_data
- json_data['title'] = title
- json_data['actor'] = actor
- json_data['release'] = release
- json_data['cover_small'] = cover_small
- json_data['tag'] = tag
- json_data['naming_rule'] = naming_rule
- json_data['location_rule'] = location_rule
- json_data['year'] = year
- json_data['actor_list'] = actor_list
- return json_data
-
-
-def get_info(json_data): # 返回json里的数据
- title = json_data['title']
- studio = json_data['studio']
- year = json_data['year']
- outline = json_data['outline']
- runtime = json_data['runtime']
- director = json_data['director']
- actor_photo = json_data['actor_photo']
- release = json_data['release']
- number = json_data['number']
- cover = json_data['cover']
- website = json_data['website']
- return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website
-
-
-def smallCoverCheck(path, number, imagecut, cover_small, c_word, option, Config, filepath, failed_folder):
- if imagecut == 3:
- if option == 'emby':
- DownloadFileWithFilename(cover_small, '1.jpg', path, Config, filepath, failed_folder)
- try:
- img = Image.open(path + '/1.jpg')
- except Exception:
- img = Image.open('1.jpg')
- w = img.width
- h = img.height
- img.save(path + '/' + number + c_word + '.png')
- time.sleep(1)
- os.remove(path + '/1.jpg')
- if option == 'kodi':
- DownloadFileWithFilename(cover_small, '1.jpg', path, Config, filepath, failed_folder)
- try:
- img = Image.open(path + '/1.jpg')
- except Exception:
- img = Image.open('1.jpg')
- w = img.width
- h = img.height
- img.save(path + '/' + number + c_word + '-poster.jpg')
- time.sleep(1)
- os.remove(path + '/1.jpg')
- if option == 'plex':
- DownloadFileWithFilename(cover_small, '1.jpg', path, Config, filepath, failed_folder)
- try:
- img = Image.open(path + '/1.jpg')
- except Exception:
- img = Image.open('1.jpg')
- w = img.width
- h = img.height
- img.save(path + '/poster.jpg')
- os.remove(path + '/1.jpg')
-
-
-def creatFolder(success_folder, location_rule, json_data, Config): # 创建文件夹
- title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website= get_info(json_data)
- if len(location_rule) > 240: # 新建成功输出文件夹
- path = success_folder + '/' + location_rule.replace("'actor'", "'manypeople'", 3).replace("actor",
- "'manypeople'",
- 3) # path为影片+元数据所在目录
- else:
- path = success_folder + '/' + location_rule
- # print(path)
- if not os.path.exists(path):
- path = escapePath(path, Config)
- try:
- os.makedirs(path)
- except:
- path = success_folder + '/' + location_rule.replace('/[' + number + ']-' + title, "/number")
- path = escapePath(path, Config)
-
- os.makedirs(path)
- return path
-
-
-# =====================资源下载部分===========================
-def DownloadFileWithFilename(url, filename, path, Config, filepath, failed_folder): # path = examle:photo , video.in the Project Folder!
- proxy, timeout, retry_count = get_network_settings()
- i = 0
-
- while i < retry_count:
- try:
- if not proxy == '':
- if not os.path.exists(path):
- os.makedirs(path)
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
- r = requests.get(url, headers=headers, timeout=timeout,
- proxies={"http": "http://" + str(proxy), "https": "https://" + str(proxy)})
- if r == '':
- print('[-]Movie Data not found!')
- return
- with open(str(path) + "/" + filename, "wb") as code:
- code.write(r.content)
- return
- else:
- if not os.path.exists(path):
- os.makedirs(path)
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
- r = requests.get(url, timeout=timeout, headers=headers)
- if r == '':
- print('[-]Movie Data not found!')
- return
- with open(str(path) + "/" + filename, "wb") as code:
- code.write(r.content)
- return
- except requests.exceptions.RequestException:
- i += 1
- print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
- except requests.exceptions.ConnectionError:
- i += 1
- print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
- except requests.exceptions.ProxyError:
- i += 1
- print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
- except requests.exceptions.ConnectTimeout:
- i += 1
- print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
- print('[-]Connect Failed! Please check your Proxy or Network!')
- moveFailedFolder(filepath, failed_folder)
- return
-
-
-def imageDownload(option, cover, number, c_word, path, multi_part, Config, filepath, failed_folder): # 封面是否下载成功,否则移动到failed
- if option == 'emby':
- if DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder) == 'failed':
- moveFailedFolder(filepath, failed_folder)
- return
- DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder)
- if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
- print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
- return
- i = 1
- while i <= int(config['proxy']['retry']):
- if os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
- print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
- DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder)
- i = i + 1
- continue
- else:
- break
- if multi_part == 1:
- old_name = os.path.join(path, number + c_word + '.jpg')
- new_name = os.path.join(path, number + c_word + '.jpg')
- os.rename(old_name, new_name)
- print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
- else:
- print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
- elif option == 'plex':
- if DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder) == 'failed':
- moveFailedFolder(filepath, failed_folder)
- return
- DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder)
- if not os.path.getsize(path + '/fanart.jpg') == 0:
- print('[+]Image Downloaded!', path + '/fanart.jpg')
- return
- i = 1
- while i <= int(config['proxy']['retry']):
- if os.path.getsize(path + '/fanart.jpg') == 0:
- print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
- DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder)
- i = i + 1
- continue
- else:
- break
- if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
- print('[!]Image Download Failed! Trying again.')
- DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder)
- print('[+]Image Downloaded!', path + '/fanart.jpg')
- elif option == 'kodi':
- if DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder) == 'failed':
- moveFailedFolder(filepath, failed_folder)
- return
- DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder)
- if not os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
- print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')
- return
- i = 1
- while i <= int(config['proxy']['retry']):
- if os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
- print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
- DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder)
- i = i + 1
- continue
- else:
- break
- print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')
-
-
-def PrintFiles(option, path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag, actor_list):
- title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(json_data)
- try:
- if not os.path.exists(path):
- os.makedirs(path)
- if option == 'plex':
- with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
- print('', file=code)
- print("", file=code)
- print(" " + naming_rule + part + "", file=code)
- print(" ", file=code)
- print(" ", file=code)
- print(" " + studio + "+", file=code)
- print(" " + year + "", file=code)
- print(" " + outline + "", file=code)
- print(" " + outline + "", file=code)
- print(" " + str(runtime).replace(" ", "") + "", file=code)
- print(" " + director + "", file=code)
- print(" poster.jpg", file=code)
- print(" thumb.png", file=code)
- print(" fanart.jpg", file=code)
- try:
- for key in actor_list:
- print(" ", file=code)
- print(" " + key + "", file=code)
- print(" ", file=code)
- except:
- aaaa = ''
- print(" " + studio + "", file=code)
- print(" ", file=code)
- if cn_sub == '1':
- print(" 中文字幕", file=code)
- try:
- for i in str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(','):
- print(" " + i + "", file=code)
- except:
- aaaaa = ''
- try:
- for i in str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(','):
- print(" " + i + "", file=code)
- except:
- aaaaaaaa = ''
- if cn_sub == '1':
- print(" 中文字幕", file=code)
- print(" " + number + "", file=code)
- print(" " + release + "", file=code)
- print(" " + cover + "", file=code)
- print(" " + website + "", file=code)
- print("", file=code)
- print("[+]Writeed! " + path + "/" + number + ".nfo")
- elif option == 'emby':
- with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
- print('', file=code)
- print("", file=code)
- print(" " + naming_rule + part + "", file=code)
- print(" ", file=code)
- print(" ", file=code)
- print(" " + studio + "+", file=code)
- print(" " + year + "", file=code)
- print(" " + outline + "", file=code)
- print(" " + outline + "", file=code)
- print(" " + str(runtime).replace(" ", "") + "", file=code)
- print(" " + director + "", file=code)
- print(" " + number + c_word + ".png", file=code)
- print(" " + number + c_word + ".png", file=code)
- print(" " + number + c_word + '.jpg' + "", file=code)
- try:
- for key in actor_list:
- print(" ", file=code)
- print(" " + key + "", file=code)
- print(" ", file=code)
- except:
- aaaa = ''
- print(" " + studio + "", file=code)
- print(" ", file=code)
- if cn_sub == '1':
- print(" 中文字幕", file=code)
- try:
- for i in tag:
- print(" " + i + "", file=code)
- except:
- aaaaa = ''
- try:
- for i in tag:
- print(" " + i + "", file=code)
- except:
- aaaaaaaa = ''
- if cn_sub == '1':
- print(" 中文字幕", file=code)
- print(" " + number + "", file=code)
- print(" " + release + "", file=code)
- print(" " + cover + "", file=code)
- print(" " + website + "", file=code)
- print("", file=code)
- print("[+]Writeed! " + path + "/" + number + c_word + ".nfo")
- elif option == 'kodi':
- with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
- print('', file=code)
- print("", file=code)
- print(" " + naming_rule + part + "", file=code)
- print(" ", file=code)
- print(" ", file=code)
- print(" " + studio + "+", file=code)
- print(" " + year + "", file=code)
- print(" " + outline + "", file=code)
- print(" " + outline + "", file=code)
- print(" " + str(runtime).replace(" ", "") + "", file=code)
- print(" " + director + "", file=code)
- print(" " + number + c_word + "-poster.jpg", file=code)
- print(" " + number + c_word + '-fanart.jpg' + "", file=code)
- try:
- for key in actor_list:
- print(" ", file=code)
- print(" " + key + "", file=code)
- print(" ", file=code)
- except:
- aaaa = ''
- print(" " + studio + "", file=code)
- print(" ", file=code)
- if cn_sub == '1':
- print(" 中文字幕", file=code)
- try:
- for i in tag:
- print(" " + i + "", file=code)
- except:
- aaaaa = ''
- try:
- for i in tag:
- print(" " + i + "", file=code)
- except:
- aaaaaaaa = ''
- if cn_sub == '1':
- print(" 中文字幕", file=code)
- print(" " + number + "", file=code)
- print(" " + release + "", file=code)
- print(" " + cover + "", file=code)
- print(" " + website + "", file=code)
- print("", file=code)
- print("[+]Writeed! " + path + "/" + number + c_word + ".nfo")
- except IOError as e:
- print("[-]Write Failed!")
- print(e)
- moveFailedFolder(filepath, failed_folder)
- return
- except Exception as e1:
- print(e1)
- print("[-]Write Failed!")
- moveFailedFolder(filepath, failed_folder)
- return
-
-
-def cutImage(option, imagecut, path, number, c_word):
- if option == 'plex':
- if imagecut == 1:
- try:
- img = Image.open(path + '/fanart.jpg')
- imgSize = img.size
- w = img.width
- h = img.height
- img2 = img.crop((w / 1.9, 0, w, h))
- img2.save(path + '/poster.jpg')
- except:
- print('[-]Cover cut failed!')
- elif imagecut == 0:
- img = Image.open(path + '/fanart.jpg')
- w = img.width
- h = img.height
- img.save(path + '/poster.jpg')
- elif option == 'emby':
- if imagecut == 1:
- try:
- img = Image.open(path + '/' + number + c_word + '.jpg')
- imgSize = img.size
- w = img.width
- h = img.height
- img2 = img.crop((w / 1.9, 0, w, h))
- img2.save(path + '/' + number + c_word + '.png')
- except:
- print('[-]Cover cut failed!')
- elif imagecut == 0:
- img = Image.open(path + '/' + number + c_word + '.jpg')
- w = img.width
- h = img.height
- img.save(path + '/' + number + c_word + '.png')
- elif option == 'kodi':
- if imagecut == 1:
- try:
- img = Image.open(path + '/' + number + c_word + '-fanart.jpg')
- imgSize = img.size
- w = img.width
- h = img.height
- img2 = img.crop((w / 1.9, 0, w, h))
- img2.save(path + '/' + number + c_word + '-poster.jpg')
- except:
- print('[-]Cover cut failed!')
- elif imagecut == 0:
- img = Image.open(path + '/' + number + c_word + '-fanart.jpg')
- w = img.width
- h = img.height
- try:
- img = img.convert('RGB')
- img.save(path + '/' + number + c_word + '-poster.jpg')
- except:
- img = img.convert('RGB')
- img.save(path + '/' + number + c_word + '-poster.jpg')
-
-
-def pasteFileToFolder(filepath, path, number, c_word): # 文件路径,番号,后缀,要移动至的位置
- houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group())
- try:
- if config['common']['soft_link'] == '1': # 如果soft_link=1 使用软链接
- os.symlink(filepath, path + '/' + number + c_word + houzhui)
- else:
- os.rename(filepath, path + '/' + number + c_word + houzhui)
- if os.path.exists(os.getcwd() + '/' + number + c_word + '.srt'): # 字幕移动
- os.rename(os.getcwd() + '/' + number + c_word + '.srt', path + '/' + number + c_word + '.srt')
- print('[+]Sub moved!')
- elif os.path.exists(os.getcwd() + '/' + number + c_word + '.ssa'):
- os.rename(os.getcwd() + '/' + number + c_word + '.ssa', path + '/' + number + c_word + '.ssa')
- print('[+]Sub moved!')
- elif os.path.exists(os.getcwd() + '/' + number + c_word + '.sub'):
- os.rename(os.getcwd() + '/' + number + c_word + '.sub', path + '/' + number + c_word + '.sub')
- print('[+]Sub moved!')
- except FileExistsError:
- print('[-]File Exists! Please check your movie!')
- print('[-]move to the root folder of the program.')
- return
- except PermissionError:
- print('[-]Error! Please run as administrator!')
- return
-
-
-def pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word): # 文件路径,番号,后缀,要移动至的位置
- if multi_part == 1:
- number += part # 这时number会被附加上CD1后缀
- houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group())
- try:
- if config['common']['soft_link'] == '1':
- os.symlink(filepath, path + '/' + number + part + c_word + houzhui)
- else:
- os.rename(filepath, path + '/' + number + part + c_word + houzhui)
- if os.path.exists(number + '.srt'): # 字幕移动
- os.rename(number + part + c_word + '.srt', path + '/' + number + part + c_word + '.srt')
- print('[+]Sub moved!')
- elif os.path.exists(number + part + c_word + '.ass'):
- os.rename(number + part + c_word + '.ass', path + '/' + number + part + c_word + '.ass')
- print('[+]Sub moved!')
- elif os.path.exists(number + part + c_word + '.sub'):
- os.rename(number + part + c_word + '.sub', path + '/' + number + part + c_word + '.sub')
- print('[+]Sub moved!')
- print('[!]Success')
- except FileExistsError:
- print('[-]File Exists! Please check your movie!')
- print('[-]move to the root folder of the program.')
- return
- except PermissionError:
- print('[-]Error! Please run as administrator!')
- return
-
-
-def copyRenameJpgToBackdrop(option, path, number, c_word):
- if option == 'plex':
- shutil.copy(path + '/fanart.jpg', path + '/Backdrop.jpg')
- shutil.copy(path + '/poster.jpg', path + '/thumb.png')
- if option == 'emby':
- shutil.copy(path + '/' + number + c_word + '.jpg', path + '/Backdrop.jpg')
- if option == 'kodi':
- shutil.copy(path + '/' + number + c_word + '-fanart.jpg', path + '/Backdrop.jpg')
-
-
-def get_part(filepath, failed_folder):
- try:
- if re.search('-CD\d+', filepath):
- return re.findall('-CD\d+', filepath)[0]
- if re.search('-cd\d+', filepath):
- return re.findall('-cd\d+', filepath)[0]
- except:
- print("[-]failed!Please rename the filename again!")
- moveFailedFolder(filepath, failed_folder)
- return
-
-
-def debug_mode(json_data):
- try:
- if config['debug_mode']['switch'] == '1':
- print('[+] ---Debug info---')
- for i, v in json_data.items():
- if i == 'outline':
- print('[+] -', i, ' :', len(v), 'characters')
- continue
- if i == 'actor_photo' or i == 'year':
- continue
- print('[+] -', "%-11s" % i, ':', v)
- print('[+] ---Debug info---')
- except:
- aaa = ''
-
-
-def core_main(file_path, number_th):
- # =======================================================================初始化所需变量
- multi_part = 0
- part = ''
- c_word = ''
- option = ''
- cn_sub = ''
- config_file = 'config.ini'
- Config = ConfigParser()
- Config.read(config_file, encoding='UTF-8')
- try:
- option = ReadMediaWarehouse()
- except:
- print('[-]Config media_warehouse read failed!')
- program_mode = Config['common']['main_mode'] # 运行模式
- failed_folder = Config['common']['failed_output_folder'] # 失败输出目录
- success_folder = Config['common']['success_output_folder'] # 成功输出目录
- filepath = file_path # 影片的路径
- number = number_th
- json_data = getDataFromJSON(number, filepath, failed_folder) # 定义番号
- if json_data["number"] != number:
- # fix issue #119
- # the root cause is we normalize the search id
- # PrintFiles() will use the normalized id from website,
- # but pasteFileToFolder() still use the input raw search id
- # so the solution is: use the normalized search id
- number = json_data["number"]
- imagecut = json_data['imagecut']
- tag = json_data['tag']
- # =======================================================================判断-C,-CD后缀
- if '-CD' in filepath or '-cd' in filepath:
- multi_part = 1
- part = get_part(filepath, failed_folder)
- if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
- cn_sub = '1'
- c_word = '-C' # 中文字幕影片后缀
-
- CreatFailedFolder(failed_folder) # 创建输出失败目录
- debug_mode(json_data) # 调试模式检测
- path = creatFolder(success_folder, json_data['location_rule'], json_data, Config) # 创建文件夹
- # =======================================================================刮削模式
- if program_mode == '1':
- if multi_part == 1:
- number += part # 这时number会被附加上CD1后缀
- smallCoverCheck(path, number, imagecut, json_data['cover_small'], c_word, option, Config, filepath, failed_folder) # 检查小封面
- imageDownload(option, json_data['cover'], number, c_word, path, multi_part, Config, filepath, failed_folder) # creatFoder会返回番号路径
- cutImage(option, imagecut, path, number, c_word) # 裁剪图
- copyRenameJpgToBackdrop(option, path, number, c_word)
- PrintFiles(option, path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, failed_folder, tag, json_data['actor_list']) # 打印文件
- pasteFileToFolder(filepath, path, number, c_word) # 移动文件
- # =======================================================================整理模式
- elif program_mode == '2':
- pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word) # 移动文件
+# -*- coding: utf-8 -*-
+
+import re
+import os
+import os.path
+import shutil
+from PIL import Image
+import time
+import json
+from ADC_function import *
+from configparser import ConfigParser
+import argparse
+# =========website========
+import fc2fans_club
+import mgstage
+import avsox
+import javbus
+import javdb
+import fanza
+import requests
+import random
+
+
+# =====================本地文件处理===========================
+
+def escapePath(path, Config): # Remove escape literals
+ escapeLiterals = Config['escape']['literals']
+ backslash = '\\'
+ for literal in escapeLiterals:
+ path = path.replace(backslash + literal, '')
+ return path
+
+
+def moveFailedFolder(filepath, failed_folder):
+ print('[-]Move to Failed output folder')
+ shutil.move(filepath, str(os.getcwd()) + '/' + failed_folder + '/')
+ return
+
+
+def CreatFailedFolder(failed_folder):
+ if not os.path.exists(failed_folder + '/'): # 新建failed文件夹
+ try:
+ os.makedirs(failed_folder + '/')
+ except:
+ print("[-]failed!can not be make Failed output folder\n[-](Please run as Administrator)")
+ return
+
+
+def getDataFromJSON(file_number, filepath, failed_folder): # 从JSON返回元数据
+ """
+ iterate through all services and fetch the data
+ """
+
+ func_mapping = {
+ "avsox": avsox.main,
+ "fc2": fc2fans_club.main,
+ "fanza": fanza.main,
+ "javdb": javdb.main,
+ "javbus": javbus.main,
+ "mgstage": mgstage.main,
+ }
+
+ # default fetch order list, from the begining to the end
+ sources = ["javbus", "javdb", "fanza", "mgstage", "fc2", "avsox"]
+
+ # if the input file name matches centain rules,
+ # move some web service to the begining of the list
+ if re.match(r"^\d{5,}", file_number) or (
+ "HEYZO" in file_number or "heyzo" in file_number or "Heyzo" in file_number
+ ):
+ sources.insert(0, sources.pop(sources.index("avsox")))
+ elif re.match(r"\d+\D+", file_number) or (
+ "siro" in file_number or "SIRO" in file_number or "Siro" in file_number
+ ):
+ sources.insert(0, sources.pop(sources.index("fanza")))
+ elif "fc2" in file_number or "FC2" in file_number:
+ sources.insert(0, sources.pop(sources.index("fc2")))
+
+ for source in sources:
+ json_data = json.loads(func_mapping[source](file_number))
+ # if any service return a valid return, break
+ if getDataState(json_data) != 0:
+ break
+
+ # ================================================网站规则添加结束================================================
+
+ title = json_data['title']
+ actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表
+ release = json_data['release']
+ number = json_data['number']
+ studio = json_data['studio']
+ source = json_data['source']
+ runtime = json_data['runtime']
+ outline = json_data['runtime']
+ label = json_data['label']
+ year = json_data['year']
+ try:
+ cover_small = json_data['cover_small']
+ except:
+ cover_small = ''
+ imagecut = json_data['imagecut']
+ tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @
+ actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
+
+
+ if title == '' or number == '':
+ print('[-]Movie Data not found!')
+ moveFailedFolder(filepath, failed_folder)
+ return
+
+ # if imagecut == '3':
+ # DownloadFileWithFilename()
+
+ # ====================处理异常字符====================== #\/:*?"<>|
+ title = title.replace('\\', '')
+ title = title.replace('/', '')
+ title = title.replace(':', '')
+ title = title.replace('*', '')
+ title = title.replace('?', '')
+ title = title.replace('"', '')
+ title = title.replace('<', '')
+ title = title.replace('>', '')
+ title = title.replace('|', '')
+ release = release.replace('/', '-')
+ tmpArr = cover_small.split(',')
+ if len(tmpArr) > 0:
+ cover_small = tmpArr[0].strip('\"').strip('\'')
+ # ====================处理异常字符 END================== #\/:*?"<>|
+
+ naming_rule = eval(config['Name_Rule']['naming_rule'])
+ location_rule = eval(config['Name_Rule']['location_rule'])
+ if 'actor' in config['Name_Rule']['location_rule'] and len(actor) > 100:
+ print(config['Name_Rule']['location_rule'])
+ location_rule = eval(config['Name_Rule']['location_rule'].replace("actor","'多人作品'"))
+ if 'title' in config['Name_Rule']['location_rule'] and len(title) > 100:
+ location_rule = eval(config['Name_Rule']['location_rule'].replace("title",'number'))
+
+ # 返回处理后的json_data
+ json_data['title'] = title
+ json_data['actor'] = actor
+ json_data['release'] = release
+ json_data['cover_small'] = cover_small
+ json_data['tag'] = tag
+ json_data['naming_rule'] = naming_rule
+ json_data['location_rule'] = location_rule
+ json_data['year'] = year
+ json_data['actor_list'] = actor_list
+ return json_data
+
+
+def get_info(json_data): # 返回json里的数据
+ title = json_data['title']
+ studio = json_data['studio']
+ year = json_data['year']
+ outline = json_data['outline']
+ runtime = json_data['runtime']
+ director = json_data['director']
+ actor_photo = json_data['actor_photo']
+ release = json_data['release']
+ number = json_data['number']
+ cover = json_data['cover']
+ website = json_data['website']
+ return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website
+
+
+def smallCoverCheck(path, number, imagecut, cover_small, c_word, option, Config, filepath, failed_folder):
+ if imagecut == 3:
+ if option == 'emby':
+ DownloadFileWithFilename(cover_small, '1.jpg', path, Config, filepath, failed_folder)
+ try:
+ img = Image.open(path + '/1.jpg')
+ except Exception:
+ img = Image.open('1.jpg')
+ w = img.width
+ h = img.height
+ img.save(path + '/' + number + c_word + '.png')
+ time.sleep(1)
+ os.remove(path + '/1.jpg')
+ if option == 'kodi':
+ DownloadFileWithFilename(cover_small, '1.jpg', path, Config, filepath, failed_folder)
+ try:
+ img = Image.open(path + '/1.jpg')
+ except Exception:
+ img = Image.open('1.jpg')
+ w = img.width
+ h = img.height
+ img.save(path + '/' + number + c_word + '-poster.jpg')
+ time.sleep(1)
+ os.remove(path + '/1.jpg')
+ if option == 'plex':
+ DownloadFileWithFilename(cover_small, '1.jpg', path, Config, filepath, failed_folder)
+ try:
+ img = Image.open(path + '/1.jpg')
+ except Exception:
+ img = Image.open('1.jpg')
+ w = img.width
+ h = img.height
+ img.save(path + '/poster.jpg')
+ os.remove(path + '/1.jpg')
+
+
+def creatFolder(success_folder, location_rule, json_data, Config): # 创建文件夹
+ title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website= get_info(json_data)
+ if len(location_rule) > 240: # 新建成功输出文件夹
+ path = success_folder + '/' + location_rule.replace("'actor'", "'manypeople'", 3).replace("actor",
+ "'manypeople'",
+ 3) # path为影片+元数据所在目录
+ else:
+ path = success_folder + '/' + location_rule
+ # print(path)
+ if not os.path.exists(path):
+ path = escapePath(path, Config)
+ try:
+ os.makedirs(path)
+ except:
+ path = success_folder + '/' + location_rule.replace('/[' + number + ']-' + title, "/number")
+ path = escapePath(path, Config)
+
+ os.makedirs(path)
+ return path
+
+
+# =====================资源下载部分===========================
+def DownloadFileWithFilename(url, filename, path, Config, filepath, failed_folder): # path = examle:photo , video.in the Project Folder!
+ proxy, timeout, retry_count = get_network_settings()
+ i = 0
+
+ while i < retry_count:
+ try:
+ if not proxy == '':
+ if not os.path.exists(path):
+ os.makedirs(path)
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
+ r = requests.get(url, headers=headers, timeout=timeout,
+ proxies={"http": "http://" + str(proxy), "https": "https://" + str(proxy)})
+ if r == '':
+ print('[-]Movie Data not found!')
+ return
+ with open(str(path) + "/" + filename, "wb") as code:
+ code.write(r.content)
+ return
+ else:
+ if not os.path.exists(path):
+ os.makedirs(path)
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
+ r = requests.get(url, timeout=timeout, headers=headers)
+ if r == '':
+ print('[-]Movie Data not found!')
+ return
+ with open(str(path) + "/" + filename, "wb") as code:
+ code.write(r.content)
+ return
+ except requests.exceptions.RequestException:
+ i += 1
+ print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
+ except requests.exceptions.ConnectionError:
+ i += 1
+ print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
+ except requests.exceptions.ProxyError:
+ i += 1
+ print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
+ except requests.exceptions.ConnectTimeout:
+ i += 1
+ print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
+ print('[-]Connect Failed! Please check your Proxy or Network!')
+ moveFailedFolder(filepath, failed_folder)
+ return
+
+
+def imageDownload(option, cover, number, c_word, path, multi_part, Config, filepath, failed_folder): # 封面是否下载成功,否则移动到failed
+ if option == 'emby':
+ if DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder) == 'failed':
+ moveFailedFolder(filepath, failed_folder)
+ return
+ DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder)
+ if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
+ print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
+ return
+ i = 1
+ while i <= int(config['proxy']['retry']):
+ if os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
+ print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
+ DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder)
+ i = i + 1
+ continue
+ else:
+ break
+ if multi_part == 1:
+ old_name = os.path.join(path, number + c_word + '.jpg')
+ new_name = os.path.join(path, number + c_word + '.jpg')
+ os.rename(old_name, new_name)
+ print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
+ else:
+ print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
+ elif option == 'plex':
+ if DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder) == 'failed':
+ moveFailedFolder(filepath, failed_folder)
+ return
+ DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder)
+ if not os.path.getsize(path + '/fanart.jpg') == 0:
+ print('[+]Image Downloaded!', path + '/fanart.jpg')
+ return
+ i = 1
+ while i <= int(config['proxy']['retry']):
+ if os.path.getsize(path + '/fanart.jpg') == 0:
+ print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
+ DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder)
+ i = i + 1
+ continue
+ else:
+ break
+ if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
+ print('[!]Image Download Failed! Trying again.')
+ DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder)
+ print('[+]Image Downloaded!', path + '/fanart.jpg')
+ elif option == 'kodi':
+ if DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder) == 'failed':
+ moveFailedFolder(filepath, failed_folder)
+ return
+ DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder)
+ if not os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
+ print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')
+ return
+ i = 1
+ while i <= int(config['proxy']['retry']):
+ if os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
+ print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
+ DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder)
+ i = i + 1
+ continue
+ else:
+ break
+ print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')
+
+
+def PrintFiles(option, path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag, actor_list):
+ title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(json_data)
+ try:
+ if not os.path.exists(path):
+ os.makedirs(path)
+ if option == 'plex':
+ with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
+ print('', file=code)
+ print("", file=code)
+ print(" " + naming_rule + part + "", file=code)
+ print(" ", file=code)
+ print(" ", file=code)
+ print(" " + studio + "+", file=code)
+ print(" " + year + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + str(runtime).replace(" ", "") + "", file=code)
+ print(" " + director + "", file=code)
+ print(" poster.jpg", file=code)
+ print(" thumb.png", file=code)
+ print(" fanart.jpg", file=code)
+ try:
+ for key in actor_list:
+ print(" ", file=code)
+ print(" " + key + "", file=code)
+ print(" ", file=code)
+ except:
+ aaaa = ''
+ print(" " + studio + "", file=code)
+ print(" ", file=code)
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ try:
+ for i in str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(','):
+ print(" " + i + "", file=code)
+ except:
+ aaaaa = ''
+ try:
+ for i in str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(','):
+ print(" " + i + "", file=code)
+ except:
+ aaaaaaaa = ''
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ print(" " + number + "", file=code)
+ print(" " + release + "", file=code)
+ print(" " + cover + "", file=code)
+ print(" " + website + "", file=code)
+ print("", file=code)
+ print("[+]Writeed! " + path + "/" + number + ".nfo")
+ elif option == 'emby':
+ with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
+ print('', file=code)
+ print("", file=code)
+ print(" " + naming_rule + part + "", file=code)
+ print(" ", file=code)
+ print(" ", file=code)
+ print(" " + studio + "+", file=code)
+ print(" " + year + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + str(runtime).replace(" ", "") + "", file=code)
+ print(" " + director + "", file=code)
+ print(" " + number + c_word + ".png", file=code)
+ print(" " + number + c_word + ".png", file=code)
+ print(" " + number + c_word + '.jpg' + "", file=code)
+ try:
+ for key in actor_list:
+ print(" ", file=code)
+ print(" " + key + "", file=code)
+ print(" ", file=code)
+ except:
+ aaaa = ''
+ print(" " + studio + "", file=code)
+ print(" ", file=code)
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaa = ''
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaaaaa = ''
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ print(" " + number + "", file=code)
+ print(" " + release + "", file=code)
+ print(" " + cover + "", file=code)
+ print(" " + website + "", file=code)
+ print("", file=code)
+ print("[+]Writeed! " + path + "/" + number + c_word + ".nfo")
+ elif option == 'kodi':
+ with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
+ print('', file=code)
+ print("", file=code)
+ print(" " + naming_rule + part + "", file=code)
+ print(" ", file=code)
+ print(" ", file=code)
+ print(" " + studio + "+", file=code)
+ print(" " + year + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + str(runtime).replace(" ", "") + "", file=code)
+ print(" " + director + "", file=code)
+ print(" " + number + c_word + "-poster.jpg", file=code)
+ print(" " + number + c_word + '-fanart.jpg' + "", file=code)
+ try:
+ for key in actor_list:
+ print(" ", file=code)
+ print(" " + key + "", file=code)
+ print(" ", file=code)
+ except:
+ aaaa = ''
+ print(" " + studio + "", file=code)
+ print(" ", file=code)
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaa = ''
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaaaaa = ''
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ print(" " + number + "", file=code)
+ print(" " + release + "", file=code)
+ print(" " + cover + "", file=code)
+ print(" " + website + "", file=code)
+ print("", file=code)
+ print("[+]Writeed! " + path + "/" + number + c_word + ".nfo")
+ except IOError as e:
+ print("[-]Write Failed!")
+ print(e)
+ moveFailedFolder(filepath, failed_folder)
+ return
+ except Exception as e1:
+ print(e1)
+ print("[-]Write Failed!")
+ moveFailedFolder(filepath, failed_folder)
+ return
+
+
+def cutImage(option, imagecut, path, number, c_word):
+ if option == 'plex':
+ if imagecut == 1:
+ try:
+ img = Image.open(path + '/fanart.jpg')
+ imgSize = img.size
+ w = img.width
+ h = img.height
+ img2 = img.crop((w / 1.9, 0, w, h))
+ img2.save(path + '/poster.jpg')
+ except:
+ print('[-]Cover cut failed!')
+ elif imagecut == 0:
+ img = Image.open(path + '/fanart.jpg')
+ w = img.width
+ h = img.height
+ img.save(path + '/poster.jpg')
+ elif option == 'emby':
+ if imagecut == 1:
+ try:
+ img = Image.open(path + '/' + number + c_word + '.jpg')
+ imgSize = img.size
+ w = img.width
+ h = img.height
+ img2 = img.crop((w / 1.9, 0, w, h))
+ img2.save(path + '/' + number + c_word + '.png')
+ except:
+ print('[-]Cover cut failed!')
+ elif imagecut == 0:
+ img = Image.open(path + '/' + number + c_word + '.jpg')
+ w = img.width
+ h = img.height
+ img.save(path + '/' + number + c_word + '.png')
+ elif option == 'kodi':
+ if imagecut == 1:
+ try:
+ img = Image.open(path + '/' + number + c_word + '-fanart.jpg')
+ imgSize = img.size
+ w = img.width
+ h = img.height
+ img2 = img.crop((w / 1.9, 0, w, h))
+ img2.save(path + '/' + number + c_word + '-poster.jpg')
+ except:
+ print('[-]Cover cut failed!')
+ elif imagecut == 0:
+ img = Image.open(path + '/' + number + c_word + '-fanart.jpg')
+ w = img.width
+ h = img.height
+ try:
+ img = img.convert('RGB')
+ img.save(path + '/' + number + c_word + '-poster.jpg')
+ except:
+ img = img.convert('RGB')
+ img.save(path + '/' + number + c_word + '-poster.jpg')
+
+
+def pasteFileToFolder(filepath, path, number, c_word): # 文件路径,番号,后缀,要移动至的位置
+ houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group())
+ try:
+ if config['common']['soft_link'] == '1': # 如果soft_link=1 使用软链接
+ os.symlink(filepath, path + '/' + number + c_word + houzhui)
+ else:
+ os.rename(filepath, path + '/' + number + c_word + houzhui)
+ if os.path.exists(os.getcwd() + '/' + number + c_word + '.srt'): # 字幕移动
+ os.rename(os.getcwd() + '/' + number + c_word + '.srt', path + '/' + number + c_word + '.srt')
+ print('[+]Sub moved!')
+ elif os.path.exists(os.getcwd() + '/' + number + c_word + '.ssa'):
+ os.rename(os.getcwd() + '/' + number + c_word + '.ssa', path + '/' + number + c_word + '.ssa')
+ print('[+]Sub moved!')
+ elif os.path.exists(os.getcwd() + '/' + number + c_word + '.sub'):
+ os.rename(os.getcwd() + '/' + number + c_word + '.sub', path + '/' + number + c_word + '.sub')
+ print('[+]Sub moved!')
+ except FileExistsError:
+ print('[-]File Exists! Please check your movie!')
+ print('[-]move to the root folder of the program.')
+ return
+ except PermissionError:
+ print('[-]Error! Please run as administrator!')
+ return
+
+
+def pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word): # 文件路径,番号,后缀,要移动至的位置
+ if multi_part == 1:
+ number += part # 这时number会被附加上CD1后缀
+ houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group())
+ try:
+ if config['common']['soft_link'] == '1':
+ os.symlink(filepath, path + '/' + number + part + c_word + houzhui)
+ else:
+ os.rename(filepath, path + '/' + number + part + c_word + houzhui)
+ if os.path.exists(number + '.srt'): # 字幕移动
+ os.rename(number + part + c_word + '.srt', path + '/' + number + part + c_word + '.srt')
+ print('[+]Sub moved!')
+ elif os.path.exists(number + part + c_word + '.ass'):
+ os.rename(number + part + c_word + '.ass', path + '/' + number + part + c_word + '.ass')
+ print('[+]Sub moved!')
+ elif os.path.exists(number + part + c_word + '.sub'):
+ os.rename(number + part + c_word + '.sub', path + '/' + number + part + c_word + '.sub')
+ print('[+]Sub moved!')
+ print('[!]Success')
+ except FileExistsError:
+ print('[-]File Exists! Please check your movie!')
+ print('[-]move to the root folder of the program.')
+ return
+ except PermissionError:
+ print('[-]Error! Please run as administrator!')
+ return
+
+
+def copyRenameJpgToBackdrop(option, path, number, c_word):
+ if option == 'plex':
+ shutil.copy(path + '/fanart.jpg', path + '/Backdrop.jpg')
+ shutil.copy(path + '/poster.jpg', path + '/thumb.png')
+ if option == 'emby':
+ shutil.copy(path + '/' + number + c_word + '.jpg', path + '/Backdrop.jpg')
+ if option == 'kodi':
+ shutil.copy(path + '/' + number + c_word + '-fanart.jpg', path + '/Backdrop.jpg')
+
+
+def get_part(filepath, failed_folder):
+ try:
+ if re.search('-CD\d+', filepath):
+ return re.findall('-CD\d+', filepath)[0]
+ if re.search('-cd\d+', filepath):
+ return re.findall('-cd\d+', filepath)[0]
+ except:
+ print("[-]failed!Please rename the filename again!")
+ moveFailedFolder(filepath, failed_folder)
+ return
+
+
+def debug_mode(json_data):
+ try:
+ if config['debug_mode']['switch'] == '1':
+ print('[+] ---Debug info---')
+ for i, v in json_data.items():
+ if i == 'outline':
+ print('[+] -', i, ' :', len(v), 'characters')
+ continue
+ if i == 'actor_photo' or i == 'year':
+ continue
+ print('[+] -', "%-11s" % i, ':', v)
+ print('[+] ---Debug info---')
+ except:
+ aaa = ''
+
+
+def core_main(file_path, number_th):
+ # =======================================================================初始化所需变量
+ multi_part = 0
+ part = ''
+ c_word = ''
+ option = ''
+ cn_sub = ''
+ config_file = 'config.ini'
+ Config = ConfigParser()
+ Config.read(config_file, encoding='UTF-8')
+ try:
+ option = ReadMediaWarehouse()
+ except:
+ print('[-]Config media_warehouse read failed!')
+ program_mode = Config['common']['main_mode'] # 运行模式
+ failed_folder = Config['common']['failed_output_folder'] # 失败输出目录
+ success_folder = Config['common']['success_output_folder'] # 成功输出目录
+ filepath = file_path # 影片的路径
+ number = number_th
+ json_data = getDataFromJSON(number, filepath, failed_folder) # 定义番号
+ if json_data["number"] != number:
+ # fix issue #119
+ # the root cause is we normalize the search id
+ # PrintFiles() will use the normalized id from website,
+ # but pasteFileToFolder() still use the input raw search id
+ # so the solution is: use the normalized search id
+ number = json_data["number"]
+ imagecut = json_data['imagecut']
+ tag = json_data['tag']
+ # =======================================================================判断-C,-CD后缀
+ if '-CD' in filepath or '-cd' in filepath:
+ multi_part = 1
+ part = get_part(filepath, failed_folder)
+ if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
+ cn_sub = '1'
+ c_word = '-C' # 中文字幕影片后缀
+
+ CreatFailedFolder(failed_folder) # 创建输出失败目录
+ debug_mode(json_data) # 调试模式检测
+ path = creatFolder(success_folder, json_data['location_rule'], json_data, Config) # 创建文件夹
+ # =======================================================================刮削模式
+ if program_mode == '1':
+ if multi_part == 1:
+ number += part # 这时number会被附加上CD1后缀
+ smallCoverCheck(path, number, imagecut, json_data['cover_small'], c_word, option, Config, filepath, failed_folder) # 检查小封面
+ imageDownload(option, json_data['cover'], number, c_word, path, multi_part, Config, filepath, failed_folder) # creatFoder会返回番号路径
+ cutImage(option, imagecut, path, number, c_word) # 裁剪图
+ copyRenameJpgToBackdrop(option, path, number, c_word)
+ PrintFiles(option, path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, failed_folder, tag, json_data['actor_list']) # 打印文件
+ pasteFileToFolder(filepath, path, number, c_word) # 移动文件
+ # =======================================================================整理模式
+ elif program_mode == '2':
+ pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word) # 移动文件
diff --git a/fanza.py b/fanza.py
index 87c8be0..72632dc 100644
--- a/fanza.py
+++ b/fanza.py
@@ -1,229 +1,229 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-import json
-import re
-
-from lxml import etree
-
-from ADC_function import *
-
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-
-def getTitle(text):
- html = etree.fromstring(text, etree.HTMLParser())
- result = html.xpath('//*[@id="title"]/text()')[0]
- return result
-
-
-def getActor(text):
- # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
- html = etree.fromstring(text, etree.HTMLParser())
- result = (
- str(
- html.xpath(
- "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
- )
- )
- .strip(" ['']")
- .replace("', '", ",")
- )
- return result
-
-
-def getStudio(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- try:
- result = html.xpath(
- "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()"
- )[0]
- except:
- result = html.xpath(
- "//td[contains(text(),'メーカー')]/following-sibling::td/text()"
- )[0]
- return result
-
-
-def getRuntime(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
- return re.search(r"\d+", str(result)).group()
-
-
-def getLabel(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- try:
- result = html.xpath(
- "//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()"
- )[0]
- except:
- result = html.xpath(
- "//td[contains(text(),'シリーズ:')]/following-sibling::td/text()"
- )[0]
- return result
-
-
-def getNum(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- try:
- result = html.xpath(
- "//td[contains(text(),'品番:')]/following-sibling::td/a/text()"
- )[0]
- except:
- result = html.xpath(
- "//td[contains(text(),'品番:')]/following-sibling::td/text()"
- )[0]
- return result
-
-
-def getYear(getRelease):
- try:
- result = str(re.search(r"\d{4}", getRelease).group())
- return result
- except:
- return getRelease
-
-
-def getRelease(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- try:
- result = html.xpath(
- "//td[contains(text(),'発売日:')]/following-sibling::td/a/text()"
- )[0].lstrip("\n")
- except:
- result = html.xpath(
- "//td[contains(text(),'発売日:')]/following-sibling::td/text()"
- )[0].lstrip("\n")
- return result
-
-
-def getTag(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- try:
- result = html.xpath(
- "//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()"
- )
- except:
- result = html.xpath(
- "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
- )
- return result
-
-
-def getCover(text, number):
- html = etree.fromstring(text, etree.HTMLParser())
- cover_number = number
- try:
- result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
- except:
- # sometimes fanza modify _ to \u0005f for image id
- if "_" in cover_number:
- cover_number = cover_number.replace("_", r"\u005f")
- try:
- result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
- except:
- # (TODO) handle more edge case
- # print(html)
- # raise exception here, same behavior as before
- # people's major requirement is fetching the picture
- raise ValueError("can not find image")
- return result
-
-
-def getDirector(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- try:
- result = html.xpath(
- "//td[contains(text(),'監督:')]/following-sibling::td/a/text()"
- )[0]
- except:
- result = html.xpath(
- "//td[contains(text(),'監督:')]/following-sibling::td/text()"
- )[0]
- return result
-
-
-def getOutline(text):
- html = etree.fromstring(text, etree.HTMLParser())
- try:
- result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace(
- "\n", ""
- )
- if result == "":
- result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace(
- "\n", ""
- )
- except:
- # (TODO) handle more edge case
- # print(html)
- return ""
- return result
-
-
-def main(number):
- # fanza allow letter + number + underscore, normalize the input here
- # @note: I only find the usage of underscore as h_test123456789
- fanza_search_number = number
- # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
- if fanza_search_number.startswith("h-"):
- fanza_search_number = fanza_search_number.replace("h-", "h_")
-
- fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()
-
- fanza_urls = [
- "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
- "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
- "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
- "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
- ]
- chosen_url = ""
- for url in fanza_urls:
- chosen_url = url + fanza_search_number
- htmlcode = get_html(chosen_url)
- if "404 Not Found" not in htmlcode:
- break
- if "404 Not Found" in htmlcode:
- return json.dumps({"title": "",})
- try:
- # for some old page, the input number does not match the page
- # for example, the url will be cid=test012
- # but the hinban on the page is test00012
- # so get the hinban first, and then pass it to following functions
- fanza_hinban = getNum(htmlcode)
- data = {
- "title": getTitle(htmlcode).strip(getActor(htmlcode)),
- "studio": getStudio(htmlcode),
- "outline": getOutline(htmlcode),
- "runtime": getRuntime(htmlcode),
- "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
- "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
- "release": getRelease(htmlcode),
- "number": fanza_hinban,
- "cover": getCover(htmlcode, fanza_hinban),
- "imagecut": 1,
- "tag": getTag(htmlcode),
- "label": getLabel(htmlcode),
- "year": getYear(
- getRelease(htmlcode)
- ), # str(re.search('\d{4}',getRelease(a)).group()),
- "actor_photo": "",
- "website": chosen_url,
- "source": "fanza.py",
- }
- except:
- data = {
- "title": "",
- }
- js = json.dumps(
- data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
- ) # .encode('UTF-8')
- return js
-
-
-if __name__ == "__main__":
- # print(main("DV-1562"))
- # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
- # print(main("ipx292"))
- pass
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import json
+import re
+
+from lxml import etree
+
+from ADC_function import *
+
+# import sys
+# import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+
+
+def getTitle(text):
+ html = etree.fromstring(text, etree.HTMLParser())
+ result = html.xpath('//*[@id="title"]/text()')[0]
+ return result
+
+
+def getActor(text):
+ # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+ html = etree.fromstring(text, etree.HTMLParser())
+ result = (
+ str(
+ html.xpath(
+ "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
+ )
+ )
+ .strip(" ['']")
+ .replace("', '", ",")
+ )
+ return result
+
+
+def getStudio(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ try:
+ result = html.xpath(
+ "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()"
+ )[0]
+ except:
+ result = html.xpath(
+ "//td[contains(text(),'メーカー')]/following-sibling::td/text()"
+ )[0]
+ return result
+
+
+def getRuntime(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
+ return re.search(r"\d+", str(result)).group()
+
+
+def getLabel(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ try:
+ result = html.xpath(
+ "//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()"
+ )[0]
+ except:
+ result = html.xpath(
+ "//td[contains(text(),'シリーズ:')]/following-sibling::td/text()"
+ )[0]
+ return result
+
+
+def getNum(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ try:
+ result = html.xpath(
+ "//td[contains(text(),'品番:')]/following-sibling::td/a/text()"
+ )[0]
+ except:
+ result = html.xpath(
+ "//td[contains(text(),'品番:')]/following-sibling::td/text()"
+ )[0]
+ return result
+
+
+def getYear(getRelease):
+ try:
+ result = str(re.search(r"\d{4}", getRelease).group())
+ return result
+ except:
+ return getRelease
+
+
+def getRelease(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ try:
+ result = html.xpath(
+ "//td[contains(text(),'発売日:')]/following-sibling::td/a/text()"
+ )[0].lstrip("\n")
+ except:
+ result = html.xpath(
+ "//td[contains(text(),'発売日:')]/following-sibling::td/text()"
+ )[0].lstrip("\n")
+ return result
+
+
+def getTag(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ try:
+ result = html.xpath(
+ "//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()"
+ )
+ except:
+ result = html.xpath(
+ "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
+ )
+ return result
+
+
+def getCover(text, number):
+ html = etree.fromstring(text, etree.HTMLParser())
+ cover_number = number
+ try:
+ result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
+ except:
+ # sometimes fanza modify _ to \u0005f for image id
+ if "_" in cover_number:
+ cover_number = cover_number.replace("_", r"\u005f")
+ try:
+ result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
+ except:
+ # (TODO) handle more edge case
+ # print(html)
+ # raise exception here, same behavior as before
+ # people's major requirement is fetching the picture
+ raise ValueError("can not find image")
+ return result
+
+
+def getDirector(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ try:
+ result = html.xpath(
+ "//td[contains(text(),'監督:')]/following-sibling::td/a/text()"
+ )[0]
+ except:
+ result = html.xpath(
+ "//td[contains(text(),'監督:')]/following-sibling::td/text()"
+ )[0]
+ return result
+
+
+def getOutline(text):
+ html = etree.fromstring(text, etree.HTMLParser())
+ try:
+ result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace(
+ "\n", ""
+ )
+ if result == "":
+ result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace(
+ "\n", ""
+ )
+ except:
+ # (TODO) handle more edge case
+ # print(html)
+ return ""
+ return result
+
+
+def main(number):
+ # fanza allow letter + number + underscore, normalize the input here
+ # @note: I only find the usage of underscore as h_test123456789
+ fanza_search_number = number
+ # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
+ if fanza_search_number.startswith("h-"):
+ fanza_search_number = fanza_search_number.replace("h-", "h_")
+
+ fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()
+
+ fanza_urls = [
+ "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
+ "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
+ "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
+ "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
+ ]
+ chosen_url = ""
+ for url in fanza_urls:
+ chosen_url = url + fanza_search_number
+ htmlcode = get_html(chosen_url)
+ if "404 Not Found" not in htmlcode:
+ break
+ if "404 Not Found" in htmlcode:
+ return json.dumps({"title": "",})
+ try:
+ # for some old page, the input number does not match the page
+ # for example, the url will be cid=test012
+ # but the hinban on the page is test00012
+ # so get the hinban first, and then pass it to following functions
+ fanza_hinban = getNum(htmlcode)
+ data = {
+ "title": getTitle(htmlcode).strip(getActor(htmlcode)),
+ "studio": getStudio(htmlcode),
+ "outline": getOutline(htmlcode),
+ "runtime": getRuntime(htmlcode),
+ "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
+ "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
+ "release": getRelease(htmlcode),
+ "number": fanza_hinban,
+ "cover": getCover(htmlcode, fanza_hinban),
+ "imagecut": 1,
+ "tag": getTag(htmlcode),
+ "label": getLabel(htmlcode),
+ "year": getYear(
+ getRelease(htmlcode)
+ ), # str(re.search('\d{4}',getRelease(a)).group()),
+ "actor_photo": "",
+ "website": chosen_url,
+ "source": "fanza.py",
+ }
+ except:
+ data = {
+ "title": "",
+ }
+ js = json.dumps(
+ data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
+ ) # .encode('UTF-8')
+ return js
+
+
+if __name__ == "__main__":
+ # print(main("DV-1562"))
+ # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
+ # print(main("ipx292"))
+ pass
diff --git a/fc2fans_club.py b/fc2fans_club.py
index 3215e49..9dfeb24 100755
--- a/fc2fans_club.py
+++ b/fc2fans_club.py
@@ -1,162 +1,162 @@
-import re
-from lxml import etree#need install
-import json
-import ADC_function
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getTitle(htmlcode): #获取厂商
- #print(htmlcode)
- html = etree.fromstring(htmlcode,etree.HTMLParser())
- result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']")
- result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1)
- #print(result2)
- return result2
-def getActor(htmlcode):
- try:
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']")
- return result
- except:
- return ''
-def getStudio(htmlcode): #获取厂商
- html = etree.fromstring(htmlcode,etree.HTMLParser())
- result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']")
- return result
-def getNum(htmlcode): #获取番号
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
- #print(result)
- return result
-def getRelease(htmlcode2): #
- #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
- html=etree.fromstring(htmlcode2,etree.HTMLParser())
- result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
- return result
-def getCover(htmlcode,number,htmlcode2): #获取厂商 #
- #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
- html = etree.fromstring(htmlcode2, etree.HTMLParser())
- result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']")
- if result == '':
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']")
- return 'https://fc2club.com' + result2
- return 'http:' + result
-def getOutline(htmlcode2): #获取番号 #
- html = etree.fromstring(htmlcode2, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
- return result
-def getTag(htmlcode): #获取番号
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()'))
- return result.strip(" ['']").replace("'",'').replace(' ','')
-def getYear(release):
- try:
- result = re.search('\d{4}',release).group()
- return result
- except:
- return ''
-
-def getTitle_fc2com(htmlcode): #获取厂商
- html = etree.fromstring(htmlcode,etree.HTMLParser())
- result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0]
- return result
-def getActor_fc2com(htmlcode):
- try:
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
- return result
- except:
- return ''
-def getStudio_fc2com(htmlcode): #获取厂商
- try:
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']")
- return result
- except:
- return ''
-def getNum_fc2com(htmlcode): #获取番号
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
- return result
-def getRelease_fc2com(htmlcode2): #
- html=etree.fromstring(htmlcode2,etree.HTMLParser())
- result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
- return result
-def getCover_fc2com(htmlcode2): #获取厂商 #
- html = etree.fromstring(htmlcode2, etree.HTMLParser())
- result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']")
- return 'http:' + result
-def getOutline_fc2com(htmlcode2): #获取番号 #
- html = etree.fromstring(htmlcode2, etree.HTMLParser())
- result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
- return result
-def getTag_fc2com(number): #获取番号
- htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape'))
- result = re.findall('"tag":"(.*?)"', htmlcode)
- return result
-def getYear_fc2com(release):
- try:
- result = re.search('\d{4}',release).group()
- return result
- except:
- return ''
-
-def main(number):
- try:
- htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/')
- htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html')
- actor = getActor(htmlcode)
- if getActor(htmlcode) == '':
- actor = 'FC2系列'
- dic = {
- 'title': getTitle(htmlcode),
- 'studio': getStudio(htmlcode),
- 'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
- 'outline': '',#getOutline(htmlcode2),
- 'runtime': getYear(getRelease(htmlcode)),
- 'director': getStudio(htmlcode),
- 'actor': actor,
- 'release': getRelease(number),
- 'number': 'FC2-'+number,
- 'label': '',
- 'cover': getCover(htmlcode,number,htmlcode2),
- 'imagecut': 0,
- 'tag': getTag(htmlcode),
- 'actor_photo':'',
- 'website': 'https://fc2club.com//html/FC2-' + number + '.html',
- 'source':'https://fc2club.com//html/FC2-' + number + '.html',
- }
- if dic['title'] == '':
- htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'})
- actor = getActor(htmlcode)
- if getActor(htmlcode) == '':
- actor = 'FC2系列'
- dic = {
- 'title': getTitle_fc2com(htmlcode2),
- 'studio': getStudio_fc2com(htmlcode2),
- 'year': '', # str(re.search('\d{4}',getRelease(number)).group()),
- 'outline': getOutline_fc2com(htmlcode2),
- 'runtime': getYear_fc2com(getRelease(htmlcode2)),
- 'director': getStudio_fc2com(htmlcode2),
- 'actor': actor,
- 'release': getRelease_fc2com(number),
- 'number': 'FC2-' + number,
- 'cover': getCover_fc2com(htmlcode2),
- 'imagecut': 0,
- 'tag': getTag_fc2com(number),
- 'label': '',
- 'actor_photo': '',
- 'website': 'http://adult.contents.fc2.com/article/' + number + '/',
- 'source': 'http://adult.contents.fc2.com/article/' + number + '/',
- }
- except Exception as e:
- # (TODO) better handle this
- # print(e)
- dic = {"title": ""}
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
- return js
-
-
-#print(main('1252953'))
+import re
+from lxml import etree#need install
+import json
+import ADC_function
+# import sys
+# import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+
+def getTitle(htmlcode): #获取厂商
+ #print(htmlcode)
+ html = etree.fromstring(htmlcode,etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']")
+ result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1)
+ #print(result2)
+ return result2
+def getActor(htmlcode):
+ try:
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']")
+ return result
+ except:
+ return ''
+def getStudio(htmlcode): #获取厂商
+ html = etree.fromstring(htmlcode,etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']")
+ return result
+def getNum(htmlcode): #获取番号
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
+ #print(result)
+ return result
+def getRelease(htmlcode2): #
+ #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
+ html=etree.fromstring(htmlcode2,etree.HTMLParser())
+ result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
+ return result
+def getCover(htmlcode,number,htmlcode2): #获取厂商 #
+ #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
+ html = etree.fromstring(htmlcode2, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']")
+ if result == '':
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']")
+ return 'https://fc2club.com' + result2
+ return 'http:' + result
+def getOutline(htmlcode2): #获取番号 #
+ html = etree.fromstring(htmlcode2, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
+ return result
+def getTag(htmlcode): #获取番号
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()'))
+ return result.strip(" ['']").replace("'",'').replace(' ','')
+def getYear(release):
+ try:
+ result = re.search('\d{4}',release).group()
+ return result
+ except:
+ return ''
+
+def getTitle_fc2com(htmlcode): #获取厂商
+ html = etree.fromstring(htmlcode,etree.HTMLParser())
+ result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0]
+ return result
+def getActor_fc2com(htmlcode):
+ try:
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
+ return result
+ except:
+ return ''
+def getStudio_fc2com(htmlcode): #获取厂商
+ try:
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']")
+ return result
+ except:
+ return ''
+def getNum_fc2com(htmlcode): #获取番号
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
+ return result
+def getRelease_fc2com(htmlcode2): #
+ html=etree.fromstring(htmlcode2,etree.HTMLParser())
+ result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
+ return result
+def getCover_fc2com(htmlcode2): #获取厂商 #
+ html = etree.fromstring(htmlcode2, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']")
+ return 'http:' + result
+def getOutline_fc2com(htmlcode2): #获取番号 #
+ html = etree.fromstring(htmlcode2, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
+ return result
+def getTag_fc2com(number): #获取番号
+ htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape'))
+ result = re.findall('"tag":"(.*?)"', htmlcode)
+ return result
+def getYear_fc2com(release):
+ try:
+ result = re.search('\d{4}',release).group()
+ return result
+ except:
+ return ''
+
+def main(number):
+ try:
+ htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/')
+ htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html')
+ actor = getActor(htmlcode)
+ if getActor(htmlcode) == '':
+ actor = 'FC2系列'
+ dic = {
+ 'title': getTitle(htmlcode),
+ 'studio': getStudio(htmlcode),
+ 'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
+ 'outline': '',#getOutline(htmlcode2),
+ 'runtime': getYear(getRelease(htmlcode)),
+ 'director': getStudio(htmlcode),
+ 'actor': actor,
+ 'release': getRelease(number),
+ 'number': 'FC2-'+number,
+ 'label': '',
+ 'cover': getCover(htmlcode,number,htmlcode2),
+ 'imagecut': 0,
+ 'tag': getTag(htmlcode),
+ 'actor_photo':'',
+ 'website': 'https://fc2club.com//html/FC2-' + number + '.html',
+ 'source':'https://fc2club.com//html/FC2-' + number + '.html',
+ }
+ if dic['title'] == '':
+ htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'})
+ actor = getActor(htmlcode)
+ if getActor(htmlcode) == '':
+ actor = 'FC2系列'
+ dic = {
+ 'title': getTitle_fc2com(htmlcode2),
+ 'studio': getStudio_fc2com(htmlcode2),
+ 'year': '', # str(re.search('\d{4}',getRelease(number)).group()),
+ 'outline': getOutline_fc2com(htmlcode2),
+ 'runtime': getYear_fc2com(getRelease(htmlcode2)),
+ 'director': getStudio_fc2com(htmlcode2),
+ 'actor': actor,
+ 'release': getRelease_fc2com(number),
+ 'number': 'FC2-' + number,
+ 'cover': getCover_fc2com(htmlcode2),
+ 'imagecut': 0,
+ 'tag': getTag_fc2com(number),
+ 'label': '',
+ 'actor_photo': '',
+ 'website': 'http://adult.contents.fc2.com/article/' + number + '/',
+ 'source': 'http://adult.contents.fc2.com/article/' + number + '/',
+ }
+ except Exception as e:
+ # (TODO) better handle this
+ # print(e)
+ dic = {"title": ""}
+ js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
+ return js
+
+
+#print(main('1252953'))
diff --git a/javbus.py b/javbus.py
index aa18d2a..83d61f8 100755
--- a/javbus.py
+++ b/javbus.py
@@ -1,138 +1,138 @@
-import re
-from pyquery import PyQuery as pq#need install
-from lxml import etree#need install
-from bs4 import BeautifulSoup#need install
-import json
-from ADC_function import *
-
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
- soup = BeautifulSoup(htmlcode, 'lxml')
- a = soup.find_all(attrs={'class': 'star-name'})
- d={}
- for i in a:
- l=i.a['href']
- t=i.get_text()
- html = etree.fromstring(get_html(l), etree.HTMLParser())
- p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
- p2={t:p}
- d.update(p2)
- return d
-def getTitle(htmlcode): #获取标题
- doc = pq(htmlcode)
- title=str(doc('div.container h3').text()).replace(' ','-')
- try:
- title2 = re.sub('n\d+-','',title)
- return title2
- except:
- return title
-def getStudio(htmlcode): #获取厂商
- html = etree.fromstring(htmlcode,etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
- return result
-def getYear(htmlcode): #获取年份
- html = etree.fromstring(htmlcode,etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
- return result
-def getCover(htmlcode): #获取封面链接
- doc = pq(htmlcode)
- image = doc('a.bigImage')
- return image.attr('href')
-def getRelease(htmlcode): #获取出版日期
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
- return result
-def getRuntime(htmlcode): #获取分钟
- soup = BeautifulSoup(htmlcode, 'lxml')
- a = soup.find(text=re.compile('分鐘'))
- return a
-def getActor(htmlcode): #获取女优
- b=[]
- soup=BeautifulSoup(htmlcode,'lxml')
- a=soup.find_all(attrs={'class':'star-name'})
- for i in a:
- b.append(i.get_text())
- return b
-def getNum(htmlcode): #获取番号
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
- return result
-def getDirector(htmlcode): #获取导演
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
- return result
-def getOutline(htmlcode): #获取演员
- doc = pq(htmlcode)
- result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
- return result
-def getSerise(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
- return result
-def getTag(htmlcode): # 获取演员
- tag = []
- soup = BeautifulSoup(htmlcode, 'lxml')
- a = soup.find_all(attrs={'class': 'genre'})
- for i in a:
- if 'onmouseout' in str(i):
- continue
- tag.append(i.get_text())
- return tag
-
-
-def main(number):
- try:
- htmlcode = get_html('https://www.javbus.com/' + number)
- try:
- dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
- except:
- dww_htmlcode = ''
- dic = {
- 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
- 'studio': getStudio(htmlcode),
- 'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
- 'outline': getOutline(dww_htmlcode),
- 'runtime': getRuntime(htmlcode),
- 'director': getDirector(htmlcode),
- 'actor': getActor(htmlcode),
- 'release': getRelease(htmlcode),
- 'number': getNum(htmlcode),
- 'cover': getCover(htmlcode),
- 'imagecut': 1,
- 'tag': getTag(htmlcode),
- 'label': getSerise(htmlcode),
- 'actor_photo': getActorPhoto(htmlcode),
- 'website': 'https://www.javbus.com/' + number,
- 'source' : 'javbus.py',
- }
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
- return js
- except:
- return main_uncensored(number)
-
-def main_uncensored(number):
- htmlcode = get_html('https://www.javbus.com/' + number)
- dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
- if getTitle(htmlcode) == '':
- htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
- dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
- dic = {
- 'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
- 'studio': getStudio(htmlcode),
- 'year': getYear(htmlcode),
- 'outline': getOutline(dww_htmlcode),
- 'runtime': getRuntime(htmlcode),
- 'director': getDirector(htmlcode),
- 'actor': getActor(htmlcode),
- 'release': getRelease(htmlcode),
- 'number': getNum(htmlcode),
- 'cover': getCover(htmlcode),
- 'tag': getTag(htmlcode),
- 'label': getSerise(htmlcode),
- 'imagecut': 0,
- 'actor_photo': '',
- 'website': 'https://www.javbus.com/' + number,
- 'source': 'javbus.py',
- }
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
- return js
-
+import re
+from pyquery import PyQuery as pq#need install
+from lxml import etree#need install
+from bs4 import BeautifulSoup#need install
+import json
+from ADC_function import *
+
+def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+ soup = BeautifulSoup(htmlcode, 'lxml')
+ a = soup.find_all(attrs={'class': 'star-name'})
+ d={}
+ for i in a:
+ l=i.a['href']
+ t=i.get_text()
+ html = etree.fromstring(get_html(l), etree.HTMLParser())
+ p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
+ p2={t:p}
+ d.update(p2)
+ return d
+def getTitle(htmlcode): #获取标题
+ doc = pq(htmlcode)
+ title=str(doc('div.container h3').text()).replace(' ','-')
+ try:
+ title2 = re.sub('n\d+-','',title)
+ return title2
+ except:
+ return title
+def getStudio(htmlcode): #获取厂商
+ html = etree.fromstring(htmlcode,etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
+ return result
+def getYear(htmlcode): #获取年份
+ html = etree.fromstring(htmlcode,etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
+ return result
+def getCover(htmlcode): #获取封面链接
+ doc = pq(htmlcode)
+ image = doc('a.bigImage')
+ return image.attr('href')
+def getRelease(htmlcode): #获取出版日期
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
+ return result
+def getRuntime(htmlcode): #获取分钟
+ soup = BeautifulSoup(htmlcode, 'lxml')
+ a = soup.find(text=re.compile('分鐘'))
+ return a
+def getActor(htmlcode): #获取女优
+ b=[]
+ soup=BeautifulSoup(htmlcode,'lxml')
+ a=soup.find_all(attrs={'class':'star-name'})
+ for i in a:
+ b.append(i.get_text())
+ return b
+def getNum(htmlcode): #获取番号
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
+ return result
+def getDirector(htmlcode): #获取导演
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
+ return result
+def getOutline(htmlcode): #获取演员
+ doc = pq(htmlcode)
+ result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
+ return result
+def getSerise(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
+ return result
+def getTag(htmlcode): # 获取演员
+ tag = []
+ soup = BeautifulSoup(htmlcode, 'lxml')
+ a = soup.find_all(attrs={'class': 'genre'})
+ for i in a:
+ if 'onmouseout' in str(i):
+ continue
+ tag.append(i.get_text())
+ return tag
+
+
+def main(number):
+ try:
+ htmlcode = get_html('https://www.javbus.com/' + number)
+ try:
+ dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+ except:
+ dww_htmlcode = ''
+ dic = {
+ 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
+ 'studio': getStudio(htmlcode),
+ 'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
+ 'outline': getOutline(dww_htmlcode),
+ 'runtime': getRuntime(htmlcode),
+ 'director': getDirector(htmlcode),
+ 'actor': getActor(htmlcode),
+ 'release': getRelease(htmlcode),
+ 'number': getNum(htmlcode),
+ 'cover': getCover(htmlcode),
+ 'imagecut': 1,
+ 'tag': getTag(htmlcode),
+ 'label': getSerise(htmlcode),
+ 'actor_photo': getActorPhoto(htmlcode),
+ 'website': 'https://www.javbus.com/' + number,
+ 'source' : 'javbus.py',
+ }
+ js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
+ return js
+ except:
+ return main_uncensored(number)
+
+def main_uncensored(number):
+ htmlcode = get_html('https://www.javbus.com/' + number)
+ dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+ if getTitle(htmlcode) == '':
+ htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
+ dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+ dic = {
+ 'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
+ 'studio': getStudio(htmlcode),
+ 'year': getYear(htmlcode),
+ 'outline': getOutline(dww_htmlcode),
+ 'runtime': getRuntime(htmlcode),
+ 'director': getDirector(htmlcode),
+ 'actor': getActor(htmlcode),
+ 'release': getRelease(htmlcode),
+ 'number': getNum(htmlcode),
+ 'cover': getCover(htmlcode),
+ 'tag': getTag(htmlcode),
+ 'label': getSerise(htmlcode),
+ 'imagecut': 0,
+ 'actor_photo': '',
+ 'website': 'https://www.javbus.com/' + number,
+ 'source': 'javbus.py',
+ }
+ js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
+ return js
+
diff --git a/javdb.py b/javdb.py
index 727c992..180602a 100755
--- a/javdb.py
+++ b/javdb.py
@@ -1,123 +1,123 @@
-import re
-from lxml import etree
-import json
-from bs4 import BeautifulSoup
-from ADC_function import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getTitle(a):
- html = etree.fromstring(a, etree.HTMLParser())
- result = html.xpath("/html/body/section/div/h2/strong/text()")[0]
- return result
-def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
-def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
- a = actor.split(',')
- d={}
- for i in a:
- p={i:''}
- d.update(p)
- return d
-def getStudio(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getRuntime(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+').rstrip('mi')
-def getLabel(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getNum(a):
- html = etree.fromstring(a, etree.HTMLParser())
- result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result2 + result1).strip('+')
-def getYear(getRelease):
- try:
- result = str(re.search('\d{4}', getRelease).group())
- return result
- except:
- return getRelease
-def getRelease(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+')
-def getTag(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',')
-def getCover_small(a, index=0):
- # same issue mentioned below,
- # javdb sometime returns multiple results
- # DO NOT just get the firt one, get the one with correct index number
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
- if not 'https' in result:
- result = 'https:' + result
- return result
-def getCover(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath("//div[@class='column column-video-cover']/a/img/@src")).strip(" ['']")
- return result
-def getDirector(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getOutline(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
- return result
-def main(number):
- try:
- number = number.upper()
- query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
- html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- # javdb sometime returns multiple results,
- # and the first elememt maybe not the one we are looking for
- # iterate all candidates and find the match one
- urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
- ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
- correct_url = urls[ids.index(number)]
- detail_page = get_html('https://javdb.com' + correct_url)
- dic = {
- 'actor': getActor(detail_page),
- 'title': getTitle(detail_page),
- 'studio': getStudio(detail_page),
- 'outline': getOutline(detail_page),
- 'runtime': getRuntime(detail_page),
- 'director': getDirector(detail_page),
- 'release': getRelease(detail_page),
- 'number': getNum(detail_page),
- 'cover': getCover(detail_page),
- 'cover_small': getCover_small(query_result, index=ids.index(number)),
- 'imagecut': 3,
- 'tag': getTag(detail_page),
- 'label': getLabel(detail_page),
- 'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()),
- 'actor_photo': getActorPhoto(getActor(detail_page)),
- 'website': 'https://javdb.com' + correct_url,
- 'source': 'javdb.py',
- }
- except Exception as e:
- # print(e)
- dic = {"title": ""}
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
- return js
-
-# main('DV-1562')
-# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
-#print(main('ipx-292'))
+import re
+from lxml import etree
+import json
+from bs4 import BeautifulSoup
+from ADC_function import *
+# import sys
+# import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+
+def getTitle(a):
+ html = etree.fromstring(a, etree.HTMLParser())
+ result = html.xpath("/html/body/section/div/h2/strong/text()")[0]
+ return result
+def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
+def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
+ a = actor.split(',')
+ d={}
+ for i in a:
+ p={i:''}
+ d.update(p)
+ return d
+def getStudio(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+def getRuntime(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+').rstrip('mi')
+def getLabel(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+def getNum(a):
+ html = etree.fromstring(a, etree.HTMLParser())
+ result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result2 + result1).strip('+')
+def getYear(getRelease):
+ try:
+ result = str(re.search('\d{4}', getRelease).group())
+ return result
+ except:
+ return getRelease
+def getRelease(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+')
+def getTag(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',')
+def getCover_small(a, index=0):
+ # same issue mentioned below,
+ # javdb sometime returns multiple results
+ # DO NOT just get the firt one, get the one with correct index number
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
+ if not 'https' in result:
+ result = 'https:' + result
+ return result
+def getCover(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath("//div[@class='column column-video-cover']/a/img/@src")).strip(" ['']")
+ return result
+def getDirector(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+def getOutline(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
+ return result
+def main(number):
+ try:
+ number = number.upper()
+ query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
+ html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ # javdb sometime returns multiple results,
+ # and the first elememt maybe not the one we are looking for
+ # iterate all candidates and find the match one
+ urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
+ ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
+ correct_url = urls[ids.index(number)]
+ detail_page = get_html('https://javdb.com' + correct_url)
+ dic = {
+ 'actor': getActor(detail_page),
+ 'title': getTitle(detail_page),
+ 'studio': getStudio(detail_page),
+ 'outline': getOutline(detail_page),
+ 'runtime': getRuntime(detail_page),
+ 'director': getDirector(detail_page),
+ 'release': getRelease(detail_page),
+ 'number': getNum(detail_page),
+ 'cover': getCover(detail_page),
+ 'cover_small': getCover_small(query_result, index=ids.index(number)),
+ 'imagecut': 3,
+ 'tag': getTag(detail_page),
+ 'label': getLabel(detail_page),
+ 'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()),
+ 'actor_photo': getActorPhoto(getActor(detail_page)),
+ 'website': 'https://javdb.com' + correct_url,
+ 'source': 'javdb.py',
+ }
+ except Exception as e:
+ # print(e)
+ dic = {"title": ""}
+ js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
+ return js
+
+# main('DV-1562')
+# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
+#print(main('ipx-292'))
diff --git a/mgstage.py b/mgstage.py
index 8880f96..76665ab 100755
--- a/mgstage.py
+++ b/mgstage.py
@@ -1,111 +1,111 @@
-import re
-from lxml import etree
-import json
-from bs4 import BeautifulSoup
-from ADC_function import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getTitle(a):
- try:
- html = etree.fromstring(a, etree.HTMLParser())
- result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
- return result.replace('/', ',')
- except:
- return ''
-def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
- html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
- result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
- result2=str(html.xpath('//th[contains(text(),"出演:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
- return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
-def getStudio(a):
- html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
- result1=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
- result2=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
- return str(result1+result2).strip('+').replace("', '",'').replace('"','')
-def getRuntime(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
- result2 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
- return str(result1 + result2).strip('+').rstrip('mi')
-def getLabel(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- result2 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
-def getNum(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//th[contains(text(),"品番:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- result2 = str(html.xpath('//th[contains(text(),"品番:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- return str(result1 + result2).strip('+')
-def getYear(getRelease):
- try:
- result = str(re.search('\d{4}',getRelease).group())
- return result
- except:
- return getRelease
-def getRelease(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- return str(result1 + result2).strip('+')
-def getTag(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','')
-def getCover(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
- # /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src
- return result
-def getDirector(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
-def getOutline(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('//p/text()')).strip(" ['']")
- return result
-def main(number2):
- number=number2.upper()
- htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
- soup = BeautifulSoup(htmlcode, 'lxml')
- a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','')
- b = str(soup.find(attrs={'id': 'introduction'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','')
- print(b)
- dic = {
- 'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''),
- 'studio': getStudio(a),
- 'outline': getOutline(b),
- 'runtime': getRuntime(a),
- 'director': getDirector(a),
- 'actor': getActor(a),
- 'release': getRelease(a),
- 'number': getNum(a),
- 'cover': getCover(htmlcode),
- 'imagecut': 0,
- 'tag': getTag(a),
- 'label':getLabel(a),
- 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()),
- 'actor_photo': '',
- 'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
- 'source': 'mgstage.py',
- }
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
- return js
- #print(htmlcode)
-
-print(main('SIRO-3607'))
+import re
+from lxml import etree
+import json
+from bs4 import BeautifulSoup
+from ADC_function import *
+# import sys
+# import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+
+def getTitle(a):
+ try:
+ html = etree.fromstring(a, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
+ return result.replace('/', ',')
+ except:
+ return ''
+def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+ html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
+ result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
+ result2=str(html.xpath('//th[contains(text(),"出演:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
+ return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
+def getStudio(a):
+ html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
+ result1=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
+ result2=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
+ return str(result1+result2).strip('+').replace("', '",'').replace('"','')
+def getRuntime(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
+ result2 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
+ return str(result1 + result2).strip('+').rstrip('mi')
+def getLabel(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ result2 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
+def getNum(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//th[contains(text(),"品番:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ result2 = str(html.xpath('//th[contains(text(),"品番:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ return str(result1 + result2).strip('+')
+def getYear(getRelease):
+ try:
+ result = str(re.search('\d{4}',getRelease).group())
+ return result
+ except:
+ return getRelease
+def getRelease(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ return str(result1 + result2).strip('+')
+def getTag(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','')
+def getCover(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
+ # /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src
+ return result
+def getDirector(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
+def getOutline(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('//p/text()')).strip(" ['']")
+ return result
+def main(number2):
+ number=number2.upper()
+ htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
+ soup = BeautifulSoup(htmlcode, 'lxml')
+ a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','')
+ b = str(soup.find(attrs={'id': 'introduction'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','')
+ print(b)
+ dic = {
+ 'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''),
+ 'studio': getStudio(a),
+ 'outline': getOutline(b),
+ 'runtime': getRuntime(a),
+ 'director': getDirector(a),
+ 'actor': getActor(a),
+ 'release': getRelease(a),
+ 'number': getNum(a),
+ 'cover': getCover(htmlcode),
+ 'imagecut': 0,
+ 'tag': getTag(a),
+ 'label':getLabel(a),
+ 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()),
+ 'actor_photo': '',
+ 'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
+ 'source': 'mgstage.py',
+ }
+ js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
+ return js
+ #print(htmlcode)
+
+print(main('SIRO-3607'))