76 Commits
0.11.4 ... 1.1

Author SHA1 Message Date
Yoshiko
9898932f09 Update update_check.json 2019-08-18 22:40:37 +08:00
Yoshiko
c4fc22054b Update 1.1 2019-08-18 22:40:11 +08:00
Yoshiko
449e900837 Update README.md 2019-08-18 14:52:02 +08:00
Yoshiko
e3ebbec947 Update README.md 2019-08-14 21:56:25 +08:00
Yoshiko
65a9521ab1 Update README.md 2019-08-14 21:55:51 +08:00
Yoshiko
b79a600c0d Update README.md 2019-08-14 19:29:18 +08:00
Yoshiko
30d33fe8f7 Update README.md 2019-08-14 11:50:42 +08:00
Yoshiko
b325fc1f01 Update README.md 2019-08-14 11:49:00 +08:00
Yoshiko
954fb02c0c Update README.md 2019-08-14 00:28:39 +08:00
Yoshiko
5ee398d6b5 Update Beta 11.9 2019-08-12 01:23:57 +08:00
Yoshiko
b754c11814 Update update_check.json 2019-08-12 01:21:46 +08:00
Yoshiko
5d19ae594d Update Beta 11.9 2019-08-12 01:21:34 +08:00
Yoshiko
bfa8ed3144 Update README.md 2019-08-11 00:41:01 +08:00
Yoshiko
0ec23aaa38 Update README.md 2019-08-11 00:39:31 +08:00
Yoshiko
878ae46d77 Update README.md 2019-08-11 00:31:24 +08:00
Yoshiko
766e6bbd88 Update update_check.json 2019-08-11 00:29:15 +08:00
Yoshiko
0107c7d624 Update README.md 2019-08-11 00:28:25 +08:00
Yoshiko
d0cf2d2193 Update README.md 2019-08-11 00:27:36 +08:00
Yoshiko
d1403af548 Update README.md 2019-08-10 22:28:35 +08:00
Yoshiko
bc20b09f60 Update Beta 11.8 2019-08-10 21:45:36 +08:00
Yoshiko
8e2c0c3686 Version fallback to Beta 11.6 2019-08-09 00:32:57 +08:00
Yoshiko
446e1bf7d0 Version fallback to Beta 11.6 2019-08-09 00:32:04 +08:00
Yoshiko
54437236f0 Update Beta 11.7 2019-08-07 00:19:22 +08:00
Yoshiko
9ed57a8ae9 Update README.md 2019-08-07 00:15:33 +08:00
Yoshiko
c66a53ade1 Update Beta 11.7 2019-08-06 16:46:21 +08:00
Yoshiko
7aec4c4b84 Update update_check.json 2019-08-06 16:37:16 +08:00
Yoshiko
cfb3511360 Update Beta 11.7 2019-08-06 16:36:45 +08:00
Yoshiko
2adcfacf27 Merge pull request #26 from RRRRRm/master
Fix the path error under Linux and specify Python3 as the runtime.
2019-08-05 22:52:57 +08:00
RRRRRm
09dc684ff6 Fix some bugs. 2019-08-05 20:39:41 +08:00
RRRRRm
1bc924a6ac Update README.md 2019-08-05 15:57:46 +08:00
RRRRRm
00db4741bc Calling core.py asynchronously. Allow to specify input and output paths. 2019-08-05 15:48:44 +08:00
RRRRRm
1086447369 Fix the path error under Linux. Specify Python3 as the runtime. 2019-08-05 03:00:35 +08:00
Yoshiko
642c8103c7 Update README.md 2019-07-24 08:51:40 +08:00
Yoshiko
b053ae614c Update README.md 2019-07-23 21:18:22 +08:00
Yoshiko
b7583afc9b Merge pull request #20 from biaji/master
Add encoding info to source
2019-07-21 10:28:03 +08:00
biAji
731b08f843 Add encoding info to source
According to PEP-263, add encoding info to source code
2019-07-18 09:22:28 +08:00
Yoshiko
64f235aaff Update README.md 2019-07-15 12:41:14 +08:00
Yoshiko
f0d5a2a45d Update 11.6 2019-07-14 15:07:04 +08:00
Yoshiko
01521fe390 Update 11.6 2019-07-14 10:06:49 +08:00
Yoshiko
a33b882592 Update update_check.json 2019-07-14 09:59:56 +08:00
Yoshiko
150b81453c Update 11.6 2019-07-14 09:58:46 +08:00
Yoshiko
a6df479b78 Update 11.6 2019-07-14 09:45:53 +08:00
Yoshiko
dd6445b2ba Update 11.6 2019-07-14 09:38:26 +08:00
Yoshiko
41051a915b Update README.md 2019-07-12 18:13:09 +08:00
Yoshiko
32ce390939 Update README.md 2019-07-12 18:08:45 +08:00
Yoshiko
8deec6a6c0 Update README.md 2019-07-12 18:08:20 +08:00
Yoshiko
0fab70ff3d Update README.md 2019-07-12 18:07:23 +08:00
Yoshiko
53bbb99a64 Update README.md 2019-07-12 17:59:46 +08:00
Yoshiko
0e712de805 Update README.md 2019-07-11 10:43:55 +08:00
Yoshiko
6f74254e96 Update README.md 2019-07-11 00:58:16 +08:00
Yoshiko
4220bd708b Update README.md 2019-07-11 00:49:23 +08:00
Yoshiko
3802d88972 Update README.md 2019-07-11 00:46:22 +08:00
Yoshiko
8cddbf1e1b Update README.md 2019-07-11 00:41:40 +08:00
Yoshiko
332326e5f6 Update README.md 2019-07-09 18:52:36 +08:00
Yoshiko
27f64a81d0 Update README.md 2019-07-09 17:57:09 +08:00
Yoshiko
7e3fa5ade8 Update README.md 2019-07-09 17:56:48 +08:00
Yoshiko
cc362a2a26 Beta 11.5 Update 2019-07-09 17:47:43 +08:00
Yoshiko
dde6167b05 Update update_check.json 2019-07-09 17:47:02 +08:00
Yoshiko
fe69f42f92 Update README.md 2019-07-09 17:11:09 +08:00
Yoshiko
6b050cef43 Update README.md 2019-07-09 17:09:32 +08:00
Yoshiko
c721c3c769 Update README.md 2019-07-09 16:51:06 +08:00
Yoshiko
9f8702ca12 Update README.md 2019-07-09 16:50:35 +08:00
Yoshiko
153b3a35b8 Update README.md 2019-07-09 15:58:44 +08:00
Yoshiko
88e543a16f Update README.md 2019-07-09 13:51:52 +08:00
Yoshiko
5906af6d95 Update README.md 2019-07-09 13:43:09 +08:00
Yoshiko
39953f1870 Update README.md 2019-07-09 13:17:41 +08:00
Yoshiko
047618a0df Update README.md 2019-07-09 01:55:43 +08:00
Yoshiko
2da51a51d0 Update README.md 2019-07-09 01:45:35 +08:00
Yoshiko
8c0e0a296d Update README.md 2019-07-09 01:45:05 +08:00
Yoshiko
ce0ac607c2 Update README.md 2019-07-04 14:41:29 +08:00
Yoshiko
f0437cf6af Delete py to exe.bat 2019-07-04 03:01:18 +08:00
Yoshiko
32bfc57eed Update README.md 2019-07-04 02:58:48 +08:00
Yoshiko
909ca96915 Update README.md 2019-07-04 02:57:21 +08:00
Yoshiko
341ab5b2bf Update README.md 2019-07-04 02:55:09 +08:00
Yoshiko
d899a19419 Update README.md 2019-07-04 02:54:24 +08:00
Yoshiko
61b0bc40de Update README.md 2019-07-04 02:42:31 +08:00
12 changed files with 843 additions and 456 deletions

70
ADC_function.py Normal file → Executable file
View File

@@ -1,49 +1,85 @@
# -*- coding: utf-8 -*-
import requests import requests
from configparser import RawConfigParser from configparser import ConfigParser
import os import os
import re import re
from retrying import retry import time
import sys import sys
# content = open('config.ini').read() config_file='config.ini'
# content = re.sub(r"\xfe\xff","", content) config = ConfigParser()
# content = re.sub(r"\xff\xfe","", content)
# content = re.sub(r"\xef\xbb\xbf","", content)
# open('BaseConfig.cfg', 'w').write(content)
config = RawConfigParser() if os.path.exists(config_file):
if os.path.exists('config.ini'): try:
config.read('config.ini', encoding='UTF-8') config.read(config_file, encoding='UTF-8')
except:
print('[-]Config.ini read failed! Please use the offical file!')
else: else:
print('[+]config.ini: not found, creating...')
with open("config.ini", "wt", encoding='UTF-8') as code: with open("config.ini", "wt", encoding='UTF-8') as code:
print("[common]", file=code)
print("main_mode=1", file=code)
print("failed_output_folder=failed", file=code)
print("success_output_folder=JAV_output", file=code)
print("", file=code)
print("[proxy]",file=code) print("[proxy]",file=code)
print("proxy=127.0.0.1:1080",file=code) print("proxy=127.0.0.1:1080",file=code)
print("timeout=10", file=code) print("timeout=10", file=code)
print("retry=3", file=code)
print("", file=code)
print("[Name_Rule]", file=code) print("[Name_Rule]", file=code)
print("location_rule='JAV_output/'+actor+'/['+number+']-'+title",file=code) print("location_rule=actor+'/'+number",file=code)
print("naming_rule=number+'-'+title",file=code) print("naming_rule=number+'-'+title",file=code)
print("", file=code)
print("[update]",file=code) print("[update]",file=code)
print("update_check=1") print("update_check=1",file=code)
print("", file=code)
print("[media]", file=code)
print("media_warehouse=emby", file=code)
print("#emby or plex", file=code)
print("#plex only test!", file=code)
print("", file=code)
print("[directory_capture]", file=code)
print("directory=", file=code)
print("", file=code)
time.sleep(2)
print('[+]config.ini: created!')
try:
config.read(config_file, encoding='UTF-8')
except:
print('[-]Config.ini read failed! Please use the offical file!')
def ReadMediaWarehouse():
return config['media']['media_warehouse']
def UpdateCheckSwitch(): def UpdateCheckSwitch():
check=str(config['update']['update_check']) check=str(config['update']['update_check'])
if check == '1': if check == '1':
return '1' return '1'
elif check == '0': elif check == '0':
return '0' return '0'
elif check == '':
return '0'
def get_html(url,cookies = None):#网页请求核心 def get_html(url,cookies = None):#网页请求核心
try:
proxy = config['proxy']['proxy']
timeout = int(config['proxy']['timeout'])
retry_count = int(config['proxy']['retry'])
except:
print('[-]Proxy config error! Please check the config.')
i = 0 i = 0
retry_count = int(config['proxy']['retry'])
while i < retry_count: while i < retry_count:
try: try:
if not str(config['proxy']['proxy']) == '': if not str(config['proxy']['proxy']) == '':
proxies = {"http": "http://" + str(config['proxy']['proxy']),"https": "https://" + str(config['proxy']['proxy'])} proxies = {"http": "http://" + proxy,"https": "https://" + proxy}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'} headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
getweb = requests.get(str(url), headers=headers, timeout=int(config['proxy']['timeout']),proxies=proxies, cookies=cookies) getweb = requests.get(str(url), headers=headers, timeout=timeout,proxies=proxies, cookies=cookies)
getweb.encoding = 'utf-8' getweb.encoding = 'utf-8'
return getweb.text return getweb.text
else: else:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
getweb = requests.get(str(url), headers=headers, timeout=int(config['proxy']['timeout']), cookies=cookies) getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
getweb.encoding = 'utf-8' getweb.encoding = 'utf-8'
return getweb.text return getweb.text
except requests.exceptions.RequestException: except requests.exceptions.RequestException:
@@ -58,6 +94,6 @@ def get_html(url,cookies = None):#网页请求核心
except requests.exceptions.ConnectTimeout: except requests.exceptions.ConnectTimeout:
i += 1 i += 1
print('[-]Connect retry '+str(i)+'/'+str(retry_count)) print('[-]Connect retry '+str(i)+'/'+str(retry_count))
print('[-]Connect Failed! Please check your Proxy or Network!')

164
AV_Data_Capture.py Normal file → Executable file
View File

@@ -1,46 +1,71 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import glob import glob
import os import os
import time import time
import re import re
import sys import sys
import ADC_function from ADC_function import *
import json import json
import shutil import shutil
from configparser import ConfigParser
version='0.11.4'
os.chdir(os.getcwd()) os.chdir(os.getcwd())
# ============global var===========
version='1.1'
config = ConfigParser()
config.read(config_file, encoding='UTF-8')
Platform = sys.platform
# ==========global var end=========
def UpdateCheck(): def UpdateCheck():
if ADC_function.UpdateCheckSwitch() == '1': if UpdateCheckSwitch() == '1':
html2 = ADC_function.get_html('https://raw.githubusercontent.com/wenead99/AV_Data_Capture/master/update_check.json') html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json')
html = json.loads(str(html2)) html = json.loads(str(html2))
if not version == html['version']: if not version == html['version']:
print('[*] * New update ' + html['version'] + ' *') print('[*] * New update ' + html['version'] + ' *')
print('[*] * Download *') print('[*] * Download *')
print('[*] ' + html['download']) print('[*] ' + html['download'])
print('[*]=====================================') print('[*]=====================================')
else: else:
print('[+]Update Check disabled!') print('[+]Update Check disabled!')
def movie_lists(): def movie_lists():
#MP4 directory = config['directory_capture']['directory']
a2 = glob.glob(r".\*.mp4") mp4=[]
# AVI avi=[]
b2 = glob.glob(r".\*.avi") rmvb=[]
# RMVB wmv=[]
c2 = glob.glob(r".\*.rmvb") mov=[]
# WMV mkv=[]
d2 = glob.glob(r".\*.wmv") flv=[]
# MOV ts=[]
e2 = glob.glob(r".\*.mov") if directory=='*':
# MKV for i in os.listdir(os.getcwd()):
f2 = glob.glob(r".\*.mkv") mp4 += glob.glob(r"./" + i + "/*.mp4")
# FLV avi += glob.glob(r"./" + i + "/*.avi")
g2 = glob.glob(r".\*.flv") rmvb += glob.glob(r"./" + i + "/*.rmvb")
# TS wmv += glob.glob(r"./" + i + "/*.wmv")
h2 = glob.glob(r".\*.ts") mov += glob.glob(r"./" + i + "/*.mov")
mkv += glob.glob(r"./" + i + "/*.mkv")
total = a2+b2+c2+d2+e2+f2+g2+h2 flv += glob.glob(r"./" + i + "/*.flv")
ts += glob.glob(r"./" + i + "/*.ts")
total = mp4 + avi + rmvb + wmv + mov + mkv + flv + ts
return total
mp4 = glob.glob(r"./" + directory + "/*.mp4")
avi = glob.glob(r"./" + directory + "/*.avi")
rmvb = glob.glob(r"./" + directory + "/*.rmvb")
wmv = glob.glob(r"./" + directory + "/*.wmv")
mov = glob.glob(r"./" + directory + "/*.mov")
mkv = glob.glob(r"./" + directory + "/*.mkv")
flv = glob.glob(r"./" + directory + "/*.flv")
ts = glob.glob(r"./" + directory + "/*.ts")
total = mp4 + avi + rmvb + wmv + mov + mkv + flv + ts
return total return total
def CreatFailedFolder(): def CreatFailedFolder():
if not os.path.exists('failed/'): # 新建failed文件夹 if not os.path.exists('failed/'): # 新建failed文件夹
@@ -50,7 +75,6 @@ def CreatFailedFolder():
print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)") print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)")
os._exit(0) os._exit(0)
def lists_from_test(custom_nuber): #电影列表 def lists_from_test(custom_nuber): #电影列表
a=[] a=[]
a.append(custom_nuber) a.append(custom_nuber)
return a return a
@@ -69,57 +93,51 @@ def rreplace(self, old, new, *max):
count = max[0] count = max[0]
return new.join(self.rsplit(old, count)) return new.join(self.rsplit(old, count))
def getNumber(filepath): def getNumber(filepath):
try: # 试图提取番号 try: # 普通提取番号 主要处理包含减号-的番号
# ====番号获取主程序==== try:
try: # 普通提取番号 主要处理包含减号-的番号
filepath1 = filepath.replace("_", "-") filepath1 = filepath.replace("_", "-")
filepath1.strip('22-sht.me').strip('-HD').strip('-hd') filepath1.strip('22-sht.me').strip('-HD').strip('-hd')
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1)) # 去除文件名中时间 filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1)) # 去除文件名中时间
file_number = re.search('\w+-\d+', filename).group() file_number = re.search('\w+-\d+', filename).group()
return file_number return file_number
except: # 提取不含减号-的番号 except:
try: # 提取东京热番号格式 n1087 filepath1 = filepath.replace("_", "-")
filename1 = str(re.sub("h26\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot') filepath1.strip('22-sht.me').strip('-HD').strip('-hd')
filename0 = str(re.sub(".*?\.com-\d+", "", filename1)).strip('_') filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1)) # 去除文件名中时间
if '-C.' in filepath or '-c.' in filepath: file_number = re.search('\w+-\w+', filename).group()
cn_sub = '1' return file_number
file_number = str(re.search('n\d{4}', filename0).group(0)) except: # 提取不含减号-的番号
return file_number try:
except: # 提取无减号番号 filename1 = str(re.sub("ts6\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot')
filename1 = str(re.sub("h26\d", "", filepath)) # 去除h264/265 filename0 = str(re.sub(".*?\.com-\d+", "", filename1)).strip('_')
filename0 = str(re.sub(".*?\.com-\d+", "", filename1)) file_number = str(re.search('\w+\d{4}', filename0).group(0))
file_number2 = str(re.match('\w+', filename0).group()) return file_number
if '-C.' in filepath or '-c.' in filepath: except: # 提取无减号番号
cn_sub = '1' filename1 = str(re.sub("ts6\d", "", filepath)) # 去除ts64/265
file_number = str(file_number2.replace(re.match("^[A-Za-z]+", file_number2).group(), filename0 = str(re.sub(".*?\.com-\d+", "", filename1))
re.match("^[A-Za-z]+", file_number2).group() + '-')) file_number2 = str(re.match('\w+', filename0).group())
return file_number file_number = str(file_number2.replace(re.match("^[A-Za-z]+", file_number2).group(),re.match("^[A-Za-z]+", file_number2).group() + '-'))
# if not re.search('\w-', file_number).group() == 'None': return file_number
# file_number = re.search('\w+-\w+', filename).group()
# 上面是插入减号-到番号中
# ====番号获取主程序=结束===
except Exception as e: # 番号提取异常
print('[-]' + str(os.path.basename(filepath)) + ' Cannot catch the number :')
print('[-]' + str(os.path.basename(filepath)) + ' :', e)
print('[-]Move ' + os.path.basename(filepath) + ' to failed folder')
shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/')
except IOError as e2:
print('[-]' + str(os.path.basename(filepath)) + ' Cannot catch the number :')
print('[-]' + str(os.path.basename(filepath)) + ' :', e2)
print('[-]Move ' + os.path.basename(filepath) + ' to failed folder')
shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/')
def RunCore(): def RunCore():
if os.path.exists('core.py'): if Platform == 'win32':
os.system('python core.py' + ' "' + i + '" --number "'+getNumber(i)+'"') #选择从py文件启动 用于源码py if os.path.exists('core.py'):
elif os.path.exists('core.exe'): os.system('python core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动用于源码py
os.system('core.exe' + ' "' + i + '" --number "'+getNumber(i)+'"') #选择从exe文件启动用于EXE版程序: elif os.path.exists('core.exe'):
elif os.path.exists('core.py') and os.path.exists('core.exe'): os.system('core.exe' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从exe启动用于EXE版程序
os.system('python core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 选择从py文件启动 用于源码py elif os.path.exists('core.py') and os.path.exists('core.exe'):
os.system('python core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动用于源码py
else:
if os.path.exists('core.py'):
os.system('python3 core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动用于源码py
elif os.path.exists('core.exe'):
os.system('core.exe' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从exe启动用于EXE版程序
elif os.path.exists('core.py') and os.path.exists('core.exe'):
os.system('python3 core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动用于源码py
if __name__ =='__main__': if __name__ =='__main__':
print('[*]===========AV Data Capture===========') print('[*]===========AV Data Capture===========')
print('[*] Version '+version) print('[*] Version '+version)
print('[*]=====================================') print('[*]=====================================')
CreatFailedFolder() CreatFailedFolder()
UpdateCheck() UpdateCheck()
@@ -127,13 +145,21 @@ if __name__ =='__main__':
count = 0 count = 0
count_all = str(len(movie_lists())) count_all = str(len(movie_lists()))
print('[+]Find',str(len(movie_lists())),'movies')
for i in movie_lists(): #遍历电影列表 交给core处理 for i in movie_lists(): #遍历电影列表 交给core处理
count = count + 1 count = count + 1
percentage = str(count/int(count_all)*100)[:4]+'%' percentage = str(count/int(count_all)*100)[:4]+'%'
print('[!] - '+percentage+' ['+str(count)+'/'+count_all+'] -') print('[!] - '+percentage+' ['+str(count)+'/'+count_all+'] -')
print("[!]Making Data for [" + i + "],the number is [" + getNumber(i) + "]") try:
RunCore() print("[!]Making Data for [" + i + "], the number is [" + getNumber(i) + "]")
print("[*]=====================================") RunCore()
print("[*]=====================================")
except: # 番号提取异常
print('[-]' + i + ' Cannot catch the number :')
print('[-]Move ' + i + ' to failed folder')
shutil.move(i, str(os.getcwd()) + '/' + 'failed/')
continue
CEF('JAV_output') CEF('JAV_output')
print("[+]All finished!!!") print("[+]All finished!!!")

444
README.md
View File

@@ -1,183 +1,261 @@
# AV Data Capture 日本电影元数据抓取工具(刮削器) # AV Data Capture
<a title="Hits" target="_blank" href="https://github.com/b3log/hits"><img src="https://hits.b3log.org/b3log/hits.svg"></a> <a title="Hits" target="_blank" href="https://github.com/yoshiko2/AV_Data_Capture"><img src="https://hits.b3log.org/yoshiko2/AV_Data_Capture.svg"></a>
![](https://img.shields.io/badge/build-passing-brightgreen.svg?style=flat-square) ![](https://img.shields.io/badge/build-passing-brightgreen.svg?style=flat-square)
![](https://img.shields.io/github/downloads/yoshiko2/av_data_capture/total.svg?style=flat-square)<br> ![](https://img.shields.io/github/downloads/yoshiko2/av_data_capture/total.svg?style=flat-square)<br>
![](https://img.shields.io/github/languages/code-size/yoshiko2/av_data_capture.svg?style=flat-square) ![](https://img.shields.io/github/license/yoshiko2/av_data_capture.svg?style=flat-square)
![](https://img.shields.io/github/issues/yoshiko2/av_data_capture.svg?style=flat-square) ![](https://img.shields.io/github/release/yoshiko2/av_data_capture.svg?style=flat-square)
![](https://img.shields.io/github/license/yoshiko2/av_data_capture.svg?style=flat-square) ![](https://img.shields.io/badge/Python-3.7-yellow.svg?style=flat-square&logo=python)<br>
![](https://img.shields.io/github/release/yoshiko2/av_data_capture.svg?style=flat-square)<br>
![](https://img.shields.io/github/forks/yoshiko2/av_data_capture.svg?style=flat-square)
![](https://img.shields.io/github/stars/yoshiko2/av_data_capture.svg?style=flat-square) **日本电影元数据 抓取工具 | 刮削器**配合本地影片管理软件EMBY,KODI管理本地影片该软件起到分类与元数据抓取作用利用元数据信息来分类供本地影片分类整理使用。
![](https://img.shields.io/github/watchers/yoshiko2/av_data_capture.svg?style=flat-square)
# 目录
* [免责声明](#免责声明)
* [注意](#注意)
# 目录 * [你问我答 FAQ](#你问我答-faq)
* [前言](#前言) * [效果图](#效果图)
* [效果图](#效果图) * [如何使用](#如何使用)
* [免责声明](#免责声明) * [下载](#下载)
* [如何使用](#如何使用) * [简明教程](#简要教程)
* [下载](#下载) * [模块安装](#1请安装模块在cmd终端逐条输入以下命令安装)
* [简明教程](#简要教程) * [配置](#2配置configini)
* [模块安装](#1请安装模块在cmd终端逐条输入以下命令安装) * [(可选)设置自定义目录和影片重命名规则](#3可选设置自定义目录和影片重命名规则)
* [配置](#2配置proxyini) * [运行软件](#5运行-av_data_capturepyexe)
* [运行软件](#4运行-av_data_capturepyexe) * [影片原路径处理](#4建议把软件拷贝和电影的统一目录下)
* [异常处理(重要)](#5异常处理重要) * [异常处理(重要)](#51异常处理重要)
* [导入至EMBY](#7把jav_output文件夹导入到embykodi中根据封面选片子享受手冲乐趣) * [导入至媒体库](#7把jav_output文件夹导入到embykodi中等待元数据刷新完成)
* [输出文件示例](#8输出的文件如下) * [关于群晖NAS](#8关于群晖NAS)
* [写在后面](#8写在后面) * [写在后面](#9写在后面)
# 前言 # 免责声明
&emsp;&emsp;目前,我下的日本电影越来越多,也意味着日本电影要**集中地管理**,形成本地媒体库。现在有两款主流的日本电影元数据获取器,"EverAver"和"Javhelper"。前者的优点是元数据获取比较全,缺点是不能批量处理;后者优点是可以批量处理,但是元数据不够全。<br> 1.本软件仅供**技术交流,学术交流**使用,本项目旨在学习 Python3<br>
&emsp;&emsp;为此,综合上述软件特点,我写出了本软件,为了方便的管理本地日本电影,和更好的手冲体验。<br> 2.本软件禁止用于任何非法用途<br>
&emsp;&emsp;希望大家可以认真耐心地看完本文档,你的耐心换来的是完美的管理方式。<br> 3.使用者使用该软件产生的一切法律后果由使用者承担<br>
&emsp;&emsp;本软件更新可能比较**频繁**,麻烦诸位用户**积极更新新版本**以获得**最佳体验**。 4.不可使用于商业和个人其他意图<br>
- - -
**tg官方电报群:[ 点击进群](https://t.me/AV_Data_Capture_Official)**<br> # 注意
**推荐用法: 使用该软件后,对于不能正常获取元数据的电影可以用 Everaver 来补救**<br> **推荐用法: 使用该软件后,对于不能正常获取元数据的电影可以用 Everaver 来补救**<br>
暂不支持多P电影<br> 暂不支持多P电影<br>
[回到目录](#目录)
# 你问我答 FAQ
# 郑重声明 ### F这软件能下片吗
1.本软件仅供**技术交流,学术交流**使用<br> **Q**:该软件不提供任何影片下载地址,仅供本地影片分类整理使用。
2.本软件禁止用于任何非法用途<br> ### F什么是元数据
3.使用者使用该软件产生的一切法律后果由使用者承担<br> **Q**:元数据包括了影片的:封面,导演,演员,简介,类型......
4.本软件禁止任何商用行为<br> ### F软件收费吗
[回到目录](#目录) **Q**:软件永久免费。除了 **作者** 钦点以外,给那些 **利用本软件牟利** 的人送上 **骨灰盒-全家族 | 崭新出厂**
### F软件运行异常怎么办
# 效果图 **Q**:认真看 [异常处理(重要)](#5异常处理重要)
**由于法律因素,图片必须经马赛克处理**<br>
![](https://i.loli.net/2019/06/02/5cf2b5d0bbecf69019.png) # 效果图
![](https://i.loli.net/2019/06/22/5d0d10dd6255e44008.png)<br> **图片来自网络**,由于相关法律法规,具体效果请自行联想
[回到目录](#目录) ![](https://i.loli.net/2019/07/04/5d1cf9bb1b08b86592.jpg)
![](https://i.loli.net/2019/07/04/5d1cf9bb2696937880.jpg)<br>
# 如何使用
### 下载 # 如何使用
* release的程序可脱离**python环境**运行,可跳过 [模块安装](#1请安装模块在cmd终端逐条输入以下命令安装)<br>下载地址(**仅限Windows**):[点击前往](https://github.com/wenead99/AV_Data_Capture/releases)<br> ### 下载
* Linux,MacOS请下载源码包运行 * release的程序可脱离**python环境**运行,可跳过 [模块安装](#1请安装模块在cmd终端逐条输入以下命令安装)<br>Release 下载地址(**仅限Windows**):<br>[![](https://img.shields.io/badge/%E4%B8%8B%E8%BD%BD-windows-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yoshiko2/AV_Data_Capture/releases)<br>
* Linux,MacOS请下载源码包运行
* Windows Python环境:[点击前往](https://www.python.org/downloads/windows/) 选中executable installer下载
* MacOS Python环境[点击前往](https://www.python.org/downloads/mac-osx/) * Windows Python环境:[点击前往](https://www.python.org/downloads/windows/) 选中executable installer下载
* Linux Python环境Linux用户懂的吧不解释下载地址 * MacOS Python环境[点击前往](https://www.python.org/downloads/mac-osx/)
### 简要教程:<br> * Linux Python环境Linux用户懂的吧不解释下载地址
**1.把软件拉到和电影的同一目录<br>2.设置ini文件的代理路由器拥有自动代理功能的可以把proxy=后面内容去掉)<br>3.运行软件等待完成<br>4.把JAV_output导入至KODI,EMBY中。<br>详细请看以下教程**<br> ### 简要教程:<br>
[回到目录](#目录) **1.把软件拉到和电影的同一目录<br>2.设置ini文件的代理路由器拥有自动代理功能的可以把proxy=后面内容去掉)<br>3.运行软件等待完成<br>4.把JAV_output导入至KODI,EMBY中。<br>详细请看以下教程**<br>
## 1.请安装模块,在CMD/终端逐条输入以下命令安装 ## 1.请安装模块,在CMD/终端逐条输入以下命令安装
```python ```python
pip install requests pip install requests
``` ```
### ###
```python ```python
pip install pyquery pip install pyquery
``` ```
### ###
```python ```python
pip install lxml pip install lxml
``` ```
### ###
```python ```python
pip install Beautifulsoup4 pip install Beautifulsoup4
``` ```
### ###
```python ```python
pip install pillow pip install pillow
``` ```
### ###
## 2.配置config.ini
[回到目录](#目录) config.ini
>[common]<br>
## 2.配置proxy.ini >main_mode=1<br>
proxy.ini >failed_output_folder=failed<br>
>[proxy]<br> >success_output_folder=JAV_output<br>
>proxy=127.0.0.1:1080<br> >
>timeout=10<br> >[proxy]<br>
>retry=3<br> >proxy=127.0.0.1:1080<br>
> >timeout=10<br>
>[Name_Rule]<br> >retry=3<br>
>location_rule='JAV_output/'+actor+'/['+number+']-'+title<br> >
>naming_rule=number+'-'+title<br> >[Name_Rule]<br>
> >location_rule=actor+'/'+number<br>
>[update]<br> >naming_rule=number+'-'+title<br>
>update_check=1<br> >
>[update]<br>
### 1.网络设置 >update_check=1<br>
#### * 针对“某些地区”的代理设置 >
打开```proxy.ini```,在```[proxy]```下的```proxy```行设置本地代理地址和端口支持Shadowsocks/R,V2RAY本地代理端口:<br> >[media]<br>
例子:```proxy=127.0.0.1:1080```<br>素人系列抓取建议使用日本代理<br> >media_warehouse=emby<br>
**路由器拥有自动代理功能的可以把proxy=后面内容去掉)**<br> >#emby or plex<br>
**如果遇到tineout错误可以把文件的proxy=后面的地址和端口删除并开启vpn全局模式或者重启电脑vpn网卡**<br> >
#### 连接超时重试设置 >[directory_capture]<br>
>[proxy]<br> >directory=<br>
>timeout=10<br>
### 全局设置
10为超时重试时间 单位:秒 ---
#### 连接重试次数设置 #### 软件模式
>[proxy]<br> >[common]<br>
>retry=3<br> >main_mode=1<br>
3即为重试次数 1为普通模式2为整理模式仅根据女优把电影命名为番号并分类到女优名称的文件夹下
### (可选)设置自定义目录和影片重命名规则 >failed_output_folder=failed<br>
**已有默认配置**<br> >success_output_folder=JAV_outputd<br>
##### 命名参数<br>
>title = 片名<br> 设置成功输出目录和失败输出目录
>actor = 演员<br>
>studio = 公司<br> ---
>director = 导演<br> ### 网络设置
>release = 发售日<br> #### * 针对“某些地区”的代理设置
>year = 发行年份<br> 打开```config.ini```,在```[proxy]```下的```proxy```行设置本地代理地址和端口支持Shadowxxxx/X,V2XXX本地代理端口:<br>
>number = 番号<br> 例子:```proxy=127.0.0.1:1080```<br>素人系列抓取建议使用日本代理<br>
>cover = 封面链接<br> **路由器拥有自动代理功能的可以把proxy=后面内容去掉**<br>
>tag = 类型<br> **本地代理软件开全局模式的同志同上**<br>
>outline = 简介<br> **如果遇到tineout错误可以把文件的proxy=后面的地址和端口删除并开启vpn全局模式或者重启电脑vpn网卡**<br>
>runtime = 时长<br> #### 连接超时重试设置
##### **例子**:<br> >[proxy]<br>
>目录结构规则:location_rule='JAV_output/'+actor+'/'+number **不推荐修改目录结构规则,抓取数据时新建文件夹容易出错**<br> >timeout=10<br>
>影片命名规则:naming_rule='['+number+']-'+title<br> **在EMBY,KODI等本地媒体库显示的标题**
### 3.更新开关 10为超时重试时间 单位:秒
>[update]<br>update_check=1<br>
1为开0为关 ---
[回到目录](#目录) #### 连接重试次数设置
## 3.把软件拷贝和AV的统一目录下 >[proxy]<br>
## 4.运行 ```AV_Data_capture.py/.exe``` >retry=3<br>
## 5.异常处理(重要)
### 关于连接拒绝的错误 3即为重试次数
请设置好[代理](#1针对网络审查国家或地区的代理设置)<br>
### 关于Nonetype,xpath报错 ---
同上<br> #### 检查更新开关
[回到目录](#目录) >[update]<br>
### 关于番号提取失败或者异常 >update_check=1<br>
**目前可以提取元素的影片:JAVBUS上有元数据的电影素人系列:300Maan,259luxu,siro等,FC2系列**<br>
>下一张图片来自Pockies的blog:https://pockies.github.io/2019/03/25/everaver-emby-kodi/ 原作者已授权<br> 0为关闭1为开启不建议关闭
![](https://raw.githubusercontent.com/Pockies/pic/master/741f9461gy1g1cxc31t41j20i804zdgo.jpg) ---
##### 媒体库选择
目前作者已经完善了番号提取机制,功能较为强大,可提取上述文件名的的番号,如果出现提取失败或者异常的情况,请用以下规则命名<br> >[media]<br>
**妈蛋不要喂软件那么多野鸡片子,不让软件好好活了,操** >media_warehouse=emby<br>
``` >#emby or plex<br>
COSQ-004.mp4
``` 可选择emby, plex<br>
如果是PLEX请安装插件```XBMCnfoMoviesImporter```
针对**野鸡番号**你需要把文件名命名为与抓取网站提供的番号一致文件拓展名除外然后把文件拖拽至core.exe/.py<br>
**野鸡番号**:比如 DCL-001-1这种野鸡三段式番号在javbus等资料库存在的作品。<br>除了SSNI-XXX-C后面这种-C的是指电影有中文字幕 ---
![](https://i.loli.net/2019/06/02/5cf2b5d03640e73201.gif)<br> #### 抓取目录选择
条件:文件名中间要有下划线或者减号"_","-",没有多余的内容只有番号为最佳,可以让软件更好获取元数据 >[directory_capture]<br>
对于多影片重命名,可以用[ReNamer](http://www.den4b.com/products/renamer)来批量重命名<br> >directory=<br>
[回到目录](#目录) 如果directory后面为空则抓取和程序同一目录下的影片设置为``` * ```可抓取软件所在目录下的所有子目录中的影片
### 关于PIL/image.py ## 3.(可选)设置自定义目录和影片重命名规则
暂时无解可能是网络问题或者pillow模块打包问题你可以用源码运行要安装好第一步的模块 >[Name_Rule]<br>
>location_rule=actor+'/'+number<br>
>naming_rule=number+'-'+title<br>
## 6.软件会自动把元数据获取成功的电影移动到JAV_output文件夹中根据女优分类失败的电影移动到failed文件夹中。
## 7.把JAV_output文件夹导入到EMBY,KODI中根据封面选片子享受手冲乐趣 已有默认配置
cookies大神的EMBY教程:[链接](https://pockies.github.io/2019/03/25/everaver-emby-kodi/#%E5%AE%89%E8%A3%85emby%E5%B9%B6%E6%B7%BB%E5%8A%A0%E5%AA%92%E4%BD%93%E5%BA%93)<br>
## 8.写在后面 ---
怎么样,看着自己的日本电影被这样完美地管理,是不是感觉成就感爆棚呢?<br> #### 命名参数
[回到目录](#目录) >title = 片名<br>
>actor = 演员<br>
>studio = 公司<br>
>director = 导演<br>
>release = 发售日<br>
>year = 发行年份<br>
>number = 番号<br>
>cover = 封面链接<br>
>tag = 类型<br>
>outline = 简介<br>
>runtime = 时长<br>
上面的参数以下都称之为**变量**
#### 例子:
自定义规则方法:有两种元素,变量和字符,无论是任何一种元素之间连接必须要用加号 **+** ,比如:```'naming_rule=['+number+']-'+title```,其中冒号 ' ' 内的文字是字符,没有冒号包含的文字是变量,元素之间连接必须要用加号 **+** <br>
目录结构规则:默认 ```location_rule=actor+'/'+number```<br> **不推荐修改时在这里添加title**有时title过长因为Windows API问题抓取数据时新建文件夹容易出错。<br>
影片命名规则:默认 ```naming_rule=number+'-'+title```<br> **在EMBY,KODI等本地媒体库显示的标题不影响目录结构下影片文件的命名**,依旧是 番号+后缀。
---
### 更新开关
>[update]<br>update_check=1<br>
1为开0为关
## 4.建议把软件拷贝和电影的统一目录下
如果```config.ini```中```directory=```后面为空的情况下
## 5.运行 ```AV_Data_capture.py/.exe```
当文件名包含:<br>
中文,字幕,-c., -C., 处理元数据时会加上**中文字幕**标签
## 5.1 异常处理(重要)
### 请确保软件是完整地确保ini文件内容是和下载提供ini文件内容的一致的
---
### 关于软件打开就闪退
可以打开cmd命令提示符把 ```AV_Data_capture.py/.exe```拖进cmd窗口回车运行查看错误出现的错误信息**依据以下条目解决**
---
### 关于 ```Updata_check``` 和 ```JSON``` 相关的错误
跳转 [网络设置](#网络设置)
---
### 关于```FileNotFoundError: [WinError 3] 系统找不到指定的路径。: 'JAV_output''```
在软件所在文件夹下新建 JAV_output 文件夹,可能是你没有把软件拉到和电影的同一目录
---
### 关于连接拒绝的错误
请设置好[代理](#针对某些地区的代理设置)<br>
---
### 关于Nonetype,xpath报错
同上<br>
---
### 关于番号提取失败或者异常
**目前可以提取元素的影片:JAVBUS上有元数据的电影素人系列:300Maan,259luxu,siro等,FC2系列**<br>
>下一张图片来自Pockies的blog 原作者已授权<br>
![](https://raw.githubusercontent.com/Pockies/pic/master/741f9461gy1g1cxc31t41j20i804zdgo.jpg)
目前作者已经完善了番号提取机制,功能较为强大,可提取上述文件名的的番号,如果出现提取失败或者异常的情况,请用以下规则命名<br>
**妈蛋不要喂软件那么多野鸡片子,不让软件好好活了,操**
```
COSQ-004.mp4
```
针对 **野鸡番号** 你需要把文件名命名为与抓取网站提供的番号一致文件拓展名除外然后把文件拖拽至core.exe/.py<br>
**野鸡番号**:比如 ```XXX-XXX-1```, ```1301XX-MINA_YUKA``` 这种**野鸡**番号在javbus等资料库存在的作品。<br>**重要**:除了 **影片文件名** ```XXXX-XXX-C```,后面这种-C的是指电影有中文字幕<br>
条件:文件名中间要有下划线或者减号"_","-",没有多余的内容只有番号为最佳,可以让软件更好获取元数据
对于多影片重命名,可以用[ReNamer](http://www.den4b.com/products/renamer)来批量重命名<br>
---
### 关于PIL/image.py
暂时无解可能是网络问题或者pillow模块打包问题你可以用源码运行要安装好第一步的模块
## 6.软件会自动把元数据获取成功的电影移动到JAV_output文件夹中根据演员分类失败的电影移动到failed文件夹中。
## 7.把JAV_output文件夹导入到EMBY,KODI中等待元数据刷新完成
## 8.关于群晖NAS
开启SMB在Windows上挂载为网络磁盘即可使用本软件也适用于其他NAS
## 9.写在后面
怎么样,看着自己的日本电影被这样完美地管理,是不是感觉成就感爆棚呢?<br>
**tg官方电报群:[ 点击进群](https://t.me/AV_Data_Capture_Official)**<br>

112
avsox.py Normal file
View File

@@ -0,0 +1,112 @@
import re
from lxml import etree
import json
from bs4 import BeautifulSoup
from ADC_function import *
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'avatar-box'})
d = {}
for i in a:
l = i.img['src']
t = i.span.get_text()
p2 = {t: l}
d.update(p2)
return d
def getTitle(a):
try:
html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
return result.replace('/', '')
except:
return ''
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
soup = BeautifulSoup(a, 'lxml')
a = soup.find_all(attrs={'class': 'avatar-box'})
d = []
for i in a:
d.append(i.span.get_text())
return d
def getStudio(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
return result1
def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
return result1
def getLabel(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
return result1
def getNum(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
return result1
def getYear(release):
try:
result = str(re.search('\d{4}',release).group())
return result
except:
return release
def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
return result1
def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
return result
def getCover_small(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
return result
def getTag(a): # 获取演员
soup = BeautifulSoup(a, 'lxml')
a = soup.find_all(attrs={'class': 'genre'})
d = []
for i in a:
d.append(i.get_text())
return d
def main(number):
a = get_html('https://avsox.asia/cn/search/' + number)
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
if result1 == '' or result1 == 'null' or result1 == 'None':
a = get_html('https://avsox.asia/cn/search/' + number.replace('-', '_'))
print(a)
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
if result1 == '' or result1 == 'null' or result1 == 'None':
a = get_html('https://avsox.asia/cn/search/' + number.replace('_', ''))
print(a)
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
web = get_html(result1)
soup = BeautifulSoup(web, 'lxml')
info = str(soup.find(attrs={'class': 'row movie'}))
dic = {
'actor': getActor(web),
'title': getTitle(web).strip(getNum(web)),
'studio': getStudio(info),
'outline': '',#
'runtime': getRuntime(info),
'director': '', #
'release': getRelease(info),
'number': getNum(info),
'cover': getCover(web),
'cover_small': getCover_small(a),
'imagecut': 3,
'tag': getTag(web),
'label': getLabel(info),
'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': getActorPhoto(web),
'website': result1,
'source': 'avsox.py',
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js
#print(main('041516_541'))

View File

@@ -1,12 +1,23 @@
[common]
main_mode=1
failed_output_folder=failed
success_output_folder=JAV_output
[proxy] [proxy]
proxy=127.0.0.1:1080 proxy=127.0.0.1:1080
timeout=10 timeout=10
retry=3 retry=3
[Name_Rule] [Name_Rule]
location_rule='JAV_output/'+actor+'/'+number location_rule=actor+'/'+number
naming_rule=number+'-'+title naming_rule=number+'-'+title
[update] [update]
update_check=1 update_check=1
#on=1,off=0
[media]
media_warehouse=emby
#emby or plex
[directory_capture]
directory=

413
core.py Normal file → Executable file
View File

@@ -1,17 +1,29 @@
# -*- coding: utf-8 -*-
import re import re
import os import os
import os.path import os.path
import shutil import shutil
from PIL import Image from PIL import Image
import time import time
import javbus
import json import json
import fc2fans_club
import siro
from ADC_function import * from ADC_function import *
from configparser import ConfigParser from configparser import ConfigParser
import argparse import argparse
#=========website========
import fc2fans_club
import siro
import avsox
import javbus
import javdb import javdb
#=========website========
Config = ConfigParser()
Config.read(config_file, encoding='UTF-8')
try:
option = ReadMediaWarehouse()
except:
print('[-]Config media_warehouse read failed!')
#初始化全局变量 #初始化全局变量
title='' title=''
@@ -33,10 +45,18 @@ houzhui=''
website='' website=''
json_data={} json_data={}
actor_photo={} actor_photo={}
cover_small=''
naming_rule =''#eval(config['Name_Rule']['naming_rule']) naming_rule =''#eval(config['Name_Rule']['naming_rule'])
location_rule=''#eval(config['Name_Rule']['location_rule']) location_rule=''#eval(config['Name_Rule']['location_rule'])
program_mode = Config['common']['main_mode']
failed_folder= Config['common']['failed_output_folder']
success_folder=Config['common']['success_output_folder']
#=====================本地文件处理=========================== #=====================本地文件处理===========================
def moveFailedFolder():
global filepath
print('[-]Move to Failed output folder')
shutil.move(filepath, str(os.getcwd()) + '/' + failed_folder + '/')
os._exit(0)
def argparse_get_file(): def argparse_get_file():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--number", help="Enter Number on here", default='') parser.add_argument("--number", help="Enter Number on here", default='')
@@ -44,12 +64,17 @@ def argparse_get_file():
args = parser.parse_args() args = parser.parse_args()
return (args.file, args.number) return (args.file, args.number)
def CreatFailedFolder(): def CreatFailedFolder():
if not os.path.exists('failed/'): # 新建failed文件夹 if not os.path.exists(failed_folder+'/'): # 新建failed文件夹
try: try:
os.makedirs('failed/') os.makedirs(failed_folder+'/')
except: except:
print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)") print("[-]failed!can not be make Failed output folder\n[-](Please run as Administrator)")
os._exit(0) os._exit(0)
def getDataState(json_data): #元数据获取失败检测
if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
return 0
else:
return 1
def getDataFromJSON(file_number): #从JSON返回元数据 def getDataFromJSON(file_number): #从JSON返回元数据
global title global title
global studio global studio
@@ -68,52 +93,81 @@ def getDataFromJSON(file_number): #从JSON返回元数据
global cn_sub global cn_sub
global website global website
global actor_photo global actor_photo
global cover_small
global naming_rule global naming_rule
global location_rule global location_rule
# ================================================网站规则添加开始================================================
try: # 添加 需要 正则表达式的规则 try: # 添加 需要 正则表达式的规则
# =======================javdb.py=======================
if re.search('^\d{5,}', file_number).group() in file_number: if re.search('^\d{5,}', file_number).group() in file_number:
json_data = json.loads(javbus.main_uncensored(file_number)) json_data = json.loads(avsox.main(file_number))
if getDataState(json_data) == 0: #如果元数据获取失败,请求番号至其他网站抓取
json_data = json.loads(javdb.main(file_number))
elif re.search('\d+\D+', file_number).group() in file_number:
json_data = json.loads(siro.main(file_number))
if getDataState(json_data) == 0: #如果元数据获取失败,请求番号至其他网站抓取
json_data = json.loads(javbus.main(file_number))
elif getDataState(json_data) == 0: #如果元数据获取失败,请求番号至其他网站抓取
json_data = json.loads(javdb.main(file_number))
except: # 添加 无需 正则表达式的规则 except: # 添加 无需 正则表达式的规则
# ====================fc2fans_club.py====================
if 'fc2' in file_number: if 'fc2' in file_number:
json_data = json.loads(fc2fans_club.main( json_data = json.loads(fc2fans_club.main(file_number.strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-').strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-')))
file_number.strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-').strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-')))
elif 'FC2' in file_number: elif 'FC2' in file_number:
json_data = json.loads(fc2fans_club.main( json_data = json.loads(fc2fans_club.main(file_number.strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-').strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-')))
file_number.strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-').strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-'))) elif 'HEYZO' in number or 'heyzo' in number or 'Heyzo' in number:
# print(file_number.strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-')) json_data = json.loads(avsox.main(file_number))
# =======================javbus.py======================= elif 'siro' in file_number or 'SIRO' in file_number or 'Siro' in file_number:
json_data = json.loads(siro.main(file_number))
else: else:
json_data = json.loads(javbus.main(file_number)) json_data = json.loads(javbus.main(file_number))
if getDataState(json_data) == 0: #如果元数据获取失败,请求番号至其他网站抓取
json_data = json.loads(avsox.main(file_number))
elif getDataState(json_data) == 0: #如果元数据获取失败,请求番号至其他网站抓取
json_data = json.loads(javdb.main(file_number))
# ================================================网站规则添加结束================================================ # ================================================网站规则添加结束================================================
title = str(json_data['title']).replace(' ','') title = str(json_data['title']).replace(' ','')
studio = json_data['studio'] studio = json_data['studio']
year = json_data['year'] year = json_data['year']
outline = json_data['outline'] outline = json_data['outline']
runtime = json_data['runtime'] runtime = json_data['runtime']
director = json_data['director'] director = json_data['director']
actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表 actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表
release = json_data['release'] release = json_data['release']
number = json_data['number'] number = json_data['number']
cover = json_data['cover'] cover = json_data['cover']
imagecut = json_data['imagecut'] try:
tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 cover_small = json_data['cover_small']
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') except:
actor_photo = json_data['actor_photo'] aaaaaaa=''
website = json_data['website'] imagecut = json_data['imagecut']
tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
actor_photo = json_data['actor_photo']
website = json_data['website']
source = json_data['source']
if title == '' or number == '':
print('[-]Movie Data not found!')
moveFailedFolder()
if imagecut == '3':
DownloadFileWithFilename()
# ====================处理异常字符====================== #\/:*?"<>| # ====================处理异常字符====================== #\/:*?"<>|
if '\\' in title: if '\\' in title:
title=title.replace('\\', ' ') title=title.replace('\\', ' ')
elif '/' in title: elif r'/' in title:
title=title.replace('/', '') title=title.replace(r'/', '')
elif ':' in title: elif ':' in title:
title=title.replace('/', '') title=title.replace(':', '')
elif '*' in title: elif '*' in title:
title=title.replace('*', '') title=title.replace('*', '')
elif '?' in title: elif '?' in title:
@@ -130,53 +184,75 @@ def getDataFromJSON(file_number): #从JSON返回元数据
naming_rule = eval(config['Name_Rule']['naming_rule']) naming_rule = eval(config['Name_Rule']['naming_rule'])
location_rule = eval(config['Name_Rule']['location_rule']) location_rule = eval(config['Name_Rule']['location_rule'])
def smallCoverCheck():
if imagecut == 3:
if option == 'emby':
DownloadFileWithFilename(cover_small, '1.jpg', path)
img = Image.open(path + '/1.jpg')
w = img.width
h = img.height
img.save(path + '/' + number + '.png')
time.sleep(1)
os.remove(path + '/1.jpg')
if option == 'plex':
DownloadFileWithFilename(cover_small, '1.jpg', path)
img = Image.open(path + '/1.jpg')
w = img.width
h = img.height
img.save(path + '/poster.png')
os.remove(path + '/1.jpg')
def creatFolder(): #创建文件夹 def creatFolder(): #创建文件夹
global actor global actor
global path global path
if len(actor) > 240: #新建成功输出文件夹 if len(actor) > 240: #新建成功输出文件夹
path = location_rule.replace("'actor'","'超多人'",3).replace("actor","'超多人'",3) #path为影片+元数据所在目录 path = success_folder+'/'+location_rule.replace("'actor'","'超多人'",3).replace("actor","'超多人'",3) #path为影片+元数据所在目录
#print(path) #print(path)
else: else:
path = location_rule path = success_folder+'/'+location_rule
#print(path) #print(path)
if not os.path.exists(path): if not os.path.exists(path):
try: try:
os.makedirs(path) os.makedirs(path)
except: except:
path = location_rule.replace('/['+number+']-'+title,"/number") path = success_folder+'/'+location_rule.replace('/['+number+']-'+title,"/number")
#print(path) #print(path)
os.makedirs(path) os.makedirs(path)
#=====================资源下载部分=========================== #=====================资源下载部分===========================
def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in the Project Folder! def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in the Project Folder!
config = ConfigParser() try:
config.read('config.ini', encoding='UTF-8') proxy = Config['proxy']['proxy']
proxy = str(config['proxy']['proxy']) timeout = int(Config['proxy']['timeout'])
timeout = int(config['proxy']['timeout']) retry_count = int(Config['proxy']['retry'])
retry_count = int(config['proxy']['retry']) except:
print('[-]Proxy config error! Please check the config.')
i = 0 i = 0
while i < retry_count: while i < retry_count:
try: try:
if not str(config['proxy']['proxy']) == '': if not proxy == '':
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
r = requests.get(url, headers=headers, timeout=timeout,proxies={"http": "http://" + str(proxy), "https": "https://" + str(proxy)}) r = requests.get(url, headers=headers, timeout=timeout,proxies={"http": "http://" + str(proxy), "https": "https://" + str(proxy)})
if r == '':
print('[-]Movie Data not found!')
os._exit(0)
with open(str(path) + "/" + filename, "wb") as code: with open(str(path) + "/" + filename, "wb") as code:
code.write(r.content) code.write(r.content)
return return
# print(bytes(r),file=code)
else: else:
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
r = requests.get(url, timeout=timeout, headers=headers) r = requests.get(url, timeout=timeout, headers=headers)
if r == '':
print('[-]Movie Data not found!')
os._exit(0)
with open(str(path) + "/" + filename, "wb") as code: with open(str(path) + "/" + filename, "wb") as code:
code.write(r.content) code.write(r.content)
return return
# print(bytes(r),file=code)
except requests.exceptions.RequestException: except requests.exceptions.RequestException:
i += 1 i += 1
print('[-]Image Download : Connect retry '+str(i)+'/'+str(retry_count)) print('[-]Image Download : Connect retry '+str(i)+'/'+str(retry_count))
@@ -189,13 +265,19 @@ def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in
except requests.exceptions.ConnectTimeout: except requests.exceptions.ConnectTimeout:
i += 1 i += 1
print('[-]Image Download : Connect retry '+str(i)+'/'+str(retry_count)) print('[-]Image Download : Connect retry '+str(i)+'/'+str(retry_count))
print('[-]Connect Failed! Please check your Proxy or Network!')
moveFailedFolder()
def imageDownload(filepath): #封面是否下载成功否则移动到failed def imageDownload(filepath): #封面是否下载成功否则移动到failed
global path if option == 'emby':
if DownloadFileWithFilename(cover,'fanart.jpg', path) == 'failed': if DownloadFileWithFilename(cover, number + '.jpg', path) == 'failed':
shutil.move(filepath, 'failed/') moveFailedFolder()
os._exit(0) DownloadFileWithFilename(cover, number + '.jpg', path)
DownloadFileWithFilename(cover, 'fanart.jpg', path) print('[+]Image Downloaded!', path + '/' + number + '.jpg')
print('[+]Image Downloaded!', path +'/fanart.jpg') elif option == 'plex':
if DownloadFileWithFilename(cover, 'fanart.jpg', path) == 'failed':
moveFailedFolder()
DownloadFileWithFilename(cover, 'fanart.jpg', path)
print('[+]Image Downloaded!', path + '/fanart.jpg')
def PrintFiles(filepath): def PrintFiles(filepath):
#global path #global path
global title global title
@@ -204,86 +286,166 @@ def PrintFiles(filepath):
try: try:
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
with open(path + "/" + number + ".nfo", "wt", encoding='UTF-8') as code: if option == 'plex':
print("<movie>", file=code) with open(path + "/" + number + ".nfo", "wt", encoding='UTF-8') as code:
print(" <title>" + naming_rule + "</title>", file=code) print("<movie>", file=code)
print(" <set>", file=code) print(" <title>" + naming_rule + "</title>", file=code)
print(" </set>", file=code) print(" <set>", file=code)
print(" <studio>" + studio + "+</studio>", file=code) print(" </set>", file=code)
print(" <year>" + year + "</year>", file=code) print(" <studio>" + studio + "+</studio>", file=code)
print(" <outline>"+outline+"</outline>", file=code) print(" <year>" + year + "</year>", file=code)
print(" <plot>"+outline+"</plot>", file=code) print(" <outline>" + outline + "</outline>", file=code)
print(" <runtime>"+str(runtime).replace(" ","")+"</runtime>", file=code) print(" <plot>" + outline + "</plot>", file=code)
print(" <director>" + director + "</director>", file=code) print(" <runtime>" + str(runtime).replace(" ", "") + "</runtime>", file=code)
print(" <poster>poster.png</poster>", file=code) print(" <director>" + director + "</director>", file=code)
print(" <thumb>thumb.png</thumb>", file=code) print(" <poster>poster.png</poster>", file=code)
print(" <fanart>fanart.jpg</fanart>", file=code) print(" <thumb>thumb.png</thumb>", file=code)
try: print(" <fanart>fanart.jpg</fanart>", file=code)
for key, value in actor_photo.items(): try:
print(" <actor>", file=code) for key, value in actor_photo.items():
print(" <name>" + key + "</name>", file=code) print(" <actor>", file=code)
if not actor_photo == '': # or actor_photo == []: print(" <name>" + key + "</name>", file=code)
print(" <thumb>" + value + "</thumb>", file=code) if not actor_photo == '': # or actor_photo == []:
print(" </actor>", file=code) print(" <thumb>" + value + "</thumb>", file=code)
except: print(" </actor>", file=code)
aaaa='' except:
print(" <maker>" + studio + "</maker>", file=code) aaaa = ''
print(" <label>", file=code) print(" <maker>" + studio + "</maker>", file=code)
print(" </label>", file=code) print(" <label>", file=code)
if cn_sub == '1': print(" </label>", file=code)
print(" <tag>中文字幕</tag>", file=code) if cn_sub == '1':
try: print(" <tag>中文字幕</tag>", file=code)
for i in tag: try:
print(" <tag>" + i + "</tag>", file=code) for i in tag:
except: print(" <tag>" + i + "</tag>", file=code)
aaaaa='' except:
try: aaaaa = ''
for i in tag: try:
print(" <genre>" + i + "</genre>", file=code) for i in tag:
except: print(" <genre>" + i + "</genre>", file=code)
aaaaaaaa='' except:
if cn_sub == '1': aaaaaaaa = ''
print(" <genre>中文字幕</genre>", file=code) if cn_sub == '1':
print(" <num>" + number + "</num>", file=code) print(" <genre>中文字幕</genre>", file=code)
print(" <release>" + release + "</release>", file=code) print(" <num>" + number + "</num>", file=code)
print(" <cover>"+cover+"</cover>", file=code) print(" <release>" + release + "</release>", file=code)
print(" <website>" + website + "</website>", file=code) print(" <cover>" + cover + "</cover>", file=code)
print("</movie>", file=code) print(" <website>" + website + "</website>", file=code)
print("[+]Writeed! "+path + "/" + number + ".nfo") print("</movie>", file=code)
print("[+]Writeed! " + path + "/" + number + ".nfo")
elif option == 'emby':
with open(path + "/" + number + ".nfo", "wt", encoding='UTF-8') as code:
print("<movie>", file=code)
print(" <title>" + naming_rule + "</title>", file=code)
print(" <set>", file=code)
print(" </set>", file=code)
print(" <studio>" + studio + "+</studio>", file=code)
print(" <year>" + year + "</year>", file=code)
print(" <outline>" + outline + "</outline>", file=code)
print(" <plot>" + outline + "</plot>", file=code)
print(" <runtime>" + str(runtime).replace(" ", "") + "</runtime>", file=code)
print(" <director>" + director + "</director>", file=code)
print(" <poster>" + number + ".png</poster>", file=code)
print(" <thumb>" + number + ".png</thumb>", file=code)
print(" <fanart>" + number + '.jpg' + "</fanart>", file=code)
try:
for key, value in actor_photo.items():
print(" <actor>", file=code)
print(" <name>" + key + "</name>", file=code)
if not actor_photo == '': # or actor_photo == []:
print(" <thumb>" + value + "</thumb>", file=code)
print(" </actor>", file=code)
except:
aaaa = ''
print(" <maker>" + studio + "</maker>", file=code)
print(" <label>", file=code)
print(" </label>", file=code)
if cn_sub == '1':
print(" <tag>中文字幕</tag>", file=code)
try:
for i in tag:
print(" <tag>" + i + "</tag>", file=code)
except:
aaaaa = ''
try:
for i in tag:
print(" <genre>" + i + "</genre>", file=code)
except:
aaaaaaaa = ''
if cn_sub == '1':
print(" <genre>中文字幕</genre>", file=code)
print(" <num>" + number + "</num>", file=code)
print(" <release>" + release + "</release>", file=code)
print(" <cover>" + cover + "</cover>", file=code)
print(" <website>" + "https://www.javbus.com/" + number + "</website>", file=code)
print("</movie>", file=code)
print("[+]Writeed! " + path + "/" + number + ".nfo")
except IOError as e: except IOError as e:
print("[-]Write Failed!") print("[-]Write Failed!")
print(e) print(e)
shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/') moveFailedFolder()
os._exit(0)
except Exception as e1: except Exception as e1:
print(e1) print(e1)
print("[-]Write Failed!") print("[-]Write Failed!")
shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/') moveFailedFolder()
os._exit(0)
def cutImage(): def cutImage():
if imagecut == 1: if option == 'plex':
try: if imagecut == 1:
try:
img = Image.open(path + '/fanart.jpg')
imgSize = img.size
w = img.width
h = img.height
img2 = img.crop((w / 1.9, 0, w, h))
img2.save(path + '/poster.png')
except:
print('[-]Cover cut failed!')
elif imagecut == 0:
img = Image.open(path + '/fanart.jpg') img = Image.open(path + '/fanart.jpg')
imgSize = img.size
w = img.width w = img.width
h = img.height h = img.height
img2 = img.crop((w / 1.9, 0, w, h)) img.save(path + '/poster.png')
img2.save(path + '/poster.png') elif option == 'emby':
except: if imagecut == 1:
print('[-]Cover cut failed!') try:
else: img = Image.open(path + '/' + number + '.jpg')
img = Image.open(path + '/fanart.jpg') imgSize = img.size
w = img.width w = img.width
h = img.height h = img.height
img.save(path + '/poster.png') img2 = img.crop((w / 1.9, 0, w, h))
img2.save(path + '/' + number + '.png')
except:
print('[-]Cover cut failed!')
elif imagecut == 0:
img = Image.open(path + '/' + number + '.jpg')
w = img.width
h = img.height
img.save(path + '/' + number + '.png')
def pasteFileToFolder(filepath, path): #文件路径,番号,后缀,要移动至的位置 def pasteFileToFolder(filepath, path): #文件路径,番号,后缀,要移动至的位置
global houzhui global houzhui
houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|avi|rmvb|wmv|mov|mp4|mkv|flv|ts)$', filepath).group()) houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|avi|rmvb|wmv|mov|mp4|mkv|flv|ts)$', filepath).group())
os.rename(filepath, number + houzhui) try:
shutil.move(number + houzhui, path) os.rename(filepath, path + '/' + number + houzhui)
except FileExistsError:
print('[-]File Exists! Please check your movie!')
print('[-]move to the root folder of the program.')
os._exit(0)
def pasteFileToFolder_mode2(filepath, path): #文件路径,番号,后缀,要移动至的位置
global houzhui
houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|avi|rmvb|wmv|mov|mp4|mkv|flv|ts)$', filepath).group())
try:
os.rename(filepath, path + houzhui)
print('[+]Movie ' + number + ' move to target folder Finished!')
except:
print('[-]File Exists! Please check your movie!')
print('[-]move to the root folder of the program.')
os._exit(0)
def renameJpgToBackdrop_copy(): def renameJpgToBackdrop_copy():
shutil.copy(path+'/fanart.jpg', path+'/Backdrop.jpg') if option == 'plex':
shutil.copy(path + '/poster.png', path + '/thumb.png') shutil.copy(path + '/fanart.jpg', path + '/Backdrop.jpg')
shutil.copy(path + '/poster.png', path + '/thumb.png')
if option == 'emby':
shutil.copy(path + '/' + number + '.jpg', path + '/Backdrop.jpg')
if __name__ == '__main__': if __name__ == '__main__':
filepath=argparse_get_file()[0] #影片的路径 filepath=argparse_get_file()[0] #影片的路径
@@ -297,15 +459,18 @@ if __name__ == '__main__':
print("[!]Making Data for [" + number + "]") print("[!]Making Data for [" + number + "]")
except: except:
print("[-]failed!Please rename the filename again!") print("[-]failed!Please rename the filename again!")
shutil.move(filepath,'failed/') moveFailedFolder()
else: else:
number = argparse_get_file()[1] number = argparse_get_file()[1]
CreatFailedFolder() CreatFailedFolder()
getDataFromJSON(number) # 定义番号 getDataFromJSON(number) # 定义番号
creatFolder() # 创建文件夹 creatFolder() # 创建文件夹
imageDownload(filepath) # creatFoder会返回番号路径 if program_mode == '1':
PrintFiles(filepath) # 打印文件 imageDownload(filepath) # creatFoder会返回番号路径
cutImage() # 裁剪图 PrintFiles(filepath) # 打印文件
pasteFileToFolder(filepath, path) # 移动文件 smallCoverCheck()
renameJpgToBackdrop_copy() cutImage() # 裁剪图
# time.sleep(20) pasteFileToFolder(filepath, path) # 移动文件
renameJpgToBackdrop_copy()
elif program_mode == '2':
pasteFileToFolder_mode2(filepath, path) # 移动文件

1
fc2fans_club.py Normal file → Executable file
View File

@@ -75,6 +75,7 @@ def main(number2):
'tag': getTag(htmlcode), 'tag': getTag(htmlcode),
'actor_photo':'', 'actor_photo':'',
'website': 'http://fc2fans.club/html/FC2-' + number + '.html', 'website': 'http://fc2fans.club/html/FC2-' + number + '.html',
'source': 'fc2fans_club.py',
} }
#print(getTitle(htmlcode)) #print(getTitle(htmlcode))
#print(getNum(htmlcode)) #print(getNum(htmlcode))

53
javbus.py Normal file → Executable file
View File

@@ -1,17 +1,9 @@
import re import re
import requests #need install
from pyquery import PyQuery as pq#need install from pyquery import PyQuery as pq#need install
from lxml import etree#need install from lxml import etree#need install
import os
import os.path
import shutil
from bs4 import BeautifulSoup#need install from bs4 import BeautifulSoup#need install
from PIL import Image#need install
import time
import json import json
from ADC_function import * from ADC_function import *
import javdb
import siro
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml') soup = BeautifulSoup(htmlcode, 'lxml')
@@ -88,16 +80,12 @@ def getTag(htmlcode): # 获取演员
def main(number): def main(number):
try:
if re.search('\d+\D+', number).group() in number:
js = siro.main(number)
return js
except:
aaaa=''
try: try:
htmlcode = get_html('https://www.javbus.com/' + number) htmlcode = get_html('https://www.javbus.com/' + number)
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) try:
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
except:
dww_htmlcode = ''
dic = { dic = {
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
'studio': getStudio(htmlcode), 'studio': getStudio(htmlcode),
@@ -114,35 +102,12 @@ def main(number):
'label': getSerise(htmlcode), 'label': getSerise(htmlcode),
'actor_photo': getActorPhoto(htmlcode), 'actor_photo': getActorPhoto(htmlcode),
'website': 'https://www.javbus.com/' + number, 'website': 'https://www.javbus.com/' + number,
'source' : 'javbus.py',
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
if 'HEYZO' in number or 'heyzo' in number or 'Heyzo' in number:
htmlcode = get_html('https://www.javbus.com/' + number)
#dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
dic = {
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
'studio': getStudio(htmlcode),
'year': getYear(htmlcode),
'outline': '',
'runtime': getRuntime(htmlcode),
'director': getDirector(htmlcode),
'actor': getActor(htmlcode),
'release': getRelease(htmlcode),
'number': getNum(htmlcode),
'cover': getCover(htmlcode),
'imagecut': 1,
'tag': getTag(htmlcode),
'label': getSerise(htmlcode),
'actor_photo': getActorPhoto(htmlcode),
'website': 'https://www.javbus.com/' + number,
}
js2 = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,
separators=(',', ':'), ) # .encode('UTF-8')
return js2
return js return js
except: except:
a=javdb.main(number) return main_uncensored(number)
return a
def main_uncensored(number): def main_uncensored(number):
htmlcode = get_html('https://www.javbus.com/' + number) htmlcode = get_html('https://www.javbus.com/' + number)
@@ -166,11 +131,7 @@ def main_uncensored(number):
'imagecut': 0, 'imagecut': 0,
'actor_photo': '', 'actor_photo': '',
'website': 'https://www.javbus.com/' + number, 'website': 'https://www.javbus.com/' + number,
'source': 'javbus.py',
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
if getYear(htmlcode) == '' or getYear(htmlcode) == 'null':
js2 = javdb.main(number)
return js2
return js return js

17
javdb.py Normal file → Executable file
View File

@@ -1,7 +1,6 @@
import re import re
from lxml import etree from lxml import etree
import json import json
import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from ADC_function import * from ADC_function import *
@@ -79,7 +78,6 @@ def main(number):
result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
b = get_html('https://javdb1.com' + result1) b = get_html('https://javdb1.com' + result1)
soup = BeautifulSoup(b, 'lxml') soup = BeautifulSoup(b, 'lxml')
a = str(soup.find(attrs={'class': 'panel'})) a = str(soup.find(attrs={'class': 'panel'}))
dic = { dic = {
'actor': getActor(a), 'actor': getActor(a),
@@ -99,6 +97,7 @@ def main(number):
'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': '', 'actor_photo': '',
'website': 'https://javdb1.com' + result1, 'website': 'https://javdb1.com' + result1,
'source': 'javdb.py',
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js return js
@@ -106,19 +105,18 @@ def main(number):
a = get_html('https://javdb.com/search?q=' + number + '&f=all') a = get_html('https://javdb.com/search?q=' + number + '&f=all')
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
if result1 == '': if result1 == '' or result1 == 'null':
a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all') a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all')
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
b = get_html('https://javdb.com' + result1) b = get_html('https://javdb.com' + result1)
soup = BeautifulSoup(b, 'lxml') soup = BeautifulSoup(b, 'lxml')
a = str(soup.find(attrs={'class': 'panel'})) a = str(soup.find(attrs={'class': 'panel'}))
dic = { dic = {
'actor': getActor(a), 'actor': getActor(a),
'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace(getNum(a), 'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace(
'').replace( getNum(a),
'').replace(
'无码', '').replace('有码', '').lstrip(' '), '无码', '').replace('有码', '').lstrip(' '),
'studio': getStudio(a), 'studio': getStudio(a),
'outline': getOutline(a), 'outline': getOutline(a),
@@ -132,9 +130,10 @@ def main(number):
'label': getLabel(a), 'label': getLabel(a),
'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': '', 'actor_photo': '',
'website':'https://javdb.com' + result1, 'website': 'https://javdb.com' + result1,
'source': 'javdb.py',
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8')
return js return js
#print(main('061519-861')) #print(main('061519-861'))

View File

@@ -1,2 +0,0 @@
pyinstaller --onefile AV_Data_Capture.py
pyinstaller --onefile core.py --hidden-import ADC_function.py --hidden-import fc2fans_club.py --hidden-import javbus.py --hidden-import siro.py

4
siro.py Normal file → Executable file
View File

@@ -1,7 +1,6 @@
import re import re
from lxml import etree from lxml import etree
import json import json
import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from ADC_function import * from ADC_function import *
@@ -97,8 +96,9 @@ def main(number2):
'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': '', 'actor_photo': '',
'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/', 'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
'source': 'siro.py',
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
return js return js
#print(main('300maan-401')) #print(main('300maan-373'))

View File

@@ -1,5 +1,5 @@
{ {
"version": "0.11.4", "version": "1.1",
"version_show":"Beta 11.4", "version_show":"Beta 1.1",
"download": "https://github.com/wenead99/AV_Data_Capture/releases" "download": "https://github.com/wenead99/AV_Data_Capture/releases"
} }