209 Commits
0.8 ... 0.11.7

Author SHA1 Message Date
Yoshiko
c66a53ade1 Update Beta 11.7 2019-08-06 16:46:21 +08:00
Yoshiko
7aec4c4b84 Update update_check.json 2019-08-06 16:37:16 +08:00
Yoshiko
cfb3511360 Update Beta 11.7 2019-08-06 16:36:45 +08:00
Yoshiko
2adcfacf27 Merge pull request #26 from RRRRRm/master
Fix the path error under Linux and specify Python3 as the runtime.
2019-08-05 22:52:57 +08:00
RRRRRm
09dc684ff6 Fix some bugs. 2019-08-05 20:39:41 +08:00
RRRRRm
1bc924a6ac Update README.md 2019-08-05 15:57:46 +08:00
RRRRRm
00db4741bc Calling core.py asynchronously. Allow to specify input and output paths. 2019-08-05 15:48:44 +08:00
RRRRRm
1086447369 Fix the path error under Linux. Specify Python3 as the runtime. 2019-08-05 03:00:35 +08:00
Yoshiko
642c8103c7 Update README.md 2019-07-24 08:51:40 +08:00
Yoshiko
b053ae614c Update README.md 2019-07-23 21:18:22 +08:00
Yoshiko
b7583afc9b Merge pull request #20 from biaji/master
Add encoding info to source
2019-07-21 10:28:03 +08:00
biAji
731b08f843 Add encoding info to source
According to PEP-263, add encoding info to source code
2019-07-18 09:22:28 +08:00
Yoshiko
64f235aaff Update README.md 2019-07-15 12:41:14 +08:00
Yoshiko
f0d5a2a45d Update 11.6 2019-07-14 15:07:04 +08:00
Yoshiko
01521fe390 Update 11.6 2019-07-14 10:06:49 +08:00
Yoshiko
a33b882592 Update update_check.json 2019-07-14 09:59:56 +08:00
Yoshiko
150b81453c Update 11.6 2019-07-14 09:58:46 +08:00
Yoshiko
a6df479b78 Update 11.6 2019-07-14 09:45:53 +08:00
Yoshiko
dd6445b2ba Update 11.6 2019-07-14 09:38:26 +08:00
Yoshiko
41051a915b Update README.md 2019-07-12 18:13:09 +08:00
Yoshiko
32ce390939 Update README.md 2019-07-12 18:08:45 +08:00
Yoshiko
8deec6a6c0 Update README.md 2019-07-12 18:08:20 +08:00
Yoshiko
0fab70ff3d Update README.md 2019-07-12 18:07:23 +08:00
Yoshiko
53bbb99a64 Update README.md 2019-07-12 17:59:46 +08:00
Yoshiko
0e712de805 Update README.md 2019-07-11 10:43:55 +08:00
Yoshiko
6f74254e96 Update README.md 2019-07-11 00:58:16 +08:00
Yoshiko
4220bd708b Update README.md 2019-07-11 00:49:23 +08:00
Yoshiko
3802d88972 Update README.md 2019-07-11 00:46:22 +08:00
Yoshiko
8cddbf1e1b Update README.md 2019-07-11 00:41:40 +08:00
Yoshiko
332326e5f6 Update README.md 2019-07-09 18:52:36 +08:00
Yoshiko
27f64a81d0 Update README.md 2019-07-09 17:57:09 +08:00
Yoshiko
7e3fa5ade8 Update README.md 2019-07-09 17:56:48 +08:00
Yoshiko
cc362a2a26 Beta 11.5 Update 2019-07-09 17:47:43 +08:00
Yoshiko
dde6167b05 Update update_check.json 2019-07-09 17:47:02 +08:00
Yoshiko
fe69f42f92 Update README.md 2019-07-09 17:11:09 +08:00
Yoshiko
6b050cef43 Update README.md 2019-07-09 17:09:32 +08:00
Yoshiko
c721c3c769 Update README.md 2019-07-09 16:51:06 +08:00
Yoshiko
9f8702ca12 Update README.md 2019-07-09 16:50:35 +08:00
Yoshiko
153b3a35b8 Update README.md 2019-07-09 15:58:44 +08:00
Yoshiko
88e543a16f Update README.md 2019-07-09 13:51:52 +08:00
Yoshiko
5906af6d95 Update README.md 2019-07-09 13:43:09 +08:00
Yoshiko
39953f1870 Update README.md 2019-07-09 13:17:41 +08:00
Yoshiko
047618a0df Update README.md 2019-07-09 01:55:43 +08:00
Yoshiko
2da51a51d0 Update README.md 2019-07-09 01:45:35 +08:00
Yoshiko
8c0e0a296d Update README.md 2019-07-09 01:45:05 +08:00
Yoshiko
ce0ac607c2 Update README.md 2019-07-04 14:41:29 +08:00
Yoshiko
f0437cf6af Delete py to exe.bat 2019-07-04 03:01:18 +08:00
Yoshiko
32bfc57eed Update README.md 2019-07-04 02:58:48 +08:00
Yoshiko
909ca96915 Update README.md 2019-07-04 02:57:21 +08:00
Yoshiko
341ab5b2bf Update README.md 2019-07-04 02:55:09 +08:00
Yoshiko
d899a19419 Update README.md 2019-07-04 02:54:24 +08:00
Yoshiko
61b0bc40de Update README.md 2019-07-04 02:42:31 +08:00
Yoshiko
6fde3f98dd Delete proxy.ini 2019-07-04 02:26:42 +08:00
Yoshiko
838eb9c8db Update config.ini 2019-07-04 02:26:23 +08:00
Yoshiko
687bbfce10 Update update_check.json 2019-07-04 02:26:00 +08:00
Yoshiko
4b35113932 Beta 11.4 Update 2019-07-04 02:25:40 +08:00
Yoshiko
d672d4d0d7 Update README.md 2019-07-04 02:23:57 +08:00
Yoshiko
1d3845bb91 Update README.md 2019-07-04 02:22:06 +08:00
wenead99
e5effca854 Update README.md 2019-06-30 18:25:54 +08:00
wenead99
bae82898da Update README.md 2019-06-30 02:04:22 +08:00
wenead99
2e8e7151e3 Update README.md 2019-06-30 02:01:17 +08:00
wenead99
8db74bc34d Update README.md 2019-06-30 01:00:50 +08:00
wenead99
e18392d7d3 Update README.md 2019-06-30 00:58:08 +08:00
wenead99
e4e32c06df Update README.md 2019-06-30 00:54:56 +08:00
wenead99
09802c5632 Update README.md 2019-06-30 00:52:43 +08:00
wenead99
584db78fd0 Update README.md 2019-06-30 00:44:46 +08:00
wenead99
56a41604cb Update AV_Data_Capture.py 2019-06-29 19:03:27 +08:00
wenead99
8228084a1d Update README.md 2019-06-29 18:58:39 +08:00
wenead99
f16def5f3a Update update_check.json 2019-06-29 18:49:30 +08:00
wenead99
c0303a57a1 Beta 11.2 Update 2019-06-29 18:43:45 +08:00
wenead99
07c8a7fb0e Update README.md 2019-06-29 17:02:03 +08:00
wenead99
71691e1fe9 Beta 11.1 Update 2019-06-29 16:19:58 +08:00
wenead99
e2569e4541 Add files via upload 2019-06-29 10:37:29 +08:00
wenead99
51385491de Add files via upload 2019-06-29 10:34:40 +08:00
wenead99
bb049714cf Update update_check.json 2019-06-29 10:30:41 +08:00
wenead99
5dcaa20a6c Update README.md 2019-06-28 23:29:38 +08:00
wenead99
26652bf2ed Update README.md 2019-06-24 15:12:22 +08:00
wenead99
352d2fa28a Update README.md 2019-06-24 15:09:48 +08:00
wenead99
ff5ac0d599 Update README.md 2019-06-24 15:08:32 +08:00
wenead99
f34888d2e7 Update README.md 2019-06-23 14:27:39 +08:00
wenead99
f609e647b5 Update README.md 2019-06-23 14:26:27 +08:00
wenead99
ffc280a01c Update README.md 2019-06-23 14:24:13 +08:00
wenead99
fee0ae95b3 Update README.md 2019-06-23 11:18:26 +08:00
wenead99
cd7e254d2e Update README.md 2019-06-23 11:11:32 +08:00
wenead99
ce2995123d Update README.md 2019-06-23 01:08:27 +08:00
wenead99
46e676b592 Update README.md 2019-06-23 01:08:06 +08:00
wenead99
a435d645e4 Update README.md 2019-06-23 01:00:57 +08:00
wenead99
76eecd1e6f Update README.md 2019-06-23 01:00:33 +08:00
wenead99
3c296db204 Update README.md 2019-06-23 00:57:01 +08:00
wenead99
7d6408fe29 Update README.md 2019-06-23 00:56:44 +08:00
wenead99
337c84fd1c Update README.md 2019-06-23 00:55:02 +08:00
wenead99
ad220c1ca6 Update README.md 2019-06-23 00:54:48 +08:00
wenead99
37df711cdc Update README.md 2019-06-23 00:54:28 +08:00
wenead99
92dd9cb734 Update README.md 2019-06-23 00:51:40 +08:00
wenead99
64445b5105 Update README.md 2019-06-23 00:46:11 +08:00
wenead99
bfdb094ee3 Update README.md 2019-06-23 00:35:35 +08:00
wenead99
b38942a326 Update README.md 2019-06-23 00:34:55 +08:00
wenead99
7d03a1f7f9 Update README.md 2019-06-23 00:34:12 +08:00
wenead99
f9c0df7e06 Update README.md 2019-06-23 00:32:30 +08:00
wenead99
b1783d8c75 Update AV_Data_Capture.py 2019-06-22 19:22:23 +08:00
wenead99
908da6d006 Add files via upload 2019-06-22 19:20:54 +08:00
wenead99
9ec99143d4 Update update_check.json 2019-06-22 16:16:45 +08:00
wenead99
575a710ef8 Beta 10.6更新 2019-06-22 16:16:18 +08:00
wenead99
7c16307643 Update README.md 2019-06-22 16:11:07 +08:00
wenead99
e816529260 Update README.md 2019-06-22 16:10:40 +08:00
wenead99
8282e59a39 Update README.md 2019-06-22 16:08:20 +08:00
wenead99
a96bdb8d13 Update README.md 2019-06-22 16:05:29 +08:00
wenead99
f7f1c3e871 Update README.md 2019-06-22 16:05:01 +08:00
wenead99
632250083f Update README.md 2019-06-22 16:04:18 +08:00
wenead99
0ebfe43133 Update README.md 2019-06-22 16:03:03 +08:00
wenead99
bb367fe79e Update README.md 2019-06-22 15:56:56 +08:00
wenead99
3a4d405c8e Update README.md 2019-06-22 15:53:30 +08:00
wenead99
8f8adcddbb Update README.md 2019-06-22 15:52:06 +08:00
wenead99
394c831b05 Update README.md 2019-06-22 15:47:53 +08:00
wenead99
bb8b3a3bc3 Update update_check.json 2019-06-22 13:19:10 +08:00
wenead99
6c5c932b98 修改Ini文件导致的目录名无效BUG 2019-06-22 13:16:37 +08:00
wenead99
9a151a5d4c Update README.md 2019-06-22 01:44:28 +08:00
wenead99
f24595687b Beta 10.5 更新 2019-06-22 01:29:42 +08:00
wenead99
aa130d2d25 Update README.md 2019-06-22 01:18:44 +08:00
wenead99
bccc49508e Update README.md 2019-06-22 01:12:33 +08:00
wenead99
ad6db7ca97 Update README.md 2019-06-22 01:05:15 +08:00
wenead99
b95d35d6fa Update README.md 2019-06-22 01:04:38 +08:00
wenead99
3bf0cf5fbc Update README.md 2019-06-22 00:58:28 +08:00
wenead99
dbdc0c818d Update README.md 2019-06-22 00:57:45 +08:00
wenead99
e156c34e23 Update README.md 2019-06-22 00:55:46 +08:00
wenead99
ee782e3794 Update README.md 2019-06-22 00:55:01 +08:00
wenead99
90aa77a23a Update AV_Data_Capture.py 2019-06-22 00:46:43 +08:00
wenead99
d4251c8b44 Beta 10.5更新 2019-06-22 00:46:06 +08:00
wenead99
6f684e67e2 Beta 0.15 更新 2019-06-22 00:34:36 +08:00
wenead99
18cf202b5b Update README.md 2019-06-21 23:59:15 +08:00
wenead99
54b2b71472 Update README.md 2019-06-21 23:58:12 +08:00
wenead99
44ba47bafc Update README.md 2019-06-21 23:55:23 +08:00
wenead99
7eb72634d8 Update README.md 2019-06-21 20:07:44 +08:00
wenead99
5787d3470a Update README.md 2019-06-21 20:05:53 +08:00
wenead99
1fce045ac2 Update README.md 2019-06-21 20:05:09 +08:00
wenead99
794aa74782 Update README.md 2019-06-21 20:03:07 +08:00
wenead99
b2e49a99a7 Update README.md 2019-06-21 20:01:58 +08:00
wenead99
d208d53375 Update README.md 2019-06-21 20:00:15 +08:00
wenead99
7158378eca Update README.md 2019-06-21 19:59:55 +08:00
wenead99
0961d8cbe4 Update README.md 2019-06-21 19:59:41 +08:00
wenead99
6ef5d11742 Update README.md 2019-06-21 19:57:03 +08:00
wenead99
45e1d8370c Beta 10.4 更新 2019-06-21 18:27:21 +08:00
wenead99
420f995977 Update README.md 2019-06-21 18:26:25 +08:00
wenead99
dbe1f91bd9 Update README.md 2019-06-21 18:23:59 +08:00
wenead99
770c5fcb1f Update update_check.json 2019-06-21 17:54:41 +08:00
wenead99
665d1ffe43 Beta 10.4 2019-06-21 15:40:02 +08:00
wenead99
14ed221152 Update README.md 2019-06-21 10:53:34 +08:00
wenead99
c41b9c1e32 Update README.md 2019-06-21 10:16:14 +08:00
wenead99
17d4d68cbe Update README.md 2019-06-21 10:00:25 +08:00
wenead99
b5a23fe430 Beta 10.3 Update 2019.6.20 2019-06-21 00:03:43 +08:00
wenead99
2747be4a21 Update README.md 2019-06-20 20:49:40 +08:00
wenead99
02da503a2f Update update_check.json 2019-06-20 19:13:38 +08:00
wenead99
31c5d5c314 Update update_check.json 2019-06-20 19:10:28 +08:00
wenead99
22e5b9aa44 Update update_check.json 2019-06-20 19:07:42 +08:00
wenead99
400e8c9678 Update update_check.json 2019-06-20 19:03:24 +08:00
wenead99
b06e744c0c Beta 0.10.3更新检测 2019-06-19 20:53:10 +08:00
wenead99
ddbfe7765b Beta 10.3更新检测 2019-06-19 20:50:44 +08:00
wenead99
c0f47fb712 Update README.md 2019-06-19 18:22:31 +08:00
wenead99
7b0e8bf5f7 Beta 10.2 Update 2019-06-19 18:21:19 +08:00
wenead99
fa8ea58fe6 Beta 10.2 Update 2019-06-19 18:20:30 +08:00
wenead99
8c824e5d29 Beta 10.2 Update 2019-06-19 18:20:02 +08:00
wenead99
764fba74ec Beta 10.2 Update 2019-06-19 18:19:34 +08:00
wenead99
36c436772c Update README.md 2019-06-19 13:43:04 +08:00
wenead99
897a621adc Update README.md 2019-06-19 13:42:19 +08:00
wenead99
1f5802cdb4 Update README.md 2019-06-19 13:41:05 +08:00
wenead99
0a57e2bab6 Update README.md 2019-06-19 11:03:44 +08:00
wenead99
3ddfe94f2b Update README.md 2019-06-19 11:02:31 +08:00
wenead99
c6fd5ac565 Update README.md 2019-06-19 00:05:01 +08:00
wenead99
2a7cdcf12d Update README.md 2019-06-18 23:56:34 +08:00
wenead99
759e546534 Beta 10.1 修复FC2元数据提取异常 2019-06-18 18:11:04 +08:00
wenead99
222337a5f0 修改FC2提取异常 2019-06-18 18:02:01 +08:00
wenead99
9fb6122a9d Update AV_Data_Capture.py 2019-06-18 16:58:32 +08:00
wenead99
9f0c01d62e Update README.md 2019-06-18 16:57:39 +08:00
wenead99
6ed79d8fcb Update README.md 2019-06-18 16:56:22 +08:00
wenead99
abb53c3219 Update README.md 2019-06-18 16:55:43 +08:00
wenead99
6578d807ca Update README.md 2019-06-18 16:55:10 +08:00
wenead99
e9acd32fd7 Update README.md 2019-06-18 16:54:49 +08:00
wenead99
0c64165b49 Update README.md 2019-06-18 16:53:45 +08:00
wenead99
6278659e55 Update README.md 2019-06-18 16:53:11 +08:00
wenead99
ca2c97a98f Update README.md 2019-06-17 23:45:00 +08:00
wenead99
164cc464dc Update README.md 2019-06-17 23:40:17 +08:00
wenead99
faa99507ad Update README.md 2019-06-17 19:11:54 +08:00
wenead99
d7a48d2829 Update README.md 2019-06-17 19:11:35 +08:00
wenead99
c40936f1c4 Update README.md 2019-06-17 19:10:22 +08:00
wenead99
38b26d4161 Update README.md 2019-06-17 19:09:55 +08:00
wenead99
e17dffba4e Update README.md 2019-06-17 18:34:26 +08:00
wenead99
ae1a91bf28 Update README.md 2019-06-17 18:31:46 +08:00
wenead99
208c24b606 Update README.md 2019-06-17 18:31:10 +08:00
wenead99
751450ebad Update README.md 2019-06-17 18:30:46 +08:00
wenead99
e429ca3c7d Update README.md 2019-06-17 18:29:31 +08:00
wenead99
9e26558666 Update README.md 2019-06-17 18:26:11 +08:00
wenead99
759b30ec5c Update README.md 2019-06-17 18:24:20 +08:00
wenead99
b7c195b76e Update README.md 2019-06-17 18:17:37 +08:00
wenead99
7038fcf8ed Update README.md 2019-06-17 18:12:38 +08:00
wenead99
54041313dc Add files via upload 2019-06-17 18:04:04 +08:00
wenead99
47a29f6628 Update README.md 2019-06-17 18:03:14 +08:00
wenead99
839610d230 Update README.md 2019-06-17 16:53:03 +08:00
wenead99
a0b324c1a8 Update README.md 2019-06-17 16:52:23 +08:00
wenead99
1996807702 Add files via upload 2019-06-17 16:28:07 +08:00
wenead99
e91b7a85bf 0.10 Beta10 Update 2019-06-17 16:14:17 +08:00
wenead99
dddaf5c74f Update README.md 2019-06-16 17:08:58 +08:00
wenead99
2a3935b221 Update README.md 2019-06-16 17:07:36 +08:00
wenead99
a5becea6c9 Update README.md 2019-06-16 15:39:06 +08:00
wenead99
1381b66619 Update README.md 2019-06-16 12:40:21 +08:00
wenead99
eb946d948f Update 0.9 2019-06-15 20:40:13 +08:00
wenead99
46087ba886 Update README.md 2019-06-11 19:10:57 +08:00
wenead99
f8764d1b81 Update README.md 2019-06-11 19:10:01 +08:00
wenead99
b9095452da Update README.md 2019-06-11 19:09:34 +08:00
wenead99
be8d23e782 Update README.md 2019-06-11 19:08:45 +08:00
11 changed files with 1193 additions and 448 deletions

101
ADC_function.py Normal file → Executable file
View File

@@ -1,10 +1,97 @@
import requests #!/usr/bin/env python3
# -*- coding: utf-8 -*-
def get_html(url):#网页请求核心 import requests
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} from configparser import ConfigParser
getweb = requests.get(str(url),timeout=5,headers=headers) import os
getweb.encoding='utf-8' import re
import time
import sys
config_file='config.ini'
config = ConfigParser()
if os.path.exists(config_file):
try: try:
return getweb.text config.read(config_file, encoding='UTF-8')
except: except:
print("[-]Connect Failed! Please check your Proxy.") print('[-]Config.ini read failed! Please use the offical file!')
else:
print('[+]config.ini: not found, creating...')
with open("config.ini", "wt", encoding='UTF-8') as code:
print("[proxy]",file=code)
print("proxy=127.0.0.1:1080",file=code)
print("timeout=10", file=code)
print("retry=3", file=code)
print("", file=code)
print("[Name_Rule]", file=code)
print("location_rule='JAV_output/'+actor+'/'+number",file=code)
print("naming_rule=number+'-'+title",file=code)
print("", file=code)
print("[update]",file=code)
print("update_check=1",file=code)
print("", file=code)
print("[media]", file=code)
print("media_warehouse=emby", file=code)
print("#emby or plex", file=code)
print("#plex only test!", file=code)
print("", file=code)
print("[directory_capture]", file=code)
print("switch=0", file=code)
print("directory=", file=code)
print("", file=code)
print("everyone switch:1=on, 0=off", file=code)
time.sleep(2)
print('[+]config.ini: created!')
try:
config.read(config_file, encoding='UTF-8')
except:
print('[-]Config.ini read failed! Please use the offical file!')
def ReadMediaWarehouse():
return config['media']['media_warehouse']
def UpdateCheckSwitch():
check=str(config['update']['update_check'])
if check == '1':
return '1'
elif check == '0':
return '0'
elif check == '':
return '0'
def get_html(url,cookies = None):#网页请求核心
try:
proxy = config['proxy']['proxy']
timeout = int(config['proxy']['timeout'])
retry_count = int(config['proxy']['retry'])
except:
print('[-]Proxy config error! Please check the config.')
i = 0
while i < retry_count:
try:
if not str(config['proxy']['proxy']) == '':
proxies = {"http": "http://" + proxy,"https": "https://" + proxy}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
getweb = requests.get(str(url), headers=headers, timeout=timeout,proxies=proxies, cookies=cookies)
getweb.encoding = 'utf-8'
return getweb.text
else:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
getweb.encoding = 'utf-8'
return getweb.text
except requests.exceptions.RequestException:
i += 1
print('[-]Connect retry '+str(i)+'/'+str(retry_count))
except requests.exceptions.ConnectionError:
i += 1
print('[-]Connect retry '+str(i)+'/'+str(retry_count))
except requests.exceptions.ProxyError:
i += 1
print('[-]Connect retry '+str(i)+'/'+str(retry_count))
except requests.exceptions.ConnectTimeout:
i += 1
print('[-]Connect retry '+str(i)+'/'+str(retry_count))
print('[-]Connect Failed! Please check your Proxy or Network!')

176
AV_Data_Capture.py Normal file → Executable file
View File

@@ -1,33 +1,78 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import glob import glob
import os import os
import time import time
import re import re
import sys
from ADC_function import *
import json
import subprocess
import shutil
from configparser import ConfigParser
version='0.11.7'
os.chdir(os.getcwd())
input_dir='.' # 电影的读取与输出路径, 默认为当前路径
config = ConfigParser()
config.read(config_file, encoding='UTF-8')
def UpdateCheck():
if UpdateCheckSwitch() == '1':
html = json.loads(get_html('https://raw.githubusercontent.com/wenead99/AV_Data_Capture/master/update_check.json'))
if not version == html['version']:
print('[*] * New update ' + html['version'] + ' *')
print('[*] * Download *')
print('[*] ' + html['download'])
print('[*]=====================================')
else:
print('[+]Update Check disabled!')
def set_directory(): # 设置读取与存放路径
global input_dir
# 配置项switch为1且定义了新的路径时, 更改默认存取路径
if config['directory_capture']['switch'] == '1':
custom_input = config['directory_capture']['input_directory']
if custom_input != '': # 自定义了输入路径
input_dir = format_path(custom_input)
# 若自定义了输入路径, 输出路径默认在输入路径下
CreatFolder(input_dir)
#print('[+]Working directory is "' + os.getcwd() + '".')
#print('[+]Using "' + input_dir + '" as input directory.')
def format_path(path): # 使路径兼容Linux与MacOS
if path.find('\\'): # 是仅兼容Windows的路径格式
path_list=path.split('\\')
path='/'.join(path_list) # 转换为可移植的路径格式
return path
def movie_lists(): def movie_lists():
#MP4 a2 = glob.glob( input_dir + "/*.mp4")
a2 = glob.glob(os.getcwd() + r"\*.mp4") b2 = glob.glob( input_dir + "/*.avi")
# AVI c2 = glob.glob( input_dir + "/*.rmvb")
b2 = glob.glob(os.getcwd() + r"\*.avi") d2 = glob.glob( input_dir + "/*.wmv")
# RMVB e2 = glob.glob( input_dir + "/*.mov")
c2 = glob.glob(os.getcwd() + r"\*.rmvb") f2 = glob.glob( input_dir + "/*.mkv")
# WMV g2 = glob.glob( input_dir + "/*.flv")
d2 = glob.glob(os.getcwd() + r"\*.wmv") h2 = glob.glob( input_dir + "/*.ts")
# MOV total = a2 + b2 + c2 + d2 + e2 + f2 + g2 + h2
e2 = glob.glob(os.getcwd() + r"\*.mov")
# MKV
f2 = glob.glob(os.getcwd() + r"\*.mkv")
# FLV
g2 = glob.glob(os.getcwd() + r"\*.flv")
total = a2+b2+c2+d2+e2+f2+g2
return total return total
def CreatFolder(folder_path):
if not os.path.exists(folder_path): # 新建文件夹
try:
print('[+]Creating ' + folder_path)
os.makedirs(folder_path)
except:
print("[-]failed!can not be make folder '"+folder_path+"'\n[-](Please run as Administrator)")
os._exit(0)
def lists_from_test(custom_nuber): #电影列表 def lists_from_test(custom_nuber): #电影列表
a=[] a=[]
a.append(custom_nuber) a.append(custom_nuber)
return a return a
def CEF(path): def CEF(path):
files = os.listdir(path) # 获取路径下的子文件(夹)列表 files = os.listdir(path) # 获取路径下的子文件(夹)列表
for file in files: for file in files:
@@ -36,25 +81,96 @@ def CEF(path):
print('[+]Deleting empty folder',path + '/' + file) print('[+]Deleting empty folder',path + '/' + file)
except: except:
a='' a=''
def rreplace(self, old, new, *max): def rreplace(self, old, new, *max):
#从右开始替换文件名中内容,源字符串,将被替换的子字符串, 新字符串用于替换old子字符串可选字符串, 替换不超过 max 次 #从右开始替换文件名中内容,源字符串,将被替换的子字符串, 新字符串用于替换old子字符串可选字符串, 替换不超过 max 次
count = len(self) count = len(self)
if max and str(max[0]).isdigit(): if max and str(max[0]).isdigit():
count = max[0] count = max[0]
return new.join(self.rsplit(old, count)) return new.join(self.rsplit(old, count))
def getNumber(filepath):
try: # 试图提取番号
# ====番号获取主程序====
try: # 普通提取番号 主要处理包含减号-的番号
filepath1 = filepath.replace("_", "-")
filepath1.strip('22-sht.me').strip('-HD').strip('-hd')
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1)) # 去除文件名中时间
file_number = re.search('\w+-\d+', filename).group()
return file_number
except: # 提取不含减号-的番号
try: # 提取东京热番号格式 n1087
filename1 = str(re.sub("h26\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot')
filename0 = str(re.sub(".*?\.com-\d+", "", filename1)).strip('_')
if '-C.' in filepath or '-c.' in filepath:
cn_sub = '1'
file_number = str(re.search('n\d{4}', filename0).group(0))
return file_number
except: # 提取无减号番号
filename1 = str(re.sub("h26\d", "", filepath)) # 去除h264/265
filename0 = str(re.sub(".*?\.com-\d+", "", filename1))
file_number2 = str(re.match('\w+', filename0).group())
if '-C.' in filepath or '-c.' in filepath:
cn_sub = '1'
file_number = str(file_number2.replace(re.match("^[A-Za-z]+", file_number2).group(),
re.match("^[A-Za-z]+", file_number2).group() + '-'))
return file_number
# if not re.search('\w-', file_number).group() == 'None':
# file_number = re.search('\w+-\w+', filename).group()
# 上面是插入减号-到番号中
# ====番号获取主程序=结束===
except Exception as e: # 番号提取异常
print('[-]' + str(os.path.basename(filepath)) + ' Cannot catch the number :')
print('[-]' + str(os.path.basename(filepath)) + ' :', e)
print('[-]Move ' + os.path.basename(filepath) + ' to failed folder')
#print('[-]' + filepath + ' -> ' + output_dir + '/failed/')
#shutil.move(filepath, output_dir + '/failed/')
except IOError as e2:
print('[-]' + str(os.path.basename(filepath)) + ' Cannot catch the number :')
print('[-]' + str(os.path.basename(filepath)) + ' :', e2)
#print('[-]' + filepath + ' -> ' + output_dir + '/failed/')
#shutil.move(filepath, output_dir + '/failed/')
def RunCore(movie):
# 异步调用core.py, core.py作为子线程执行, 本程序继续执行.
if os.path.exists('core.py'):
cmd_arg=[sys.executable,'core.py',movie,'--number',getNumber(movie)] #从py文件启动用于源码py
elif os.path.exists('core.exe'):
cmd_arg=['core.exe',movie,'--number',getNumber(movie)] #从exe启动用于EXE版程序
elif os.path.exists('core.py') and os.path.exists('core.exe'):
cmd_arg=[sys.executable,'core.py',movie,'--number',getNumber(movie)] #从py文件启动用于源码py
process=subprocess.Popen(cmd_arg)
return process
if __name__ =='__main__': if __name__ =='__main__':
print('[*]===========AV Data Capture===========')
print('[*] Version '+version)
print('[*]=====================================')
UpdateCheck()
os.chdir(os.getcwd()) os.chdir(os.getcwd())
for i in movie_lists(): #遍历电影列表 交给core处理 set_directory()
if '_' in i:
os.rename(re.search(r'[^\\/:*?"<>|\r\n]+$', i).group(), rreplace(re.search(r'[^\\/:*?"<>|\r\n]+$', i).group(), '_', '-', 1))
i = rreplace(re.search(r'[^\\/:*?"<>|\r\n]+$', i).group(), '_', '-', 1)
os.system('python core.py' + ' "' + i + '"') #选择从py文件启动 用于源码py
#os.system('core.exe' + ' "' + i + '"') #选择从exe文件启动用于EXE版程序
print("[*]=====================================")
print("[!]Cleaning empty folders")
CEF('JAV_output') count = 0
movies = movie_lists()
count_all = str(len(movies))
print('[+]Find ' + str(len(movies)) + ' movies.')
process_list=[]
for movie in movies: #遍历电影列表 交给core处理
num=getNumber(movie) # 获取番号
if num is None:
movies.remove(movie) # 未获取到番号, 则将影片从列表移除
count_all=count_all-1
continue
print("[!]Making Data for [" + movie + "], the number is [" + num + "]")
process=RunCore(movie)
process_list.append(process)
print("[*]=====================================")
for i in range(len(movies)):
process_list[i].communicate()
percentage = str((i+1)/int(count_all)*100)[:4]+'%'
print('[!] - '+percentage+' ['+str(count)+'/'+count_all+'] -')
print("[!]The [" + getNumber(movies[i]) + "] process is done.")
print("[*]=====================================")
CEF(input_dir)
print("[+]All finished!!!") print("[+]All finished!!!")
time.sleep(3) input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")

287
README.md
View File

@@ -1,77 +1,210 @@
# 日本AV元数据抓取工具 (刮削器) # AV Data Capture
## 关于本软件 ~路star谢谢
<a title="Hits" target="_blank" href="https://github.com/yoshiko2/AV_Data_Capture"><img src="https://hits.b3log.org/yoshiko2/AV_Data_Capture.svg"></a>
**#0.5重大更新新增对FC2,259LUXU,SIRO,300MAAN系列影片抓取支持,优化对无码视频抓取** ![](https://img.shields.io/badge/build-passing-brightgreen.svg?style=flat-square)
![](https://img.shields.io/github/downloads/yoshiko2/av_data_capture/total.svg?style=flat-square)<br>
目前我下的AV越来越多也意味着AV要集中地管理形成媒体库。现在有两款主流的AV元数据获取器"EverAver"和"Javhelper"。前者的优点是元数据获取比较全,缺点是不能批量处理;后者优点是可以批量处理,但是元数据不够全。 ![](https://img.shields.io/github/license/yoshiko2/av_data_capture.svg?style=flat-square)
![](https://img.shields.io/github/release/yoshiko2/av_data_capture.svg?style=flat-square)<br>
为此综合上述软件特点我写出了本软件为了方便的管理本地AV和更好的手冲体验。没女朋友怎么办ʅ(‾◡◝)ʃ
**预计本周末适配DS Video暂时只支持Kodi,EMBY** **日本电影元数据 抓取工具 | 刮削器**配合本地影片管理软件EMBY,KODI管理本地影片该软件起到分类与元数据抓取作用利用元数据信息来分类供本地影片分类整理使用。
**tg官方电报群:https://t.me/AV_Data_Capture_Official** # 目录
* [免责声明](#免责声明)
### **请认真阅读下面使用说明再使用** * [如何使用](#如何使用) * [注意](#注意)
* [你问我答 FAQ](#你问我答-faq)
![](https://i.loli.net/2019/06/02/5cf2b5d0bbecf69019.png) * [效果图](#效果图)
* [如何使用](#如何使用)
* [下载](#下载)
## 软件流程图 * [简明教程](#简要教程)
![](https://i.loli.net/2019/06/02/5cf2bb9a9e2d997635.png) * [模块安装](#1请安装模块在cmd终端逐条输入以下命令安装)
* [配置](#2配置configini)
## 如何使用 * [运行软件](#4运行-av_data_capturepyexe)
### **请认真阅读下面使用说明** * [异常处理(重要)](#5异常处理重要)
**release的程序可脱离python环境运行可跳过第一步仅限windows平台)** * [导入至媒体库](#7把jav_output文件夹导入到embykodi中根据封面选片子享受手冲乐趣)
**下载地址(Windows):https://github.com/wenead99/AV_Data_Capture/releases** * [写在后面](#8写在后面)
1. 请安装requests,pyquery,lxml,Beautifulsoup4,pillow模块,可在CMD逐条输入以下命令安装
```python # 免责声明
pip install requests 1.本软件仅供**技术交流,学术交流**使用,本项目旨在学习 Python3<br>
``` 2.本软件禁止用于任何非法用途<br>
### 3.使用者使用该软件产生的一切法律后果由使用者承担<br>
```python 4.不可使用于商业和个人其他意图<br>
pip install pyquery
``` # 注意
### **推荐用法: 使用该软件后,对于不能正常获取元数据的电影可以用 Everaver 来补救**<br>
```python 暂不支持多P电影<br>
pip install lxml
``` # 你问我答 FAQ
### ### F这软件能下片吗
```python **Q**:该软件不提供任何影片下载地址,仅供本地影片分类整理使用。
pip install Beautifulsoup4 ### F什么是元数据
``` **Q**:元数据包括了影片的:封面,导演,演员,简介,类型......
### ### F软件收费吗
```python **Q**:软件永久免费。除了 **作者** 钦点以外,给那些 **利用本软件牟利** 的人送上 **骨灰盒-全家族 | 崭新出厂**
pip install pillow ### F软件运行异常怎么办
``` **Q**:认真看 [异常处理(重要)](#5异常处理重要)
2. 你的AV在被软件管理前最好命名为番号: # 效果图
``` **图片来自网络**,由于相关法律法规,具体效果请自行联想
COSQ-004.mp4 ![](https://i.loli.net/2019/07/04/5d1cf9bb1b08b86592.jpg)
``` ![](https://i.loli.net/2019/07/04/5d1cf9bb2696937880.jpg)<br>
或者
``` # 如何使用
COSQ_004.mp4 ### 下载
``` * release的程序可脱离**python环境**运行,可跳过 [模块安装](#1请安装模块在cmd终端逐条输入以下命令安装)<br>Release 下载地址(**仅限Windows**):<br>[![](https://img.shields.io/badge/%E4%B8%8B%E8%BD%BD-windows-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yoshiko2/AV_Data_Capture/releases/download/0.11.6/Beta11.6.zip)<br>
文件名中间要有下划线或者减号"_","-",没有多余的内容只有番号为最佳,可以让软件更好获取元数据 * Linux,MacOS请下载源码包运行
对于多影片重命名可以用ReNamer来批量重命名
软件官网:http://www.den4b.com/products/renamer * Windows Python环境:[点击前往](https://www.python.org/downloads/windows/) 选中executable installer下载
* MacOS Python环境[点击前往](https://www.python.org/downloads/mac-osx/)
![](https://i.loli.net/2019/06/02/5cf2b5cfbfe1070559.png) * Linux Python环境Linux用户懂的吧不解释下载地址
### 简要教程:<br>
3. 把软件拷贝到AV的所在目录下运行程序中国大陆用户必须挂VPNShsadowsocks开全局代理 **1.把软件拉到和电影的同一目录<br>2.设置ini文件的代理路由器拥有自动代理功能的可以把proxy=后面内容去掉)<br>3.运行软件等待完成<br>4.把JAV_output导入至KODI,EMBY中。<br>详细请看以下教程**<br>
4. 运行AV_Data_capture.py
5. **你也可以把单个影片拖动到core程序** ## 1.请安装模块,在CMD/终端逐条输入以下命令安装
```python
![](https://i.loli.net/2019/06/02/5cf2b5d03640e73201.gif) pip install requests
```
6. 软件会自动把元数据获取成功的电影移动到JAV_output文件夹中根据女优分类失败的电影移动到failed文件夹中。 ###
```python
7. 把JAV_output文件夹导入到EMBY,KODI中根据封面选片子享受手冲乐趣 pip install pyquery
```
![](https://i.loli.net/2019/06/02/5cf2b5cfd1b0226763.png) ###
![](https://i.loli.net/2019/06/02/5cf2b5cfd1b0246492.png) ```python
![](https://i.loli.net/2019/06/02/5cf2b5d009e4930666.png) pip install lxml
```
###
```python
pip install Beautifulsoup4
```
###
```python
pip install pillow
```
###
## 2.配置config.ini
config.ini
>[proxy]<br>
>proxy=127.0.0.1:1080<br>
>timeout=10<br>
>retry=3<br>
>
>[Name_Rule]<br>
>location_rule='JAV_output/'+actor+'/['+number+']-'+title<br>
>naming_rule=number+'-'+title<br>
>
>[update]<br>
>update_check=1<br>
>
>[media]<br>
>media_warehouse=emby<br>
>#emby or plex<br>
>#plex only test!<br>
>
>[directory_capture]<br>
>switch=0<br>
>directory=<br>
>
>#everyone switch:1=on, 0=off<br>
### 1.网络设置
#### * 针对“某些地区”的代理设置
打开```config.ini```,在```[proxy]```下的```proxy```行设置本地代理地址和端口支持Shadowxxxx/X,V2XXX本地代理端口:<br>
例子:```proxy=127.0.0.1:1080```<br>素人系列抓取建议使用日本代理<br>
**路由器拥有自动代理功能的可以把proxy=后面内容去掉**<br>
**本地代理软件开全局模式的同志同上**<br>
**如果遇到tineout错误可以把文件的proxy=后面的地址和端口删除并开启vpn全局模式或者重启电脑vpn网卡**<br>
#### 连接超时重试设置
>[proxy]<br>
>timeout=10<br>
10为超时重试时间 单位:秒
#### 连接重试次数设置
>[proxy]<br>
>retry=3<br>
3即为重试次数
#### 检查更新开关
>[update]<br>
>update_check=1<br>
0为关闭1为开启不建议关闭
PLEX请安装插件```XBMCnfoMoviesImporter```
##### 媒体库选择
>[media]<br>
>media_warehouse=emby<br>
>#emby or plex<br>
>#plex only test!<br>
建议选择emby, plex不完善
#### 抓取目录选择
>[directory_capture]<br>
>switch=0<br>
>input_directory=<br>
>output_directory=<br>
switch为1时目录自定义才会被触发此时可以指定抓取任意目录下的影片, 并指定存放的目录如果为0则不触发抓取和程序同一目录下的影片directory不生效. 如果仅指定input_directory, output_directory默认与input_directory相同.
### (可选)设置自定义目录和影片重命名规则
**已有默认配置**<br>
##### 命名参数<br>
>title = 片名<br>
>actor = 演员<br>
>studio = 公司<br>
>director = 导演<br>
>release = 发售日<br>
>year = 发行年份<br>
>number = 番号<br>
>cover = 封面链接<br>
>tag = 类型<br>
>outline = 简介<br>
>runtime = 时长<br>
##### **例子**:<br>
目录结构规则:```location_rule='JAV_output/'+actor+'/'+number```<br> **不推荐修改时在这里添加title**有时title过长因为Windows API问题抓取数据时新建文件夹容易出错。<br>
影片命名规则:```naming_rule='['+number+']-'+title```<br> **在EMBY,KODI等本地媒体库显示的标题不影响目录结构下影片文件的命名**,依旧是 番号+后缀。
### 3.更新开关
>[update]<br>update_check=1<br>
1为开0为关
## 3.把软件拷贝和电影的统一目录下
## 4.运行 ```AV_Data_capture.py/.exe```
当文件名包含:<br>
中文,字幕,-c., -C., 处理元数据时会加上**中文字幕**标签
## 5.异常处理(重要)
### 请确保软件是完整地确保ini文件内容是和下载提供ini文件内容的一致的
### 关于软件打开就闪退
可以打开cmd命令提示符把 ```AV_Data_capture.py/.exe```拖进cmd窗口回车运行查看错误出现的错误信息**依据以下条目解决**
### 关于 ```Updata_check``` 和 ```JSON``` 相关的错误
跳转 [网络设置](#1网络设置)
### 关于```FileNotFoundError: [WinError 3] 系统找不到指定的路径。: 'JAV_output''```
在软件所在文件夹下新建 JAV_output 文件夹,可能是你没有把软件拉到和电影的同一目录
### 关于连接拒绝的错误
请设置好[代理](#1针对某些地区的代理设置)<br>
### 关于Nonetype,xpath报错
同上<br>
### 关于番号提取失败或者异常
**目前可以提取元素的影片:JAVBUS上有元数据的电影素人系列:300Maan,259luxu,siro等,FC2系列**<br>
>下一张图片来自Pockies的blog 原作者已授权<br>
![](https://raw.githubusercontent.com/Pockies/pic/master/741f9461gy1g1cxc31t41j20i804zdgo.jpg)
目前作者已经完善了番号提取机制,功能较为强大,可提取上述文件名的的番号,如果出现提取失败或者异常的情况,请用以下规则命名<br>
**妈蛋不要喂软件那么多野鸡片子,不让软件好好活了,操**
```
COSQ-004.mp4
```
针对 **野鸡番号** 你需要把文件名命名为与抓取网站提供的番号一致文件拓展名除外然后把文件拖拽至core.exe/.py<br>
**野鸡番号**:比如 ```XXX-XXX-1```, ```1301XX-MINA_YUKA``` 这种**野鸡**番号在javbus等资料库存在的作品。<br>**重要**:除了 **影片文件名** ```XXXX-XXX-C```,后面这种-C的是指电影有中文字幕<br>
条件:文件名中间要有下划线或者减号"_","-",没有多余的内容只有番号为最佳,可以让软件更好获取元数据
对于多影片重命名,可以用[ReNamer](http://www.den4b.com/products/renamer)来批量重命名<br>
### 关于PIL/image.py
暂时无解可能是网络问题或者pillow模块打包问题你可以用源码运行要安装好第一步的模块
## 6.软件会自动把元数据获取成功的电影移动到JAV_output文件夹中根据演员分类失败的电影移动到failed文件夹中。
## 7.把JAV_output文件夹导入到EMBY,KODI中等待元数据刷新完成
## 8.写在后面
怎么样,看着自己的日本电影被这样完美地管理,是不是感觉成就感爆棚呢?<br>
**tg官方电报群:[ 点击进群](https://t.me/AV_Data_Capture_Official)**<br>

21
config.ini Normal file
View File

@@ -0,0 +1,21 @@
[proxy]
proxy=127.0.0.1:1080
timeout=10
retry=3
[Name_Rule]
location_rule='JAV_output/'+actor+'/'+number
naming_rule=number+'-'+title
[update]
update_check=1
[media]
media_warehouse=emby
#emby or plex
#plex only test!
[directory_capture]
input_directory=
#everyone switch:1=on, 0=off

522
core.py Normal file → Executable file
View File

@@ -1,3 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re import re
import os import os
import os.path import os.path
@@ -8,6 +11,10 @@ import javbus
import json import json
import fc2fans_club import fc2fans_club
import siro import siro
from ADC_function import *
from configparser import ConfigParser
import argparse
import javdb
#初始化全局变量 #初始化全局变量
title='' title=''
@@ -16,213 +23,394 @@ year=''
outline='' outline=''
runtime='' runtime=''
director='' director=''
actor=[] actor_list=[]
actor=''
release='' release=''
number='' number=''
cover='' cover=''
imagecut='' imagecut=''
tag=[] tag=[]
cn_sub=''
path=''
houzhui=''
website=''
json_data={}
actor_photo={}
naming_rule =''#eval(config['Name_Rule']['naming_rule'])
location_rule=''#eval(config['Name_Rule']['location_rule'])
#=====================资源下载部分=========================== Config = ConfigParser()
def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in the Project Folder! Config.read(config_file, encoding='UTF-8')
import requests try:
try: option = ReadMediaWarehouse()
if not os.path.exists(path): except:
os.makedirs(path) print('[-]Config media_warehouse read failed!')
r = requests.get(url)
with open(str(path) + "/"+str(filename), "wb") as code:
code.write(r.content)
except IOError as e:
print("[-]Movie not found in All website!")
#print("[*]=====================================")
return "failed"
except Exception as e1:
print(e1)
print("[-]Download Failed2!")
time.sleep(3)
os._exit(0)
def PrintFiles(path):
try:
if not os.path.exists(path):
os.makedirs(path)
with open(path + "/" + number + ".nfo", "wt", encoding='UTF-8') as code:
print("<movie>", file=code)
print(" <title>" + title + "</title>", file=code)
print(" <set>", file=code)
print(" </set>", file=code)
print(" <studio>" + studio + "+</studio>", file=code)
print(" <year>" + year + "</year>", file=code)
print(" <outline>"+outline+"</outline>", file=code)
print(" <plot>"+outline+"</plot>", file=code)
print(" <runtime>"+str(runtime).replace(" ","")+"</runtime>", file=code)
print(" <director>" + director + "</director>", file=code)
print(" <poster>" + number + ".png</poster>", file=code)
print(" <thumb>" + number + ".png</thumb>", file=code)
print(" <fanart>"+number + '.jpg'+"</fanart>", file=code)
try:
for u in actor:
print(" <actor>", file=code)
print(" <name>" + u + "</name>", file=code)
print(" </actor>", file=code)
except:
aaaa=''
print(" <maker>" + studio + "</maker>", file=code)
print(" <label>", file=code)
print(" </label>", file=code)
try:
for i in tag:
print(" <tag>" + i + "</tag>", file=code)
except:
aaaa=''
print(" <num>" + number + "</num>", file=code)
print(" <release>" + release + "</release>", file=code)
print(" <cover>"+cover+"</cover>", file=code)
print(" <website>" + "https://www.javbus.com/"+number + "</website>", file=code)
print("</movie>", file=code)
print("[+]Writeed! "+path + "/" + number + ".nfo")
except IOError as e:
print("[-]Write Failed!")
print(e)
except Exception as e1:
print(e1)
print("[-]Write Failed!")
#=====================本地文件处理=========================== #=====================本地文件处理===========================
def moveFailedFolder():
global filepath
print('[-]Move to "failed"')
#print('[-]' + filepath + ' -> ' + output_dir + '/failed/')
#os.rename(filepath, output_dir + '/failed/')
os._exit(0)
def argparse_get_file(): def argparse_get_file():
import argparse
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("file", help="Write the file path on here") parser.add_argument("file", help="Write the file path on here")
parser.add_argument("--number", help="Enter Number on here", default='')
args = parser.parse_args() args = parser.parse_args()
return args.file return (args.file, args.number)
def getNumberFromFilename(filepath): def CreatFailedFolder():
if not os.path.exists('/failed/'): # 新建failed文件夹
try:
os.makedirs('/failed/')
except:
print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)")
os._exit(0)
def getDataFromJSON(file_number): #从JSON返回元数据
global title global title
global studio global studio
global year global year
global outline global outline
global runtime global runtime
global director global director
global actor_list
global actor global actor
global release global release
global number global number
global cover global cover
global imagecut global imagecut
global tag global tag
global image_main
global cn_sub
global website
global actor_photo
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", os.path.basename(filepath))) global naming_rule
print("[!]Making Data for ["+filename+"]") global location_rule
file_number = str(re.search('\w+-\w+', filename).group())
#print(a)
try:
# ================================================网站规则添加开始================================================
try: # 添加 需要 正则表达式的规则
# =======================javdb.py=======================
if re.search('^\d{5,}', file_number).group() in file_number:
json_data = json.loads(javbus.main_uncensored(file_number))
except: # 添加 无需 正则表达式的规则
# ====================fc2fans_club.py====================
if 'fc2' in file_number:
json_data = json.loads(fc2fans_club.main(
file_number.strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-').strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-')))
elif 'FC2' in file_number:
json_data = json.loads(fc2fans_club.main(
file_number.strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-').strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-')))
elif 'siro' in number or 'SIRO' in number or 'Siro' in number:
json_data = json.loads(siro.main(file_number))
# =======================javbus.py=======================
else:
json_data = json.loads(javbus.main(file_number))
# ================================================网站规则添加结束================================================
#================================================网站规则添加开始================================================ title = str(json_data['title']).replace(' ','')
studio = json_data['studio']
year = json_data['year']
outline = json_data['outline']
runtime = json_data['runtime']
director = json_data['director']
actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表
release = json_data['release']
number = json_data['number']
cover = json_data['cover']
imagecut = json_data['imagecut']
tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
actor_photo = json_data['actor_photo']
website = json_data['website']
if title == '' or number == '':
print('[-]Movie Data not found!')
moveFailedFolder()
try: #添加 需要 正则表达式的规则 # ====================处理异常字符====================== #\/:*?"<>|
if re.search('^\d{5,}', file_number).group() in filename: if '\\' in title:
json_data = json.loads(javbus.main_uncensored(file_number)) title=title.replace('\\', ' ')
except: #添加 无需 正则表达式的规则 elif '/' in title:
if 'fc2' in filename: title=title.replace('/', '')
json_data = json.loads(fc2fans_club.main(file_number)) elif ':' in title:
elif 'FC2' in filename: title=title.replace('/', '')
json_data = json.loads(fc2fans_club.main(file_number)) elif '*' in title:
elif 'siro' in filename: title=title.replace('*', '')
json_data = json.loads(siro.main(file_number)) elif '?' in title:
elif 'SIRO' in filename: title=title.replace('?', '')
json_data = json.loads(siro.main(file_number)) elif '"' in title:
elif '259luxu' in filename: title=title.replace('"', '')
json_data = json.loads(siro.main(file_number)) elif '<' in title:
elif '259LUXU' in filename: title=title.replace('<', '')
json_data = json.loads(siro.main(file_number)) elif '>' in title:
else: title=title.replace('>', '')
json_data = json.loads(javbus.main(file_number)) elif '|' in title:
title=title.replace('|', '')
# ====================处理异常字符 END================== #\/:*?"<>|
naming_rule = eval(config['Name_Rule']['naming_rule'])
#================================================网站规则添加结束================================================ location_rule = eval(config['Name_Rule']['location_rule'])
def creatFolder(): #创建文件夹
global actor
title = json_data['title']
studio = json_data['studio']
year = json_data['year']
outline = json_data['outline']
runtime = json_data['runtime']
director = json_data['director']
actor = str(json_data['actor']).strip("[ ]").replace("'",'').replace(" ",'').split(',')
release = json_data['release']
number = json_data['number']
cover = json_data['cover']
imagecut = json_data['imagecut']
tag = str(json_data['tag']).strip("[ ]").replace("'",'').replace(" ",'').split(',')
except:
print('[-]File '+filename+'`s number can not be caught')
print('[-]Move ' + filename + ' to failed folder')
if not os.path.exists('failed/'): # 新建failed文件夹
os.makedirs('failed/')
if not os.path.exists('failed/'):
print("[-]failed!Dirs can not be make (Please run as Administrator)")
time.sleep(3)
os._exit(0)
shutil.move(filepath, str(os.getcwd())+'/'+'failed/')
os._exit(0)
path = '' #设置path为全局变量后面移动文件要用
def creatFolder():
actor2 = str(actor).strip("[ ]").replace("'",'').replace(" ",'')
global path global path
if not os.path.exists('failed/'): #新建failed文件夹 if len(actor) > 240: #新建成功输出文件夹
os.makedirs('failed/') path = location_rule.replace("'actor'","'超多人'",3).replace("actor","'超多人'",3) #path为影片+元数据所在目录
if not os.path.exists('failed/'): #print(path)
print("[-]failed!Dirs can not be make (Please run as Administrator)")
os._exit(0)
if len(actor2) > 240: #新建成功输出文件夹
path = 'JAV_output' + '/' + '超多人' + '/' + number #path为影片+元数据所在目录
else: else:
path = 'JAV_output' + '/' + str(actor2) + '/' + str(number) path = location_rule
#print(path)
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path)
path = str(os.getcwd())+'/'+path
def imageDownload(filepath): #封面是否下载成功否则移动到failed
if DownloadFileWithFilename(cover,str(number) + '.jpg', path) == 'failed':
shutil.move(filepath, 'failed/')
os._exit(0)
DownloadFileWithFilename(cover, number + '.jpg', path)
print('[+]Image Downloaded!', path +'/'+number+'.jpg')
def cutImage():
if imagecut == 1:
try: try:
img = Image.open(path + '/' + number + '.jpg') os.makedirs(path)
imgSize = img.size except:
path = location_rule.replace('/['+number+']-'+title,"/number")
#print(path)
os.makedirs(path)
#=====================资源下载部分===========================
def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in the Project Folder!
try:
proxy = Config['proxy']['proxy']
timeout = int(Config['proxy']['timeout'])
retry_count = int(Config['proxy']['retry'])
except:
print('[-]Proxy config error! Please check the config.')
i = 0
while i < retry_count:
try:
if not proxy == '':
if not os.path.exists(path):
os.makedirs(path)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
r = requests.get(url, headers=headers, timeout=timeout,proxies={"http": "http://" + str(proxy), "https": "https://" + str(proxy)})
if r == '':
print('[-]Movie Data not found!')
os._exit(0)
with open(str(path) + "/" + filename, "wb") as code:
code.write(r.content)
return
else:
if not os.path.exists(path):
os.makedirs(path)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
r = requests.get(url, timeout=timeout, headers=headers)
if r == '':
print('[-]Movie Data not found!')
os._exit(0)
with open(str(path) + "/" + filename, "wb") as code:
code.write(r.content)
return
except requests.exceptions.RequestException:
i += 1
print('[-]Image Download : Connect retry '+str(i)+'/'+str(retry_count))
except requests.exceptions.ConnectionError:
i += 1
print('[-]Image Download : Connect retry '+str(i)+'/'+str(retry_count))
except requests.exceptions.ProxyError:
i += 1
print('[-]Image Download : Connect retry '+str(i)+'/'+str(retry_count))
except requests.exceptions.ConnectTimeout:
i += 1
print('[-]Image Download : Connect retry '+str(i)+'/'+str(retry_count))
print('[-]Connect Failed! Please check your Proxy or Network!')
moveFailedFolder()
def imageDownload(filepath): #封面是否下载成功否则移动到failed
if option == 'emby':
if DownloadFileWithFilename(cover, number + '.jpg', path) == 'failed':
moveFailedFolder()
DownloadFileWithFilename(cover, number + '.jpg', path)
print('[+]Image Downloaded!', path + '/' + number + '.jpg')
elif option == 'plex':
if DownloadFileWithFilename(cover, 'fanart.jpg', path) == 'failed':
moveFailedFolder()
DownloadFileWithFilename(cover, 'fanart.jpg', path)
print('[+]Image Downloaded!', path + '/fanart.jpg')
def PrintFiles(filepath):
#global path
global title
global cn_sub
global actor_photo
try:
if not os.path.exists(path):
os.makedirs(path)
if option == 'plex':
with open(path + "/" + number + ".nfo", "wt", encoding='UTF-8') as code:
print("<movie>", file=code)
print(" <title>" + naming_rule + "</title>", file=code)
print(" <set>", file=code)
print(" </set>", file=code)
print(" <studio>" + studio + "+</studio>", file=code)
print(" <year>" + year + "</year>", file=code)
print(" <outline>" + outline + "</outline>", file=code)
print(" <plot>" + outline + "</plot>", file=code)
print(" <runtime>" + str(runtime).replace(" ", "") + "</runtime>", file=code)
print(" <director>" + director + "</director>", file=code)
print(" <poster>poster.png</poster>", file=code)
print(" <thumb>thumb.png</thumb>", file=code)
print(" <fanart>fanart.jpg</fanart>", file=code)
try:
for key, value in actor_photo.items():
print(" <actor>", file=code)
print(" <name>" + key + "</name>", file=code)
if not actor_photo == '': # or actor_photo == []:
print(" <thumb>" + value + "</thumb>", file=code)
print(" </actor>", file=code)
except:
aaaa = ''
print(" <maker>" + studio + "</maker>", file=code)
print(" <label>", file=code)
print(" </label>", file=code)
if cn_sub == '1':
print(" <tag>中文字幕</tag>", file=code)
try:
for i in tag:
print(" <tag>" + i + "</tag>", file=code)
except:
aaaaa = ''
try:
for i in tag:
print(" <genre>" + i + "</genre>", file=code)
except:
aaaaaaaa = ''
if cn_sub == '1':
print(" <genre>中文字幕</genre>", file=code)
print(" <num>" + number + "</num>", file=code)
print(" <release>" + release + "</release>", file=code)
print(" <cover>" + cover + "</cover>", file=code)
print(" <website>" + website + "</website>", file=code)
print("</movie>", file=code)
print("[+]Writeed! " + path + "/" + number + ".nfo")
elif option == 'emby':
with open(path + "/" + number + ".nfo", "wt", encoding='UTF-8') as code:
print("<movie>", file=code)
print(" <title>" + naming_rule + "</title>", file=code)
print(" <set>", file=code)
print(" </set>", file=code)
print(" <studio>" + studio + "+</studio>", file=code)
print(" <year>" + year + "</year>", file=code)
print(" <outline>" + outline + "</outline>", file=code)
print(" <plot>" + outline + "</plot>", file=code)
print(" <runtime>" + str(runtime).replace(" ", "") + "</runtime>", file=code)
print(" <director>" + director + "</director>", file=code)
print(" <poster>" + number + ".png</poster>", file=code)
print(" <thumb>" + number + ".png</thumb>", file=code)
print(" <fanart>" + number + '.jpg' + "</fanart>", file=code)
try:
for key, value in actor_photo.items():
print(" <actor>", file=code)
print(" <name>" + key + "</name>", file=code)
if not actor_photo == '': # or actor_photo == []:
print(" <thumb>" + value + "</thumb>", file=code)
print(" </actor>", file=code)
except:
aaaa = ''
print(" <maker>" + studio + "</maker>", file=code)
print(" <label>", file=code)
print(" </label>", file=code)
if cn_sub == '1':
print(" <tag>中文字幕</tag>", file=code)
try:
for i in tag:
print(" <tag>" + i + "</tag>", file=code)
except:
aaaaa = ''
try:
for i in tag:
print(" <genre>" + i + "</genre>", file=code)
except:
aaaaaaaa = ''
if cn_sub == '1':
print(" <genre>中文字幕</genre>", file=code)
print(" <num>" + number + "</num>", file=code)
print(" <release>" + release + "</release>", file=code)
print(" <cover>" + cover + "</cover>", file=code)
print(" <website>" + "https://www.javbus.com/" + number + "</website>", file=code)
print("</movie>", file=code)
print("[+]Writeed! " + path + "/" + number + ".nfo")
except IOError as e:
print("[-]Write Failed!")
print(e)
moveFailedFolder()
except Exception as e1:
print(e1)
print("[-]Write Failed!")
moveFailedFolder()
def cutImage():
if option == 'plex':
if imagecut == 1:
try:
img = Image.open(path + '/fanart.jpg')
imgSize = img.size
w = img.width
h = img.height
img2 = img.crop((w / 1.9, 0, w, h))
img2.save(path + '/poster.png')
except:
print('[-]Cover cut failed!')
else:
img = Image.open(path + '/fanart.jpg')
w = img.width w = img.width
h = img.height h = img.height
img2 = img.crop((w / 1.9, 0, w, h)) img.save(path + '/poster.png')
img2.save(path + '/' + number + '.png') elif option == 'emby':
except: if imagecut == 1:
print('[-]Cover cut failed!') try:
else: img = Image.open(path + '/' + number + '.jpg')
img = Image.open(path + '/' + number + '.jpg') imgSize = img.size
w = img.width w = img.width
h = img.height h = img.height
img.save(path + '/' + number + '.png') img2 = img.crop((w / 1.9, 0, w, h))
img2.save(path + '/' + number + '.png')
except:
print('[-]Cover cut failed!')
else:
img = Image.open(path + '/' + number + '.jpg')
w = img.width
h = img.height
img.save(path + '/' + number + '.png')
def pasteFileToFolder(filepath, path): #文件路径,番号,后缀,要移动至的位置 def pasteFileToFolder(filepath, path): #文件路径,番号,后缀,要移动至的位置
houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|avi|rmvb|wmv|mov|mp4|mkv|flv)$', filepath).group()) global houzhui
houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|avi|rmvb|wmv|mov|mp4|mkv|flv|ts)$', filepath).group())
os.rename(filepath, number + houzhui) os.rename(filepath, number + houzhui)
shutil.move(number + houzhui, path) try:
shutil.move(number + houzhui, path)
except:
print('[-]File Exists! Please check your movie!')
print('[-]move to the root folder of the program.')
os._exit(0)
def moveJpgToBackdrop_copy():
if option == 'plex':
shutil.copy(path + '/fanart.jpg', path + '/Backdrop.jpg')
shutil.copy(path + '/poster.png', path + '/thumb.png')
if option == 'emby':
shutil.copy(path + '/' + number + '.jpg', path + '/Backdrop.jpg')
if __name__ == '__main__': if __name__ == '__main__':
filepath=argparse_get_file() #影片的路径 filepath=argparse_get_file()[0] #影片的路径
getNumberFromFilename(filepath) #定义番号
creatFolder() #创建文件夹 if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
imageDownload(filepath) #creatFoder会返回番号路径 cn_sub='1'
PrintFiles(path)#打印文件
cutImage() #裁剪图 if argparse_get_file()[1] == '': #获取手动拉去影片获取的番号
pasteFileToFolder(filepath,path) #移动文件 try:
number = str(re.findall(r'(.+?)\.',str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$',filepath).group()))).strip("['']").replace('_','-')
print("[!]Making Data for [" + number + "]")
except:
print("[-]failed!Please move the filename again!")
moveFailedFolder()
else:
number = argparse_get_file()[1]
CreatFailedFolder()
getDataFromJSON(number) # 定义番号
creatFolder() # 创建文件夹
imageDownload(filepath) # creatFoder会返回番号路径
PrintFiles(filepath) # 打印文件
cutImage() # 裁剪图
pasteFileToFolder(filepath, path) # 移动文件
moveJpgToBackdrop_copy()

85
fc2fans_club.py Normal file → Executable file
View File

@@ -1,3 +1,4 @@
#!/usr/bin/env python3
import re import re
from lxml import etree#need install from lxml import etree#need install
import json import json
@@ -7,7 +8,16 @@ def getTitle(htmlcode): #获取厂商
#print(htmlcode) #print(htmlcode)
html = etree.fromstring(htmlcode,etree.HTMLParser()) html = etree.fromstring(htmlcode,etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']")
return result result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1)
#print(result2)
return result2
def getActor(htmlcode):
try:
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']")
return result
except:
return ''
def getStudio(htmlcode): #获取厂商 def getStudio(htmlcode): #获取厂商
html = etree.fromstring(htmlcode,etree.HTMLParser()) html = etree.fromstring(htmlcode,etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']")
@@ -15,44 +25,61 @@ def getStudio(htmlcode): #获取厂商
def getNum(htmlcode): #获取番号 def getNum(htmlcode): #获取番号
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
#print(result)
return result return result
def getRelease(number): def getRelease(htmlcode2): #
a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
html=etree.fromstring(a,etree.HTMLParser()) html=etree.fromstring(htmlcode2,etree.HTMLParser())
result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']") result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
return result return result
def getCover(htmlcode,number): #获取厂商 def getCover(htmlcode,number,htmlcode2): #获取厂商 #
a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
html = etree.fromstring(a, etree.HTMLParser()) html = etree.fromstring(htmlcode2, etree.HTMLParser())
result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']") result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']")
return 'http:'+result if result == '':
def getOutline(htmlcode,number): #获取番号 html = etree.fromstring(htmlcode, etree.HTMLParser())
a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']")
html = etree.fromstring(a, etree.HTMLParser()) return 'http://fc2fans.club' + result2
return 'http:' + result
def getOutline(htmlcode2,number): #获取番号 #
#a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
html = etree.fromstring(htmlcode2, etree.HTMLParser())
result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[4]/p/text()')).replace("\\n",'',10000).strip(" ['']").replace("'",'',10000) result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[4]/p/text()')).replace("\\n",'',10000).strip(" ['']").replace("'",'',10000)
return result return result
# def getTag(htmlcode,number): #获取番号 def getTag(htmlcode): #获取番号
# a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') html = etree.fromstring(htmlcode, etree.HTMLParser())
# html = etree.fromstring(a, etree.HTMLParser()) result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()'))
# result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[4]/p/text()')).replace("\\n",'',10000).strip(" ['']").replace("'",'',10000) return result.strip(" ['']").replace("'",'').replace(' ','')
# return result def getYear(release):
try:
result = re.search('\d{4}',release).group()
return result
except:
return ''
def main(number): def main(number2):
str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") number=number2.replace('PPV','').replace('ppv','')
htmlcode2 = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
htmlcode = ADC_function.get_html('http://fc2fans.club/html/FC2-' + number + '.html') htmlcode = ADC_function.get_html('http://fc2fans.club/html/FC2-' + number + '.html')
dic = { dic = {
'title': getTitle(htmlcode), 'title': getTitle(htmlcode),
'studio': getStudio(htmlcode), 'studio': getStudio(htmlcode),
'year': getRelease(number), 'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
'outline': getOutline(htmlcode,number), 'outline': getOutline(htmlcode,number),
'runtime': '', 'runtime': getYear(getRelease(htmlcode)),
'director': getStudio(htmlcode), 'director': getStudio(htmlcode),
'actor': '', 'actor': getActor(htmlcode),
'release': getRelease(number), 'release': getRelease(number),
'number': number, 'number': 'FC2-'+number,
'cover': getCover(htmlcode,number), 'cover': getCover(htmlcode,number,htmlcode2),
'imagecut': 0, 'imagecut': 0,
'tag':" ", 'tag': getTag(htmlcode),
'actor_photo':'',
'website': 'http://fc2fans.club/html/FC2-' + number + '.html',
} }
#print(getTitle(htmlcode))
#print(getNum(htmlcode))
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
return js return js
#print(main('1051725'))

185
javbus.py Normal file → Executable file
View File

@@ -1,3 +1,4 @@
#!/usr/bin/env python3
import re import re
import requests #need install import requests #need install
from pyquery import PyQuery as pq#need install from pyquery import PyQuery as pq#need install
@@ -9,19 +10,30 @@ from bs4 import BeautifulSoup#need install
from PIL import Image#need install from PIL import Image#need install
import time import time
import json import json
from ADC_function import *
import javdb
import siro
def get_html(url):#网页请求核心 def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} soup = BeautifulSoup(htmlcode, 'lxml')
getweb = requests.get(str(url),timeout=5,headers=headers).text a = soup.find_all(attrs={'class': 'star-name'})
try: d={}
return getweb for i in a:
except: l=i.a['href']
print("[-]Connect Failed! Please check your Proxy.") t=i.get_text()
html = etree.fromstring(get_html(l), etree.HTMLParser())
p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
p2={t:p}
d.update(p2)
return d
def getTitle(htmlcode): #获取标题 def getTitle(htmlcode): #获取标题
doc = pq(htmlcode) doc = pq(htmlcode)
title=str(doc('div.container h3').text()).replace(' ','-') title=str(doc('div.container h3').text()).replace(' ','-')
return title try:
title2 = re.sub('n\d+-','',title)
return title2
except:
return title
def getStudio(htmlcode): #获取厂商 def getStudio(htmlcode): #获取厂商
html = etree.fromstring(htmlcode,etree.HTMLParser()) html = etree.fromstring(htmlcode,etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
@@ -34,7 +46,6 @@ def getCover(htmlcode): #获取封面链接
doc = pq(htmlcode) doc = pq(htmlcode)
image = doc('a.bigImage') image = doc('a.bigImage')
return image.attr('href') return image.attr('href')
print(image.attr('href'))
def getRelease(htmlcode): #获取出版日期 def getRelease(htmlcode): #获取出版日期
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
@@ -62,8 +73,10 @@ def getOutline(htmlcode): #获取演员
doc = pq(htmlcode) doc = pq(htmlcode)
result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text()) result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
return result return result
def getSerise(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
return result
def getTag(htmlcode): # 获取演员 def getTag(htmlcode): # 获取演员
tag = [] tag = []
soup = BeautifulSoup(htmlcode, 'lxml') soup = BeautifulSoup(htmlcode, 'lxml')
@@ -76,10 +89,70 @@ def getTag(htmlcode): # 获取演员
def main(number): def main(number):
htmlcode=get_html('https://www.javbus.com/'+number) try:
dww_htmlcode=get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) if re.search('\d+\D+', number).group() in number or 'siro' in number or 'SIRO' in number or 'Siro' in number:
js = siro.main(number)
return js
except:
aaaa=''
try:
htmlcode = get_html('https://www.javbus.com/' + number)
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
dic = {
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
'studio': getStudio(htmlcode),
'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
'outline': getOutline(dww_htmlcode),
'runtime': getRuntime(htmlcode),
'director': getDirector(htmlcode),
'actor': getActor(htmlcode),
'release': getRelease(htmlcode),
'number': getNum(htmlcode),
'cover': getCover(htmlcode),
'imagecut': 1,
'tag': getTag(htmlcode),
'label': getSerise(htmlcode),
'actor_photo': getActorPhoto(htmlcode),
'website': 'https://www.javbus.com/' + number,
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
if 'HEYZO' in number or 'heyzo' in number or 'Heyzo' in number:
htmlcode = get_html('https://www.javbus.com/' + number)
#dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
dic = {
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
'studio': getStudio(htmlcode),
'year': getYear(htmlcode),
'outline': '',
'runtime': getRuntime(htmlcode),
'director': getDirector(htmlcode),
'actor': getActor(htmlcode),
'release': getRelease(htmlcode),
'number': getNum(htmlcode),
'cover': getCover(htmlcode),
'imagecut': 1,
'tag': getTag(htmlcode),
'label': getSerise(htmlcode),
'actor_photo': getActorPhoto(htmlcode),
'website': 'https://www.javbus.com/' + number,
}
js2 = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,
separators=(',', ':'), ) # .encode('UTF-8')
return js2
return js
except:
a=javdb.main(number)
return a
def main_uncensored(number):
htmlcode = get_html('https://www.javbus.com/' + number)
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
if getTitle(htmlcode) == '':
htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
dic = { dic = {
'title': getTitle(htmlcode), 'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
'studio': getStudio(htmlcode), 'studio': getStudio(htmlcode),
'year': getYear(htmlcode), 'year': getYear(htmlcode),
'outline': getOutline(dww_htmlcode), 'outline': getOutline(dww_htmlcode),
@@ -89,84 +162,16 @@ def main(number):
'release': getRelease(htmlcode), 'release': getRelease(htmlcode),
'number': getNum(htmlcode), 'number': getNum(htmlcode),
'cover': getCover(htmlcode), 'cover': getCover(htmlcode),
'imagecut': 1,
'tag':getTag(htmlcode)
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
return js
def main_uncensored(number):
htmlcode = get_html('https://www.javbus.com/' + number)
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
#print('un')
#print('https://www.javbus.com/' + number)
dic = {
'title': getTitle(htmlcode),
'studio': getStudio(htmlcode),
'year': getYear(htmlcode),
'outline': getOutline(htmlcode),
'runtime': getRuntime(htmlcode),
'director': getDirector(htmlcode),
'actor': getActor(htmlcode),
'release': getRelease(htmlcode),
'number': getNum(htmlcode),
'cover': getCover(htmlcode),
'tag': getTag(htmlcode), 'tag': getTag(htmlcode),
'label': getSerise(htmlcode),
'imagecut': 0, 'imagecut': 0,
'actor_photo': '',
'website': 'https://www.javbus.com/' + number,
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
if getYear(htmlcode) == '': if getYear(htmlcode) == '' or getYear(htmlcode) == 'null':
#print('un2') js2 = javdb.main(number)
number2 = number.replace('-', '_') return js2
htmlcode = get_html('https://www.javbus.com/' + number2)
#print('https://www.javbus.com/' + number2)
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number2.replace("_", ''))
dic = {
'title': getTitle(htmlcode),
'studio': getStudio(htmlcode),
'year': getYear(htmlcode),
'outline': getOutline(htmlcode),
'runtime': getRuntime(htmlcode),
'director': getDirector(htmlcode),
'actor': getActor(htmlcode),
'release': getRelease(htmlcode),
'number': getNum(htmlcode),
'cover': getCover(htmlcode),
'tag': getTag(htmlcode),
'imagecut': 0,
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
#print(js)
return js
else:
bbb=''
return js
# def return1():
# json_data=json.loads(main('ipx-292'))
#
# title = str(json_data['title'])
# studio = str(json_data['studio'])
# year = str(json_data['year'])
# outline = str(json_data['outline'])
# runtime = str(json_data['runtime'])
# director = str(json_data['director'])
# actor = str(json_data['actor'])
# release = str(json_data['release'])
# number = str(json_data['number'])
# cover = str(json_data['cover'])
# tag = str(json_data['tag'])
#
# print(title)
# print(studio)
# print(year)
# print(outline)
# print(runtime)
# print(director)
# print(actor)
# print(release)
# print(number)
# print(cover)
# print(tag)
# return1()

141
javdb.py Executable file
View File

@@ -0,0 +1,141 @@
#!/usr/bin/env python3
import re
from lxml import etree
import json
import requests
from bs4 import BeautifulSoup
from ADC_function import *
def getTitle(a):
try:
html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath('/html/body/section/div/h2/strong/text()')).strip(" ['']")
return re.sub('.*\] ','',result.replace('/', ',').replace('\\xa0','').replace(' : ',''))
except:
return re.sub('.*\] ','',result.replace('/', ',').replace('\\xa0',''))
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace(",\\xa0","").replace("'","").replace(' ','').replace(',,','').lstrip(',').replace(',',', ')
def getStudio(a):
html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"製作")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"製作")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1+result2).strip('+').replace("', '",'').replace('"','')
def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').rstrip('mi')
def getLabel(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
def getNum(a):
html = etree.fromstring(a, etree.HTMLParser())
result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+')
def getYear(getRelease):
try:
result = str(re.search('\d{4}',getRelease).group())
return result
except:
return getRelease
def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+')
def getTag(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace(",\\xa0","").replace("'","").replace(' ','').replace(',,','').lstrip(',')
def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/section/div/div[2]/div[1]/a/img/@src')).strip(" ['']")
if result == '':
result = str(html.xpath('/html/body/section/div/div[3]/div[1]/a/img/@src')).strip(" ['']")
return result
def getDirector(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
def getOutline(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
return result
def main(number):
try:
a = get_html('https://javdb.com/search?q=' + number + '&f=all')
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
if result1 == '':
a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all')
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
b = get_html('https://javdb1.com' + result1)
soup = BeautifulSoup(b, 'lxml')
a = str(soup.find(attrs={'class': 'panel'}))
dic = {
'actor': getActor(a),
'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace(getNum(a),
'').replace(
'无码', '').replace('有码', '').lstrip(' '),
'studio': getStudio(a),
'outline': getOutline(a),
'runtime': getRuntime(a),
'director': getDirector(a),
'release': getRelease(a),
'number': getNum(a),
'cover': getCover(b),
'imagecut': 0,
'tag': getTag(a),
'label': getLabel(a),
'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': '',
'website': 'https://javdb1.com' + result1,
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js
except:
a = get_html('https://javdb.com/search?q=' + number + '&f=all')
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
if result1 == '':
a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all')
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
b = get_html('https://javdb.com' + result1)
soup = BeautifulSoup(b, 'lxml')
a = str(soup.find(attrs={'class': 'panel'}))
dic = {
'actor': getActor(a),
'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace(getNum(a),
'').replace(
'无码', '').replace('有码', '').lstrip(' '),
'studio': getStudio(a),
'outline': getOutline(a),
'runtime': getRuntime(a),
'director': getDirector(a),
'release': getRelease(a),
'number': getNum(a),
'cover': getCover(b),
'imagecut': 0,
'tag': getTag(a),
'label': getLabel(a),
'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': '',
'website':'https://javdb.com' + result1,
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js
#print(main('061519-861'))

View File

@@ -1,2 +0,0 @@
pyinstaller --onefile AV_Data_Capture.py
pyinstaller --onefile core.py --hidden-import ADC_function.py --hidden-import fc2fans_club.py --hidden-import javbus.py --hidden-import siro.py

116
siro.py Normal file → Executable file
View File

@@ -1,81 +1,105 @@
#!/usr/bin/env python3
import re import re
from lxml import etree from lxml import etree
import json import json
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from ADC_function import *
def get_html(url):#网页请求核心
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
cookies = {'adc':'1'}
getweb = requests.get(str(url),timeout=5,cookies=cookies,headers=headers).text
try:
return getweb
except:
print("[-]Connect Failed! Please check your Proxy.")
def getTitle(a): def getTitle(a):
html = etree.fromstring(a, etree.HTMLParser()) try:
result = str(html.xpath('//*[@id="center_column"]/div[2]/h1/text()')).strip(" ['']") html = etree.fromstring(a, etree.HTMLParser())
return result result = str(html.xpath('//*[@id="center_column"]/div[2]/h1/text()')).strip(" ['']")
return result.replace('/', ',')
except:
return ''
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
html = etree.fromstring(a, etree.HTMLParser()) html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
result=str(html.xpath('//table[2]/tr[1]/td/a/text()')).strip(" ['\\n ']") result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
return result result2=str(html.xpath('//th[contains(text(),"出演:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
def getStudio(a): def getStudio(a):
html = etree.fromstring(a, etree.HTMLParser()) html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
result=str(html.xpath('//table[2]/tr[2]/td/a/text()')).strip(" ['\\n ']") result1=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
return result result2=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
return str(result1+result2).strip('+').replace("', '",'').replace('"','')
def getRuntime(a): def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result=str(html.xpath('//table[2]/tr[3]/td/text()')).strip(" ['\\n ']") result1 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
return result result2 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
return str(result1 + result2).strip('+').rstrip('mi')
def getLabel(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
'\\n')
result2 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
'\\n')
return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
def getNum(a): def getNum(a):
html = etree.fromstring(a, etree.HTMLParser()) html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result=str(html.xpath('//table[2]/tr[4]/td/text()')).strip(" ['\\n ']") result1 = str(html.xpath('//th[contains(text(),"品番:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
return result '\\n')
def getYear(a): result2 = str(html.xpath('//th[contains(text(),"品番:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
html = etree.fromstring(a, etree.HTMLParser()) '\\n')
#result=str(html.xpath('//table[2]/tr[5]/td/text()')).strip(" ['\\n ']") return str(result1 + result2).strip('+')
result=str(html.xpath('//table[2]/tr[5]/td/text()')).strip(" ['\\n ']") def getYear(getRelease):
return result try:
result = str(re.search('\d{4}',getRelease).group())
return result
except:
return getRelease
def getRelease(a): def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result=str(html.xpath('//table[2]/tr[5]/td/text()')).strip(" ['\\n ']") result1 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
return result '\\n')
result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
'\\n')
return str(result1 + result2).strip('+')
def getTag(a): def getTag(a):
html = etree.fromstring(a, etree.HTMLParser()) html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result=str(html.xpath('//table[2]/tr[9]/td/text()')).strip(" ['\\n ']") result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
return result '\\n')
result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
'\\n')
return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','')
def getCover(htmlcode): def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="center_column"]/div[2]/div[1]/div/div/h2/img/@src')).strip(" ['']") result = str(html.xpath('//*[@id="center_column"]/div[2]/div[1]/div/div/h2/img/@src')).strip(" ['']")
return result return result
def getDirector(a): def getDirector(a):
html = etree.fromstring(a, etree.HTMLParser()) html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result = str(html.xpath('//table[2]/tr[7]/td/a/text()')).strip(" ['\\n ']") result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
return result '\\n')
result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
'\\n')
return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
def getOutline(htmlcode): def getOutline(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
return result return result
def main(number2):
def main(number): number=number2.upper()
htmlcode=get_html('https://www.mgstage.com/product/product_detail/'+str(number)) htmlcode=get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'})
soup = BeautifulSoup(htmlcode, 'lxml') soup = BeautifulSoup(htmlcode, 'lxml')
a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','') a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','')
dic = { dic = {
'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''), 'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''),
'studio': getStudio(a), 'studio': getStudio(a),
'year': getYear(a),
'outline': getOutline(htmlcode), 'outline': getOutline(htmlcode),
'runtime': getRuntime(a), 'runtime': getRuntime(a),
'director': getDirector(a), 'director': getDirector(a),
'actor': getActor(a), 'actor': getActor(a),
'release': getRelease(a), 'release': getRelease(a),
'number': number, 'number': getNum(a),
'cover': getCover(htmlcode), 'cover': getCover(htmlcode),
'imagecut': 0, 'imagecut': 0,
'tag':' ', 'tag': getTag(a),
'label':getLabel(a),
'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': '',
'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
return js return js
#print(main('300maan-373'))

5
update_check.json Normal file
View File

@@ -0,0 +1,5 @@
{
"version": "0.11.7",
"version_show":"Beta 11.7",
"download": "https://github.com/wenead99/AV_Data_Capture/releases"
}