Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f34888d2e7 | ||
|
|
f609e647b5 | ||
|
|
ffc280a01c | ||
|
|
fee0ae95b3 | ||
|
|
cd7e254d2e | ||
|
|
ce2995123d | ||
|
|
46e676b592 | ||
|
|
a435d645e4 | ||
|
|
76eecd1e6f | ||
|
|
3c296db204 | ||
|
|
7d6408fe29 | ||
|
|
337c84fd1c | ||
|
|
ad220c1ca6 | ||
|
|
37df711cdc | ||
|
|
92dd9cb734 | ||
|
|
64445b5105 | ||
|
|
bfdb094ee3 | ||
|
|
b38942a326 | ||
|
|
7d03a1f7f9 | ||
|
|
f9c0df7e06 | ||
|
|
b1783d8c75 | ||
|
|
908da6d006 |
@@ -78,4 +78,4 @@ if __name__ =='__main__':
|
||||
print("[!]Cleaning empty folders")
|
||||
CEF('JAV_output')
|
||||
print("[+]All finished!!!")
|
||||
input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看错误信息。")
|
||||
input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看错误信息。")
|
||||
|
||||
21
README.md
21
README.md
@@ -1,4 +1,19 @@
|
||||
# AV Data Capture 日本AV元数据刮削器
|
||||
|
||||
|
||||
<a title="Hits" target="_blank" href="https://github.com/b3log/hits"><img src="https://hits.b3log.org/b3log/hits.svg"></a>
|
||||

|
||||
<br>
|
||||

|
||||

|
||||

|
||||
<br>
|
||||

|
||||

|
||||

|
||||
|
||||
|
||||
|
||||
# 目录
|
||||
* [前言](#前言)
|
||||
* [捐助二维码](#捐助二维码)
|
||||
@@ -83,7 +98,7 @@ pip install pillow
|
||||
## 2.配置proxy.ini
|
||||
#### 1.针对网络审查国家或地区的代理设置
|
||||
打开```proxy.ini```,在```[proxy]```下的```proxy```行设置本地代理地址和端口,支持Shadowsocks/R,V2RAY本地代理端口:<br>
|
||||
例子:```proxy=127.0.0.1:1080```<br>
|
||||
例子:```proxy=127.0.0.1:1080```<br>素人系列抓取建议使用日本代理<br>
|
||||
**(路由器拥有自动代理功能的可以把proxy=后面内容去掉)**<br>
|
||||
**如果遇到tineout错误,可以把文件的proxy=后面的地址和端口删除,并开启vpn全局模式,或者重启电脑,vpn,网卡**<br>
|
||||
[回到目录](#目录)
|
||||
@@ -114,8 +129,8 @@ pip install pillow
|
||||
## 5.异常处理(重要)
|
||||
### 关于连接拒绝的错误
|
||||
请设置好[代理](#1针对网络审查国家或地区的代理设置)<br>
|
||||
|
||||
|
||||
### 关于Nonetype,xpath报错
|
||||
同上<br>
|
||||
[回到目录](#目录)
|
||||
### 关于番号提取失败或者异常
|
||||
**目前可以提取元素的影片:JAVBUS上有元数据的电影,素人系列:300Maan,259luxu,siro等,FC2系列**<br>
|
||||
|
||||
13
core.py
13
core.py
@@ -108,9 +108,9 @@ def getNumberFromFilename(filepath):
|
||||
except: #添加 无需 正则表达式的规则
|
||||
# ====================fc2fans_club.py===================
|
||||
if 'fc2' in filename:
|
||||
json_data = json.loads(fc2fans_club.main(file_number.strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-')))
|
||||
json_data = json.loads(fc2fans_club.main(file_number.strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-').strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-')))
|
||||
elif 'FC2' in filename:
|
||||
json_data = json.loads(fc2fans_club.main(file_number.strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-')))
|
||||
json_data = json.loads(fc2fans_club.main(file_number.strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-').strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-')))
|
||||
#print(file_number.strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-'))
|
||||
#=======================javbus.py=======================
|
||||
else:
|
||||
@@ -156,6 +156,7 @@ def getNumberFromFilename(filepath):
|
||||
os._exit(0)
|
||||
path = '' #设置path为全局变量,后面移动文件要用
|
||||
def creatFolder():
|
||||
global actor
|
||||
global path
|
||||
if len(actor) > 240: #新建成功输出文件夹
|
||||
path = location_rule.replace("'actor'","'超多人'",3).replace("actor","'超多人'",3) #path为影片+元数据所在目录
|
||||
@@ -164,7 +165,13 @@ def creatFolder():
|
||||
path = location_rule
|
||||
#print(path)
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
try:
|
||||
os.makedirs(path)
|
||||
except:
|
||||
path = location_rule.replace(actor,"'其他'")
|
||||
os.makedirs(path)
|
||||
|
||||
|
||||
#=====================资源下载部分===========================
|
||||
def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in the Project Folder!
|
||||
config = ConfigParser()
|
||||
|
||||
11
siro.py
11
siro.py
@@ -6,9 +6,12 @@ from bs4 import BeautifulSoup
|
||||
from ADC_function import *
|
||||
|
||||
def getTitle(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser())
|
||||
result = str(html.xpath('//*[@id="center_column"]/div[2]/h1/text()')).strip(" ['']")
|
||||
return result.replace('/',',')
|
||||
try:
|
||||
html = etree.fromstring(a, etree.HTMLParser())
|
||||
result = str(html.xpath('//*[@id="center_column"]/div[2]/h1/text()')).strip(" ['']")
|
||||
return result.replace('/', ',')
|
||||
except:
|
||||
return ''
|
||||
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
|
||||
html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
|
||||
result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
||||
@@ -96,4 +99,4 @@ def main(number2):
|
||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
|
||||
return js
|
||||
|
||||
#print(main('200GANA-1624'))
|
||||
#print(main('200GANA-1581'))
|
||||
Reference in New Issue
Block a user