22 Commits

Author SHA1 Message Date
wenead99
f34888d2e7 Update README.md 2019-06-23 14:27:39 +08:00
wenead99
f609e647b5 Update README.md 2019-06-23 14:26:27 +08:00
wenead99
ffc280a01c Update README.md 2019-06-23 14:24:13 +08:00
wenead99
fee0ae95b3 Update README.md 2019-06-23 11:18:26 +08:00
wenead99
cd7e254d2e Update README.md 2019-06-23 11:11:32 +08:00
wenead99
ce2995123d Update README.md 2019-06-23 01:08:27 +08:00
wenead99
46e676b592 Update README.md 2019-06-23 01:08:06 +08:00
wenead99
a435d645e4 Update README.md 2019-06-23 01:00:57 +08:00
wenead99
76eecd1e6f Update README.md 2019-06-23 01:00:33 +08:00
wenead99
3c296db204 Update README.md 2019-06-23 00:57:01 +08:00
wenead99
7d6408fe29 Update README.md 2019-06-23 00:56:44 +08:00
wenead99
337c84fd1c Update README.md 2019-06-23 00:55:02 +08:00
wenead99
ad220c1ca6 Update README.md 2019-06-23 00:54:48 +08:00
wenead99
37df711cdc Update README.md 2019-06-23 00:54:28 +08:00
wenead99
92dd9cb734 Update README.md 2019-06-23 00:51:40 +08:00
wenead99
64445b5105 Update README.md 2019-06-23 00:46:11 +08:00
wenead99
bfdb094ee3 Update README.md 2019-06-23 00:35:35 +08:00
wenead99
b38942a326 Update README.md 2019-06-23 00:34:55 +08:00
wenead99
7d03a1f7f9 Update README.md 2019-06-23 00:34:12 +08:00
wenead99
f9c0df7e06 Update README.md 2019-06-23 00:32:30 +08:00
wenead99
b1783d8c75 Update AV_Data_Capture.py 2019-06-22 19:22:23 +08:00
wenead99
908da6d006 Add files via upload 2019-06-22 19:20:54 +08:00
4 changed files with 36 additions and 11 deletions

View File

@@ -78,4 +78,4 @@ if __name__ =='__main__':
print("[!]Cleaning empty folders")
CEF('JAV_output')
print("[+]All finished!!!")
input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看错误信息。")
input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看错误信息。")

View File

@@ -1,4 +1,19 @@
# AV Data Capture 日本AV元数据刮削器
<a title="Hits" target="_blank" href="https://github.com/b3log/hits"><img src="https://hits.b3log.org/b3log/hits.svg"></a>
![](https://img.shields.io/badge/build-passing-brightgreen.svg?style=flat-square)
![](https://img.shields.io/github/downloads/wenead99/av_data_capture/total.svg?style=flat-square)<br>
![](https://img.shields.io/github/languages/code-size/wenead99/av_data_capture.svg?style=flat-square)
![](https://img.shields.io/github/issues/wenead99/av_data_capture.svg?style=flat-square)
![](https://img.shields.io/github/license/wenead99/av_data_capture.svg?style=flat-square)
![](https://img.shields.io/github/release/wenead99/av_data_capture.svg?style=flat-square)<br>
![](https://img.shields.io/github/forks/wenead99/av_data_capture.svg?style=flat-square)
![](https://img.shields.io/github/stars/wenead99/av_data_capture.svg?style=flat-square)
![](https://img.shields.io/github/watchers/wenead99/av_data_capture.svg?style=flat-square)
# 目录
* [前言](#前言)
* [捐助二维码](#捐助二维码)
@@ -83,7 +98,7 @@ pip install pillow
## 2.配置proxy.ini
#### 1.针对网络审查国家或地区的代理设置
打开```proxy.ini```,在```[proxy]```下的```proxy```行设置本地代理地址和端口支持Shadowsocks/R,V2RAY本地代理端口:<br>
例子:```proxy=127.0.0.1:1080```<br>
例子:```proxy=127.0.0.1:1080```<br>素人系列抓取建议使用日本代理<br>
**路由器拥有自动代理功能的可以把proxy=后面内容去掉)**<br>
**如果遇到tineout错误可以把文件的proxy=后面的地址和端口删除并开启vpn全局模式或者重启电脑vpn网卡**<br>
[回到目录](#目录)
@@ -114,8 +129,8 @@ pip install pillow
## 5.异常处理(重要)
### 关于连接拒绝的错误
请设置好[代理](#1针对网络审查国家或地区的代理设置)<br>
### 关于Nonetype,xpath报错
同上<br>
[回到目录](#目录)
### 关于番号提取失败或者异常
**目前可以提取元素的影片:JAVBUS上有元数据的电影素人系列:300Maan,259luxu,siro等,FC2系列**<br>

13
core.py
View File

@@ -108,9 +108,9 @@ def getNumberFromFilename(filepath):
except: #添加 无需 正则表达式的规则
# ====================fc2fans_club.py===================
if 'fc2' in filename:
json_data = json.loads(fc2fans_club.main(file_number.strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-')))
json_data = json.loads(fc2fans_club.main(file_number.strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-').strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-')))
elif 'FC2' in filename:
json_data = json.loads(fc2fans_club.main(file_number.strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-')))
json_data = json.loads(fc2fans_club.main(file_number.strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-').strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-')))
#print(file_number.strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-'))
#=======================javbus.py=======================
else:
@@ -156,6 +156,7 @@ def getNumberFromFilename(filepath):
os._exit(0)
path = '' #设置path为全局变量后面移动文件要用
def creatFolder():
global actor
global path
if len(actor) > 240: #新建成功输出文件夹
path = location_rule.replace("'actor'","'超多人'",3).replace("actor","'超多人'",3) #path为影片+元数据所在目录
@@ -164,7 +165,13 @@ def creatFolder():
path = location_rule
#print(path)
if not os.path.exists(path):
os.makedirs(path)
try:
os.makedirs(path)
except:
path = location_rule.replace(actor,"'其他'")
os.makedirs(path)
#=====================资源下载部分===========================
def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in the Project Folder!
config = ConfigParser()

11
siro.py
View File

@@ -6,9 +6,12 @@ from bs4 import BeautifulSoup
from ADC_function import *
def getTitle(a):
html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath('//*[@id="center_column"]/div[2]/h1/text()')).strip(" ['']")
return result.replace('/',',')
try:
html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath('//*[@id="center_column"]/div[2]/h1/text()')).strip(" ['']")
return result.replace('/', ',')
except:
return ''
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
@@ -96,4 +99,4 @@ def main(number2):
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
return js
#print(main('200GANA-1624'))
#print(main('200GANA-1581'))