Update 3.1
This commit is contained in:
@@ -29,14 +29,19 @@ def check_update(current_version):
|
|||||||
else:
|
else:
|
||||||
print("[+]Update Check disabled!")
|
print("[+]Update Check disabled!")
|
||||||
|
|
||||||
def argparse_get_file():
|
def argparse_function(switch):
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("file", default='',nargs='?', help="Write the file path on here")
|
parser.add_argument("file", default='',nargs='?', help="Single Movie file path.")
|
||||||
|
parser.add_argument("-c", "--config", default='config.ini', nargs='?', help="The config file Path.")
|
||||||
|
parser.add_argument("-e", "--exit", default='1', nargs='?', help="Exit Switch 1:Press enter key to exit. 2:Auto exit.")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.file == '':
|
if switch == 1:
|
||||||
return ''
|
if args.file == '':
|
||||||
else:
|
return ''
|
||||||
return args.file
|
elif switch == 2:
|
||||||
|
return args.config
|
||||||
|
elif switch == 3:
|
||||||
|
return args.exit
|
||||||
|
|
||||||
def movie_lists(root, escape_folder):
|
def movie_lists(root, escape_folder):
|
||||||
for folder in escape_folder:
|
for folder in escape_folder:
|
||||||
@@ -95,8 +100,8 @@ def getNumber(filepath,absolute_path = False):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
version = '3.0'
|
version = '3.1'
|
||||||
config_file = 'config.ini'
|
config_file = argparse_function(2)
|
||||||
config = ConfigParser()
|
config = ConfigParser()
|
||||||
config.read(config_file, encoding='UTF-8')
|
config.read(config_file, encoding='UTF-8')
|
||||||
success_folder = config['common']['success_output_folder']
|
success_folder = config['common']['success_output_folder']
|
||||||
@@ -114,7 +119,7 @@ if __name__ == '__main__':
|
|||||||
movie_list = movie_lists('.', escape_folder)
|
movie_list = movie_lists('.', escape_folder)
|
||||||
|
|
||||||
#========== 野鸡番号拖动 ==========
|
#========== 野鸡番号拖动 ==========
|
||||||
number_argparse=argparse_get_file()
|
number_argparse = argparse_function(1)
|
||||||
if not number_argparse == '':
|
if not number_argparse == '':
|
||||||
print("[!]Making Data for [" + number_argparse + "], the number is [" + getNumber(number_argparse,absolute_path = True) + "]")
|
print("[!]Making Data for [" + number_argparse + "], the number is [" + getNumber(number_argparse,absolute_path = True) + "]")
|
||||||
core_main(number_argparse, getNumber(number_argparse,absolute_path = True))
|
core_main(number_argparse, getNumber(number_argparse,absolute_path = True))
|
||||||
@@ -140,7 +145,7 @@ if __name__ == '__main__':
|
|||||||
# print("[*]======================================================")
|
# print("[*]======================================================")
|
||||||
try:
|
try:
|
||||||
print("[!]Making Data for [" + i + "], the number is [" + getNumber(i) + "]")
|
print("[!]Making Data for [" + i + "], the number is [" + getNumber(i) + "]")
|
||||||
core_main(i, getNumber(i))
|
core_main(i, getNumber(i), config_file=config_file)
|
||||||
print("[*]======================================================")
|
print("[*]======================================================")
|
||||||
except Exception as e: # 番号提取异常
|
except Exception as e: # 番号提取异常
|
||||||
print('[-]' + i + ' ERRPR :')
|
print('[-]' + i + ' ERRPR :')
|
||||||
@@ -159,4 +164,6 @@ if __name__ == '__main__':
|
|||||||
CEF(success_folder)
|
CEF(success_folder)
|
||||||
CEF(failed_folder)
|
CEF(failed_folder)
|
||||||
print("[+]All finished!!!")
|
print("[+]All finished!!!")
|
||||||
|
if argparse_function(3) == '2':
|
||||||
|
os._exit(0)
|
||||||
input("[+][+]Press enter key exit, you can check the error messge before you exit.")
|
input("[+][+]Press enter key exit, you can check the error messge before you exit.")
|
||||||
|
|||||||
9
core.py
9
core.py
@@ -10,6 +10,8 @@ import json
|
|||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from configparser import ConfigParser
|
from configparser import ConfigParser
|
||||||
import argparse
|
import argparse
|
||||||
|
import requests
|
||||||
|
import random
|
||||||
# =========website========
|
# =========website========
|
||||||
import fc2fans_club
|
import fc2fans_club
|
||||||
import mgstage
|
import mgstage
|
||||||
@@ -18,8 +20,6 @@ import javbus
|
|||||||
import javdb
|
import javdb
|
||||||
import fanza
|
import fanza
|
||||||
import jav321
|
import jav321
|
||||||
import requests
|
|
||||||
import random
|
|
||||||
|
|
||||||
|
|
||||||
# =====================本地文件处理===========================
|
# =====================本地文件处理===========================
|
||||||
@@ -335,7 +335,7 @@ def cutImage(imagecut, path, number, c_word):
|
|||||||
print('[-]Cover cut failed!')
|
print('[-]Cover cut failed!')
|
||||||
elif imagecut == 0:
|
elif imagecut == 0:
|
||||||
shutil.copyfile(path + '/' + number + c_word + '-fanart.jpg',path + '/' + number + c_word + '-poster.jpg')
|
shutil.copyfile(path + '/' + number + c_word + '-fanart.jpg',path + '/' + number + c_word + '-poster.jpg')
|
||||||
print('[+]Image Copyed! ' + path + '/' + number + c_word + '-poster.jpg')
|
print('[+]Image Copyed! ' + path + '/' + number + c_word + '-poster.jpg')
|
||||||
|
|
||||||
|
|
||||||
def pasteFileToFolder(filepath, path, number, c_word): # 文件路径,番号,后缀,要移动至的位置
|
def pasteFileToFolder(filepath, path, number, c_word): # 文件路径,番号,后缀,要移动至的位置
|
||||||
@@ -418,14 +418,13 @@ def debug_mode(json_data):
|
|||||||
aaa = ''
|
aaa = ''
|
||||||
|
|
||||||
|
|
||||||
def core_main(file_path, number_th):
|
def core_main(file_path, number_th, config_file):
|
||||||
# =======================================================================初始化所需变量
|
# =======================================================================初始化所需变量
|
||||||
multi_part = 0
|
multi_part = 0
|
||||||
part = ''
|
part = ''
|
||||||
c_word = ''
|
c_word = ''
|
||||||
cn_sub = ''
|
cn_sub = ''
|
||||||
liuchu = ''
|
liuchu = ''
|
||||||
config_file = 'config.ini'
|
|
||||||
Config = ConfigParser()
|
Config = ConfigParser()
|
||||||
Config.read(config_file, encoding='UTF-8')
|
Config.read(config_file, encoding='UTF-8')
|
||||||
program_mode = Config['common']['main_mode'] # 运行模式
|
program_mode = Config['common']['main_mode'] # 运行模式
|
||||||
|
|||||||
28
fanza.py
28
fanza.py
@@ -223,6 +223,34 @@ def main(number):
|
|||||||
) # .encode('UTF-8')
|
) # .encode('UTF-8')
|
||||||
return js
|
return js
|
||||||
|
|
||||||
|
def main_htmlcode(number):
|
||||||
|
# fanza allow letter + number + underscore, normalize the input here
|
||||||
|
# @note: I only find the usage of underscore as h_test123456789
|
||||||
|
fanza_search_number = number
|
||||||
|
# AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
|
||||||
|
if fanza_search_number.startswith("h-"):
|
||||||
|
fanza_search_number = fanza_search_number.replace("h-", "h_")
|
||||||
|
|
||||||
|
fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()
|
||||||
|
|
||||||
|
fanza_urls = [
|
||||||
|
"https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
|
||||||
|
"https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
|
||||||
|
"https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
|
||||||
|
"https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
|
||||||
|
"https://www.dmm.co.jp/digital/videoc/-/detail/=/cid=",
|
||||||
|
"https://www.dmm.co.jp/digital/nikkatsu/-/detail/=/cid=",
|
||||||
|
]
|
||||||
|
chosen_url = ""
|
||||||
|
for url in fanza_urls:
|
||||||
|
chosen_url = url + fanza_search_number
|
||||||
|
htmlcode = get_html(chosen_url)
|
||||||
|
if "404 Not Found" not in htmlcode:
|
||||||
|
break
|
||||||
|
if "404 Not Found" in htmlcode:
|
||||||
|
return json.dumps({"title": "",})
|
||||||
|
return htmlcode
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# print(main("DV-1562"))
|
# print(main("DV-1562"))
|
||||||
|
|||||||
93
javbus.py
93
javbus.py
@@ -4,6 +4,7 @@ from lxml import etree#need install
|
|||||||
from bs4 import BeautifulSoup#need install
|
from bs4 import BeautifulSoup#need install
|
||||||
import json
|
import json
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
|
import fanza
|
||||||
|
|
||||||
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
|
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
|
||||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
soup = BeautifulSoup(htmlcode, 'lxml')
|
||||||
@@ -60,10 +61,19 @@ def getDirector(htmlcode): #获取导演
|
|||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
||||||
return result
|
return result
|
||||||
def getOutline(htmlcode): #获取演员
|
def getCID(htmlcode):
|
||||||
doc = pq(htmlcode)
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
|
#print(htmlcode)
|
||||||
|
string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
|
||||||
|
result = re.sub('/.*?.jpg','',string)
|
||||||
return result
|
return result
|
||||||
|
def getOutline(htmlcode): #获取演员
|
||||||
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
|
try:
|
||||||
|
result = html.xpath("string(//div[contains(@class,'mg-b20 lh4')])").replace('\n','')
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
def getSerise(htmlcode):
|
def getSerise(htmlcode):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
||||||
@@ -78,43 +88,14 @@ def getTag(htmlcode): # 获取演员
|
|||||||
tag.append(i.get_text())
|
tag.append(i.get_text())
|
||||||
return tag
|
return tag
|
||||||
|
|
||||||
|
|
||||||
def main(number):
|
|
||||||
try:
|
|
||||||
htmlcode = get_html('https://www.javbus.com/' + number)
|
|
||||||
try:
|
|
||||||
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
|
|
||||||
except:
|
|
||||||
dww_htmlcode = ''
|
|
||||||
dic = {
|
|
||||||
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
|
|
||||||
'studio': getStudio(htmlcode),
|
|
||||||
'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
|
|
||||||
'outline': getOutline(dww_htmlcode),
|
|
||||||
'runtime': getRuntime(htmlcode),
|
|
||||||
'director': getDirector(htmlcode),
|
|
||||||
'actor': getActor(htmlcode),
|
|
||||||
'release': getRelease(htmlcode),
|
|
||||||
'number': getNum(htmlcode),
|
|
||||||
'cover': getCover(htmlcode),
|
|
||||||
'imagecut': 1,
|
|
||||||
'tag': getTag(htmlcode),
|
|
||||||
'label': getSerise(htmlcode),
|
|
||||||
'actor_photo': getActorPhoto(htmlcode),
|
|
||||||
'website': 'https://www.javbus.com/' + number,
|
|
||||||
'source' : 'javbus.py',
|
|
||||||
}
|
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
|
||||||
return js
|
|
||||||
except:
|
|
||||||
return main_uncensored(number)
|
|
||||||
|
|
||||||
def main_uncensored(number):
|
def main_uncensored(number):
|
||||||
htmlcode = get_html('https://www.javbus.com/' + number)
|
htmlcode = get_html('https://www.javbus.com/' + number)
|
||||||
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
|
|
||||||
if getTitle(htmlcode) == '':
|
if getTitle(htmlcode) == '':
|
||||||
htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
|
htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
|
||||||
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
|
try:
|
||||||
|
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
||||||
|
except:
|
||||||
|
dww_htmlcode = ''
|
||||||
dic = {
|
dic = {
|
||||||
'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
|
'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
|
||||||
'studio': getStudio(htmlcode),
|
'studio': getStudio(htmlcode),
|
||||||
@@ -136,3 +117,43 @@ def main_uncensored(number):
|
|||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
return js
|
return js
|
||||||
|
|
||||||
|
|
||||||
|
def main(number):
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
htmlcode = get_html('https://www.javbus.com/' + number)
|
||||||
|
try:
|
||||||
|
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
||||||
|
except:
|
||||||
|
dww_htmlcode = ''
|
||||||
|
dic = {
|
||||||
|
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
|
||||||
|
'studio': getStudio(htmlcode),
|
||||||
|
'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
|
||||||
|
'outline': getOutline(dww_htmlcode),
|
||||||
|
'runtime': getRuntime(htmlcode),
|
||||||
|
'director': getDirector(htmlcode),
|
||||||
|
'actor': getActor(htmlcode),
|
||||||
|
'release': getRelease(htmlcode),
|
||||||
|
'number': getNum(htmlcode),
|
||||||
|
'cover': getCover(htmlcode),
|
||||||
|
'imagecut': 1,
|
||||||
|
'tag': getTag(htmlcode),
|
||||||
|
'label': getSerise(htmlcode),
|
||||||
|
'actor_photo': getActorPhoto(htmlcode),
|
||||||
|
'website': 'https://www.javbus.com/' + number,
|
||||||
|
'source': 'javbus.py',
|
||||||
|
}
|
||||||
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,
|
||||||
|
separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
|
return js
|
||||||
|
except:
|
||||||
|
return main_uncensored(number)
|
||||||
|
except:
|
||||||
|
data = {
|
||||||
|
"title": "",
|
||||||
|
}
|
||||||
|
js = json.dumps(
|
||||||
|
data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
|
||||||
|
)
|
||||||
|
return js
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"version": "3.0",
|
"version": "3.1",
|
||||||
"version_show": "3.0",
|
"version_show": "3.1",
|
||||||
"download": "https://github.com/yoshiko2/AV_Data_Capture/releases"
|
"download": "https://github.com/yoshiko2/AV_Data_Capture/releases"
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user