fix carib and 1pond number issues
This commit is contained in:
17
ADC_function.py
Executable file → Normal file
17
ADC_function.py
Executable file → Normal file
@@ -331,7 +331,7 @@ def translateTag_to_sc(tag):
|
||||
'女装人妖', '及膝襪': '及膝袜', '泡泡襪': '泡泡袜', '空中小姐': '空中小姐', '旗袍': '旗袍', '兔女郎': '兔女郎',
|
||||
'女祭司': '女祭司', '動畫人物': '动画人物', '迷你裙警察': '迷你裙警察', '成熟的女人': '成熟的女人', '巨乳': '巨乳',
|
||||
'蘿莉塔': '萝莉塔', '無毛': '无毛', '屁股': '屁股', '苗條': '苗条', '素人': '素人', '乳房': '乳房',
|
||||
'巨大陰莖': '巨大阴茎', '胖女人': '胖女人', '平胸': '平胸', '高': '高', '美腳': '美脚', '孕婦': '孕妇',
|
||||
'巨大陰莖': '巨大阴茎', '胖女人': '胖女人', '平胸': '平胸', '高': '高', '美腳': '美腿', '孕婦': '孕妇',
|
||||
'巨大屁股': '巨大屁股', '瘦小身型': '瘦小身型', '變性者': '变性者', '肌肉': '肌肉', '超乳': '超乳', '乳交':
|
||||
'乳交', '中出': '中出', '多P': '多P', '69': '69', '淫語': '淫语', '女上位': '女上位', '自慰': '自慰',
|
||||
'顏射': '颜射', '潮吹': '潮吹', '口交': '口交', '舔陰': '舔阴', '肛交': '肛交', '手指插入': '手指插入',
|
||||
@@ -359,7 +359,7 @@ def translateTag_to_sc(tag):
|
||||
'動画': '视频', '電子書籍': '电子书', '同人': '同人志', 'アダルトPCゲーム': '成人PC游戏', 'DVD/CD':
|
||||
' DVD/CD', 'コミック': '漫画', 'いろいろレンタル': '各种租赁', '通販': '购物', 'マーケットプレイス': '市场',
|
||||
'3Dプリント': ' 3D打印', 'ロボット': '机器人', '巨乳': '巨乳', '熟女': '熟女', 'ギャル': '美少女',
|
||||
'人妻・主婦': '人妻', '女子校生': '高中女生', '中出し': '中出', 'アナル': '肛门', 'ニューハーフ': '变性人',
|
||||
'人妻・主婦': '人妻', '女子校生': '高中女生', '中出し': '中出', 'アナル': '肛交', 'ニューハーフ': '变性人',
|
||||
'VR専用': 'VR专用', 'ハイクオリティVR': '高质量VR', 'アイドル・芸能人': '偶像/名人', 'アクメ・オーガズム':
|
||||
'性高潮', 'アスリート': '运动员', '姉・妹': '姐妹', 'イタズラ': '恶作剧', 'インストラクター': '指导员',
|
||||
'ウェイトレス': '服务员', '受付嬢': '接待员', 'エステ': '美容院', 'M男': 'M男', 'M女': 'M女', 'OL':
|
||||
@@ -381,7 +381,7 @@ def translateTag_to_sc(tag):
|
||||
'面接': '面试', 'モデル': '模特', '野外・露出': '野外・露出', 'ヨガ': '瑜伽', '乱交': '狂欢', '旅行': '旅行',
|
||||
'レースクィーン': '种族女王', '若妻・幼妻': '年轻妻子/年轻妻子', 'アジア女優': '亚洲女演员', '巨尻': '大屁股', '筋肉':
|
||||
'肌肉', '小柄': '娇小', '黒人男優': '黑人演员', '処女': '处女', '女装・男の娘': '伪娘', 'スレンダー':
|
||||
'苗条', '早漏': '早泄', 'そっくりさん': '相似', '長身': '高大', '超乳': '巨乳', 'デカチン・巨根':
|
||||
'苗条', '早漏': '早泄', 'そっくりさん': '相似', '長身': '高个', '超乳': '巨乳', 'デカチン・巨根':
|
||||
'大鸡巴', '童貞': '处女', '軟体': '柔软的身体', '妊婦': '孕妇', '白人女優': '白人女演员', 'パイパン': '剃光',
|
||||
'日焼け': '晒伤', '貧乳・微乳': '贫乳/小乳房', '美少女': '美少女', '美乳': ' 美乳',
|
||||
'ふたなり': ' 双胞胎', 'ぽっちゃり': ' 丰满', 'ミニ系': ' 迷你系', '学生服':
|
||||
@@ -433,7 +433,7 @@ def translateTag_to_sc(tag):
|
||||
'競泳・スクール水着': '游泳学校的游泳衣', '素人': '素人', 'ベスト・総集編': '精选集', '美乳': '美乳', '美少女': '美少女',
|
||||
'職業色々': '各种职业', '配信専用': '配信专用', '電マ': '电码', '顔射': '颜射', 'アイドル・芸能人': '偶像艺人',
|
||||
'アクション・格闘': '格斗动作', '足コキ': '足交', '脚フェチ': '脚控', 'アジア女優': '亚洲女演员', '汗だく': '满头大汗',
|
||||
'アナルセックス': '肛门性爱', 'アナル': '肛门', '姉・妹': '姐姐、妹妹', 'Eカップ': 'E罩杯', 'イタズラ': '恶作剧',
|
||||
'アナルセックス': '肛门性爱', 'アナル': '肛交', '姉・妹': '姐姐、妹妹', 'Eカップ': 'E罩杯', 'イタズラ': '恶作剧',
|
||||
'異物挿入': '插入异物', 'イメージビデオ': '视频图像', '色白': '白皙', '淫語': '淫语', '淫語モノ': '淫语故事',
|
||||
'インストラクター': '教练', '飲尿': '饮尿', '淫乱・ハード系': '淫乱硬系', 'ウェイトレス': '女服务生', 'Hカップ':
|
||||
'H罩杯', 'SF': 'SF', 'SM': 'SM', 'Fカップ': 'F罩杯', 'M男': 'M男', 'お母さん': '妈妈',
|
||||
@@ -449,7 +449,7 @@ def translateTag_to_sc(tag):
|
||||
'女医生', '女教師': '女教师', '女子アナ': '女主播', '女子校生': '女学生', '女子大生': '女大学生', '女性向け':
|
||||
'面向女性', '女装・男の娘': '伪娘', 'Gカップ': 'G罩杯', 'スカトロ': '蹲', 'スチュワーデス・CA': '空姐CA',
|
||||
'スポーツ': '体育运动', '清楚': '清秀', '制服': '制服', 'その他フェチ': '其他恋物癖', '体操着・ブルマ': '运动服',
|
||||
'多人数': '很多人', '着エロ': '色情', '長身': '高个子', '痴漢': '痴汉', '手コキ': '手锯', '手マン': '手艺人',
|
||||
'多人数': '很多人', '着エロ': '色情', '長身': '高个子', '痴漢': '痴汉', '手コキ': '手淫', '手マン': '手艺人',
|
||||
'Dカップ': 'D罩杯', '泥酔': '烂醉如泥', 'デカチン・巨根': '巨根', '盗撮': '偷拍', '盗撮・のぞき': '偷拍', '童貞':
|
||||
'处男', 'ドキュメンタリー': '记录片', 'ドラッグ・媚薬': '药局', 'ドラマ': '电视剧', 'ニューハーフ': '变性人',
|
||||
'ニーソックス': '过膝袜', '妊婦': '孕妇', '寝取り・寝取られ': '睡下', 'HowTo': 'HowTo',
|
||||
@@ -460,7 +460,12 @@ def translateTag_to_sc(tag):
|
||||
'3P・乱交': '3P・乱交', '野外・露出': '野外露出', '海外': '国外', 'レズ': '女士', 'アニメ': '动画',
|
||||
'アダルト': '成人', 'アイドル': '空闲', '個人撮影': '个人摄影', '無修正': '无修正', 'コスプレ': '角色扮演',
|
||||
'下着': '内衣', '水着': '游泳衣', 'パンチラ': '小册子', 'フェラ': '口交', 'モデル': '模型', '中出し': '中出', '可愛い': '可爱',
|
||||
'オリジナル': '原始', '貧乳': '贫乳', 'オナニー': '自慰', 'パイパン': '菠萝', 'ロリ': '萝莉', '生ハメ': '第一人称'
|
||||
'オリジナル': '原始', '貧乳': '贫乳', 'オナニー': '自慰', 'パイパン': '菠萝', 'ロリ': '萝莉', '生ハメ': '第一人称',
|
||||
|
||||
|
||||
#caribbeancom
|
||||
'青姦': '野战', '初裏': '破处', 'ブルマー': '体操服', 'クンニ ベスト/オムニバス':'汇编', 'クンニ': '舔阴',
|
||||
|
||||
}
|
||||
try:
|
||||
return dict_gen[tag]
|
||||
|
||||
93
WebCrawler/carib.py
Normal file
93
WebCrawler/carib.py
Normal file
@@ -0,0 +1,93 @@
|
||||
import sys
|
||||
sys.path.append('../')
|
||||
import json
|
||||
from bs4 import BeautifulSoup
|
||||
from lxml import html
|
||||
import re
|
||||
import urllib.request
|
||||
import socket
|
||||
from ADC_function import *
|
||||
|
||||
def get_html(url):
|
||||
socket.setdefaulttimeout(10)
|
||||
papg = urllib.request.urlopen(url)
|
||||
htm = papg.read()
|
||||
htm = htm.decode("euc_jp")
|
||||
return htm
|
||||
|
||||
def main(number: str) -> json:
|
||||
try:
|
||||
caribhtml = get_html('https://www.caribbeancom.com/moviepages/'+number+'/index.html')
|
||||
|
||||
soup = BeautifulSoup(caribhtml, "html.parser")
|
||||
lx = html.fromstring(str(soup))
|
||||
|
||||
if not soup.select_one("#moviepages > div > div:nth-child(1) > div.movie-info.section"):
|
||||
raise ValueError("page info not found")
|
||||
except:
|
||||
dic = {"title": ""}
|
||||
return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))
|
||||
dic = {
|
||||
'title': get_title(lx),
|
||||
'studio': '加勒比',
|
||||
'year': get_year(lx),
|
||||
'outline': '',
|
||||
'runtime': get_runtime(lx),
|
||||
'director': '',
|
||||
'actor': get_actor(lx),
|
||||
'release': get_release(lx),
|
||||
'number': number,
|
||||
'cover': 'https://www.caribbeancom.com/moviepages/' + number + '/images/l_l.jpg',
|
||||
'tag': get_tag(lx),
|
||||
'extrafanart': get_extrafanart(lx),
|
||||
'label': '',
|
||||
'imagecut': 0,
|
||||
'actor_photo': '',
|
||||
'website': 'https://www.caribbeancom.com/moviepages/' + number + '/index.html',
|
||||
'source': 'carib.py',
|
||||
'series': '',
|
||||
}
|
||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||
return js
|
||||
|
||||
def get_title(lx: html.HtmlElement) -> str:
|
||||
return str(lx.xpath("//div[@class='movie-info section']/div[@class='heading']/h1[@itemprop='name']/text()")[0]).strip()
|
||||
|
||||
def get_year(lx: html.HtmlElement) -> str:
|
||||
return lx.xpath("//li[2]/span[@class='spec-content']/text()")[0][:4]
|
||||
|
||||
def get_release(lx: html.HtmlElement) -> str:
|
||||
return lx.xpath("//li[2]/span[@class='spec-content']/text()")[0].replace('/','-')
|
||||
|
||||
def get_actor(lx: html.HtmlElement) -> str:
|
||||
r = []
|
||||
actors = lx.xpath("//span[@class='spec-content']/a[@itemprop='actor']/span/text()")
|
||||
for act in actors:
|
||||
if str(act) != '他':
|
||||
r.append(act)
|
||||
return r
|
||||
|
||||
def get_tag(lx: html.HtmlElement) -> str:
|
||||
r = []
|
||||
genres = lx.xpath("//span[@class='spec-content']/a[@itemprop='genre']/text()")
|
||||
for g in genres:
|
||||
r.append(translateTag_to_sc(str(g)))
|
||||
return r
|
||||
|
||||
def get_extrafanart(lx: html.HtmlElement) -> str:
|
||||
r = []
|
||||
genres = lx.xpath("//*[@id='sampleexclude']/div[2]/div/div[@class='grid-item']/div/a/@href")
|
||||
for g in genres:
|
||||
jpg = str(g)
|
||||
if '/member/' in jpg:
|
||||
break
|
||||
else:
|
||||
r.append('https://www.caribbeancom.com' + jpg)
|
||||
return r
|
||||
|
||||
def get_runtime(lx: html.HtmlElement) -> str:
|
||||
return str(lx.xpath( "//span[@class='spec-content']/span[@itemprop='duration']/text()")[0]).strip()
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(main("041721-001"))
|
||||
print(main("080520-001"))
|
||||
@@ -26,7 +26,7 @@ max_title_len= 50
|
||||
update_check=1
|
||||
|
||||
[priority]
|
||||
website=javbus,javdb,airav,fanza,xcity,mgstage,fc2,avsox,jav321,dlsite
|
||||
website=javbus,javdb,airav,fanza,xcity,mgstage,fc2,avsox,jav321,dlsite,carib
|
||||
|
||||
[escape]
|
||||
literals=\()/
|
||||
|
||||
40
core.py
Executable file → Normal file
40
core.py
Executable file → Normal file
@@ -23,6 +23,7 @@ from WebCrawler import mgstage
|
||||
from WebCrawler import xcity
|
||||
# from WebCrawler import javlib
|
||||
from WebCrawler import dlsite
|
||||
from WebCrawler import carib
|
||||
|
||||
|
||||
def escape_path(path, escape_literals: str): # Remove escape literals
|
||||
@@ -63,6 +64,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON
|
||||
"xcity": xcity.main,
|
||||
# "javlib": javlib.main,
|
||||
"dlsite": dlsite.main,
|
||||
"carib": carib.main,
|
||||
}
|
||||
|
||||
# default fetch order list, from the beginning to the end
|
||||
@@ -70,19 +72,25 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON
|
||||
|
||||
# if the input file name matches certain rules,
|
||||
# move some web service to the beginning of the list
|
||||
if "avsox" in sources and (re.match(r"^\d{5,}", file_number) or
|
||||
"HEYZO" in file_number or "heyzo" in file_number or "Heyzo" in file_number
|
||||
lo_file_number = file_number.lower()
|
||||
if "carib" in sources and (re.match(r"^\d{6}-\d{3}", file_number)
|
||||
):
|
||||
sources.insert(0, sources.pop(sources.index("carib")))
|
||||
elif "avsox" in sources and (re.match(r"^\d{5,}", file_number) or
|
||||
"heyzo" in lo_file_number
|
||||
):
|
||||
# if conf.debug() == True:
|
||||
# print('[+]select avsox')
|
||||
sources.insert(0, sources.pop(sources.index("avsox")))
|
||||
elif "mgstage" in sources and (re.match(r"\d+\D+", file_number) or
|
||||
"siro" in file_number or "SIRO" in file_number or "Siro" in file_number
|
||||
"siro" in lo_file_number
|
||||
):
|
||||
sources.insert(0, sources.pop(sources.index("mgstage")))
|
||||
elif "fc2" in sources and ("fc2" in file_number or "FC2" in file_number
|
||||
elif "fc2" in sources and ("fc2" in lo_file_number
|
||||
):
|
||||
sources.insert(0, sources.pop(sources.index("fc2")))
|
||||
elif "dlsite" in sources and (
|
||||
"RJ" in file_number or "rj" in file_number or "VJ" in file_number or "vj" in file_number
|
||||
"rj" in lo_file_number
|
||||
):
|
||||
sources.insert(0, sources.pop(sources.index("dlsite")))
|
||||
|
||||
@@ -364,7 +372,7 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa
|
||||
r = requests.get(url, headers=headers, timeout=timeout, proxies=proxies)
|
||||
if r == '':
|
||||
print('[-]Movie Data not found!')
|
||||
return
|
||||
return
|
||||
with open(str(path) + "/" + filename, "wb") as code:
|
||||
code.write(r.content)
|
||||
return
|
||||
@@ -376,7 +384,7 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa
|
||||
r = requests.get(url, timeout=timeout, headers=headers)
|
||||
if r == '':
|
||||
print('[-]Movie Data not found!')
|
||||
return
|
||||
return
|
||||
with open(str(path) + "/" + filename, "wb") as code:
|
||||
code.write(r.content)
|
||||
return
|
||||
@@ -636,14 +644,14 @@ def paste_file_to_folder(filepath, path, number, leak_word, c_word, conf: config
|
||||
os.rename(filepath.replace(houzhui, subname), path + '/' + number + leak_word + c_word + subname)
|
||||
print('[+]Sub moved!')
|
||||
return True
|
||||
|
||||
|
||||
except FileExistsError:
|
||||
print('[-]File Exists! Please check your movie!')
|
||||
print('[-]move to the root folder of the program.')
|
||||
return
|
||||
return
|
||||
except PermissionError:
|
||||
print('[-]Error! Please run as administrator!')
|
||||
return
|
||||
return
|
||||
|
||||
|
||||
def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, conf): # 文件路径,番号,后缀,要移动至的位置
|
||||
@@ -667,7 +675,7 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
|
||||
except FileExistsError:
|
||||
print('[-]File Exists! Please check your movie!')
|
||||
print('[-]move to the root folder of the program.')
|
||||
return
|
||||
return
|
||||
except PermissionError:
|
||||
print('[-]Error! Please run as administrator!')
|
||||
return
|
||||
@@ -736,7 +744,7 @@ def core_main(file_path, number_th, conf: config.Config):
|
||||
if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
|
||||
cn_sub = '1'
|
||||
c_word = '-C' # 中文字幕影片后缀
|
||||
|
||||
|
||||
# 判断是否无码
|
||||
if is_uncensored(number):
|
||||
uncensored = 1
|
||||
@@ -761,7 +769,7 @@ def core_main(file_path, number_th, conf: config.Config):
|
||||
# main_mode
|
||||
# 1: 刮削模式 / Scraping mode
|
||||
# 2: 整理模式 / Organizing mode
|
||||
# 3:不改变路径刮削
|
||||
# 3:不改变路径刮削
|
||||
if conf.main_mode() == 1:
|
||||
# 创建文件夹
|
||||
path = create_folder(conf.success_folder(), json_data.get('location_rule'), json_data, conf)
|
||||
@@ -780,7 +788,7 @@ def core_main(file_path, number_th, conf: config.Config):
|
||||
trailer_download(json_data.get('trailer'), leak_word, c_word, number, path, filepath, conf, conf.failed_folder())
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
try:
|
||||
# 下载剧照 data, path, conf: config.Config, filepath, failed_folder
|
||||
if json_data.get('extrafanart'):
|
||||
@@ -800,7 +808,7 @@ def core_main(file_path, number_th, conf: config.Config):
|
||||
thumb_path = path + '/' + number + leak_word + c_word + '-thumb.jpg'
|
||||
if conf.is_watermark():
|
||||
add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf)
|
||||
|
||||
|
||||
elif conf.main_mode() == 2:
|
||||
# 创建文件夹
|
||||
path = create_folder(conf.success_folder(), json_data.get('location_rule'), json_data, conf)
|
||||
@@ -810,7 +818,7 @@ def core_main(file_path, number_th, conf: config.Config):
|
||||
thumb_path = path + '/' + number + leak_word + c_word + '-thumb.jpg'
|
||||
if conf.is_watermark():
|
||||
add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf)
|
||||
|
||||
|
||||
elif conf.main_mode() == 3:
|
||||
path = file_path.rsplit('/', 1)[0]
|
||||
path = path.rsplit('\\', 1)[0]
|
||||
|
||||
@@ -36,12 +36,16 @@ def get_number(debug,filepath: str) -> str:
|
||||
#filepath = filepath.replace("_", "-")
|
||||
filepath.strip('22-sht.me').strip('-HD').strip('-hd')
|
||||
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
|
||||
if 'FC2' or 'fc2' in filename:
|
||||
filename = filename.replace('PPV', '').replace('ppv', '').replace('--', '-').replace('_', '-')
|
||||
lower_check = filename.lower()
|
||||
if 'fc2' in lower_check:
|
||||
filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
|
||||
file_number = re.search(r'\w+-\w+', filename, re.A).group()
|
||||
tokyo_hot_check = filename.lower()
|
||||
if "tokyo" in tokyo_hot_check and "hot" in tokyo_hot_check:
|
||||
file_number = re.search(r'(cz|k|n|red-|se)\d{3,4}', tokyo_hot_check, re.A).group()
|
||||
if "tokyo" in lower_check and "hot" in lower_check:
|
||||
file_number = re.search(r'(cz|k|n|red-|se)\d{3,4}', lower_check, re.A).group()
|
||||
if "carib" in lower_check:
|
||||
file_number = str(re.search(r'\d{6}(-|_)\d{3}', lower_check, re.A).group()).replace('_', '-')
|
||||
if "1pon" in lower_check:
|
||||
file_number = str(re.search(r'\d{6}(-|_)\d{3}', lower_check, re.A).group()).replace('-', '_')
|
||||
return file_number
|
||||
else: # 提取不含减号-的番号,FANZA CID
|
||||
# 欧美番号匹配规则
|
||||
@@ -64,12 +68,16 @@ def get_number(debug,filepath: str) -> str:
|
||||
#filepath = filepath.replace("_", "-")
|
||||
filepath.strip('22-sht.me').strip('-HD').strip('-hd')
|
||||
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
|
||||
if 'FC2' or 'fc2' in filename:
|
||||
filename = filename.replace('PPV', '').replace('ppv', '').replace('--', '-').replace('_', '-')
|
||||
lower_check = filename.lower()
|
||||
if 'fc2' in lower_check:
|
||||
filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
|
||||
file_number = re.search(r'\w+-\w+', filename, re.A).group()
|
||||
tokyo_hot_check = filename.lower()
|
||||
if "tokyo" in tokyo_hot_check and "hot" in tokyo_hot_check:
|
||||
file_number = re.search(r'(cz|k|n|red-|se)\d{3,4}', tokyo_hot_check, re.A).group()
|
||||
if "tokyo" in lower_check and "hot" in lower_check:
|
||||
file_number = re.search(r'(cz|k|n|red-|se)\d{3,4}', lower_check, re.A).group()
|
||||
if "carib" in lower_check:
|
||||
file_number = str(re.search(r'\d{6}(-|_)\d{3}', lower_check, re.A).group()).replace('_', '-')
|
||||
if "1pon" in lower_check:
|
||||
file_number = str(re.search(r'\d{6}(-|_)\d{3}', lower_check, re.A).group()).replace('-', '_')
|
||||
return file_number
|
||||
else: # 提取不含减号-的番号,FANZA CID
|
||||
# 欧美番号匹配规则
|
||||
|
||||
Reference in New Issue
Block a user