This commit is contained in:
yjlmiss
2021-11-27 21:19:14 +08:00
36 changed files with 20299 additions and 1401 deletions

2
.github/FUNDING.yml vendored
View File

@@ -1,3 +1,3 @@
# These are supported funding model platforms
custom: ['https://github.com/yoshiko2/AV_Data_Capture/blob/master/donate.png']
custom: ['https://i.postimg.cc/qBmD1v9p/donate.png']

View File

@@ -19,10 +19,10 @@ jobs:
steps:
- uses: actions/checkout@v2
- name: Setup Python 3.7
- name: Setup Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.7
python-version: 3.8
- name: Install dependencies
run: |
@@ -42,6 +42,9 @@ jobs:
--hidden-import ADC_function.py \
--hidden-import core.py \
--add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
--add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
--add-data "Img:Img" \
--add-data "config.ini:." \
- name: Build with PyInstaller for windows
if: matrix.os == 'windows-latest'
@@ -51,6 +54,9 @@ jobs:
--hidden-import ADC_function.py `
--hidden-import core.py `
--add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1);cloudscraper" `
--add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1);opencc" `
--add-data "Img;Img" `
--add-data "config.ini;." `
- name: Copy config.ini
run: |

23
.vscode/launch.json vendored
View File

@@ -8,11 +8,26 @@
"name": "Python: 当前文件",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"args": [
"-p","/Volumes/movie/a b"
]
"env": {
"PYTHONIOENCODING": "utf-8"
},
"program": "${workspaceFolder}/AV_Data_capture.py",
"program1": "${workspaceFolder}/WebCrawler/javbus.py",
"program2": "${workspaceFolder}/WebCrawler/javdb.py",
"program3": "${workspaceFolder}/WebCrawler/xcity.py",
"program4": "${workspaceFolder}/number_parser.py",
"program5": "${workspaceFolder}/config.py",
"cwd0": "${fileDirname}",
"cwd1": "${workspaceFolder}/dist",
"cwd2": "${env:HOME}${env:USERPROFILE}/.avdc",
"args0": ["-a","-p","J:/Downloads","-o","J:/log"],
"args1": ["-g","-m","3","-c","1","-d","0"],
"args2": ["-igd0", "-m3", "-p", "J:/JAV_output", "-q", "121220_001"],
"args3": ["-agd0","-m3", "-q", ".*","-p","J:/#JAV_output3"],
"args4": ["-gic1", "-d0", "-m3", "-o", "avlog", "-p", "I:/JAV_output"],
"args5": ["-gic1", "-d0", "-m1", "-o", "avlog", "-p", "J:/Downloads"],
"args6": ["-z", "-o", "J:/log"]
}
]
}

716
ADC_function.py Executable file → Normal file
View File

@@ -1,8 +1,8 @@
from os import replace
import requests
import hashlib
#import hashlib
from pathlib import Path
import random
import secrets
import os.path
import uuid
import json
@@ -12,6 +12,10 @@ import re
import config
from urllib.parse import urljoin
import mechanicalsoup
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from cloudscraper import create_scraper
from concurrent.futures import ThreadPoolExecutor
def getXpathSingle(htmlcode, xpath):
@@ -20,18 +24,15 @@ def getXpathSingle(htmlcode, xpath):
return result1
G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36'
# 网页请求核心
def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None):
verify = config.Config().cacert_file()
configProxy = config.Config().proxy()
def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
verify = config.getInstance().cacert_file()
configProxy = config.getInstance().proxy()
errors = ""
if ua is None:
headers = {"User-Agent": G_USER_AGENT} # noqa
else:
headers = {"User-Agent": ua}
headers = {"User-Agent": ua or G_USER_AGENT} # noqa
for i in range(configProxy.retry):
try:
@@ -42,26 +43,26 @@ def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None)
else:
result = requests.get(str(url), headers=headers, timeout=configProxy.timeout, cookies=cookies)
result.encoding = "utf-8"
if return_type == "object":
return result
elif return_type == "content":
return result.content
else:
result.encoding = encoding or result.apparent_encoding
return result.text
except requests.exceptions.ProxyError:
print("[-]Proxy error! Please check your Proxy")
return
raise requests.exceptions.ProxyError
except Exception as e:
print("[-]Connect retry {}/{}".format(i + 1, configProxy.retry))
errors = str(e)
print('[-]Connect Failed! Please check your Proxy or Network!')
print("[-]" + errors)
raise Exception('Connect Failed')
def post_html(url: str, query: dict, headers: dict = None) -> requests.Response:
configProxy = config.Config().proxy()
configProxy = config.getInstance().proxy()
errors = ""
headers_ua = {"User-Agent": G_USER_AGENT}
if headers is None:
@@ -84,15 +85,79 @@ def post_html(url: str, query: dict, headers: dict = None) -> requests.Response:
print("[-]" + errors)
def get_html_by_browser(url, cookies: dict = None, ua: str = None, return_type: str = None):
browser = mechanicalsoup.StatefulBrowser(user_agent=G_USER_AGENT if ua is None else ua)
configProxy = config.Config().proxy()
G_DEFAULT_TIMEOUT = 10 # seconds
class TimeoutHTTPAdapter(HTTPAdapter):
def __init__(self, *args, **kwargs):
self.timeout = G_DEFAULT_TIMEOUT
if "timeout" in kwargs:
self.timeout = kwargs["timeout"]
del kwargs["timeout"]
super().__init__(*args, **kwargs)
def send(self, request, **kwargs):
timeout = kwargs.get("timeout")
if timeout is None:
kwargs["timeout"] = self.timeout
return super().send(request, **kwargs)
# with keep-alive feature
def get_html_session(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
configProxy = config.getInstance().proxy()
session = requests.Session()
if isinstance(cookies, dict) and len(cookies):
requests.utils.add_dict_to_cookiejar(session.cookies, cookies)
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
session.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
session.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
if configProxy.enable:
browser.session.proxies = configProxy.proxies()
result = browser.open(url)
session.verify = config.getInstance().cacert_file()
session.proxies = configProxy.proxies()
headers = {"User-Agent": ua or G_USER_AGENT}
session.headers = headers
try:
if isinstance(url, str) and len(url):
result = session.get(str(url))
else: # 空url参数直接返回可重用session对象无需设置return_type
return session
if not result.ok:
return ''
result.encoding = "utf-8"
return None
if return_type == "object":
return result
elif return_type == "content":
return result.content
elif return_type == "session":
return result, session
else:
result.encoding = encoding or "utf-8"
return result.text
except requests.exceptions.ProxyError:
print("[-]get_html_session() Proxy error! Please check your Proxy")
except Exception as e:
print(f"[-]get_html_session() failed. {e}")
return None
def get_html_by_browser(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None, use_scraper: bool = False):
configProxy = config.getInstance().proxy()
s = create_scraper(browser={'custom': ua or G_USER_AGENT,}) if use_scraper else requests.Session()
if isinstance(cookies, dict) and len(cookies):
requests.utils.add_dict_to_cookiejar(s.cookies, cookies)
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
s.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
s.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
if configProxy.enable:
s.verify = config.getInstance().cacert_file()
s.proxies = configProxy.proxies()
try:
browser = mechanicalsoup.StatefulBrowser(user_agent=ua or G_USER_AGENT, session=s)
if isinstance(url, str) and len(url):
result = browser.open(url)
else:
return browser
if not result.ok:
return None
if return_type == "object":
return result
elif return_type == "content":
@@ -100,25 +165,37 @@ def get_html_by_browser(url, cookies: dict = None, ua: str = None, return_type:
elif return_type == "browser":
return result, browser
else:
result.encoding = encoding or "utf-8"
return result.text
except requests.exceptions.ProxyError:
print("[-]get_html_by_browser() Proxy error! Please check your Proxy")
except Exception as e:
print(f'[-]get_html_by_browser() Failed! {e}')
return None
def get_html_by_form(url, form_name: str = None, fields: dict = None, cookies: dict = None, ua: str = None, return_type: str = None):
browser = mechanicalsoup.StatefulBrowser(user_agent=G_USER_AGENT if ua is None else ua)
if isinstance(cookies, dict):
requests.utils.add_dict_to_cookiejar(browser.session.cookies, cookies)
configProxy = config.Config().proxy()
def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
configProxy = config.getInstance().proxy()
s = requests.Session()
if isinstance(cookies, dict) and len(cookies):
requests.utils.add_dict_to_cookiejar(s.cookies, cookies)
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
s.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
s.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
if configProxy.enable:
browser.session.proxies = configProxy.proxies()
s.verify = config.getInstance().cacert_file()
s.proxies = configProxy.proxies()
try:
browser = mechanicalsoup.StatefulBrowser(user_agent=ua or G_USER_AGENT, session=s)
result = browser.open(url)
if not result.ok:
return ''
form = browser.select_form() if form_name is None else browser.select_form(form_name)
return None
form = browser.select_form() if form_select is None else browser.select_form(form_select)
if isinstance(fields, dict):
for k, v in fields.items():
browser[k] = v
response = browser.submit_selected()
response.encoding = "utf-8"
if return_type == "object":
return response
elif return_type == "content":
@@ -126,12 +203,52 @@ def get_html_by_form(url, form_name: str = None, fields: dict = None, cookies: d
elif return_type == "browser":
return response, browser
else:
result.encoding = encoding or "utf-8"
return response.text
except requests.exceptions.ProxyError:
print("[-]get_html_by_form() Proxy error! Please check your Proxy")
except Exception as e:
print(f'[-]get_html_by_form() Failed! {e}')
return None
def get_html_by_scraper(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
configProxy = config.getInstance().proxy()
session = create_scraper(browser={'custom': ua or G_USER_AGENT,})
if isinstance(cookies, dict) and len(cookies):
requests.utils.add_dict_to_cookiejar(session.cookies, cookies)
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
session.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
session.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
if configProxy.enable:
session.verify = config.getInstance().cacert_file()
session.proxies = configProxy.proxies()
try:
if isinstance(url, str) and len(url):
result = session.get(str(url))
else: # 空url参数直接返回可重用scraper对象无需设置return_type
return session
if not result.ok:
return None
if return_type == "object":
return result
elif return_type == "content":
return result.content
elif return_type == "scraper":
return result, session
else:
result.encoding = encoding or "utf-8"
return result.text
except requests.exceptions.ProxyError:
print("[-]get_html_by_scraper() Proxy error! Please check your Proxy")
except Exception as e:
print(f"[-]get_html_by_scraper() failed. {e}")
return None
# def get_javlib_cookie() -> [dict, str]:
# import cloudscraper
# switch, proxy, timeout, retry_count, proxytype = config.Config().proxy()
# switch, proxy, timeout, retry_count, proxytype = config.getInstance().proxy()
# proxies = get_proxy(proxy, proxytype)
#
# raw_cookie = {}
@@ -157,345 +274,6 @@ def get_html_by_form(url, form_name: str = None, fields: dict = None, cookies: d
# return raw_cookie, user_agent
def translateTag_to_sc(tag):
tranlate_to_sc = config.Config().transalte_to_sc()
if tranlate_to_sc:
dict_gen = {'中文字幕': '中文字幕',
'高清': 'XXXX', '字幕': 'XXXX', '推薦作品': '推荐作品', '通姦': '通奸', '淋浴': '淋浴', '舌頭': '舌头',
'下流': '下流', '敏感': '敏感', '變態': '变态', '願望': '愿望', '慾求不滿': '慾求不满', '服侍': '服侍',
'外遇': '外遇', '訪問': '访问', '性伴侶': '性伴侣', '保守': '保守', '購物': '购物', '誘惑': '诱惑',
'出差': '出差', '煩惱': '烦恼', '主動': '主动', '再會': '再会', '戀物癖': '恋物癖', '問題': '问题',
'騙奸': '骗奸', '鬼混': '鬼混', '高手': '高手', '順從': '顺从', '密會': '密会', '做家務': '做家务',
'秘密': '秘密', '送貨上門': '送货上门', '壓力': '压力', '處女作': '处女作', '淫語': '淫语', '問卷': '问卷',
'住一宿': '住一宿', '眼淚': '眼泪', '跪求': '跪求', '求職': '求职', '婚禮': '婚礼', '第一視角': '第一视角',
'洗澡': '洗澡', '首次': '首次', '劇情': '剧情', '約會': '约会', '實拍': '实拍', '同性戀': '同性恋',
'幻想': '幻想', '淫蕩': '淫荡', '旅行': '旅行', '面試': '面试', '喝酒': '喝酒', '尖叫': '尖叫',
'新年': '新年', '借款': '借款', '不忠': '不忠', '檢查': '检查', '羞恥': '羞耻', '勾引': '勾引',
'新人': '新人', '推銷': '推销', 'ブルマ': '运动短裤',
'AV女優': 'AV女优', '情人': '情人', '丈夫': '丈夫', '辣妹': '辣妹', 'S級女優': 'S级女优', '白領': '白领',
'偶像': '偶像', '兒子': '儿子', '女僕': '女仆', '老師': '老师', '夫婦': '夫妇', '保健室': '保健室',
'朋友': '朋友', '工作人員': '工作人员', '明星': '明星', '同事': '同事', '面具男': '面具男', '上司': '上司',
'睡眠系': '睡眠系', '奶奶': '奶奶', '播音員': '播音员', '鄰居': '邻居', '親人': '亲人', '店員': '店员',
'魔女': '魔女', '視訊小姐': '视讯小姐', '大學生': '大学生', '寡婦': '寡妇', '小姐': '小姐', '秘書': '秘书',
'人妖': '人妖', '啦啦隊': '啦啦队', '美容師': '美容师', '岳母': '岳母', '警察': '警察', '熟女': '熟女',
'素人': '素人', '人妻': '人妻', '痴女': '痴女', '角色扮演': '角色扮演', '蘿莉': '萝莉', '姐姐': '姐姐',
'模特': '模特', '教師': '教师', '學生': '学生', '少女': '少女', '新手': '新手', '男友': '男友',
'護士': '护士', '媽媽': '妈妈', '主婦': '主妇', '孕婦': '孕妇', '女教師': '女教师', '年輕人妻': '年轻人妻',
'職員': '职员', '看護': '看护', '外觀相似': '外观相似', '色狼': '色狼', '醫生': '医生', '新婚': '新婚',
'黑人': '黑人', '空中小姐': '空中小姐', '運動系': '运动系', '女王': '女王', '西裝': '西装', '旗袍': '旗袍',
'兔女郎': '兔女郎', '白人': '白人',
'制服': '制服', '內衣': '内衣', '休閒裝': '休閒装', '水手服': '水手服', '全裸': '全裸', '不穿內褲': '不穿内裤',
'和服': '和服', '不戴胸罩': '不戴胸罩', '連衣裙': '连衣裙', '打底褲': '打底裤', '緊身衣': '紧身衣', '客人': '客人',
'晚禮服': '晚礼服', '治癒系': '治癒系', '大衣': '大衣', '裸體襪子': '裸体袜子', '絲帶': '丝带', '睡衣': '睡衣',
'面具': '面具', '牛仔褲': '牛仔裤', '喪服': '丧服', '極小比基尼': '极小比基尼', '混血': '混血', '毛衣': '毛衣',
'頸鏈': '颈链', '短褲': '短裤', '美人': '美人', '連褲襪': '连裤袜', '裙子': '裙子', '浴衣和服': '浴衣和服',
'泳衣': '泳衣', '網襪': '网袜', '眼罩': '眼罩', '圍裙': '围裙', '比基尼': '比基尼', '情趣內衣': '情趣内衣',
'迷你裙': '迷你裙', '套裝': '套装', '眼鏡': '眼镜', '丁字褲': '丁字裤', '陽具腰帶': '阳具腰带', '男装': '男装',
'': '',
'美肌': '美肌', '屁股': '屁股', '美穴': '美穴', '黑髮': '黑发', '嬌小': '娇小', '曬痕': '晒痕',
'F罩杯': 'F罩杯', 'E罩杯': 'E罩杯', 'D罩杯': 'D罩杯', '素顏': '素颜', '貓眼': '猫眼', '捲髮': '捲发',
'虎牙': '虎牙', 'C罩杯': 'C罩杯', 'I罩杯': 'I罩杯', '小麥色': '小麦色', '大陰蒂': '大阴蒂', '美乳': '美乳',
'巨乳': '巨乳', '豐滿': '丰满', '苗條': '苗条', '美臀': '美臀', '美腿': '美腿', '無毛': '无毛',
'美白': '美白', '微乳': '微乳', '性感': '性感', '高個子': '高个子', '爆乳': '爆乳', 'G罩杯': 'G罩杯',
'多毛': '多毛', '巨臀': '巨臀', '軟體': '软体', '巨大陽具': '巨大阳具', '長發': '长发', 'H罩杯': 'H罩杯',
'舔陰': '舔阴', '電動陽具': '电动阳具', '淫亂': '淫乱', '射在外陰': '射在外阴', '猛烈': '猛烈', '後入內射': '后入内射',
'足交': '足交', '射在胸部': '射在胸部', '側位內射': '侧位内射', '射在腹部': '射在腹部', '騎乘內射': '骑乘内射', '射在頭髮': '射在头发',
'母乳': '母乳', '站立姿勢': '站立姿势', '肛射': '肛射', '陰道擴張': '阴道扩张', '內射觀察': '内射观察', '射在大腿': '射在大腿',
'精液流出': '精液流出', '射在屁股': '射在屁股', '內射潮吹': '内射潮吹', '首次肛交': '首次肛交', '射在衣服上': '射在衣服上', '首次內射': '首次内射',
'早洩': '早洩', '翻白眼': '翻白眼', '舔腳': '舔脚', '喝尿': '喝尿', '口交': '口交', '內射': '内射',
'自慰': '自慰', '後入': '后入', '騎乘位': '骑乘位', '顏射': '颜射', '口內射精': '口内射精', '手淫': '手淫',
'潮吹': '潮吹', '輪姦': '轮奸', '亂交': '乱交', '乳交': '乳交', '小便': '小便', '吸精': '吸精',
'深膚色': '深肤色', '指法': '指法', '騎在臉上': '骑在脸上', '連續內射': '连续内射', '打樁機': '打桩机', '肛交': '肛交',
'吞精': '吞精', '鴨嘴': '鸭嘴', '打飛機': '打飞机', '剃毛': '剃毛', '站立位': '站立位', '高潮': '高潮',
'二穴同入': '二穴同入', '舔肛': '舔肛', '多人口交': '多人口交', '痙攣': '痉挛', '玩弄肛門': '玩弄肛门', '立即口交': '立即口交',
'舔蛋蛋': '舔蛋蛋', '口射': '口射', '陰屁': '阴屁', '失禁': '失禁', '大量潮吹': '大量潮吹', '69': '69',
'振動': '振动', '搭訕': '搭讪', '奴役': '奴役', '打屁股': '打屁股', '潤滑油': '润滑油',
'按摩': '按摩', '散步': '散步', '扯破連褲襪': '扯破连裤袜', '手銬': '手铐', '束縛': '束缚', '調教': '调教',
'假陽具': '假阳具', '變態遊戲': '变态游戏', '注視': '注视', '蠟燭': '蜡烛', '電鑽': '电钻', '亂搞': '乱搞',
'摩擦': '摩擦', '項圈': '项圈', '繩子': '绳子', '灌腸': '灌肠', '監禁': '监禁', '車震': '车震',
'鞭打': '鞭打', '懸掛': '悬挂', '喝口水': '喝口水', '精液塗抹': '精液涂抹', '舔耳朵': '舔耳朵', '女體盛': '女体盛',
'便利店': '便利店', '插兩根': '插两根', '開口器': '开口器', '暴露': '暴露', '陰道放入食物': '阴道放入食物', '大便': '大便',
'經期': '经期', '惡作劇': '恶作剧', '電動按摩器': '电动按摩器', '凌辱': '凌辱', '玩具': '玩具', '露出': '露出',
'肛門': '肛门', '拘束': '拘束', '多P': '多P', '潤滑劑': '润滑剂', '攝影': '摄影', '野外': '野外',
'陰道觀察': '阴道观察', 'SM': 'SM', '灌入精液': '灌入精液', '受虐': '受虐', '綁縛': '绑缚', '偷拍': '偷拍',
'異物插入': '异物插入', '電話': '电话', '公寓': '公寓', '遠程操作': '远程操作', '偷窺': '偷窥', '踩踏': '踩踏',
'無套': '无套',
'企劃物': '企划物', '獨佔動畫': '独佔动画', '10代': '10代', '1080p': 'XXXX', '人氣系列': '人气系列', '60fps': 'XXXX',
'超VIP': '超VIP', '投稿': '投稿', 'VIP': 'VIP', '椅子': '椅子', '風格出眾': '风格出众', '首次作品': '首次作品',
'更衣室': '更衣室', '下午': '下午', 'KTV': 'KTV', '白天': '白天', '最佳合集': '最佳合集', 'VR': 'VR',
'動漫': '动漫',
'酒店': '酒店', '密室': '密室', '': '', '': '', '陽台': '阳台', '公園': '公园',
'家中': '家中', '公交車': '公交车', '公司': '公司', '門口': '门口', '附近': '附近', '學校': '学校',
'辦公室': '办公室', '樓梯': '楼梯', '住宅': '住宅', '公共廁所': '公共厕所', '旅館': '旅馆', '教室': '教室',
'廚房': '厨房', '桌子': '桌子', '大街': '大街', '農村': '农村', '和室': '和室', '地下室': '地下室',
'牢籠': '牢笼', '屋頂': '屋顶', '游泳池': '游泳池', '電梯': '电梯', '拍攝現場': '拍摄现场', '別墅': '别墅',
'房間': '房间', '愛情旅館': '爱情旅馆', '車內': '车内', '沙發': '沙发', '浴室': '浴室', '廁所': '厕所',
'溫泉': '温泉', '醫院': '医院', '榻榻米': '榻榻米',
'中文字幕': '中文字幕', '无码流出': '无码流出',
'折磨': '折磨', '嘔吐': '呕吐', '觸手': '触手', '蠻橫嬌羞': '蛮横娇羞', '處男': '处男', '正太控': '正太控',
'出軌': '出轨', '瘙癢': '瘙痒', '運動': '运动', '女同接吻': '女同接吻', '性感的x': '性感的', '美容院': '美容院',
'處女': '处女', '爛醉如泥的': '烂醉如泥的', '殘忍畫面': '残忍画面', '妄想': '妄想', '惡作劇': '恶作剧', '學校作品': '学校作品',
'粗暴': '粗暴', '通姦': '通奸', '姐妹': '姐妹', '雙性人': '双性人', '跳舞': '跳舞', '性奴': '性奴',
'倒追': '倒追', '性騷擾': '性骚扰', '其他': '其他', '戀腿癖': '恋腿癖', '偷窥': '偷窥', '花癡': '花痴',
'男同性恋': '男同性恋', '情侶': '情侣', '戀乳癖': '恋乳癖', '亂倫': '乱伦', '其他戀物癖': '其他恋物癖', '偶像藝人': '偶像艺人',
'野外・露出': '野外・露出', '獵豔': '猎艳', '女同性戀': '女同性恋', '企畫': '企画', '10枚組': '10枚组', '性感的': '性感的',
'科幻': '科幻', '女優ベスト・総集編': '演员的总编', '温泉': '温泉', 'M男': 'M男', '原作コラボ': '原作协作',
'16時間以上作品': '16时间以上作品', 'デカチン・巨根': '巨根', 'ファン感謝・訪問': '感恩祭', '動画': '动画', '巨尻': '巨尻', 'ハーレム': '后宫',
'日焼け': '晒黑', '早漏': '早漏', 'キス・接吻': '接吻.', '汗だく': '汗流浃背', 'スマホ専用縦動画': '智能手机的垂直视频', 'Vシネマ': '电影放映',
'Don Cipote\'s choice': 'Don Cipote\'s choice', 'アニメ': '日本动漫', 'アクション': '动作',
'イメージビデオ(男性)': '(视频)男性',
'孕ませ': '孕育', 'ボーイズラブ': '男孩恋爱',
'ビッチ': 'bitch', '特典ありAVベースボール': '特典AV棒球', 'コミック雑誌': '漫画雑志', '時間停止': '时间停止',
'黑幫成員': '黑帮成员', '童年朋友': '童年朋友', '公主': '公主', '亞洲女演員': '亚洲女演员', '伴侶': '伴侣', '講師': '讲师',
'婆婆': '婆婆', '格鬥家': '格斗家', '女檢察官': '女检察官', '明星臉': '明星脸', '女主人、女老板': '女主人、女老板', '模特兒': '模特',
'秘書': '秘书', '美少女': '美少女', '新娘、年輕妻子': '新娘、年轻妻子', '姐姐': '姐姐', '車掌小姐': '车掌小姐',
'寡婦': '寡妇', '千金小姐': '千金小姐', '白人': '白人', '已婚婦女': '已婚妇女', '女醫生': '女医生', '各種職業': '各种职业',
'妓女': '妓女', '賽車女郎': '赛车女郎', '女大學生': '女大学生', '展場女孩': '展场女孩', '女教師': '女教师', '母親': '母亲',
'家教': '家教', '护士': '护士', '蕩婦': '荡妇', '黑人演員': '黑人演员', '女生': '女生', '女主播': '女主播',
'高中女生': '高中女生', '服務生': '服务生', '魔法少女': '魔法少女', '學生(其他)': '学生(其他)', '動畫人物': '动画人物', '遊戲的真人版': '游戏真人版',
'超級女英雄': '超级女英雄',
'角色扮演': '角色扮演', '制服': '制服', '女戰士': '女战士', '及膝襪': '及膝袜', '娃娃': '娃娃', '女忍者': '女忍者',
'女裝人妖': '女装人妖', '內衣': '內衣', '猥褻穿著': '猥亵穿着', '兔女郎': '兔女郎', '貓耳女': '猫耳女', '女祭司': '女祭司',
'泡泡襪': '泡泡袜', '緊身衣': '紧身衣', '裸體圍裙': '裸体围裙', '迷你裙警察': '迷你裙警察', '空中小姐': '空中小姐',
'連褲襪': '连裤袜', '身體意識': '身体意识', 'OL': 'OL', '和服・喪服': '和服・丧服', '體育服': '体育服', '内衣': '内衣',
'水手服': '水手服', '學校泳裝': '学校泳装', '旗袍': '旗袍', '女傭': '女佣', '迷你裙': '迷你裙', '校服': '校服',
'泳裝': '泳装', '眼鏡': '眼镜', '哥德蘿莉': '哥德萝莉', '和服・浴衣': '和服・浴衣',
'超乳': '超乳', '肌肉': '肌肉', '乳房': '乳房', '嬌小的': '娇小的', '屁股': '屁股', '': '',
'變性者': '变性人', '無毛': '无毛', '胖女人': '胖女人', '苗條': '苗条', '孕婦': '孕妇', '成熟的女人': '成熟的女人',
'蘿莉塔': '萝莉塔', '貧乳・微乳': '贫乳・微乳', '巨乳': '巨乳',
'顏面騎乘': '颜面骑乘', '食糞': '食粪', '足交': '足交', '母乳': '母乳', '手指插入': '手指插入', '按摩': '按摩',
'女上位': '女上位', '舔陰': '舔阴', '拳交': '拳交', '深喉': '深喉', '69': '69', '淫語': '淫语',
'潮吹': '潮吹', '乳交': '乳交', '排便': '排便', '飲尿': '饮尿', '口交': '口交', '濫交': '滥交',
'放尿': '放尿', '打手槍': '打手枪', '吞精': '吞精', '肛交': '肛交', '顏射': '颜射', '自慰': '自慰',
'顏射x': '颜射', '中出': '中出', '肛内中出': '肛内中出',
'立即口交': '立即口交', '女優按摩棒': '演员按摩棒', '子宮頸': '子宫颈', '催眠': '催眠', '乳液': '乳液', '羞恥': '羞耻',
'凌辱': '凌辱', '拘束': '拘束', '輪姦': '轮奸', '插入異物': '插入异物', '鴨嘴': '鸭嘴', '灌腸': '灌肠',
'監禁': '监禁', '紧缚': '紧缚', '強姦': '强奸', '藥物': '药物', '汽車性愛': '汽车性爱', 'SM': 'SM',
'糞便': '粪便', '玩具': '玩具', '跳蛋': '跳蛋', '緊縛': '紧缚', '按摩棒': '按摩棒', '多P': '多P',
'性愛': '性爱', '假陽具': '假阳具', '逆強姦': '逆强奸',
'合作作品': '合作作品', '恐怖': '恐怖', '給女性觀眾': '女性向', '教學': '教学', 'DMM專屬': 'DMM专属', 'R-15': 'R-15',
'R-18': 'R-18', '戲劇': '戏剧', '3D': '3D', '特效': '特效', '故事集': '故事集', '限時降價': '限时降价',
'複刻版': '复刻版', '戲劇x': '戏剧', '戀愛': '恋爱', '高畫質': 'xxx', '主觀視角': '主观视角', '介紹影片': '介绍影片',
'4小時以上作品': '4小时以上作品', '薄馬賽克': '薄马赛克', '經典': '经典', '首次亮相': '首次亮相', '數位馬賽克': '数位马赛克', '投稿': '投稿',
'纪录片': '纪录片', '國外進口': '国外进口', '第一人稱攝影': '第一人称摄影', '業餘': '业余', '局部特寫': '局部特写', '獨立製作': '独立制作',
'DMM獨家': 'DMM独家', '單體作品': '单体作品', '合集': '合集', '高清': 'xxx', '字幕': 'xxx', '天堂TV': '天堂TV',
'DVD多士爐': 'DVD多士炉', 'AV OPEN 2014 スーパーヘビー': 'AV OPEN 2014 S级',
'AV OPEN 2014 ヘビー級': 'AV OPEN 2014重量级',
'AV OPEN 2014 ミドル級': 'AV OPEN 2014中量级',
'AV OPEN 2015 マニア/フェチ部門': 'AV OPEN 2015 狂热者/恋物癖部门', 'AV OPEN 2015 熟女部門': 'AV OPEN 2015 熟女部门',
'AV OPEN 2015 企画部門': 'AV OPEN 2015 企画部门', 'AV OPEN 2015 乙女部門': 'AV OPEN 2015 少女部',
'AV OPEN 2015 素人部門': 'AV OPEN 2015 素人部门', 'AV OPEN 2015 SM/ハード部門': 'AV OPEN 2015 SM/硬件',
'AV OPEN 2015 女優部門': 'AV OPEN 2015 演员部门', 'AVOPEN2016人妻・熟女部門': 'AVOPEN2016人妻・熟女部门',
'AVOPEN2016企画部門': 'AVOPEN2016企画部', 'AVOPEN2016ハード部門': 'AVOPEN2016ハード部',
'AVOPEN2016マニア・フェチ部門': 'AVOPEN2016疯狂恋物科', 'AVOPEN2016乙女部門': 'AVOPEN2016少女部',
'AVOPEN2016女優部門': 'AVOPEN2016演员部', 'AVOPEN2016ドラマ・ドキュメンタリー部門': 'AVOPEN2016电视剧纪录部',
'AVOPEN2016素人部門': 'AVOPEN2016素人部', 'AVOPEN2016バラエティ部門': 'AVOPEN2016娱乐部',
'VR専用': 'VR専用', '堵嘴·喜劇': '堵嘴·喜剧', '幻想': '幻想', '性別轉型·女性化': '性别转型·女性化',
'為智能手機推薦垂直視頻': '为智能手机推荐垂直视频', '設置項目': '设置项目', '迷你係列': '迷你系列',
'體驗懺悔': '体验忏悔', '黑暗系統': '黑暗系统',
'オナサポ': '手淫', 'アスリート': '运动员', '覆面・マスク': '蒙面具', 'ハイクオリティVR': '高品质VR', 'ヘルス・ソープ': '保健香皂', 'ホテル': '旅馆',
'アクメ・オーガズム': '绝顶高潮', '花嫁': '花嫁', 'デート': '约会', '軟体': '软体', '娘・養女': '养女', 'スパンキング': '打屁股',
'スワッピング・夫婦交換': '夫妇交换', '部下・同僚': '部下・同僚', '旅行': '旅行', '胸チラ': '露胸', 'バック': '后卫', 'エロス': '爱的欲望',
'男の潮吹き': '男人高潮', '女上司': '女上司', 'セクシー': '性感美女', '受付嬢': '接待小姐', 'ノーブラ': '不穿胸罩',
'白目・失神': '白眼失神', 'M女': 'M女', '女王様': '女王大人', 'ノーパン': '不穿内裤', 'セレブ': '名流', '病院・クリニック': '医院诊所',
'面接': '面试', 'お風呂': '浴室', '叔母さん': '叔母阿姨', '罵倒': '骂倒', 'お爺ちゃん': '爷爷', '逆レイプ': '强奸小姨子',
'ディルド': 'ディルド', 'ヨガ': '瑜伽', '飲み会・合コン': '酒会、联谊会', '部活・マネージャー': '社团经理', 'お婆ちゃん': '外婆',
'ビジネススーツ': '商务套装',
'チアガール': '啦啦队女孩', 'ママ友': '妈妈的朋友', 'エマニエル': '片商Emanieru熟女塾', '妄想族': '妄想族', '蝋燭': '蜡烛', '鼻フック': '鼻钩儿',
'放置': '放置', 'サンプル動画': '范例影片', 'サイコ・スリラー': '心理惊悚片', 'ラブコメ': '爱情喜剧', 'オタク': '御宅族',
## JAVDB
'可播放': '可播放', '可下載': '可下载', '含字幕': '含字幕', '單體影片': '单体影片', '含預覽圖': '含预览图',
'含預覽視頻': '含预览视频', '2020': '2020', '2019': '2019', '2018': '2018', '2017':
'2017', '2016': '2016', '2015': '2015', '2014': '2014', '2013': '2013', '2012':
'2012', '2011': '2011', '2010': '2010', '2009': '2009', '2008': '2008', '2007':
'2007', '2006': '2006', '2005': '2005', '2004': '2004', '2003': '2003', '2002':
'2002', '2001': '2001', '淫亂,真實': '淫乱,真实', '出軌': '出轨', '強姦': '强奸', '亂倫': '乱伦',
'溫泉': '温泉', '女同性戀': '女同性恋', '企畫': '企画', '戀腿癖': '恋腿癖', '獵豔': '猎艳', '偷窺': '偷窥',
'洗澡': '洗澡', '其他戀物癖': '其他恋物癖', '處女': '处女', '性愛': '性爱', '男同性戀': '男同性恋', '學校作品':
'学校作品', '妄想': '妄想', '韓國': '韩国', '形象俱樂部': '形象俱乐部', '友誼': '友谊', '亞洲': '亚洲', '暗黑系':
'暗黑系', 'M男': 'M男', '天賦': '天赋', '跳舞': '跳舞', '被外國人幹': '被外国人干', '戀物癖': '恋物癖',
'戀乳癖': '恋乳癖', '惡作劇': '恶作剧', '運動': '运动', '倒追': '倒追', '女同接吻': '女同接吻', '美容院':
'美容院', '奴隸': '奴隶', '白天出軌': '白天出轨', '流汗': '流汗', '性騷擾': '性骚扰', '情侶': '情侣',
'爛醉如泥的': '烂醉如泥的', '魔鬼系': '魔鬼系', '處男': '处男', '殘忍畫面': '残忍画面', '性感的': '性感的', '曬黑':
'晒黑', '雙性人': '双性人', '全裸': '全裸', '正太控': '正太控', '觸手': '触手', '正常': '正常', '奇異的':
'奇异的', '蠻橫嬌羞': '蛮横娇羞', '高中女生': '高中女生', '美少女': '美少女', '已婚婦女': '已婚妇女', '藝人': '艺人',
'姐姐': '姐姐', '各種職業': '各种职业', '蕩婦': '荡妇', '母親': '母亲', '女生': '女生', '妓女': '妓女',
'新娘,年輕妻子': '新娘,年轻妻子', '女教師': '女教师', '白人': '白人', '公主': '公主', '童年朋友': '童年朋友',
'婆婆': '婆婆', '飛特族': '飞特族', '亞洲女演員': '亚洲女演员', '女大學生': '女大学生', '偶像': '偶像', '明星臉':
'明星脸', '痴漢': '痴汉', '大小姐': '大小姐', '秘書': '秘书', '護士': '护士', '角色扮演者': '角色扮演者',
'賽車女郎': '赛车女郎', '家教': '家教', '黑人演員': '黑人演员', '妹妹': '妹妹', '寡婦': '寡妇', '女醫生':
'女医生', '老闆娘,女主人': '老板娘,女主人', '女主播': '女主播', '其他學生': '其他学生', '模特兒': '模特儿', '格鬥家':
'格斗家', '展場女孩': '展场女孩', '禮儀小姐': '礼仪小姐', '女檢察官': '女检察官', '講師': '讲师', '服務生': '服务生',
'伴侶': '伴侣', '車掌小姐': '车掌小姐', '女兒': '女儿', '年輕女孩': '年轻女孩', '眼鏡': '眼镜', '角色扮演':
'角色扮演', '內衣': '内衣', '制服': '制服', '水手服': '水手服', '泳裝': '泳装', '和服,喪服': '和服,丧服',
'連褲襪': '连裤袜', '女傭': '女佣', '運動短褲': '运动短裤', '女戰士': '女战士', '校服': '校服', '制服外套':
'制服外套', '修女': '修女', 'COSPLAY服飾': 'COSPLAY服饰', '裸體圍裙': '裸体围裙', '女忍者': '女忍者',
'身體意識': '身体意识', 'OL': 'OL', '貓耳女': '猫耳女', '學校泳裝': '学校泳装', '迷你裙': '迷你裙', '浴衣':
'浴衣', '猥褻穿著': '猥亵穿着', '緊身衣': '紧身衣', '娃娃': '娃娃', '蘿莉角色扮演': '萝莉角色扮演', '女裝人妖':
'女装人妖', '及膝襪': '及膝袜', '泡泡襪': '泡泡袜', '空中小姐': '空中小姐', '旗袍': '旗袍', '兔女郎': '兔女郎',
'女祭司': '女祭司', '動畫人物': '动画人物', '迷你裙警察': '迷你裙警察', '成熟的女人': '成熟的女人', '巨乳': '巨乳',
'蘿莉塔': '萝莉塔', '無毛': '无毛', '屁股': '屁股', '苗條': '苗条', '素人': '素人', '乳房': '乳房',
'巨大陰莖': '巨大阴茎', '胖女人': '胖女人', '平胸': '平胸', '': '', '美腳': '美腿', '孕婦': '孕妇',
'巨大屁股': '巨大屁股', '瘦小身型': '瘦小身型', '變性者': '变性者', '肌肉': '肌肉', '超乳': '超乳', '乳交':
'乳交', '中出': '中出', '多P': '多P', '69': '69', '淫語': '淫语', '女上位': '女上位', '自慰': '自慰',
'顏射': '颜射', '潮吹': '潮吹', '口交': '口交', '舔陰': '舔阴', '肛交': '肛交', '手指插入': '手指插入',
'手淫': '手淫', '放尿': '放尿', '足交': '足交', '按摩': '按摩', '吞精': '吞精', '剃毛': '剃毛',
'二穴同時挿入': '二穴同时插入', '母乳': '母乳', '濫交': '滥交', '深喉': '深喉', '接吻': '接吻', '拳交': '拳交',
'飲尿': '饮尿', '騎乗位': '骑乘位', '排便': '排便', '食糞': '食粪', '凌辱': '凌辱', '捆綁': '捆绑', '緊縛':
'紧缚', '輪姦': '轮奸', '玩具': '玩具', 'SM': 'SM', '戶外': '户外', '乳液': '乳液', '羞恥': '羞耻',
'女優按摩棒': '女优按摩棒', '拘束': '拘束', '調教': '调教', '立即口交': '立即口交', '跳蛋': '跳蛋', '監禁':
'监禁', '導尿': '导尿', '按摩棒': '按摩棒', '插入異物': '插入异物', '灌腸': '灌肠', '藥物': '药物', '露出':
'露出', '汽車性愛': '汽车性爱', '催眠': '催眠', '鴨嘴': '鸭嘴', '糞便': '粪便', '脫衣': '脱衣', '子宮頸':
'子宫颈', '4小時以上作品': '4小时以上作品', '戲劇': '戏剧', '第一人稱攝影': '第一人称摄影', 'HDTV': 'HDTV',
'首次亮相': '首次亮相', '薄馬賽克': '薄马赛克', '數位馬賽克': '数位马赛克', '業餘': '业余', '故事集': '故事集',
'經典': '经典', '戀愛': '恋爱', 'VR': 'VR', '給女性觀眾': '给女性观众', '精選,綜合': '精选,综合', '國外進口':
'国外进口', '科幻': '科幻', '行動': '行动', '成人電影': '成人电影', '綜合短篇': '综合短篇', '滑稽模仿': '滑稽模仿',
'男性': '男性', '介紹影片': '介绍影片', '冒險': '冒险', '模擬': '模拟', '愛好,文化': '爱好,文化', '懸疑':
'悬疑', 'R-15': 'R-15', '美少女電影': '美少女电影', '感官作品': '感官作品', '觸摸打字': '触摸打字', '投稿':
'投稿', '紀錄片': '纪录片', '去背影片': '去背影片', '獨立製作': '独立制作', '主觀視角': '主观视角', '戰鬥行動':
'战斗行动', '特效': '特效', '16小時以上作品': '16小时以上作品', '局部特寫': '局部特写', '重印版': '重印版', '歷史劇':
'历史剧', '寫真偶像': '写真偶像', '3D': '3D', '訪問': '访问', '教學': '教学', '恐怖': '恐怖', '西洋片':
'西洋片', '45分鍾以內': '45分钟以内', '45-90分鍾': '45-90分钟', '90-120分鍾': '90-120分钟',
'120分鍾以上': '120分钟以上',
# FANZA
'動画': '视频', '電子書籍': '电子书', '同人': '同人志', 'アダルトPCゲーム': '成人PC游戏', 'DVD/CD':
' DVD/CD', 'コミック': '漫画', 'いろいろレンタル': '各种租赁', '通販': '购物', 'マーケットプレイス': '市场',
'3Dプリント': ' 3D打印', 'ロボット': '机器人', '巨乳': '巨乳', '熟女': '熟女', 'ギャル': '美少女',
'人妻・主婦': '人妻', '女子校生': '高中女生', '中出し': '中出', 'アナル': '肛交', 'ニューハーフ': '变性人',
'VR専用': 'VR专用', 'ハイクオリティVR': '高质量VR', 'アイドル・芸能人': '偶像/名人', 'アクメ・オーガズム':
'性高潮', 'アスリート': '运动员', '姉・妹': '姐妹', 'イタズラ': '恶作剧', 'インストラクター': '指导员',
'ウェイトレス': '服务员', '受付嬢': '接待员', 'エステ': '美容院', 'M男': 'M男', 'M女': 'M女', 'OL':
'OL', 'お母さん': '妈妈', '女将・女主人': '房东/情妇', '幼なじみ': '儿时的朋友', 'お爺ちゃん': '爷爷', 'お嬢様・令嬢':
'女士/女儿', 'オタク': '极客', 'オナサポ': 'XXXX', 'お姉さん': '姐姐', 'お婆ちゃん': '祖母', '叔母さん': '姑母',
'お姫様': '公主', 'お風呂': '浴室', '温泉': '温泉', '女教師': '女老师', '女上司': '女上司', '女戦士': '女战士',
'女捜査官': '女调查员', 'カーセックス': '汽车做爱', '格闘家': '格斗家', 'カップル': '情侣', '家庭教師': '家庭教师',
'看護婦・ナース': '看护', 'キャバ嬢・風俗嬢': '陪酒女郎', 'キャンギャル': '辣妹', '近親相姦': '乱伦',
'義母': '岳母', '逆ナン': '逆搭讪', 'くノ一': '国一', 'コンパニオン': '同伴', '主観': '主观', '職業色々':
'各种职业', 'ショタ': '正太', '白目・失神': '白眼', '時間停止': '时间停止', '女医': '女医生', '女王様':
'女王', '女子アナ': '女主播', '女子大生': '女大学生', 'スチュワーデス': '空姐', 'スワッピング・夫婦交換': '交换夫妇',
'性転換・女体化': '性/女性化', 'セレブ': '名人', 'チアガール': '拉拉队女孩', '痴女': '荡妇', 'ツンデレ': '傲娇',
'デート': '约会', '盗撮・のぞき': '偷拍', 'ドール': '玩具娃娃', '寝取り・寝取られ・NTR': '睡觉NTR',
'ノーパン': '不穿内裤', 'ノーブラ': '无胸罩', '飲み会・合コン': '酒会,联谊', 'ハーレム': '后宫', '花嫁': '新娘',
'バスガイド': '巴士指南', '秘書': '秘书', 'ビッチ': 'XXXX', '病院・クリニック': '医院/诊所', 'ファン感謝・訪問':
'粉丝感谢访问', '不倫': '不伦', '部活・マネージャー': '俱乐部/经理', '部下・同僚': '下属/同事', 'ヘルス・ソープ':
'健康皂', '変身ヒロイン': '转型女主人公', 'ホテル': '酒店', 'マッサージ・リフレ': '按摩咨询', '魔法少女': '魔术女郎',
'ママ友': '妈妈朋友', '未亡人': '女人', '娘・養女': '女儿/被收养的女人', '胸チラ': '胸部希拉', 'メイド': '制作',
'面接': '面试', 'モデル': '模特', '野外・露出': '野外・露出', 'ヨガ': '瑜伽', '乱交': '狂欢', '旅行': '旅行',
'レースクィーン': '种族女王', '若妻・幼妻': '年轻妻子/年轻妻子', 'アジア女優': '亚洲女演员', '巨尻': '大屁股', '筋肉':
'肌肉', '小柄': '娇小', '黒人男優': '黑人演员', '処女': '处女', '女装・男の娘': '伪娘', 'スレンダー':
'苗条', '早漏': '早泄', 'そっくりさん': '相似', '長身': '高个', '超乳': '巨乳', 'デカチン・巨根':
'大鸡巴', '童貞': '处女', '軟体': '柔软的身体', '妊婦': '孕妇', '白人女優': '白人女演员', 'パイパン': '剃光',
'日焼け': '晒伤', '貧乳・微乳': '贫乳/小乳房', '美少女': '美少女', '美乳': ' 美乳',
'ふたなり': ' 双胞胎', 'ぽっちゃり': ' 丰满', 'ミニ系': ' 迷你系', '学生服':
'学生服', '競泳・スクール水着': ' 游泳学校/游泳衣', 'コスプレ':
'Cosplay', 'COSPLAY服饰': ' COSPLAY服饰', '制服': ' 制服', '体操着・ブルマ':
'运动服', 'チャイナドレス': '中国服饰', 'ニーソックス': '过膝袜', 'ネコミミ・獣系':
'猫耳兽系', '裸エプロン': '裸围裙', 'バニーガール': '兔女郎', 'パンスト・タイツ': '连裤袜/裤袜',
'ビジネススーツ': '西装', '覆面・マスク': '面罩', 'ボディコン': ' 体型控制', 'ボンテージ': ' 气瓶',
'巫女': '神社少女', '水着': '泳装', 'ミニスカ': '超短裙', 'ミニスカポリス': '超短裙警察', 'めがね': '眼镜',
'ランジェリー': '女用贴身内衣裤', 'ルーズソックス': '松散的袜子', 'レオタード': '紧身连衣裤', '和服・浴衣': '日式服装/浴衣',
'アクション・格闘': '动作/格斗', '脚フェチ': '腿恋物癖', 'アニメ': '动漫', 'イメージビデオ': '图像视频',
'イメージビデオ(男性)': '图像视频(男)', '淫乱・ハード系': '讨厌/困难', 'SF': ' SF', 'SM': ' SM', '学園もの':
'学校事物', '企画': '计划', '局部アップ': '本地化', '巨乳フェチ': '大恋物癖', 'ギャグ・コメディ': '堵嘴喜剧',
'クラシック': '经典', 'ゲイ': '同性恋', '原作コラボ': '原始协作', 'コラボ作品': '协作工作', 'サイコ・スリラー':
'心理惊悚片', '残虐表現': '残酷表情', '尻フェチ': '屁眼恋物癖', '素人': '素人', '女性向け': '女士',
'女優ベスト・総集編': '女演员最佳/摘要', 'スポーツ': '运动', 'セクシー': '性感', 'その他フェチ': '其他恋物癖', '体験告白':
'自白', '単体作品': '单身', 'ダーク系': '黑暗', 'ダンス': '舞蹈', '着エロ': '穿着色情', 'デビュー作品': '首次亮相',
'特撮': '特殊效果', 'ドキュメンタリー': '纪录片', 'ドラマ': '戏剧', 'ナンパ': '南帕', 'HowTo': ' HowTo',
'パンチラ': '内衣', 'ファンタジー': '幻想', '復刻': ' 复刻', 'Vシネマ': '电影', 'ベスト・総集編':
'最佳/摘要', 'ホラー': '恐怖', 'ボーイズラブ': '男子的爱', '妄想': ' 妄想', '洋ピン・海外輸入':
' 进口洋销', 'レズ': ' 女士', '恋愛': ' 恋爱', '足コキ':
' 足交', '汗だく': ' 满头大汗', 'アナルセックス': '肛交', '異物挿入': '异物插入', 'イラマチオ':
'深喉', '淫語': '脏话', '飲尿': '喝尿', '男の潮吹き': '人喷', 'オナニー': '手淫', 'おもちゃ': '玩具',
'監禁': '禁闭', '浣腸': '灌肠', '顔射': '恋物癖', '顔面騎乗': '面部骑行', '騎乗位': '女牛仔', 'キス・接吻':
'接吻和亲吻', '鬼畜': '恶魔', 'くすぐり': '发痒', 'クスコ': '库斯科', 'クンニ': '坤妮', 'ゲロ': '下吕', '拘束':
'拘束', '拷問': '酷刑', 'ごっくん': '', '潮吹き': '潮吹', 'シックスナイン': '六十九', '縛り・緊縛': '绑/束缚',
'羞恥': '羞耻', '触手': '触觉', '食糞': '食粪', 'スカトロ': '', 'スパンキング': '打屁股', '即ハメ':
'立即鞍', '脱糞': '排便', '手コキ': '打手枪', 'ディルド': '假阳具', '電マ': '电码', 'ドラッグ': '拖动', '辱め':
'屈辱', '鼻フック': '鼻子钩', 'ハメ撮り': '颜射', '孕ませ': '构思', 'バイブ': '盛传', 'バック': '后背', '罵倒':
'辱骂', 'パイズリ': '乳交', 'フィスト': '拳头', 'フェラ': '', 'ぶっかけ': '颜射', '放置': '离开',
'放尿・お漏らし': '小便/泄漏', '母乳': '母乳', 'ポルチオ': '色情', '指マン': '手指男人', 'ラブコメ': '爱来',
'レズキス': '女同性恋之吻', 'ローション・オイル': '乳液油', 'ローター': '转子', '蝋燭': '蜡烛', '3P・4P':
' 3P・4P', 'インディーズ': '印度', 'エマニエル': '伊曼妮尔', '期間限定セール': '限时特卖', 'ギリモザ': '最小马赛克',
'ゲーム実写版': '真人版游戏', '新人ちゃん続々デビュー': '新人陆续出道', 'スマホ推奨縦動画': '手机推荐视频',
'セット商品': '固定产品', 'その他': '其他', 'デジモ': ' 数码相机', '投稿': '投稿', '独占配信': '独家发行',
'ハイビジョン': '高清', 'パラダイスTV': '天堂电视', 'FANZA配信限定': ' FANZA发行有限公司', '複数話': '多个情节',
'妄想族': '妄想族', '16時間以上作品': '16小时或以上的工作', '3D': '3D', '4時間以上作品':
'4小时或以上的工作', 'プレステージ30OFF': 'XXXX', '豊彦・山と空・ヒプシスRASH他30OFF':
'XXXX', '熟女JAPAN・人妻援護会他30OFF':
'XXXX', 'ブランドストア30OFF': 'XXXX',
# mgstage
'ギャル': '辣妹', 'ナンパ': '搭讪', 'カップル': '情侣', 'スレンダー': '身材苗条', 'エステ・マッサージ': '美容按摩',
'3P・4P': '3P・4P', '4時間以上作品': '曝光', 'MGSだけのおまけ映像付き': '只附带MGS的赠品影像', '中出し': '中出',
'乱交': '乱交', '人妻': '人妻', '企画': '计划', 'デビュー作品': '出道作品', '初撮り': '首次拍摄', '単体作品':
'单体作品', '即ハメ': '马上就发', 'キャバ嬢・風俗嬢': '陪酒女郎', '巨乳': '巨乳', '投稿': '投稿', 'ハメ撮り':
'新娘子', '潮吹き': '潮吹', '熟女': '熟女', '独占配信': '独家发布', '痴女': '痴女', '童顔': '童颜',
'競泳・スクール水着': '游泳学校的游泳衣', '素人': '素人', 'ベスト・総集編': '精选集', '美乳': '美乳', '美少女': '美少女',
'職業色々': '各种职业', '配信専用': '配信专用', '電マ': '电码', '顔射': '颜射', 'アイドル・芸能人': '偶像艺人',
'アクション・格闘': '格斗动作', '足コキ': '足交', '脚フェチ': '脚控', 'アジア女優': '亚洲女演员', '汗だく': '满头大汗',
'アナルセックス': '肛门性爱', 'アナル': '肛交', '姉・妹': '姐姐、妹妹', 'Eカップ': 'E罩杯', 'イタズラ': '恶作剧',
'異物挿入': '插入异物', 'イメージビデオ': '视频图像', '色白': '白皙', '淫語': '淫语', '淫語モノ': '淫语故事',
'インストラクター': '教练', '飲尿': '饮尿', '淫乱・ハード系': '淫乱硬系', 'ウェイトレス': '女服务生', 'Hカップ':
'H罩杯', 'SF': 'SF', 'SM': 'SM', 'Fカップ': 'F罩杯', 'M男': 'M男', 'お母さん': '妈妈',
'女将・女主人': '女主人', 'お嬢様・令嬢': '大小姐', 'オナニー': '自慰', 'お姉さん': '姐姐', 'オモチャ': '玩具',
'温泉': '温泉', '女戦士': '女战士', '女捜査官': '女搜查官', 'カーセックス': '汽车做爱', '介護': '看护', '格闘家':
'格斗家', '家庭教師': '家庭教师', '監禁': '监禁', '看護婦・ナース': '看护', '浣腸': '灌肠', '学園もの': '校园剧',
'顔面騎乗': '颜面骑乘', '局部アップ': '局部提高', '巨尻': '巨臀', '巨乳フェチ': '巨乳恋物癖', '騎乗位': '骑乘位',
'キス': '沙鮻', 'キス・接吻': '接吻', '鬼畜': '鬼畜', '着物・浴衣': '和服、浴衣', '近親相姦': '近亲通奸', '筋肉':
'肌肉', '金髪・ブロンド': '金发', '逆ナン': '逆搭讪', '義母': '岳母', 'くノ一': '九一', 'ゲイ・ホモ': '同性恋',
'拘束': '拘束', '口内射精': '口内射精', '口内発射': '口内射精', '黒人男優': '黑人男演员', 'コスプレ': 'COSPLAY',
'コンパニオン': '接待员', 'ごっくん': '捉迷藏', '羞恥': '羞耻', '羞恥・辱め': '羞辱', '主観': '主观', '触手':
'触手', '食糞': '饭桶', '処女': '处女', 'ショタ': '正太', '縛り・緊縛': '束缚', '尻フェチ': '屁股恋物癖', '女医':
'女医生', '女教師': '女教师', '女子アナ': '女主播', '女子校生': '女学生', '女子大生': '女大学生', '女性向け':
'面向女性', '女装・男の娘': '伪娘', 'Gカップ': 'G罩杯', 'スカトロ': '', 'スチュワーデス・CA': '空姐CA',
'スポーツ': '体育运动', '清楚': '清秀', '制服': '制服', 'その他フェチ': '其他恋物癖', '体操着・ブルマ': '运动服',
'多人数': '很多人', '着エロ': '色情', '長身': '高个子', '痴漢': '痴汉', '手コキ': '手淫', '手マン': '手艺人',
'Dカップ': 'D罩杯', '泥酔': '烂醉如泥', 'デカチン・巨根': '巨根', '盗撮': '偷拍', '盗撮・のぞき': '偷拍', '童貞':
'处男', 'ドキュメンタリー': '记录片', 'ドラッグ・媚薬': '药局', 'ドラマ': '电视剧', 'ニューハーフ': '变性人',
'ニーソックス': '过膝袜', '妊婦': '孕妇', '寝取り・寝取られ': '睡下', 'HowTo': 'HowTo',
# fc2
'美人': '美女', 'ハメ撮り': '拍鸽子', 'フェチ': '恋物癖',
'コスプレ・制服': 'COSPLAY制服', '自分撮り': '自拍', 'その他': '其他', 'OL・お姉さん': 'OL姐姐', 'ゲイ': '同性恋',
'P・乱交': '3P・乱交', '野外・露出': '野外露出', '海外': '国外', 'レズ': '女士', 'アニメ': '动画',
'アダルト': '成人', 'アイドル': '空闲', '個人撮影': '个人摄影', '無修正': '无修正', 'コスプレ': '角色扮演',
'下着': '内衣', '水着': '游泳衣', 'パンチラ': '小册子', 'フェラ': '口交', 'モデル': '模型', '中出し': '中出', '可愛い': '可爱',
'オリジナル': '原始', '貧乳': '贫乳', 'オナニー': '自慰', 'パイパン': '菠萝', 'ロリ': '萝莉', '生ハメ': '第一人称',
#caribbeancom
'青姦': '野战', '初裏': '破处', 'ブルマー': '体操服', 'クンニ ベスト/オムニバス':'汇编', 'クンニ': '舔阴',
}
try:
return dict_gen[tag]
except:
return tag
else:
return tag
def translate(
src: str,
target_language: str = "zh_cn",
@@ -505,8 +283,11 @@ def translate(
delay: int = 0,
):
trans_result = ""
# 中文句子如果包含&等符号会被谷歌翻译截断损失内容,而且中文翻译到中文也没有意义,故而忽略,只翻译带有日语假名的
if not is_japanese(src):
return src
if engine == "google-free":
gsite = config.Config().get_translate_service_site()
gsite = config.getInstance().get_translate_service_site()
if not re.match('^translate\.google\.(com|com\.\w{2}|\w{2})$', gsite):
gsite = 'translate.google.cn'
url = (
@@ -519,27 +300,6 @@ f"https://{gsite}/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&sl=auto&tl={t
translate_list = [i["trans"] for i in result.json()["sentences"]]
trans_result = trans_result.join(translate_list)
# elif engine == "baidu":
# url = "https://fanyi-api.baidu.com/api/trans/vip/translate"
# salt = random.randint(1, 1435660288)
# sign = app_id + src + str(salt) + key
# sign = hashlib.md5(sign.encode()).hexdigest()
# url += (
# "?appid="
# + app_id
# + "&q="
# + src
# + "&from=auto&to="
# + target_language
# + "&salt="
# + str(salt)
# + "&sign="
# + sign
# )
# result = get_html(url=url, return_type="object")
#
# translate_list = [i["dst"] for i in result.json()["trans_result"]]
# trans_result = trans_result.join(translate_list)
elif engine == "azure":
url = "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0&to=" + target_language
headers = {
@@ -560,17 +320,6 @@ f"https://{gsite}/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&sl=auto&tl={t
return trans_result
# ========================================================================是否为无码
def is_uncensored(number):
if re.match('^\d{4,}', number) or re.match('n\d{4}', number) or 'HEYZO' in number.upper():
return True
configs = config.Config().get_uncensored()
prefix_list = str(configs).split(',')
for pre in prefix_list:
if pre.upper() in number.upper():
return True
return False
# 从浏览器中导出网站登录验证信息的cookies能够以会员方式打开游客无法访问到的页面
# 示例: FC2-755670 url https://javdb9.com/v/vO8Mn
# json 文件格式
@@ -593,20 +342,20 @@ def load_cookies(filename):
filename = os.path.basename(filename)
if not len(filename):
return None, None
path_search_order = [
f"./{filename}",
os.path.join(Path.home(), filename),
os.path.join(Path.home(), f".avdc/{filename}"),
os.path.join(Path.home(), f".local/share/avdc/{filename}")
]
path_search_order = (
Path.cwd() / filename,
Path.home() / filename,
Path.home() / f".avdc/{filename}",
Path.home() / f".local/share/avdc/{filename}"
)
cookies_filename = None
try:
for p in path_search_order:
if os.path.exists(p):
cookies_filename = os.path.abspath(p)
if p.is_file():
cookies_filename = str(p.resolve())
break
if not cookies_filename:
return None, None
try:
return json.load(open(cookies_filename)), cookies_filename
except:
return None, None
@@ -623,10 +372,139 @@ def file_modification_days(filename) -> int:
return 9999
return days
# 检查文件是否是链接
def is_link(filename: str):
if os.path.islink(filename):
return True # symlink
elif os.stat(filename).st_nlink > 1:
return True # hard link Linux MAC OSX Windows NTFS
return False
def file_not_exist_or_empty(filepath) -> bool:
return not os.path.isfile(filepath) or os.path.getsize(filepath) == 0
# 日语简单检测
def is_japanese(s) -> bool:
return bool(re.search(r'[\u3040-\u309F\u30A0-\u30FF\uFF66-\uFF9F]', s, re.UNICODE))
# Usage: python ./ADC_function.py https://cn.bing.com/
if __name__ == "__main__":
import sys, timeit
from http.client import HTTPConnection
def benchmark(t, url):
print(f"HTTP GET Benchmark times:{t} url:{url}")
tm = timeit.timeit(f"_ = session1.get('{url}')",
"from __main__ import get_html_session;session1=get_html_session()",
number=t)
print(f' *{tm:>10.5f}s get_html_session() Keep-Alive enable')
tm = timeit.timeit(f"_ = scraper1.get('{url}')",
"from __main__ import get_html_by_scraper;scraper1=get_html_by_scraper()",
number=t)
print(f' *{tm:>10.5f}s get_html_by_scraper() Keep-Alive enable')
tm = timeit.timeit(f"_ = browser1.open('{url}')",
"from __main__ import get_html_by_browser;browser1=get_html_by_browser()",
number=t)
print(f' *{tm:>10.5f}s get_html_by_browser() Keep-Alive enable')
tm = timeit.timeit(f"_ = get_html('{url}')",
"from __main__ import get_html",
number=t)
print(f' *{tm:>10.5f}s get_html()')
t = 100
#url = "https://www.189.cn/"
url = "http://www.chinaunicom.com"
HTTPConnection.debuglevel = 1
s = get_html_session()
_ = s.get(url)
HTTPConnection.debuglevel = 0
if len(sys.argv)>1:
url = sys.argv[1]
benchmark(t, url)
def download_file_with_filename(url, filename, path):
conf = config.getInstance()
configProxy = conf.proxy()
for i in range(configProxy.retry):
try:
if configProxy.enable:
if not os.path.exists(path):
try:
os.makedirs(path)
except:
print(f"[-]Fatal error! Can not make folder '{path}'")
sys.exit(0)
proxies = configProxy.proxies()
headers = {
'User-Agent': G_USER_AGENT}
r = requests.get(url, headers=headers, timeout=configProxy.timeout, proxies=proxies)
if r == '':
print('[-]Movie Download Data not found!')
return
with open(os.path.join(path, filename), "wb") as code:
code.write(r.content)
return
else:
if not os.path.exists(path):
try:
os.makedirs(path)
except:
print(f"[-]Fatal error! Can not make folder '{path}'")
sys.exit(0)
headers = {
'User-Agent': G_USER_AGENT}
r = requests.get(url, timeout=configProxy.timeout, headers=headers)
if r == '':
print('[-]Movie Download Data not found!')
return
with open(os.path.join(path, filename), "wb") as code:
code.write(r.content)
return
except requests.exceptions.RequestException:
i += 1
print('[-]Download : Connect retry ' + str(i) + '/' + str(configProxy.retry))
except requests.exceptions.ConnectionError:
i += 1
print('[-]Download : Connect retry ' + str(i) + '/' + str(configProxy.retry))
except requests.exceptions.ProxyError:
i += 1
print('[-]Download : Connect retry ' + str(i) + '/' + str(configProxy.retry))
except requests.exceptions.ConnectTimeout:
i += 1
print('[-]Download : Connect retry ' + str(i) + '/' + str(configProxy.retry))
except IOError:
raise ValueError(f"[-]Create Directory '{path}' failed!")
return
print('[-]Connect Failed! Please check your Proxy or Network!')
raise ValueError('[-]Connect Failed! Please check your Proxy or Network!')
return
def download_one_file(args):
def _inner(url: str, save_path: Path):
filebytes = get_html(url, return_type='content')
if isinstance(filebytes, bytes) and len(filebytes):
if len(filebytes) == save_path.open('wb').write(filebytes):
return str(save_path)
return _inner(*args)
'''用法示例: 2线程同时下载两个不同文件并保存到不同路径路径目录可未创建但需要具备对目标目录和文件的写权限
parallel_download_files([
('https://site1/img/p1.jpg', 'C:/temp/img/p1.jpg'),
('https://site2/cover/n1.xml', 'C:/tmp/cover/n1.xml')
])
'''
# dn_list 可以是 tuple或者list: ((url1, save_fullpath1),(url2, save_fullpath2),)
# parallel: 并行下载的线程池线程数为0则由函数自己决定
def parallel_download_files(dn_list, parallel: int = 0):
mp_args = []
for url, fullpath in dn_list:
if url and isinstance(url, str) and url.startswith('http') and fullpath and isinstance(fullpath, (str, Path)) and len(str(fullpath)):
fullpath = Path(fullpath)
fullpath.parent.mkdir(parents=True, exist_ok=True)
mp_args.append((url, fullpath))
if not len(mp_args):
return []
if not isinstance(parallel, int) or parallel not in range(1,200):
parallel = min(5, len(mp_args))
with ThreadPoolExecutor(parallel) as pool:
results = list(pool.map(download_one_file, mp_args))
return results
def delete_all_elements_in_list(string,lists):
new_lists = []
for i in lists:
if i != string:
new_lists.append(i)
return new_lists

View File

@@ -6,12 +6,16 @@ import sys
import shutil
import typing
import urllib3
import signal
import platform
from opencc import OpenCC
import ADC_function
import config
from datetime import datetime, timedelta
import time
from pathlib import Path
from ADC_function import file_modification_days, get_html, is_link
from ADC_function import file_modification_days, get_html, parallel_download_files
from number_parser import get_number
from core import core_main, moveFailedFolder
@@ -34,31 +38,55 @@ def check_update(local_version):
print("[*]======================================================")
def argparse_function(ver: str) -> typing.Tuple[str, str, bool]:
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
conf = config.getInstance()
parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.")
parser.add_argument("file", default='', nargs='?', help="Single Movie file path.")
parser.add_argument("-p","--path",default='',nargs='?',help="Analysis folder path.")
# parser.add_argument("-c", "--config", default='config.ini', nargs='?', help="The config file Path.")
default_logdir = os.path.join(Path.home(),'.avlogs')
parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
# parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
default_logdir = str(Path.home() / '.avlogs')
parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?',
help=f"""Duplicate stdout and stderr to logfiles
in logging folder, default on.
default for current user: {default_logdir}
Use --log-dir= to turn off logging feature.""")
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number")
parser.add_argument("-a", "--auto-exit", dest='autoexit', action="store_true",
help="Auto exit after program complete")
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
default folder for current user: '{default_logdir}'. Change default folder to an empty file,
or use --log-dir= to turn log off.""")
parser.add_argument("-q","--regex-query",dest='regexstr',default='',nargs='?',help="python re module regex filepath filtering.")
parser.add_argument("-d","--nfo-skip-days",dest='days',default='',nargs='?', help="Override nfo_skip_days value in config.")
parser.add_argument("-c","--stop-counter",dest='cnt',default='',nargs='?', help="Override stop_counter value in config.")
parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format(
os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt')))
parser.add_argument("-a", "--auto-exit", action="store_true",
help="Auto exit after program complete")
parser.add_argument("-g","--debug", action="store_true",
help="Turn on debug mode to generate diagnostic log for issue report.")
parser.add_argument("-z","--zero-operation",dest='zero_op', action="store_true",
help="""Only show job list of files and numbers, and **NO** actual operation
is performed. It may help you correct wrong numbers before real job.""")
parser.add_argument("-v", "--version", action="version", version=ver)
args = parser.parse_args()
def get_natural_number_or_none(value):
return int(value) if isinstance(value, str) and value.isnumeric() and int(value)>=0 else None
def get_str_or_none(value):
return value if isinstance(value, str) and len(value) else None
def get_bool_or_none(value):
return True if isinstance(value, bool) and value else None
config.G_conf_override["common:main_mode"] = get_natural_number_or_none(args.main_mode)
config.G_conf_override["common:source_folder"] = get_str_or_none(args.path)
config.G_conf_override["common:auto_exit"] = get_bool_or_none(args.auto_exit)
config.G_conf_override["common:nfo_skip_days"] = get_natural_number_or_none(args.days)
config.G_conf_override["common:stop_counter"] = get_natural_number_or_none(args.cnt)
config.G_conf_override["common:ignore_failed_list"] = get_bool_or_none(args.ignore_failed_list)
config.G_conf_override["debug_mode:switch"] = get_bool_or_none(args.debug)
return args.file, args.path, args.number, args.autoexit, args.logdir, args.regexstr
return args.file, args.number, args.logdir, args.regexstr, args.zero_op
class OutLogger(object):
def __init__(self, logfile) -> None:
self.term = sys.stdout
self.log = open(logfile,"w",encoding='utf-8',buffering=1)
self.filepath = logfile
def __del__(self):
self.close()
def __enter__(self):
@@ -85,6 +113,7 @@ class ErrLogger(OutLogger):
def __init__(self, logfile) -> None:
self.term = sys.stderr
self.log = open(logfile,"w",encoding='utf-8',buffering=1)
self.filepath = logfile
def close(self):
if self.term != None:
sys.stderr = self.term
@@ -97,14 +126,18 @@ class ErrLogger(OutLogger):
def dupe_stdout_to_logfile(logdir: str):
if not isinstance(logdir, str) or len(logdir) == 0:
return
if not os.path.isdir(logdir):
os.makedirs(logdir)
if not os.path.isdir(logdir):
return
log_dir = Path(logdir)
if not log_dir.exists():
try:
log_dir.mkdir(parents=True,exist_ok=True)
except:
pass
if not log_dir.is_dir():
return # Tips for disabling logs by change directory to a same name empty regular file
abslog_dir = log_dir.resolve()
log_tmstr = datetime.now().strftime("%Y%m%dT%H%M%S")
logfile = os.path.join(logdir, f'avdc_{log_tmstr}.txt')
errlog = os.path.join(logdir, f'avdc_{log_tmstr}_err.txt')
logfile = abslog_dir / f'avdc_{log_tmstr}.txt'
errlog = abslog_dir / f'avdc_{log_tmstr}_err.txt'
sys.stdout = OutLogger(logfile)
sys.stderr = ErrLogger(errlog)
@@ -113,28 +146,126 @@ def dupe_stdout_to_logfile(logdir: str):
def close_logfile(logdir: str):
if not isinstance(logdir, str) or len(logdir) == 0 or not os.path.isdir(logdir):
return
sys.stdout.close()
sys.stderr.close()
# 清理空文件
for current_dir, subdirs, files in os.walk(logdir, topdown=False):
#日志关闭前保存日志路径
filepath = None
try:
for f in files:
full_name = os.path.join(current_dir, f)
if os.path.getsize(full_name) == 0:
os.remove(full_name)
filepath = sys.stdout.filepath
except:
pass
sys.stdout.close()
sys.stderr.close()
log_dir = Path(logdir).resolve()
if isinstance(filepath, Path):
print(f"Log file '{filepath}' saved.")
assert(filepath.parent.samefile(log_dir))
# 清理空文件
for f in log_dir.glob(r'*_err.txt'):
if f.stat().st_size == 0:
try:
f.unlink(missing_ok=True)
except:
pass
# 合并日志 只检测日志目录内的文本日志,忽略子目录。三天前的日志,按日合并为单个日志,三个月前的日志,
# 按月合并为单个月志去年及以前的月志今年4月以后将之按年合并为年志
# 测试步骤:
"""
LOGDIR=/tmp/avlog
mkdir -p $LOGDIR
for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/avdc_${f}T235959.txt;done
for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/avdc_2021${f}T235959.txt;done
for f in {00..23};do;echo 20211001T$f>$LOGDIR/avdc_20211001T${f}5959.txt;done
echo "$(ls -1 $LOGDIR|wc -l) files in $LOGDIR"
# 1932 files in /tmp/avlog
avdc -zgic1 -d0 -m3 -o $LOGDIR
# python3 ./AV_Data_Capture.py -zgic1 -o $LOGDIR
ls $LOGDIR
# rm -rf $LOGDIR
"""
today = datetime.today()
# 第一步合并到日。3天前的日志文件名是同一天的合并为一份日志
for i in range(1):
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}T\d{6}$', f.stem, re.A)]
if not txts or not len(txts):
break
e = [f for f in txts if '_err' in f.stem]
txts.sort()
tmstr_3_days_ago = (today.replace(hour=0) - timedelta(days=3)).strftime("%Y%m%dT99")
deadline_day = f'avdc_{tmstr_3_days_ago}'
day_merge = [f for f in txts if f.stem < deadline_day]
if not day_merge or not len(day_merge):
break
cutday = len('T235959.txt') # cut length avdc_20201201|T235959.txt
for f in day_merge:
try:
day_file_name = str(f)[:-cutday] + '.txt' # avdc_20201201.txt
with open(day_file_name, 'a', encoding='utf-8') as m:
m.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True)
except:
pass
# 第二步,合并到月
for i in range(1): # 利用1次循环的break跳到第二步避免大块if缩进或者使用goto语法
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}$', f.stem, re.A)]
if not txts or not len(txts):
break
txts.sort()
tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32")
deadline_month = f'avdc_{tmstr_3_month_ago}'
month_merge = [f for f in txts if f.stem < deadline_month]
if not month_merge or not len(month_merge):
break
tomonth = len('01.txt') # cut length avdc_202012|01.txt
for f in month_merge:
try:
month_file_name = str(f)[:-tomonth] + '.txt' # avdc_202012.txt
with open(month_file_name, 'a', encoding='utf-8') as m:
m.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True)
except:
pass
# 第三步,月合并到年
if today.month < 4:
return
mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{6}$', f.stem, re.A)]
if not mons or not len(mons):
return
mons.sort()
deadline_year = f'avdc_{today.year-1}13'
year_merge = [f for f in mons if f.stem < deadline_year]
if not year_merge or not len(year_merge):
return
toyear = len('12.txt') # cut length avdc_2020|12.txt
for f in year_merge:
try:
year_file_name = str(f)[:-toyear] + '.txt' # avdc_2020.txt
with open(year_file_name, 'a', encoding='utf-8') as y:
y.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True)
except:
pass
# 第四步,压缩年志 如果有压缩需求请自行手工压缩或者使用外部脚本来定时完成。推荐nongnu的lzip对于
# 这种粒度的文本日志压缩比是目前最好的。lzip -9的运行参数下日志压缩比要高于xz -9而且内存占用更少
# 多核利用率更高(plzip多线程版本)解压速度更快。压缩后的大小差不多是未压缩时的2.4%到3.7%左右,
# 100MB的日志文件能缩小到3.7MB。
# 重写视频文件扫描,消除递归,取消全局变量,新增失败文件列表跳过处理
def movie_lists(root, conf, regexstr):
escape_folder = re.split("[,]", conf.escape_folder())
def signal_handler(*args):
print('[!]Ctrl+C detected, Exit.')
sys.exit(9)
def sigdebug_handler(*args):
config.G_conf_override["debug_mode:switch"] = not config.G_conf_override["debug_mode:switch"]
print('[!]Debug {}'.format('On' if config.getInstance().debug() else 'oFF'))
# 新增失败文件列表跳过处理,及.nfo修改天数跳过处理提示跳过视频总数调试模式(-g)下详细被跳过文件,跳过小广告
def movie_lists(source_folder, regexstr):
conf = config.getInstance()
main_mode = conf.main_mode()
debug = conf.debug()
nfo_skip_days = conf.nfo_skip_days()
soft_link = conf.soft_link()
total = []
file_type = conf.media_type().upper().split(",")
file_type = conf.media_type().lower().split(",")
trailerRE = re.compile(r'-trailer\.', re.IGNORECASE)
cliRE = None
if isinstance(regexstr, str) and len(regexstr):
@@ -142,72 +273,94 @@ def movie_lists(root, conf, regexstr):
cliRE = re.compile(regexstr, re.IGNORECASE)
except:
pass
failed_list_txt_path = Path(conf.failed_folder()).resolve() / 'failed_list.txt'
failed_set = set()
if main_mode == 3 or soft_link:
if (main_mode == 3 or soft_link) and not conf.ignore_failed_list():
try:
with open(os.path.join(conf.failed_folder(), 'failed_list.txt'), 'r', encoding='utf-8') as flt:
flist = flt.read().splitlines()
flist = failed_list_txt_path.read_text(encoding='utf-8').splitlines()
failed_set = set(flist)
flt.close()
if len(flist) != len(failed_set):
with open(os.path.join(conf.failed_folder(), 'failed_list.txt'), 'w', encoding='utf-8') as flt:
flt.writelines([line + '\n' for line in failed_set])
flt.close()
if len(flist) != len(failed_set): # 检查去重并写回但是不改变failed_list.txt内条目的先后次序重复的只保留最后的
fset = failed_set.copy()
for i in range(len(flist)-1, -1, -1):
fset.remove(flist[i]) if flist[i] in fset else flist.pop(i)
failed_list_txt_path.write_text('\n'.join(flist) + '\n', encoding='utf-8')
assert len(fset) == 0 and len(flist) == len(failed_set)
except:
pass
for current_dir, subdirs, files in os.walk(root, topdown=False):
if len(set(current_dir.replace("\\","/").split("/")) & set(escape_folder)) > 0:
if not Path(source_folder).is_dir():
print('[-]Source folder not found!')
return []
total = []
source = Path(source_folder).resolve()
skip_failed_cnt, skip_nfo_days_cnt = 0, 0
escape_folder_set = set(re.split("[,]", conf.escape_folder()))
for full_name in source.glob(r'**/*'):
if main_mode != 3 and set(full_name.parent.parts) & escape_folder_set:
continue
for f in files:
full_name = os.path.join(current_dir, f)
if not os.path.splitext(full_name)[1].upper() in file_type:
if not full_name.suffix.lower() in file_type:
continue
absf = os.path.abspath(full_name)
absf = str(full_name)
if absf in failed_set:
skip_failed_cnt += 1
if debug:
print('[!]Skip failed file:', absf)
print('[!]Skip failed movie:', absf)
continue
if cliRE and not cliRE.search(absf):
is_sym = full_name.is_symlink()
if main_mode != 3 and (is_sym or full_name.stat().st_nlink > 1): # 短路布尔 符号链接不取stat(),因为符号链接可能指向不存在目标
continue # file is symlink or hardlink(Linux/NTFS/Darwin)
# 调试用0字节样本允许通过去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB)
movie_size = 0 if is_sym else full_name.stat().st_size # 同上 符号链接不取stat()及st_size直接赋0跳过小视频检测
if movie_size > 0 and movie_size < 125829120: # 1024*1024*120=125829120
continue
if main_mode == 3 and nfo_skip_days > 0:
nfo = Path(absf).with_suffix('.nfo')
if file_modification_days(nfo) <= nfo_skip_days:
if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name):
continue
if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(full_name.with_suffix('.nfo')) <= nfo_skip_days:
skip_nfo_days_cnt += 1
if debug:
print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'")
continue
if (main_mode == 3 or not is_link(absf)) and not trailerRE.search(f):
total.append(absf)
if skip_failed_cnt:
print(f"[!]Skip {skip_failed_cnt} movies in failed list '{failed_list_txt_path}'.")
if skip_nfo_days_cnt:
print(f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.")
if nfo_skip_days <= 0 or not soft_link or main_mode == 3:
return total
# 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数跳过N天内更新过的
skip_numbers = set()
success_folder = conf.success_folder()
for current_dir, subdirs, files in os.walk(success_folder, topdown=False):
for f in files:
f_obj = Path(f)
if f_obj.suffix.lower() != '.nfo':
success_folder = Path(conf.success_folder()).resolve()
for f in success_folder.glob(r'**/*'):
if not re.match(r'\.nfo', f.suffix, re.IGNORECASE):
continue
if file_modification_days(Path(current_dir) / f_obj) > nfo_skip_days:
if file_modification_days(f) > nfo_skip_days:
continue
number = get_number(False, f_obj.stem)
if number:
skip_numbers.add(number.upper())
number = get_number(False, f.stem)
if not number:
continue
skip_numbers.add(number.lower())
rm_list = []
for f in total:
n_number = get_number(False, os.path.basename(f))
if n_number and n_number.upper() in skip_numbers:
if n_number and n_number.lower() in skip_numbers:
rm_list.append(f)
for f in rm_list:
total.remove(f)
if debug:
print(f"[!]Skip file successfully processed within {nfo_skip_days} days: '{f}'")
if len(rm_list):
print(f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.")
return total
def create_failed_folder(failed_folder):
if not os.path.isdir(failed_folder): # 新建failed文件夹
if not os.path.exists(failed_folder): # 新建failed文件夹
try:
os.makedirs(failed_folder)
if not os.path.isdir(failed_folder):
raise
except:
print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)")
print(f"[-]Fatal error! Can not make folder '{failed_folder}'")
sys.exit(0)
@@ -227,24 +380,29 @@ def rm_empty_folder(path):
pass
def create_data_and_move(file_path: str, c: config.Config, debug):
def create_data_and_move(file_path: str, zero_op, oCC):
# Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
file_name = os.path.basename(file_path)
n_number = get_number(debug, file_name)
debug = config.getInstance().debug()
n_number = get_number(debug, os.path.basename(file_path))
file_path = os.path.abspath(file_path)
if debug == True:
print(f"[!]Making Data for [{file_path}], the number is [{n_number}]")
print(f"[!] [{n_number}] As Number making data for '{file_path}'")
if zero_op:
return
if n_number:
core_main(file_path, n_number, c)
core_main(file_path, n_number, oCC)
else:
print("[-] number empty ERROR")
moveFailedFolder(file_path)
print("[*]======================================================")
else:
try:
print(f"[!]Making Data for [{file_path}], the number is [{n_number}]")
print(f"[!] [{n_number}] As Number making data for '{file_path}'")
if zero_op:
return
if n_number:
core_main(file_path, n_number, c)
core_main(file_path, n_number, oCC)
else:
raise ValueError("number empty")
print("[*]======================================================")
@@ -253,22 +411,26 @@ def create_data_and_move(file_path: str, c: config.Config, debug):
print('[-]', err)
try:
moveFailedFolder(file_path, conf)
moveFailedFolder(file_path)
except Exception as err:
print('[!]', err)
def create_data_and_move_with_custom_number(file_path: str, c: config.Config, custom_number):
def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC):
conf = config.getInstance()
file_name = os.path.basename(file_path)
try:
print("[!]Making Data for [{}], the number is [{}]".format(file_path, custom_number))
core_main(file_path, custom_number, c)
print("[!] [{1}] As Number making data for '{0}'".format(file_path, custom_number))
if custom_number:
core_main(file_path, custom_number, oCC)
else:
print("[-] number empty ERROR")
print("[*]======================================================")
except Exception as err:
print("[-] [{}] ERROR:".format(file_path))
print('[-]', err)
if c.soft_link():
if conf.soft_link():
print("[-]Link {} to failed folder".format(file_path))
os.symlink(file_path, os.path.join(conf.failed_folder(), file_name))
else:
@@ -279,69 +441,113 @@ def create_data_and_move_with_custom_number(file_path: str, c: config.Config, cu
print('[!]', err)
if __name__ == '__main__':
version = '5.0.1'
def main():
version = '5.0.5'
urllib3.disable_warnings() #Ignore http proxy warning
# Parse command line args
single_file_path, folder_path, custom_number, auto_exit, logdir, regexstr = argparse_function(version)
# Read config.ini first, in argparse_function() need conf.failed_folder()
conf = config.Config("config.ini")
# Parse command line args
single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version)
main_mode = conf.main_mode()
if not main_mode in (1, 2, 3):
print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.")
sys.exit(4)
signal.signal(signal.SIGINT, signal_handler)
if sys.platform == 'win32':
signal.signal(signal.SIGBREAK, sigdebug_handler)
else:
signal.signal(signal.SIGWINCH, sigdebug_handler)
dupe_stdout_to_logfile(logdir)
platform_total = str(' - ' + platform.platform() + ' \n[*] - ' + platform.machine() + ' - Python-' + platform.python_version())
print('[*]================== AV Data Capture ===================')
print('[*]' + version.center(54))
print('[*]' + "PRC国庆限定版".center(50))
print('[*]======================================================')
print('[*]严禁在墙内宣传本项目')
# Read config.ini
conf = config.Config("config.ini")
print('[*]' + platform_total)
print('[*]================= 严禁在墙内宣传本项目 ===================')
start_time = time.time()
print('[+]Start at', time.strftime("%Y-%m-%d %H:%M:%S"))
if conf.update_check():
check_update(version)
# Download Mapping Table, parallel version
def fmd(f):
return ('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/' + f,
Path.home() / '.local' / 'share' / 'avdc' / f)
map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json'))
res = parallel_download_files(((k, v) for k, v in map_tab if not v.exists()))
for i, fp in enumerate(res, start=1):
if fp and len(fp):
print(f"[+] [{i}/{len(res)}] Mapping Table Downloaded to {fp}")
else:
print(f"[-] [{i}/{len(res)}] Mapping Table Download failed")
print(f"[+]Load Config file '{conf.ini_path}'.")
if conf.debug():
print('[+]Enable debug')
if conf.soft_link():
print('[!]Enable soft link')
#print('[!]CmdLine:'," ".join(sys.argv[1:]))
if len(sys.argv)>1:
print('[!]CmdLine:'," ".join(sys.argv[1:]))
print('[+]Main Working mode ## {}: {} ## {}{}{}'
.format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode-1],
"" if not conf.multi_threading() else ", multi_threading on",
"" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}",
"" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}"
) if not single_file_path else ('-','Single File', '','',''))
)
create_failed_folder(conf.failed_folder())
start_time = time.time()
# create OpenCC converter
ccm = conf.cc_convert_mode()
try:
oCC = None if ccm == 0 else OpenCC('t2s.json' if ccm == 1 else 's2t.json')
except:
# some OS no OpennCC cpython, try opencc-python-reimplemented.
# pip uninstall opencc && pip install opencc-python-reimplemented
oCC = None if ccm == 0 else OpenCC('t2s' if ccm == 1 else 's2t')
if not single_file_path == '': #Single File
print('[+]==================== Single File =====================')
if custom_number == '':
create_data_and_move_with_custom_number(single_file_path, conf, get_number(conf.debug(), os.path.basename(single_file_path)))
create_data_and_move_with_custom_number(single_file_path, get_number(conf.debug(), os.path.basename(single_file_path)), oCC)
else:
create_data_and_move_with_custom_number(single_file_path, conf, custom_number)
create_data_and_move_with_custom_number(single_file_path, custom_number, oCC)
else:
if folder_path == '':
folder_path = conf.source_folder()
if not isinstance(folder_path, str) or folder_path == '':
folder_path = os.path.abspath(".")
movie_list = movie_lists(folder_path, conf, regexstr)
movie_list = movie_lists(folder_path, regexstr)
count = 0
count_all = str(len(movie_list))
print('[+]Find', count_all, 'movies. Start at', time.strftime("%Y-%m-%d %H:%M:%S"))
main_mode = conf.main_mode()
print('[+]Find', count_all, 'movies.')
print('[*]======================================================')
stop_count = conf.stop_counter()
if stop_count<1:
stop_count = 999999
else:
count_all = str(min(len(movie_list), stop_count))
if main_mode == 3:
print(f'[!]运行模式:**维护模式**,本程序将在处理{count_all}个视频文件后停止,如需后台执行自动退出请结合 -a 参数。')
for movie_path in movie_list: # 遍历电影列表 交给core处理
count = count + 1
percentage = str(count / int(count_all) * 100)[:4] + '%'
print('[!] - ' + percentage + ' [' + str(count) + '/' + count_all + '] -')
create_data_and_move(movie_path, conf, conf.debug())
print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', time.strftime("%H:%M:%S")))
create_data_and_move(movie_path, zero_op, oCC)
if count >= stop_count:
print("[!]Stop counter triggered!")
break
if conf.del_empty_folder():
if conf.del_empty_folder() and not zero_op:
rm_empty_folder(conf.success_folder())
rm_empty_folder(conf.failed_folder())
if len(folder_path):
@@ -353,9 +559,15 @@ if __name__ == '__main__':
" End at", time.strftime("%Y-%m-%d %H:%M:%S"))
print("[+]All finished!!!")
if not (conf.auto_exit() or auto_exit):
input("Press enter key exit, you can check the error message before you exit...")
close_logfile(logdir)
if not conf.auto_exit():
input("Press enter key exit, you can check the error message before you exit...")
sys.exit(0)
import multiprocessing
if __name__ == '__main__':
multiprocessing.freeze_support()
main()

BIN
Img/HACK.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 10 KiB

After

Width:  |  Height:  |  Size: 13 KiB

BIN
Img/UMR.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

View File

@@ -16,7 +16,11 @@ make:
#export cloudscraper_path=$(python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1)
@echo "[+]Pyinstaller make"
pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py --add-data "Img:Img"
pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py \
--add-data "`python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1`:cloudscraper" \
--add-data "`python3 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1`:opencc" \
--add-data "Img:Img" \
--add-data "config.ini:." \
@echo "[+]Move to bin"
if [ ! -d "./bin" ];then mkdir bin; fi

14598
MappingTable/c_number.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,627 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!-- 说明:可使用文本编辑器打开本文件后自行编辑。
keyword用于匹配标签/导演/系列/制作/发行的关键词,每个名字前后都需要用逗号隔开。当其中包含刮削得到的关键词时,可以输出对应语言的词。
zh_cn/zh_tw/jp指对应语言输出的词按设置的对应语言输出。当输出词为“删除”时表示遇到该关键词时在对应内容中删除该关键词-->
<info>
<!-- ======================== -->
<!-- 删除 -->
<!-- ======================== -->
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",成人奖,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",觸摸打字,触摸打字,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",10枚組,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",Don Cipote's choice,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",DVD多士爐,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",R-18,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",Vシネマ,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",イメージビデオ(男性),"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",サンプル動画,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",其他,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",放置,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",獨立製作,独立制作,独占配信,配信専用,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",特典ありAVベースボール,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",天堂TV,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",性愛,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",限時降價,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",亞洲女演員,亚洲女演员,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",字幕,中文字幕,中文,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",AV女优,女优,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",HDTV,HD DVD,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",MicroSD,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",R-15,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",UMD,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",VHS,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",愛好,文化,爱好、文化,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",訪問,访问,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",感官作品,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",高畫質,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",高清,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",素人作品,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",友誼,友谊,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",正常,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",蓝光,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",冒險,冒险,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",模擬,模拟,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",年輕女孩,年轻女孩,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",去背影片,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",天賦,天赋,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",形象俱樂部,形象俱乐部,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",懸疑,悬疑,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",亞洲,亚洲,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",ハロウィーンキャンペーン,"/>
<!-- ======================== -->
<!-- 标签 -->
<!-- ======================== -->
<a zh_cn="16小时+" zh_tw="16小時+" jp="16時間以上作品" keyword=",16小時以上作品,16小时以上作品,16時間以上作品,16小时+,16小時+,"/>
<a zh_cn="3D" zh_tw="3D" jp="3D" keyword=",3D,"/>
<a zh_cn="3D卡通" zh_tw="3D卡通" jp="3Dエロアニメ" keyword=",3D卡通,3Dエロアニメ,"/>
<a zh_cn="4K" zh_tw="4K" jp="4K" keyword=",4K,"/>
<a zh_cn="DMM独家" zh_tw="DMM獨家" jp="DMM獨家" keyword=",DMM獨家,DMM独家,DMM專屬,DMM专属,"/>
<a zh_cn="M女" zh_tw="M女" jp="M女" keyword=",M女,"/>
<a zh_cn="SM" zh_tw="SM" jp="SM" keyword=",SM,"/>
<a zh_cn="轻虐" zh_tw="輕虐" jp="微SM" keyword=",微SM,轻虐,輕虐,"/>
<a zh_cn="VR" zh_tw="VR" jp="VR" keyword=",VR,VR専用,高品质VR,ハイクオリティVR,"/>
<a zh_cn="武术格斗" zh_tw="武術格鬥" jp="アクション" keyword=",格鬥家,格斗家,戰鬥行動,战斗行动,アクション,武术格斗,武術格鬥,"/>
<a zh_cn="绝顶高潮" zh_tw="絕頂高潮" jp="アクメ・オーガズム" keyword=",极致·性高潮,アクメ・オーガズム,绝顶高潮,絕頂高潮,"/>
<a zh_cn="运动" zh_tw="運動" jp="アスリート" keyword=",运动员,アスリート,運動,运动,"/>
<a zh_cn="COSPLAY" zh_tw="COSPLAY" jp="COSPLAY" keyword=",COSPLAY,COSPLAY服飾,COSPLAY服饰,アニメ,"/>
<a zh_cn="动画角色" zh_tw="動畫角色" jp="動畫人物" keyword=",动漫,動画,動畫人物,动画人物,动画角色,動畫角色,"/>
<a zh_cn="角色扮演" zh_tw="角色扮演" jp="角色扮演" keyword=",角色扮演者,角色扮演,コスプレ,"/>
<a zh_cn="萝莉Cos" zh_tw="蘿莉Cos" jp="蘿莉Cos" keyword=",蘿莉角色扮演,萝莉角色扮演,萝莉Cos,蘿莉Cos,"/>
<a zh_cn="纯欲" zh_tw="純欲" jp="エロス" keyword=",エロス,纯欲,純欲,"/>
<a zh_cn="御宅族" zh_tw="御宅族" jp="オタク" keyword=",御宅族,オタク,"/>
<a zh_cn="辅助自慰" zh_tw="輔助自慰" jp="オナサポ" keyword=",自慰辅助,オナサポ,辅助自慰,輔助自慰,"/>
<a zh_cn="自慰" zh_tw="自慰" jp="自慰" keyword=",自慰,オナニー,"/>
<a zh_cn="洗浴" zh_tw="洗浴" jp="お風呂" keyword=",淋浴,お風呂,洗浴,洗澡,"/>
<a zh_cn="温泉" zh_tw="溫泉" jp="溫泉" keyword=",温泉,溫泉,"/>
<a zh_cn="寝取" zh_tw="寢取" jp="寝取られ" keyword=",寝取,寢取,寝取られ,寝取り·寝取られ·ntr,"/>
<a zh_cn="老太婆" zh_tw="老太婆" jp="お婆ちゃん" keyword=",お婆ちゃん,老太婆,"/>
<a zh_cn="老年男性" zh_tw="老年男性" jp="お爺ちゃん" keyword=",高龄男,お爺ちゃん,老年男性,"/>
<a zh_cn="接吻" zh_tw="接吻" jp="キス・接吻" keyword=",接吻,キス・接吻,"/>
<a zh_cn="女同接吻" zh_tw="女同接吻" jp="女同接吻" keyword=",女同接吻,"/>
<a zh_cn="介绍影片" zh_tw="介紹影片" jp="コミック雑誌" keyword=",コミック雑誌,介绍影片,介紹影片,"/>
<a zh_cn="心理惊悚" zh_tw="心理驚悚" jp="サイコ・スリラー" keyword=",サイコ・スリラー,心理惊悚,心理驚悚,"/>
<a zh_cn="打屁股" zh_tw="打屁股" jp="スパンキング" keyword=",虐打,スパンキング,打屁股,"/>
<a zh_cn="夫妻交换" zh_tw="夫妻交換" jp="スワッピング・夫婦交換" keyword=",夫妻交换,スワッピング・夫婦交換,夫妻交換,"/>
<a zh_cn="性感" zh_tw="性感" jp="セクシー" keyword=",性感的,性感的x,セクシー,"/>
<a zh_cn="性感内衣" zh_tw="性感内衣" jp="性感内衣" keyword=",性感内衣,內衣,内衣,ランジェリー,"/>
<a zh_cn="养尊处优" zh_tw="養尊處優" jp="セレブ" keyword=",セレブ,养尊处优,養尊處優,"/>
<a zh_cn="拉拉队" zh_tw="拉拉隊" jp="チアガール" keyword=",拉拉队长,チアガール,拉拉隊,"/>
<a zh_cn="假阳具" zh_tw="假陽具" jp="ディルド" keyword=",ディルド,假阳具,假陽具,"/>
<a zh_cn="约会" zh_tw="約會" jp="デート" keyword=",约会,デート,約會,"/>
<a zh_cn="巨根" zh_tw="巨根" jp="デカチン・巨根" keyword=",巨大陰莖,巨大阴茎,デカチン・巨根,"/>
<a zh_cn="不戴套" zh_tw="不戴套" jp="生ハメ" keyword=",不戴套,生ハメ,"/>
<a zh_cn="不穿内裤" zh_tw="不穿內褲" jp="ノーパン" keyword=",无内裤,ノーパン,不穿内裤,不穿內褲,"/>
<a zh_cn="不穿胸罩" zh_tw="不穿胸罩" jp="ノーブラ" keyword=",无胸罩,ノーブラ,不穿胸罩,"/>
<a zh_cn="后宫" zh_tw="後宮" jp="ハーレム" keyword=",ハーレム,后宫,後宮,"/>
<a zh_cn="后入" zh_tw="後入" jp="バック" keyword=",背后,バック,后入,後入,"/>
<a zh_cn="妓女" zh_tw="妓女" jp="ビッチ" keyword=",ビッチ,妓女,风俗女郎(性工作者),"/>
<a zh_cn="感谢祭" zh_tw="感謝祭" jp="ファン感謝・訪問" keyword=",粉丝感谢,ファン感謝・訪問,感谢祭,感謝祭,"/>
<a zh_cn="大保健" zh_tw="大保健" jp="ヘルス・ソープ" keyword=",ヘルス・ソープ,大保健,按摩,マッサージ,"/>
<a zh_cn="按摩棒" zh_tw="按摩棒" jp="按摩棒" keyword=",女優按摩棒,女优按摩棒,按摩棒,电动按摩棒,電動按摩棒,電マ,バイブ,"/>
<a zh_cn="男同性恋" zh_tw="男同性戀" jp="ボーイ ズラブ" keyword=",ボーイズラブ,男同,男同性戀,男同性恋,"/>
<a zh_cn="酒店" zh_tw="酒店" jp="ホテル" keyword=",ホテル,酒店,飯店,"/>
<a zh_cn="酒店小姐" zh_tw="酒店小姐" jp="キャバ嬢" keyword=",キャバ嬢,酒店小姐,"/>
<a zh_cn="妈妈的朋友" zh_tw="媽媽的朋友" jp="ママ友" keyword=",ママ友,妈妈的朋友,媽媽的朋友,"/>
<a zh_cn="喜剧" zh_tw="喜劇" jp="ラブコメ" keyword=",喜剧,爱情喜剧,ラブコメ,喜劇,滑稽模仿,堵嘴·喜劇,整人・喜剧,"/>
<a zh_cn="恶搞" zh_tw="惡搞" jp="パロディ" keyword=",パロディ,惡搞,整人,"/>
<a zh_cn="白眼失神" zh_tw="白眼失神" jp="白目・失神" keyword=",翻白眼・失神,白目・失神,白眼失神,"/>
<a zh_cn="白人" zh_tw="白人" jp="白人" keyword=",白人,"/>
<a zh_cn="招待小姐" zh_tw="招待小姐" jp="受付嬢" keyword=",招待小姐,受付嬢,接待员,"/>
<a zh_cn="薄马赛克" zh_tw="薄馬賽克" jp="薄馬賽克" keyword=",薄馬賽克,薄马赛克,"/>
<a zh_cn="鼻钩" zh_tw="鼻鉤" jp="鼻フック" keyword=",鼻勾,鼻フック,鼻钩,鼻鉤,"/>
<a zh_cn="变性人" zh_tw="變性人" jp="變性者" keyword=",變性者,变性者,变性人,變性人,"/>
<a zh_cn="医院诊所" zh_tw="醫院診所" jp="病院・クリニック" keyword=",医院・诊所,病院・クリニック,医院诊所,醫院診所,"/>
<a zh_cn="社团经理" zh_tw="社團經理" jp="部活・マネージャー" keyword=",社团・经理,部活・マネージャー,社团经理,社團經理,"/>
<a zh_cn="下属·同事" zh_tw="下屬·同事" jp="部下・同僚" keyword=",下属・同事,部下・同僚,下属·同事,下屬·同事,同事,下屬,下属,"/>
<a zh_cn="残忍" zh_tw="殘忍" jp="殘忍" keyword=",殘忍,殘忍畫面,残忍画面,奇異的,奇异的,"/>
<a zh_cn="插入异物" zh_tw="插入異物" jp="插入異物" keyword=",插入異物,插入异物,"/>
<a zh_cn="超乳" zh_tw="超乳" jp="超乳" keyword=",超乳,"/>
<a zh_cn="潮吹" zh_tw="潮吹" jp="潮吹" keyword=",潮吹,潮吹き,"/>
<a zh_cn="男优潮吹" zh_tw="男優潮吹" jp="男の潮吹き" keyword=",男潮吹,男の潮吹き,男优潮吹,男優潮吹,"/>
<a zh_cn="巴士导游" zh_tw="巴士導遊" jp="車掌小姐" keyword=",車掌小姐,车掌小姐,巴士乘务员,巴士乘務員,巴士导游,巴士導遊,バスガイド,"/>
<a zh_cn="熟女" zh_tw="熟女" jp="熟女" keyword=",熟女,成熟的女人,"/>
<a zh_cn="出轨" zh_tw="出軌" jp="出軌" keyword=",出軌,出轨,"/>
<a zh_cn="白天出轨" zh_tw="白天出軌" jp="白天出轨" keyword=",白天出軌,白天出轨,通姦,"/>
<a zh_cn="处男" zh_tw="處男" jp="處男" keyword=",處男,处男,"/>
<a zh_cn="处女" zh_tw="處女" jp="處女" keyword=",處女,处女,処女,童貞,"/>
<a zh_cn="触手" zh_tw="觸手" jp="觸手" keyword=",觸手,触手,"/>
<a zh_cn="胁迫" zh_tw="胁迫" jp="胁迫" keyword=",魔鬼系,粗暴,胁迫,"/>
<a zh_cn="催眠" zh_tw="催眠" jp="催眠" keyword=",催眠,"/>
<a zh_cn="打手枪" zh_tw="打手槍" jp="打手槍" keyword=",手淫,打手枪,打手槍,手コキ,"/>
<a zh_cn="单体作品" zh_tw="單體作品" jp="單體作品" keyword=",单体作品,單體作品,単体作品,AV女优片,"/>
<a zh_cn="荡妇" zh_tw="蕩婦" jp="蕩婦" keyword=",蕩婦,荡妇,"/>
<a zh_cn="搭讪" zh_tw="搭訕" jp="搭訕" keyword=",倒追,女方搭讪,女方搭訕,搭讪,搭訕,ナンパ,"/>
<a zh_cn="女医师" zh_tw="女醫師" jp="女醫師" keyword=",女医师,女醫師,女医,"/>
<a zh_cn="主观视角" zh_tw="主觀視角" jp="主觀視角" keyword=",第一人稱攝影,第一人称摄影,主观视角,主觀視角,第一人称视点,主観,"/>
<a zh_cn="多P" zh_tw="多P" jp="多P" keyword=",多P,"/>
<a zh_cn="恶作剧" zh_tw="惡作劇" jp="惡作劇" keyword=",惡作劇,恶作剧,"/>
<a zh_cn="放尿" zh_tw="放尿" jp="放尿" keyword=",放尿,"/>
<a zh_cn="女服务生" zh_tw="女服務生" jp="ウェイトレス" keyword=",服務生,服务生,女服务生,女服務生,ウェイトレス,"/>
<a zh_cn="蒙面" zh_tw="蒙面" jp="覆面・マスク" keyword=",蒙面・面罩,蒙面・面具,覆面・マスク,"/>
<a zh_cn="肛交" zh_tw="肛交" jp="肛交" keyword=",肛交,アナル,"/>
<a zh_cn="肛内中出" zh_tw="肛內中出" jp="肛內中出" keyword=",肛内中出,肛內中出,"/>
<a zh_cn="个子高" zh_tw="個子高" jp="个子高" keyword=",高,个子高,個子高,"/>
<a zh_cn="高中生" zh_tw="高中生" jp="高中生" keyword=",高中女生,高中生,"/>
<a zh_cn="歌德萝莉" zh_tw="歌德蘿莉" jp="哥德蘿莉" keyword=",歌德萝莉,哥德蘿莉,歌德蘿莉,"/>
<a zh_cn="各种职业" zh_tw="各種職業" jp="各種職業" keyword=",各種職業,各种职业,多種職業,多种职业,職業色々,"/>
<a zh_cn="职业装" zh_tw="職業裝" jp="職業裝" keyword=",OL,洽公服装,职业装,職業裝,ビジネススーツ,"/>
<a zh_cn="女性向" zh_tw="女性向" jp="女性向け" keyword=",給女性觀眾,给女性观众,女性向,女性向け,"/>
<a zh_cn="公主" zh_tw="公主" jp="公主" keyword=",公主,"/>
<a zh_cn="故事集" zh_tw="故事集" jp="故事集" keyword=",故事集,"/>
<a zh_cn="寡妇" zh_tw="寡婦" jp="寡婦" keyword=",寡婦,寡妇,"/>
<a zh_cn="灌肠" zh_tw="灌腸" jp="灌腸" keyword=",灌腸,灌肠,"/>
<a zh_cn="进口" zh_tw="進口" jp="國外進口" keyword=",海外,進口,进口,國外進口,国外进口,"/>
<a zh_cn="流汗" zh_tw="流汗" jp="汗だく" keyword=",流汗,汗だく,"/>
<a zh_cn="共演" zh_tw="共演" jp="合作作品" keyword=",合作作品,共演,"/>
<a zh_cn="和服・丧服" zh_tw="和服・喪服" jp="和服・喪服" keyword=",和服・丧服,和服,喪服,和服、丧服,和服・喪服,和服·丧服,和服·喪服,"/>
<a zh_cn="和服・浴衣" zh_tw="和服・浴衣" jp="和服・浴衣" keyword=",浴衣,和服・浴衣,和服、浴衣,"/>
<a zh_cn="调教・奴隶" zh_tw="調教・奴隸" jp="調教・奴隸" keyword=",奴隸,奴隶,奴隷,調教・奴隷,調教,调教,调教・奴隶,调教·奴隶,調教·奴隸,調教・奴隸."/>
<a zh_cn="黑帮成员" zh_tw="黑幫成員" jp="黑幫成員" keyword=",黑幫成員,黑帮成员,"/>
<a zh_cn="黑人" zh_tw="黑人" jp="黑人演員" keyword=",黑人,黑人演員,黑人演员,黒人男優,"/>
<a zh_cn="护士" zh_tw="護士" jp="ナース" keyword=",護士,护士,ナース,"/>
<a zh_cn="痴汉" zh_tw="痴漢" jp="痴漢" keyword=",痴漢,痴汉,"/>
<a zh_cn="痴女" zh_tw="癡女" jp="癡女" keyword=",花癡,痴女,癡女,"/>
<a zh_cn="新娘" zh_tw="新娘" jp="新娘" keyword=",花嫁,新娘,新娘,年輕妻子,新娘、年轻妻子,新娘、年輕妻子,新娘、少妇,新娘、少婦,花嫁・若妻,"/>
<a zh_cn="少妇" zh_tw="少婦" jp="少婦" keyword=",少妇,少婦,"/>
<a zh_cn="妄想" zh_tw="妄想" jp="妄想" keyword=",幻想,妄想,妄想族,"/>
<a zh_cn="肌肉" zh_tw="肌肉" jp="肌肉" keyword=",肌肉,"/>
<a zh_cn="及膝袜" zh_tw="及膝襪" jp="及膝襪" keyword=",及膝襪,及膝袜,"/>
<a zh_cn="纪录片" zh_tw="紀錄片" jp="纪录片" keyword=",紀錄片,纪录片,"/>
<a zh_cn="家庭教师" zh_tw="家庭教師" jp="家庭教師" keyword=",家教,家庭教师,家庭教師,"/>
<a zh_cn="娇小" zh_tw="嬌小" jp="嬌小的" keyword=",迷你系,迷你係列,娇小,嬌小,瘦小身型,嬌小的,迷你系‧小隻女,ミニ系・小柄,"/>
<a zh_cn="性教学" zh_tw="性教學" jp="性教學" keyword=",教學,教学,性教学,性教學,"/>
<a zh_cn="姐姐" zh_tw="姐姐" jp="姐姐" keyword=",姐姐,姐姐系,お姉さん,"/>
<a zh_cn="姐·妹" zh_tw="姐·妹" jp="姐·妹" keyword=",妹妹,姐妹,姐·妹,姊妹,"/>
<a zh_cn="穿衣幹砲" zh_tw="穿衣幹砲" jp="着エロ" keyword=",穿衣幹砲,着エロ,"/>
<a zh_cn="紧缚" zh_tw="緊縛" jp="緊縛" keyword=",緊縛,紧缚,縛り・緊縛,紧缚,"/>
<a zh_cn="紧身衣" zh_tw="緊身衣" jp="緊身衣" keyword=",緊身衣,紧身衣,紧缚皮衣,緊縛皮衣,紧身衣激凸,緊身衣激凸,ボディコン,"/>
<a zh_cn="经典老片" zh_tw="經典老片" jp="經典" keyword=",經典,经典,经典老片,經典老片,"/>
<a zh_cn="拘束" zh_tw="拘束" jp="拘束" keyword=",拘束,"/>
<a zh_cn="监禁" zh_tw="監禁" jp="監禁" keyword=",監禁,监禁,"/>
<a zh_cn="强奸" zh_tw="強姦" jp="強姦" keyword=",強姦,强奸,強暴,强暴,レイプ,"/>
<a zh_cn="轮奸" zh_tw="輪姦" jp="輪姦" keyword=",輪姦,轮奸,轮姦,"/>
<a zh_cn="私处近拍" zh_tw="私處近拍" jp="私處近拍" keyword=",私处近拍,私處近拍,局部特寫,局部特写,局部アップ,"/>
<a zh_cn="巨尻" zh_tw="巨尻" jp="巨尻" keyword=",大屁股,巨大屁股,巨尻,"/>
<a zh_cn="美尻" zh_tw="美尻" jp="美尻" keyword=",美尻,"/>
<a zh_cn="巨乳" zh_tw="巨乳" jp="巨乳" keyword=",巨乳,巨乳爆乳,爱巨乳,愛巨乳,巨乳フェチ,"/>
<a zh_cn="窈窕" zh_tw="窈窕" jp="スレンダー" keyword=",窈窕,スレンダー,"/>
<a zh_cn="美腿" zh_tw="美腿" jp="美腿" keyword=",美腿,美脚,爱美腿,愛美腿,脚フェチ,"/>
<a zh_cn="修长" zh_tw="修長" jp="長身" keyword=",修長,長身,"/>
<a zh_cn="爱美臀" zh_tw="愛美臀" jp="尻フェチ" keyword=",爱美臀,愛美臀,尻フェチ,"/>
<a zh_cn="奇幻" zh_tw="奇幻" jp="科幻" keyword=",科幻,奇幻,"/>
<a zh_cn="空姐" zh_tw="空姐" jp="スチュワーデス" keyword=",空中小姐,空姐,スチュワーデス,"/>
<a zh_cn="恐怖" zh_tw="恐怖" jp="恐怖" keyword=",恐怖,"/>
<a zh_cn="口交" zh_tw="口交" jp="フェラ" keyword=",口交,フェラ,双重口交,雙重口交,Wフェラ,"/>
<a zh_cn="强迫口交" zh_tw="強迫口交" jp="強迫口交" keyword=",强迫口交,強迫口交,イラマチオ,"/>
<a zh_cn="偷拍" zh_tw="偷拍" jp="盗撮" keyword=",偷拍,盗撮,"/>
<a zh_cn="蜡烛" zh_tw="蠟燭" jp="蝋燭" keyword=",蜡烛,蝋燭,蠟燭,"/>
<a zh_cn="滥交" zh_tw="濫交" jp="濫交" keyword=",濫交,滥交,乱交,亂交,"/>
<a zh_cn="酒醉" zh_tw="酒醉" jp="爛醉如泥的" keyword=",爛醉如泥的,烂醉如泥的,酒醉,"/>
<a zh_cn="立即插入" zh_tw="立即插入" jp="立即插入" keyword=",立即口交,即兴性交,立即插入,马上幹,馬上幹,即ハメ,"/>
<a zh_cn="连裤袜" zh_tw="連褲襪" jp="連褲襪" keyword=",連褲襪,连裤袜,"/>
<a zh_cn="连发" zh_tw="連發" jp="連発" keyword=",连发,連發,連発,"/>
<a zh_cn="恋爱" zh_tw="戀愛" jp="戀愛" keyword=",戀愛,恋爱,恋愛,"/>
<a zh_cn="恋乳癖" zh_tw="戀乳癖" jp="戀乳癖" keyword=",戀乳癖,恋乳癖,"/>
<a zh_cn="恋腿癖" zh_tw="戀腿癖" jp="戀腿癖" keyword=",戀腿癖,恋腿癖,"/>
<a zh_cn="猎艳" zh_tw="獵艷" jp="獵豔" keyword=",獵豔,猎艳,獵艷,"/>
<a zh_cn="乱伦" zh_tw="亂倫" jp="亂倫" keyword=",亂倫,乱伦,"/>
<a zh_cn="萝莉" zh_tw="蘿莉" jp="蘿莉塔" keyword=",蘿莉塔,萝莉塔,ロリ,"/>
<a zh_cn="裸体围裙" zh_tw="裸體圍裙" jp="裸體圍裙" keyword=",裸體圍裙,裸体围裙,真空围裙,真空圍裙,裸エプロン,"/>
<a zh_cn="旅行" zh_tw="旅行" jp="旅行" keyword=",旅行,"/>
<a zh_cn="骂倒" zh_tw="罵倒" jp="罵倒" keyword=",罵倒,骂倒,"/>
<a zh_cn="蛮横娇羞" zh_tw="蠻橫嬌羞" jp="蠻橫嬌羞" keyword=",蠻橫嬌羞,蛮横娇羞,"/>
<a zh_cn="猫耳" zh_tw="貓耳" jp="貓耳女" keyword=",貓耳女,猫耳女,"/>
<a zh_cn="美容院" zh_tw="美容院" jp="美容院" keyword=",美容院,エステ,"/>
<a zh_cn="短裙" zh_tw="短裙" jp="短裙" keyword=",短裙,"/>
<a zh_cn="美少女" zh_tw="美少女" jp="美少女" keyword=",美少女,美少女電影,美少女电影,"/>
<a zh_cn="迷你裙" zh_tw="迷你裙" jp="迷你裙" keyword=",迷你裙,ミニスカ,"/>
<a zh_cn="迷你裙警察" zh_tw="迷你裙警察" jp="迷你裙警察" keyword=",迷你裙警察,"/>
<a zh_cn="秘书" zh_tw="秘書" jp="秘書" keyword=",秘書,秘书,"/>
<a zh_cn="面试" zh_tw="面試" jp="面接" keyword=",面试,面接,面試,"/>
<a zh_cn="苗条" zh_tw="苗條" jp="苗條" keyword=",苗條,苗条,"/>
<a zh_cn="明星脸" zh_tw="明星臉" jp="明星臉" keyword=",明星臉,明星脸,"/>
<a zh_cn="模特" zh_tw="模特" jp="模特兒" keyword=",模特兒,模特儿,モデル,"/>
<a zh_cn="魔法少女" zh_tw="魔法少女" jp="魔法少女" keyword=",魔法少女,"/>
<a zh_cn="母亲" zh_tw="母親" jp="母親" keyword=",母親,母亲,妈妈系,媽媽系,お母さん,"/>
<a zh_cn="义母" zh_tw="義母" jp="母親" keyword=",义母,義母,"/>
<a zh_cn="母乳" zh_tw="母乳" jp="母乳" keyword=",母乳,"/>
<a zh_cn="女强男" zh_tw="女强男" jp="逆レイプ" keyword=",逆レイプ,女强男,"/>
<a zh_cn="养女" zh_tw="養女" jp="娘・養女" keyword=",养女,娘・養女,"/>
<a zh_cn="女大学生" zh_tw="女大學生" jp="女子大生" keyword=",女大學生,女大学生,女子大生,"/>
<a zh_cn="女祭司" zh_tw="女祭司" jp="女祭司" keyword=",女祭司,"/>
<a zh_cn="女搜查官" zh_tw="女搜查官" jp="女檢察官" keyword=",女檢察官,女检察官,女搜查官,"/>
<a zh_cn="女教师" zh_tw="女教師" jp="女教師" keyword=",女教師,女教师,"/>
<a zh_cn="女忍者" zh_tw="女忍者" jp="女忍者" keyword=",女忍者,くノ一,"/>
<a zh_cn="女上司" zh_tw="女上司" jp="女上司" keyword=",女上司,"/>
<a zh_cn="骑乘位" zh_tw="騎乘位" jp="騎乗位" keyword=",女上位,骑乘,騎乘,骑乘位,騎乘位,騎乗位,"/>
<a zh_cn="辣妹" zh_tw="辣妹" jp="辣妹" keyword=",女生,辣妹,ギャル,"/>
<a zh_cn="女同性恋" zh_tw="女同性戀" jp="女同性戀" keyword=",女同性戀,女同性恋,女同志,レズ,"/>
<a zh_cn="女王" zh_tw="女王" jp="女王様" keyword=",女王,女王様,"/>
<a zh_cn="女医生" zh_tw="女醫生" jp="女醫生" keyword=",女醫生,女医生,"/>
<a zh_cn="女仆" zh_tw="女僕" jp="メイド" keyword=",女傭,女佣,女仆,女僕,メイド,"/>
<a zh_cn="女优最佳合集" zh_tw="女優最佳合集" jp="女優ベスト・総集編" keyword=",女優ベスト・総集編,女优最佳合集,女優最佳合集,"/>
<a zh_cn="女战士" zh_tw="女戰士" jp="超級女英雄" keyword=",行動,行动,超級女英雄,女战士,女戰士,"/>
<a zh_cn="女主播" zh_tw="女主播" jp="女子アナ" keyword=",女主播,女子アナ,"/>
<a zh_cn="女主人" zh_tw="女主人" jp="老闆娘" keyword=",女主人,老闆娘,女主人,老板娘、女主人,女主人、女老板,女将・女主人,"/>
<a zh_cn="女装人妖" zh_tw="女裝人妖" jp="女裝人妖" keyword=",女裝人妖,女装人妖,"/>
<a zh_cn="呕吐" zh_tw="嘔吐" jp="嘔吐" keyword=",呕吐,嘔吐,"/>
<a zh_cn="粪便" zh_tw="糞便" jp="糞便" keyword=",排便,粪便,糞便,食糞,食粪,"/>
<a zh_cn="坦克" zh_tw="坦克" jp="胖女人" keyword=",胖女人,坦克,"/>
<a zh_cn="泡泡袜" zh_tw="泡泡襪" jp="泡泡襪" keyword=",泡泡袜,泡泡襪,"/>
<a zh_cn="泡沫浴" zh_tw="泡沫浴" jp="泡沫浴" keyword=",泡沫浴,"/>
<a zh_cn="美臀" zh_tw="美臀" jp="屁股" keyword=",美臀,屁股,"/>
<a zh_cn="平胸" zh_tw="平胸" jp="貧乳・微乳" keyword=",平胸,貧乳・微乳,"/>
<a zh_cn="丈母娘" zh_tw="丈母娘" jp="婆婆" keyword=",婆婆,后母,丈母娘,"/>
<a zh_cn="恋物癖" zh_tw="戀物癖" jp="戀物癖" keyword="戀物癖,恋物癖,其他戀物癖,其他恋物癖,"/>
<a zh_cn="其他癖好" zh_tw="其他癖好" jp="その他フェチ" keyword="其他癖好,その他フェチ,"/>
<a zh_cn="旗袍" zh_tw="旗袍" jp="旗袍" keyword=",旗袍,"/>
<a zh_cn="企画" zh_tw="企畫" jp="企畫" keyword=",企畫,企画,"/>
<a zh_cn="车震" zh_tw="車震" jp="汽車性愛" keyword=",汽車性愛,汽车性爱,车震,車震,车床族,車床族,カーセックス,"/>
<a zh_cn="大小姐" zh_tw="大小姐" jp="千金小姐" keyword=",大小姐,千金小姐,"/>
<a zh_cn="情侣" zh_tw="情侶" jp="情侶" keyword=",情侶,情侣,伴侶,伴侣,カップル,"/>
<a zh_cn="拳交" zh_tw="拳交" jp="拳交" keyword=",拳交,"/>
<a zh_cn="晒黑" zh_tw="曬黑" jp="日焼け" keyword=",曬黑,晒黑,日焼け,"/>
<a zh_cn="美乳" zh_tw="美乳" jp="美乳" keyword=",乳房,美乳,"/>
<a zh_cn="乳交" zh_tw="乳交" jp="乳交" keyword=",乳交,パイズリ,"/>
<a zh_cn="乳液" zh_tw="乳液" jp="乳液" keyword=",乳液,ローション・オイル,ローション·オイル,"/>
<a zh_cn="软体" zh_tw="軟體" jp="軟体" keyword=",软体,軟体,軟體,"/>
<a zh_cn="搔痒" zh_tw="搔癢" jp="瘙癢" keyword=",搔痒,瘙癢,搔癢,"/>
<a zh_cn="设计环节" zh_tw="設計環節" jp="設置項目" keyword=",設置項目,设计环节,設計環節,"/>
<a zh_cn="丰乳肥臀" zh_tw="豐乳肥臀" jp="身體意識" keyword=",身體意識,身体意识,丰乳肥臀,豐乳肥臀,"/>
<a zh_cn="深喉" zh_tw="深喉" jp="深喉" keyword=",深喉,"/>
<a zh_cn="时间停止" zh_tw="時間停止" jp="時間停止" keyword=",时间停止,時間停止,"/>
<a zh_cn="插入手指" zh_tw="插入手指" jp="手指插入" keyword=",手指插入,插入手指,"/>
<a zh_cn="首次亮相" zh_tw="首次亮相" jp="首次亮相" keyword=",首次亮相,"/>
<a zh_cn="叔母" zh_tw="叔母" jp="叔母さん" keyword=",叔母,叔母さん,"/>
<a zh_cn="数位马赛克" zh_tw="數位馬賽克" jp="數位馬賽克" keyword=",數位馬賽克,数位马赛克,"/>
<a zh_cn="双性人" zh_tw="雙性人" jp="雙性人" keyword=",雙性人,双性人,"/>
<a zh_cn="韵律服" zh_tw="韻律服" jp="レオタード" keyword=",韵律服,韻律服,レオタード,"/>
<a zh_cn="水手服" zh_tw="水手服" jp="セーラー服" keyword=",水手服,セーラー服,"/>
<a zh_cn="丝袜" zh_tw="絲襪" jp="絲襪" keyword=",丝袜,絲襪,パンスト,"/>
<a zh_cn="特摄" zh_tw="特攝" jp="特攝" keyword=",特效,特摄,特攝,"/>
<a zh_cn="经历告白" zh_tw="經歷告白" jp="體驗懺悔" keyword=",體驗懺悔,经历告白,經歷告白,"/>
<a zh_cn="体操服" zh_tw="體操服" jp="體育服" keyword=",体操服,體育服,體操服,"/>
<a zh_cn="舔阴" zh_tw="舔陰" jp="舔陰" keyword=",舔陰,舔阴,舔鲍,クンニ,"/>
<a zh_cn="跳蛋" zh_tw="跳蛋" jp="ローター" keyword=",跳蛋,ローター,"/>
<a zh_cn="跳舞" zh_tw="跳舞" jp="跳舞" keyword=",跳舞,"/>
<a zh_cn="青梅竹马" zh_tw="青梅竹馬" jp="童年朋友" keyword=",童年朋友,青梅竹马,青梅竹馬,"/>
<a zh_cn="偷窥" zh_tw="偷窺" jp="偷窥" keyword=",偷窺,偷窥,"/>
<a zh_cn="投稿" zh_tw="投稿" jp="投稿" keyword=",投稿,"/>
<a zh_cn="赛车女郎" zh_tw="賽車女郎" jp="レースクィーン" keyword=",賽車女郎,赛车女郎,レースクィーン,"/>
<a zh_cn="兔女郎" zh_tw="兔女郎" jp="兔女郎" keyword=",兔女郎,バニーガール,"/>
<a zh_cn="吞精" zh_tw="吞精" jp="吞精" keyword=",吞精,ごっくん,"/>
<a zh_cn="成人动画" zh_tw="成人動畫" jp="アニメ" keyword=",成人动画,成人動畫,アニメ,"/>
<a zh_cn="成人娃娃" zh_tw="成人娃娃" jp="娃娃" keyword=",娃娃,成人娃娃,"/>
<a zh_cn="玩物" zh_tw="玩物" jp="玩具" keyword=",玩具,玩物,"/>
<a zh_cn="适合手机垂直播放" zh_tw="適合手機垂直播放" jp="為智能手機推薦垂直視頻" keyword=",スマホ専用縦動画,為智能手機推薦垂直視頻,适合手机垂直播放,適合手機垂直播放,"/>
<a zh_cn="猥亵穿着" zh_tw="猥褻穿着" jp="猥褻穿著" keyword=",猥褻穿著,猥亵穿着,猥褻穿着,"/>
<a zh_cn="无码流出" zh_tw="無碼流出" jp="无码流出" keyword=",無碼流出,无码流出,"/>
<a zh_cn="无码破解" zh_tw="無碼破解" jp="無碼破解" keyword=",無碼破解,无码破解,"/>
<a zh_cn="无毛" zh_tw="無毛" jp="無毛" keyword=",無毛,无毛,剃毛,白虎,パイパン,"/>
<a zh_cn="剧情" zh_tw="劇情" jp="戲劇" keyword=",戲劇,戏剧,剧情,劇情,戲劇x,戏剧、连续剧,戲劇、連續劇,ドラマ,"/>
<a zh_cn="性转换·男变女" zh_tw="性轉換·男變女" jp="性別轉型·女性化" keyword=",性转换・女体化,性別轉型·女性化,性转换·男变女,性轉換·男變女,"/>
<a zh_cn="性奴" zh_tw="性奴" jp="性奴" keyword=",性奴,"/>
<a zh_cn="性骚扰" zh_tw="性騷擾" jp="性騷擾" keyword=",性騷擾,性骚扰,"/>
<a zh_cn="故意露胸" zh_tw="故意露胸" jp="胸チラ" keyword=",胸チラ,故意露胸,"/>
<a zh_cn="羞耻" zh_tw="羞恥" jp="羞恥" keyword=",羞恥,羞耻,"/>
<a zh_cn="学生" zh_tw="學生" jp="學生" keyword=",學生,其他學生,其他学生,學生(其他),学生,"/>
<a zh_cn="学生妹" zh_tw="學生妹" jp="學生妹" keyword=",学生妹,學生妹,女子校生,"/>
<a zh_cn="学生服" zh_tw="學生服" jp="學生服" keyword=",学生服,學生服,"/>
<a zh_cn="学生泳装" zh_tw="學生泳裝" jp="學校泳裝" keyword=",學校泳裝,学校泳装,学生泳装,學生泳裝,校园泳装,校園泳裝,競泳・スクール水着,"/>
<a zh_cn="泳装" zh_tw="泳裝" jp="水着" keyword=",泳裝,泳装,水着,"/>
<a zh_cn="校园" zh_tw="校園" jp="學校作品" keyword=",學校作品,学校作品,校园,校園,校园物语,校園物語,学園もの,"/>
<a zh_cn="肛检" zh_tw="肛檢" jp="鴨嘴" keyword=",鴨嘴,鸭嘴,肛检,肛檢,"/>
<a zh_cn="骑脸" zh_tw="騎臉" jp="顏面騎乘" keyword=",騎乗位,颜面骑乘,顏面騎乘,骑脸,騎臉,"/>
<a zh_cn="颜射" zh_tw="顏射" jp="顔射" keyword=",顏射,颜射,顏射x,顔射,"/>
<a zh_cn="眼镜" zh_tw="眼鏡" jp="眼鏡" keyword=",眼鏡,眼镜,メガネ,"/>
<a zh_cn="药物" zh_tw="藥物" jp="藥物" keyword=",藥物,药物,药物、迷姦,藥物、迷姦,ドラッグ,"/>
<a zh_cn="野外露出" zh_tw="野外露出" jp="野外・露出" keyword=",野外・露出,野外露出,野外,"/>
<a zh_cn="业余" zh_tw="業餘" jp="業餘" keyword=",業餘,业余,素人,"/>
<a zh_cn="人妻" zh_tw="人妻" jp="已婚婦女" keyword=",已婚婦女,已婚妇女,人妻,"/>
<a zh_cn="近亲相姦" zh_tw="近親相姦" jp="近親相姦" keyword=",近亲相姦,近親相姦,"/>
<a zh_cn="自拍" zh_tw="自拍" jp="ハメ撮り" keyword=",自拍,ハメ撮り,個人撮影,个人撮影,"/>
<a zh_cn="淫语" zh_tw="淫語" jp="淫語" keyword=",淫語,淫语,"/>
<a zh_cn="酒会" zh_tw="酒會" jp="飲み会・合コン" keyword=",饮酒派对,飲み会・合コン,酒会,酒會,"/>
<a zh_cn="饮尿" zh_tw="飲尿" jp="飲尿" keyword=",飲尿,饮尿,"/>
<a zh_cn="游戏改" zh_tw="遊戲改" jp="遊戲的真人版" keyword=",遊戲的真人版,游戏改,遊戲改,"/>
<a zh_cn="漫改" zh_tw="漫改" jp="原作コラボ" keyword=",原作改編,原作改编,原作コラボ,漫改,"/>
<a zh_cn="受孕" zh_tw="受孕" jp="孕ませ" keyword=",受孕,孕ませ,"/>
<a zh_cn="孕妇" zh_tw="孕婦" jp="孕婦" keyword=",孕婦,孕妇,"/>
<a zh_cn="早泄" zh_tw="早泄" jp="早漏" keyword=",早洩,早漏,早泄,"/>
<a zh_cn="Show Girl" zh_tw="Show Girl" jp="展場女孩" keyword=",展場女孩,展场女孩,Show Girl,"/>
<a zh_cn="正太控" zh_tw="正太控" jp="正太控" keyword=",正太控,"/>
<a zh_cn="制服" zh_tw="制服" jp="制服" keyword=",制服,"/>
<a zh_cn="中出" zh_tw="中出" jp="中出" keyword=",中出,中出し,"/>
<a zh_cn="子宫颈" zh_tw="子宮頸" jp="子宮頸" keyword=",子宮頸,子宫颈,"/>
<a zh_cn="足交" zh_tw="足交" jp="足交" keyword=",足交,足コキ,"/>
<a zh_cn="4小时+" zh_tw="4小時+" jp="4小時以上作品" keyword=",4小時以上作品,4小时以上作品,4小时+,4小時+,"/>
<a zh_cn="69" zh_tw="69" jp="69" keyword=",69,"/>
<a zh_cn="学生" zh_tw="學生" jp="學生" keyword=",C学生,學生,"/>
<a zh_cn="M男" zh_tw="M男" jp="M男" keyword=",M男,"/>
<a zh_cn="暗黑系" zh_tw="暗黑系" jp="暗黑系" keyword=",暗黑系,黑暗系統,"/>
<a zh_cn="成人电影" zh_tw="成人電影" jp="成人電影" keyword=",成人電影,成人电影,"/>
<a zh_cn="成人动漫" zh_tw="成人動漫" jp="成人動漫" keyword=",成人动漫,成人動漫,"/>
<a zh_cn="导尿" zh_tw="導尿" jp="導尿" keyword=",導尿,导尿,"/>
<a zh_cn="法国" zh_tw="法國" jp="法國" keyword=",法国,法國,"/>
<a zh_cn="飞特族" zh_tw="飛特族" jp="飛特族" keyword=",飛特族,飞特族,"/>
<a zh_cn="韩国" zh_tw="韓國" jp="韓國" keyword=",韓國,韩国,"/>
<a zh_cn="户外" zh_tw="戶外" jp="戶外" keyword=",戶外,户外,"/>
<a zh_cn="角色对换" zh_tw="角色對換" jp="角色對換" keyword=",角色对换,角色對換,"/>
<a zh_cn="精选综合" zh_tw="精選綜合" jp="合集" keyword=",精選,綜合,精选、综合,合集,精选综合,精選綜合,"/>
<a zh_cn="捆绑" zh_tw="捆綁" jp="捆綁" keyword=",捆綁,捆绑,折磨,"/>
<a zh_cn="礼仪小姐" zh_tw="禮儀小姐" jp="禮儀小姐" keyword=",禮儀小姐,礼仪小姐,"/>
<a zh_cn="历史剧" zh_tw="歷史劇" jp="歷史劇" keyword=",歷史劇,历史剧,"/>
<a zh_cn="露出" zh_tw="露出" jp="露出" keyword=",露出,"/>
<a zh_cn="母狗" zh_tw="母狗" jp="母狗" keyword=",母犬,母狗,"/>
<a zh_cn="男优介绍" zh_tw="男優介紹" jp="男優介紹" keyword=",男性,男优介绍,男優介紹,"/>
<a zh_cn="女儿" zh_tw="女兒" jp="女兒" keyword=",女兒,女儿,"/>
<a zh_cn="全裸" zh_tw="全裸" jp="全裸" keyword=",全裸,"/>
<a zh_cn="窥乳" zh_tw="窺乳" jp="窺乳" keyword=",乳房偷窺,窥乳,窺乳,"/>
<a zh_cn="羞辱" zh_tw="羞辱" jp="辱め" keyword=",凌辱,羞辱,辱め,辱骂,辱罵,"/>
<a zh_cn="脱衣" zh_tw="脫衣" jp="脫衣" keyword=",脫衣,脱衣,"/>
<a zh_cn="西洋片" zh_tw="西洋片" jp="西洋片" keyword=",西洋片,"/>
<a zh_cn="写真偶像" zh_tw="寫真偶像" jp="寫真偶像" keyword=",寫真偶像,写真偶像,"/>
<a zh_cn="修女" zh_tw="修女" jp="修女" keyword=",修女,"/>
<a zh_cn="偶像艺人" zh_tw="偶像藝人" jp="アイドル芸能人" keyword=",藝人,艺人,偶像,偶像藝人,偶像艺人,偶像‧藝人,偶像‧艺人,アイドル・芸能人,"/>
<a zh_cn="淫乱真实" zh_tw="淫亂真實" jp="淫亂真實" keyword=",淫亂,真實,淫乱、真实,淫乱真实,淫亂真實,淫乱・ハード系,"/>
<a zh_cn="瑜伽·健身" zh_tw="瑜伽·健身" jp="瑜伽·健身" keyword=",瑜伽,瑜伽·健身,ヨガ,講師,讲师"/>
<a zh_cn="运动短裤" zh_tw="運動短褲" jp="運動短褲" keyword=",運動短褲,运动短裤,"/>
<a zh_cn="JK制服" zh_tw="JK制服" jp="JK制服" keyword=",制服外套,JK制服,校服,"/>
<a zh_cn="重制版" zh_tw="重製版" jp="複刻版" keyword=",重印版,複刻版,重制版,重製版,"/>
<a zh_cn="综合短篇" zh_tw="綜合短篇" jp="綜合短篇" keyword=",綜合短篇,综合短篇,"/>
<a zh_cn="被外国人干" zh_tw="被外國人乾" jp="被外國人乾" keyword=",被外國人幹,被外国人干,被外國人乾,"/>
<a zh_cn="二穴同入" zh_tw="二穴同入" jp="二穴同入" keyword=",二穴同時挿入,二穴同入,"/>
<a zh_cn="美脚" zh_tw="美腳" jp="美腳" keyword=",美腳,美脚,"/>
<a zh_cn="过膝袜" zh_tw="過膝襪" jp="過膝襪" keyword=",絲襪、過膝襪,过膝袜,"/>
<a zh_cn="名人" zh_tw="名人" jp="名人" keyword=",名人,"/>
<a zh_cn="黑白配" zh_tw="黑白配" jp="黑白配" keyword=",黑白配,"/>
<a zh_cn="欲女" zh_tw="欲女" jp="エマニエル" keyword=",エマニエル,欲女,"/>
<a zh_cn="高筒靴" zh_tw="高筒靴" jp="高筒靴" keyword=",靴子,高筒靴,"/>
<a zh_cn="双飞" zh_tw="雙飛" jp="雙飛" keyword=",兩女一男,双飞,雙飛,"/>
<a zh_cn="两女两男" zh_tw="兩女兩男" jp="兩女兩男" keyword=",兩男兩女,两女两男,兩女兩男,"/>
<a zh_cn="两男一女" zh_tw="兩男一女" jp="兩男一女" keyword=",兩男一女,两男一女,"/>
<a zh_cn="3P" zh_tw="3P" jp="3P" keyword=",3P,3p,P,p,"/>
<a zh_cn="唾液敷面" zh_tw="唾液敷面" jp="唾液敷面" keyword=",唾液敷面,"/>
<!-- 2021-11-27 Update -->
<a zh_cn="刪除" zh_tw="刪除" jp="刪除" keyword=",指定なし,"/>
<a zh_cn="刪除" zh_tw="刪除" jp="刪除" keyword=",一般,"/>
<a zh_cn="刪除" zh_tw="刪除" jp="刪除" keyword=",R指定,"/>
<a zh_cn="刪除" zh_tw="刪除" jp="刪除" keyword=",18禁,"/>
<a zh_cn="同人游戏原创" zh_tw="同人遊戲原創" jp="同人ゲームオリジナル" keyword=",同人ゲームオリジナル,"/>
<a zh_cn="同人游戏模仿" zh_tw="同人遊戲模仿" jp="同人ゲームパロディ" keyword=",同人ゲームパロディ,"/>
<a zh_cn="同人软原" zh_tw="同人軟原" jp="同人ソフトオリジナル" keyword=",同人ソフトオリジナル,"/>
<a zh_cn="同人软模仿" zh_tw="同人軟模仿" jp="同人ソフトパロディ" keyword=",同人ソフトパロディ,"/>
<a zh_cn="同人志原创" zh_tw="同人誌原創" jp="同人誌オリジナル" keyword=",同人誌オリジナル,"/>
<a zh_cn="同人志恶搞" zh_tw="同人誌惡搞" jp="同人誌パロディ" keyword=",同人誌パロディ,"/>
<a zh_cn="二维动画" zh_tw="二維動畫" jp="2Dアニメーション" keyword=",2Dアニメーション,"/>
<a zh_cn="CG集" zh_tw="CG集" jp="CG集" keyword=",CG集,"/>
<a zh_cn="插图" zh_tw="插圖" jp="イラスト" keyword=",イラスト,"/>
<a zh_cn="游戏" zh_tw="遊戲" jp="ゲーム" keyword=",ゲーム,"/>
<a zh_cn="角色扮演软件" zh_tw="角色扮演軟件" jp="コスプレソフト" keyword=",コスプレソフト,"/>
<a zh_cn="角色扮演硬" zh_tw="角色扮演硬" jp="コスプレハード" keyword=",コスプレハード,"/>
<a zh_cn="一般的角色扮演" zh_tw="一般的角色扮演" jp="コスプレ一般" keyword=",コスプレ一般,"/>
<a zh_cn="智能手机应用程序" zh_tw="智能手機應用程序" jp="スマホアプリ" keyword=",スマホアプリ,"/>
<a zh_cn="兼容智能手机" zh_tw="兼容智能手機" jp="スマホ対応" keyword=",スマホ対応,"/>
<a zh_cn="有小说" zh_tw="有小說" jp="ノベル付き" keyword=",ノベル付き,"/>
<a zh_cn="漫画" zh_tw="漫畫" jp="マンガ" keyword=",マンガ,"/>
<a zh_cn="音楽作品" zh_tw="音楽作品" jp="音楽作品" keyword=",音楽作品,"/>
<a zh_cn="音声作品" zh_tw="音声作品" jp="音声作品" keyword=",音声作品,"/>
<a zh_cn="带音频" zh_tw="帶音頻" jp="音声付き" keyword=",音声付き,"/>
<a zh_cn="有歌" zh_tw="有歌" jp="歌付き" keyword=",歌付き,"/>
<a zh_cn="写真集" zh_tw="写真集" jp="写真集" keyword=",写真集,"/>
<a zh_cn="同人一般" zh_tw="同人一般" jp="同人一般" keyword=",同人一般,"/>
<a zh_cn="同人成人" zh_tw="同人成人" jp="同人成人" keyword=",同人成人,"/>
<a zh_cn="同人動画" zh_tw="同人動画" jp="同人動画" keyword=",同人動画,"/>
<a zh_cn="配饰" zh_tw="配飾" jp="アクセサリ" keyword=",アクセサリ,"/>
<a zh_cn="冒险" zh_tw="冒險" jp="アドベンチャー" keyword=",アドベンチャー,"/>
<a zh_cn="模拟" zh_tw="模擬" jp="シミュレーション" keyword=",シミュレーション,"/>
<a zh_cn="射击" zh_tw="射擊" jp="シューティング" keyword=",シューティング,"/>
<a zh_cn="运动的" zh_tw="運動的" jp="スポーツ" keyword=",スポーツ,"/>
<a zh_cn="工具" zh_tw="工具" jp="ツール" keyword=",ツール,"/>
<a zh_cn="桌子" zh_tw="桌子" jp="テーブル" keyword=",テーブル,"/>
<a zh_cn="数码漫画" zh_tw="數碼漫畫" jp="デジタルコミック" keyword=",デジタルコミック,"/>
<a zh_cn="数字小说" zh_tw="數字小說" jp="デジタルノベル" keyword=",デジタルノベル,"/>
<a zh_cn="益智测验" zh_tw="益智測驗" jp="パズル・クイズ" keyword=",パズル・クイズ,"/>
<a zh_cn="角色扮演" zh_tw="角色扮演" jp="ロールプレイング" keyword=",ロールプレイング,"/>
<a zh_cn="BGM" zh_tw="BGM" jp="BGM" keyword=",BGM,"/>
<a zh_cn="SF" zh_tw="SF" jp="SF" keyword=",SF,"/>
<a zh_cn="公平的" zh_tw="公平的" jp="あまあま" keyword=",あまあま,"/>
<a zh_cn="山雀" zh_tw="山雀" jp="おっぱい" keyword=",おっぱい,"/>
<a zh_cn="女士" zh_tw="女士" jp="お嬢様" keyword=",お嬢様,"/>
<a zh_cn="公主" zh_tw="公主" jp="お姫様" keyword=",お姫様,"/>
<a zh_cn="黑加仑" zh_tw="黑加侖" jp="黒ギャル" keyword=",黒ギャル,"/>
<a zh_cn="舞者" zh_tw="舞者" jp="ケモミミ" keyword=",ケモミミ,"/>
<a zh_cn="下着" zh_tw="下着" jp="下着" keyword=",下着,"/>
<a zh_cn="西装/Y 恤" zh_tw="西裝/Y 卹" jp="スーツ・Yシャツ" keyword=",スーツ・Yシャツ,"/>
<a zh_cn="浴室" zh_tw="浴室" jp="バスルーム" keyword=",バスルーム,"/>
<a zh_cn="双成" zh_tw="雙成" jp="ふたなり" keyword=",ふたなり,"/>
<a zh_cn="颜射" zh_tw="顏射" jp="ぶっかけ" keyword=",ぶっかけ,"/>
<a zh_cn="普尼" zh_tw="普尼" jp="ぷに" keyword=",ぷに,"/>
<a zh_cn="暖心的" zh_tw="暖心的" jp="ほのぼの" keyword=",ほのぼの,"/>
<a zh_cn="八井" zh_tw="八井" jp="やおい" keyword=",やおい,"/>
<a zh_cn="重播" zh_tw="重播" jp="やり込み" keyword=",やり込み,"/>
<a zh_cn="橡胶" zh_tw="橡膠" jp="ラバー" keyword=",ラバー,"/>
<a zh_cn="动画片" zh_tw="動畫片" jp="アニメーション" keyword=",アニメーション,"/>
<a zh_cn="动漫小说" zh_tw="動漫小說" jp="アニメーションノベル" keyword=",アニメーションノベル,"/>
<a zh_cn="安排" zh_tw="安排" jp="アレンジ" keyword=",アレンジ,"/>
<a zh_cn="插画小说" zh_tw="插畫小說" jp="イラスト+ノベル" keyword=",イラスト+ノベル,"/>
<a zh_cn="插图" zh_tw="插圖" jp="イラスト集" keyword=",イラスト集,"/>
<a zh_cn="不道德" zh_tw="不道德" jp="インモラル" keyword=",インモラル,"/>
<a zh_cn="综合" zh_tw="綜合" jp="オムニバス" keyword=",オムニバス,"/>
<a zh_cn="原来的" zh_tw="原來的" jp="オリジナル" keyword=",オリジナル,"/>
<a zh_cn="经典的" zh_tw="經典的" jp="クラシック" keyword=",クラシック,"/>
<a zh_cn="游戏音乐" zh_tw="遊戲音樂" jp="ゲーム音楽" keyword=",ゲーム音楽,"/>
<a zh_cn="同性恋" zh_tw="同性戀" jp="ゲイ" keyword=",ゲイ,"/>
<a zh_cn="喜剧" zh_tw="喜劇" jp="コメディ" keyword=",コメディ,"/>
<a zh_cn="哥特洛丽塔" zh_tw="哥特洛麗塔" jp="ゴスロリ" keyword=",ゴスロリ,"/>
<a zh_cn="半机械人" zh_tw="半機械人" jp="サイボーグ" keyword=",サイボーグ,"/>
<a zh_cn="悬念" zh_tw="懸念" jp="サスペンス" keyword=",サスペンス,"/>
<a zh_cn="姐姐" zh_tw="姐姐" jp="シスター" keyword=",シスター,"/>
<a zh_cn="翔太" zh_tw="翔太" jp="ショタ" keyword=",ショタ,"/>
<a zh_cn="散布" zh_tw="散佈" jp="スカトロ" keyword=",スカトロ,"/>
<a zh_cn="学校泳衣" zh_tw="學校泳衣" jp="スク水" keyword=",スク水,"/>
<a zh_cn="空姐 / CA" zh_tw="空姐 / CA" jp="スチュワーデス/CA" keyword=",スチュワーデス/CA,"/>
<a zh_cn="丝袜" zh_tw="絲襪" jp="ストッキング" keyword=",ストッキング,"/>
<a zh_cn="口角" zh_tw="口角" jp="スパッツ" keyword=",スパッツ,"/>
<a zh_cn="飞溅" zh_tw="飛濺" jp="スプラッター" keyword=",スプラッター,"/>
<a zh_cn="暗系" zh_tw="暗系" jp="ダーク系" keyword=",ダーク系,"/>
<a zh_cn="中国" zh_tw="中國" jp="チャイナ" keyword=",チャイナ,"/>
<a zh_cn="双尾" zh_tw="雙尾" jp="ツインテール" keyword=",ツインテール,"/>
<a zh_cn="鹤贝" zh_tw="鶴貝" jp="ツルペタ" keyword=",ツルペタ,"/>
<a zh_cn="傲娇" zh_tw="傲嬌" jp="ツンデレ" keyword=",ツンデレ,"/>
<a zh_cn="青少年漫画" zh_tw="青少年漫畫" jp="ティーンズコミック" keyword=",ティーンズコミック,"/>
<a zh_cn="及膝襪" zh_tw="及膝袜" jp="ニーソックス" keyword=",ニーソックス,"/>
<a zh_cn="乳头插入" zh_tw="乳頭插入" jp="ニプルファック" keyword=",ニプルファック,"/>
<a zh_cn="猫耳朵" zh_tw="貓耳朵" jp="ネコミミ" keyword=",ネコミミ,"/>
<a zh_cn="小说原创小说" zh_tw="小說原創小說" jp="ノベル・オリジナル小説" keyword=",ノベル・オリジナル小説,"/>
<a zh_cn="战斗" zh_tw="戰鬥" jp="バトル" keyword=",バトル,"/>
<a zh_cn="短裙" zh_tw="短裙" jp="パンチラ" keyword=",パンチラ,"/>
<a zh_cn="视觉小说" zh_tw="視覺小說" jp="ビジュアルノベル" keyword=",ビジュアルノベル,"/>
<a zh_cn="幻想" zh_tw="幻想" jp="ファンタジー" keyword=",ファンタジー,"/>
<a zh_cn="拳交" zh_tw="拳交" jp="フィストファック" keyword=",フィストファック,"/>
<a zh_cn="灯笼裤" zh_tw="燈籠褲" jp="ブルマ" keyword=",ブルマ,"/>
<a zh_cn="赫塔尔" zh_tw="赫塔爾" jp="ヘタレ" keyword=",ヘタレ,"/>
<a zh_cn="恐怖" zh_tw="恐怖" jp="ホラー" keyword=",ホラー,"/>
<a zh_cn="束缚" zh_tw="束縛" jp="ボンテージ" keyword=",ボンテージ,"/>
<a zh_cn="流行音乐" zh_tw="流行音樂" jp="ポップス" keyword=",ポップス,"/>
<a zh_cn="马尾辫" zh_tw="馬尾辮" jp="ポニーテール" keyword=",ポニーテール,"/>
<a zh_cn="受虐狂" zh_tw="受虐狂" jp="マゾ" keyword=",マゾ,"/>
<a zh_cn="神秘" zh_tw="神秘" jp="ミステリー" keyword=",ミステリー,"/>
<a zh_cn="最低限度" zh_tw="最低限度" jp="ミニマム" keyword=",ミニマム,"/>
<a zh_cn="军队" zh_tw="軍隊" jp="ミリタリー" keyword=",ミリタリー,"/>
<a zh_cn="木口" zh_tw="木口" jp="ムチムチ" keyword=",ムチムチ,"/>
<a zh_cn="动作漫画" zh_tw="動作漫畫" jp="モーションコミック" keyword=",モーションコミック,"/>
<a zh_cn="扬德雷" zh_tw="揚德雷" jp="ヤンデレ" keyword=",ヤンデレ,"/>
<a zh_cn="真实系统" zh_tw="真實係統" jp="リアル系" keyword=",リアル系,"/>
<a zh_cn="机器人" zh_tw="機器人" jp="ロボット" keyword=",ロボット,"/>
<a zh_cn="邪恶堕落" zh_tw="邪惡墮落" jp="悪堕ち" keyword=",悪堕ち,"/>
<a zh_cn="恶魔少女" zh_tw="惡魔少女" jp="悪魔ッ娘" keyword=",悪魔ッ娘,"/>
<a zh_cn="異種姦" zh_tw="異種姦" jp="異種姦" keyword=",異種姦,"/>
<a zh_cn="淫語朗読" zh_tw="淫語朗読" jp="淫語朗読" keyword=",淫語朗読,"/>
<a zh_cn="拡張" zh_tw="拡張" jp="拡張" keyword=",拡張,"/>
<a zh_cn="官能小説" zh_tw="官能小説" jp="官能小説" keyword=",官能小説,"/>
<a zh_cn="骑士" zh_tw="騎士" jp="騎士" keyword=",騎士,"/>
<a zh_cn="鬼畜" zh_tw="鬼畜" jp="鬼畜" keyword=",鬼畜,"/>
<a zh_cn="擬人化" zh_tw="擬人化" jp="擬人化" keyword=",擬人化,"/>
<a zh_cn="反向跳跃" zh_tw="反向跳躍" jp="逆レ○プ" keyword=",逆レ○プ,"/>
<a zh_cn="看涨和胜利" zh_tw="看漲和勝利" jp="強気・勝気" keyword=",強気・勝気,"/>
<a zh_cn="警官" zh_tw="警官" jp="警官" keyword=",警官,"/>
<a zh_cn="后輩" zh_tw="後輩" jp="後輩" keyword=",後輩,"/>
<a zh_cn="口内射精" zh_tw="口内射精" jp="口内射精" keyword=",口内射精,"/>
<a zh_cn="工場" zh_tw="工場" jp="工場" keyword=",工場,"/>
<a zh_cn="搾乳" zh_tw="搾乳" jp="搾乳" keyword=",搾乳,"/>
<a zh_cn="三国志" zh_tw="三国志" jp="三国志" keyword=",三国志,"/>
<a zh_cn="产卵" zh_tw="産卵" jp="産卵" keyword=",産卵,"/>
<a zh_cn="残酷" zh_tw="残酷" jp="残酷" keyword=",残酷,"/>
<a zh_cn="四格卡通" zh_tw="四格卡通" jp="四コマ漫画" keyword=",四コマ漫画,"/>
<a zh_cn="时间的事" zh_tw="時間的事" jp="時代モノ" keyword=",時代モノ,"/>
<a zh_cn="実母" zh_tw="実母" jp="実母" keyword=",実母,"/>
<a zh_cn="実娘" zh_tw="実娘" jp="実娘" keyword=",実娘,"/>
<a zh_cn="首輪" zh_tw="首輪" jp="首輪" keyword=",首輪,"/>
<a zh_cn="汁" zh_tw="汁" jp="汁" keyword=",汁,"/>
<a zh_cn="出产" zh_tw="出産" jp="出産" keyword=",出産,"/>
<a zh_cn="纯爱" zh_tw="純愛" jp="純愛" keyword=",純愛,"/>
<a zh_cn="処女喪失" zh_tw="処女喪失" jp="処女喪失" keyword=",処女喪失,"/>
<a zh_cn="NTR" zh_tw="NTR" jp="寝取り・寝取られ" keyword=",寝取り・寝取られ,"/>
<a zh_cn="人外" zh_tw="人外" jp="人外" keyword=",人外,"/>
<a zh_cn="先輩" zh_tw="先輩" jp="先輩" keyword=",先輩,"/>
<a zh_cn="戦国時代" zh_tw="戦国時代" jp="戦国時代" keyword=",戦国時代,"/>
<a zh_cn="战士" zh_tw="戦士" jp="戦士" keyword=",戦士,"/>
<a zh_cn="洗脑" zh_tw="洗脳" jp="洗脳" keyword=",洗脳,"/>
<a zh_cn="待受画像" zh_tw="待受画像" jp="待受画像" keyword=",待受画像,"/>
<a zh_cn="女装人妖" zh_tw="女裝人妖" jp="男の娘・女装" keyword=",男の娘・女装,"/>
<a zh_cn="装扮" zh_tw="裝扮" jp="着せ替え" keyword=",着せ替え,"/>
<a zh_cn="衣服蚀刻" zh_tw="衣服蝕刻" jp="着衣エッチ" keyword=",着衣エッチ,"/>
<a zh_cn="中世" zh_tw="中世" jp="中世" keyword=",中世,"/>
<a zh_cn="天使" zh_tw="天使" jp="天使" keyword=",天使,"/>
<a zh_cn="伝奇" zh_tw="伝奇" jp="伝奇" keyword=",伝奇,"/>
<a zh_cn="东方" zh_tw="東方" jp="東方" keyword=",東方,"/>
<a zh_cn="动物" zh_tw="動物" jp="動物" keyword=",動物,"/>
<a zh_cn="同级生" zh_tw="同級生" jp="同級生" keyword=",同級生,"/>
<a zh_cn="同人CG集" zh_tw="同人CG集" jp="同人CG集" keyword=",同人CG集,"/>
<a zh_cn="同人志漫画/漫画" zh_tw="同人誌漫畫/漫畫" jp="同人漫画・コミック" keyword=",同人漫画・コミック,"/>
<a zh_cn="同人漫画/漫画+同人CG合集" zh_tw="同人漫畫/漫畫+同人CG合集" jp="同人漫画・コミック同人CG集" keyword=",同人漫画・コミック同人CG集,"/>
<a zh_cn="肉欲" zh_tw="肉欲" jp="肉欲" keyword=",肉欲,"/>
<a zh_cn="妊婦" zh_tw="妊婦" jp="妊婦" keyword=",妊婦,"/>
<a zh_cn="年下" zh_tw="年下" jp="年下" keyword=",年下,"/>
<a zh_cn="年上" zh_tw="年上" jp="年上" keyword=",年上,"/>
<a zh_cn="废墟" zh_tw="廃墟" jp="廃墟" keyword=",廃墟,"/>
<a zh_cn="伯母・叔母" zh_tw="伯母・叔母" jp="伯母・叔母" keyword=",伯母・叔母,"/>
<a zh_cn="小胸/小胸" zh_tw="小胸/小胸" jp="微乳・貧乳" keyword=",微乳・貧乳,"/>
<a zh_cn="百合" zh_tw="百合" jp="百合" keyword=",百合,"/>
<a zh_cn="百人一首" zh_tw="百人一首" jp="百人一首" keyword=",百人一首,"/>
<a zh_cn="浮気" zh_tw="浮気" jp="浮気" keyword=",浮気,"/>
<a zh_cn="多人玩" zh_tw="多人玩" jp="複数プレイ" keyword=",複数プレイ,"/>
<a zh_cn="保健医" zh_tw="保健医" jp="保健医" keyword=",保健医,"/>
<a zh_cn="保母" zh_tw="保母" jp="保母" keyword=",保母,"/>
<a zh_cn="包茎" zh_tw="包茎" jp="包茎" keyword=",包茎,"/>
<a zh_cn="女巫" zh_tw="女巫" jp="魔女ッ娘" keyword=",魔女ッ娘,"/>
<a zh_cn="户外玩耍" zh_tw="戶外玩耍" jp="野外プレイ" keyword=",野外プレイ,"/>
<a zh_cn="药物" zh_tw="薬物" jp="薬物" keyword=",薬物,"/>
<a zh_cn="幼馴染" zh_tw="幼馴染" jp="幼馴染" keyword=",幼馴染,"/>
<a zh_cn="历史" zh_tw="歴史" jp="歴史" keyword=",歴史,"/>
<a zh_cn="和奸" zh_tw="和姦" jp="和姦" keyword=",和姦,"/>
<a zh_cn="和风" zh_tw="和風" jp="和風" keyword=",和風,"/>
<a zh_cn="和服・着物" zh_tw="和服・著物" jp="和服・着物" keyword=",和服・着物,"/>
<a zh_cn="巫女" zh_tw="巫女" jp="巫女" keyword=",巫女,"/>
<!-- ======================== -->
<!-- 厂商 -->
<!-- ======================== -->
<a zh_cn="kira☆kira" zh_tw="kira☆kira" jp="kira☆kira" keyword=",kira☆kira,"/>
<a zh_cn="S1 NO.1 STYLE" zh_tw="S1 NO.1 STYLE" jp="S1 NO.1 STYLE" keyword=",S1 Style,エスワン,エスワン ナンバーワンスタイル,エスワンナンバーワンスタイル,S1 NO.1 STYLE,S1NO.1STYLE,"/>
<a zh_cn="一本道" zh_tw="一本道" jp="一本道" keyword=",一本道,"/>
<a zh_cn="加勒比" zh_tw="加勒比" jp="加勒比" keyword=",加勒比,カリビアンコム,"/>
<a zh_cn="东京热" zh_tw="東京熱" jp="TOKYO-HOT" keyword=",东京热,東京熱,東熱,TOKYO-HOT,"/>
<a zh_cn="SOD" zh_tw="SOD" jp="SOD" keyword=",SOD,SODクリエイト,サディスティックヴィレッジ,"/>
<a zh_cn="PRESTIGE" zh_tw="PRESTIGE" jp="PRESTIGE" keyword=",PRESTIGE,プレステージ,"/>
<a zh_cn="MOODYZ" zh_tw="MOODYZ" jp="MOODYZ" keyword=",MOODYZ,ムーディーズ,"/>
<a zh_cn="ROCKET" zh_tw="ROCKET" jp="ROCKET" keyword=",ROCKET,"/>
<a zh_cn="S级素人" zh_tw="S級素人" jp="S級素人" keyword=",S級素人,アイデアポケット,"/>
<a zh_cn="HEYZO" zh_tw="HEYZO" jp="HEYZO" keyword=",HEYZO,"/>
<a zh_cn="玛丹娜" zh_tw="瑪丹娜" jp="Madonna" keyword=",玛丹娜,瑪丹娜,マドンナ,Madonna,"/>
<a zh_cn="MAXING" zh_tw="MAXING" jp="MAXING" keyword=",MAXING,マキシング,"/>
<a zh_cn="JAPANKET" zh_tw="ALICE JAPAN" jp="ALICE JAPAN" keyword=",ALICE JAPAN,アリスJAPAN,"/>
<a zh_cn="E-BODY" zh_tw="E-BODY" jp="E-BODY" keyword=",E-BODY,"/>
<a zh_cn="Natural High" zh_tw="Natural High" jp="Natural High" keyword=",Natural High,ナチュラルハイ,"/>
<a zh_cn="美" zh_tw="美" jp="美" keyword=",美,"/>
<a zh_cn="K.M.P" zh_tw="K.M.P" jp="K.M.P" keyword=",K.M.P,ケイ・エム・プロデュース,"/>
<a zh_cn="Hunter" zh_tw="Hunter" jp="Hunter" keyword=",Hunter,"/>
<a zh_cn="OPPAI" zh_tw="OPPAI" jp="OPPAI" keyword=",OPPAI,"/>
<a zh_cn="熘池五郎" zh_tw="溜池五郎" jp="溜池ゴロー" keyword=",熘池五郎,溜池五郎,溜池ゴロー,"/>
<a zh_cn="kawaii" zh_tw="kawaii" jp="kawaii" keyword=",kawaii,"/>
<a zh_cn="PREMIUM" zh_tw="PREMIUM" jp="PREMIUM" keyword=",PREMIUM,プレミアム,"/>
<a zh_cn="ヤル男" zh_tw="ヤル男" jp="ヤル男" keyword=",ヤル男,"/>
<a zh_cn="ラグジュTV" zh_tw="ラグジュTV" jp="ラグジュTV" keyword=",ラグジュTV,"/>
<a zh_cn="シロウトTV" zh_tw="シロウトTV" jp="シロウトTV" keyword=",シロウトTV,"/>
<a zh_cn="本中" zh_tw="本中" jp="本中" keyword=",本中,"/>
<a zh_cn="WANZ" zh_tw="WANZ" jp="WANZ" keyword=",WANZ,ワンズファクトリー,"/>
<a zh_cn="BeFree" zh_tw="BeFree" jp="BeFree" keyword=",BeFree,"/>
<a zh_cn="MAX-A" zh_tw="MAX-A" jp="MAX-A" keyword=",MAX-A,マックスエー,"/>
<!-- 2021-11-8 Update -->
<a zh_cn="Energy" zh_tw="Energy" jp="アイエナジー" keyword=",アイエナジー,"/>
<a zh_cn="Idea Pocket" zh_tw="Idea Pocket" jp="アイデアポケット" keyword=",アイデアポケット,"/>
<a zh_cn="AKNR" zh_tw="AKNR" jp="アキノリ" keyword=",アキノリ,"/>
<a zh_cn="Attackers" zh_tw="Attackers" jp="アタッカーズ" keyword=",アタッカーズ,"/>
<a zh_cn="Alice Japan" zh_tw="Alice Japan" jp="アリスJAPAN" keyword=",アリスJAPAN,"/>
<a zh_cn="Aurora Project Annex" zh_tw="Aurora Project Annex" jp="オーロラプロジェクト・アネックス" keyword=",オーロラプロジェクト・アネックス,"/>
<a zh_cn="Crystal 映像" zh_tw="Crystal 映像" jp="クリスタル映像" keyword=",クリスタル映像,"/>
<a zh_cn="Glory Quest" zh_tw="Glory Quest" jp="グローリークエスト" keyword=",グローリークエスト,"/>
<a zh_cn="DAS" zh_tw="DAS" jp="ダスッ!" keyword=",ダスッ!,"/>
<a zh_cn="DEEPs" zh_tw="DEEPs" jp="ディープス" keyword=",ディープス,"/>
<a zh_cn="Dogma" zh_tw="Dogma" jp="ドグマ" keyword=",ドグマ,"/>
<a zh_cn="宇宙企画" zh_tw="宇宙企画" jp="メディアステーション" keyword=",メディアステーション,"/>
<a zh_cn="WANZ FACTORY" zh_tw="WANZ FACTORY" jp="ワンズファクトリー" keyword=",ワンズファクトリー,"/>
<a zh_cn="VR PRODUCE" zh_tw="VR PRODUCE" jp="VRプロダクツ" keyword=",VRプロダクツ,VRPRODUCE,"/>
<a zh_cn="Real Works" zh_tw="Real Works" jp="レアルワークス" keyword=",レアルワークス,"/>
<a zh_cn="MAX-A" zh_tw="MAX-A" jp="マックスエー" keyword=",マックスエー,"/>
<a zh_cn="PETERS MAX" zh_tw="PETERS MAX" jp="ピーターズMAX" keyword=",ピーターズMAX,"/>
<a zh_cn="NATURAL HIGH" zh_tw="NATURAL HIGH" jp="ナチュラルハイ" keyword=",ナチュラルハイ,"/>
<a zh_cn="MAXING" zh_tw="MAXING" jp="マキシング" keyword=",マキシング,"/>
<a zh_cn="Ms Video Group" zh_tw="Ms Video Group" jp="エムズビデオグループ" keyword=",エムズビデオグループ,"/>
<a zh_cn="Minimum" zh_tw="Minimum" jp="ミニマム" keyword=",ミニマム,"/>
<a zh_cn="WAAP Entertainment" zh_tw="WAAP Entertainment" jp="ワープエンタテインメント" keyword=",ワープエンタテインメント,"/>
<a zh_cn="pacopacomama" zh_tw="pacopacomama" jp="パコパコママ" keyword=",pacopacomama,パコパコママ,"/>
</info>

View File

@@ -19,7 +19,7 @@ CLI 版本
#### 本地电影刮削与整理一体化解决方案
# 文档
* [官方WIKI(不看你怎么用)](https://github.com/yoshiko2/AV_Data_Capture/wiki)
* [官方教程WIKI](https://github.com/yoshiko2/AV_Data_Capture/wiki)
* [VergilGao's Docker部署](https://github.com/VergilGao/docker-avdc)
# 下载
@@ -29,7 +29,7 @@ CLI 版本
当你查阅、下载了本项目源代码或二进制程序,即代表你接受了以下条款
* 本软件仅供技术交流,学术交流使用
* **请勿在热门的社交平台上宣传此项目**
* **请勿在墙内的社交平台上宣传此项目**
* 本软件作者编写出该软件旨在学习 Python ,提高编程水平
* 本软件不提供任何影片下载的线索
* 用户在使用本软件前,请用户了解并遵守当地法律法规,如果本软件使用过程中存在违反当地法律法规的行为,请勿使用该软件

View File

@@ -1,8 +1,12 @@
import json
import re
from multiprocessing.pool import ThreadPool
import ADC_function
import config
from ADC_function import translate
from lxml import etree
from pathlib import Path
# =========website========
from . import airav
@@ -32,11 +36,14 @@ def get_data_state(data: dict) -> bool: # 元数据获取失败检测
return True
def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数据
def get_data_from_json(file_number, oCC): # 从JSON返回元数据
"""
iterate through all services and fetch the data
"""
actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'avdc' / 'mapping_actor.xml'))
info_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'avdc' / 'mapping_info.xml'))
func_mapping = {
"airav": airav.main,
"avsox": avsox.main,
@@ -53,6 +60,7 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数
"fc2club": fc2club.main
}
conf = config.getInstance()
# default fetch order list, from the beginning to the end
sources = conf.sources().split(',')
if not len(conf.sources()) > 80:
@@ -114,6 +122,7 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数
json_data = json.loads(pool.apply_async(func_mapping[source], (file_number,)).get())
# if any service return a valid return, break
if get_data_state(json_data):
print(f"[+]Find movie [{file_number}] metadata on website '{source}'")
break
pool.close()
pool.terminate()
@@ -125,6 +134,7 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数
json_data = json.loads(func_mapping[source](file_number))
# if any service return a valid return, break
if get_data_state(json_data):
print(f"[+]Find movie [{file_number}] metadata on website '{source}'")
break
except:
break
@@ -134,6 +144,14 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数
print('[-]Movie Number not found!')
return None
# 增加number严格判断避免提交任何number总是返回"本橋実来 ADZ335"这种返回number不一致的数据源故障
# 目前选用number命名规则是javdb.com Domain Creation Date: 2013-06-19T18:34:27Z
# 然而也可以跟进关注其它命名规则例如airav.wiki Domain Creation Date: 2019-08-28T07:18:42.0Z
# 如果将来javdb.com命名规则下不同Studio出现同名碰撞导致无法区分可考虑更换规则更新相应的number分析和抓取代码。
if str(json_data.get('number')).upper() != file_number.upper():
print('[-]Movie number has changed! [{}]->[{}]'.format(file_number, str(json_data.get('number'))))
return None
# ================================================网站规则添加结束================================================
title = json_data.get('title')
@@ -167,6 +185,10 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数
imagecut = json_data.get('imagecut')
tag = str(json_data.get('tag')).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @
while 'XXXX' in tag:
tag.remove('XXXX')
while 'xxx' in tag:
tag.remove('xxx')
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
if title == '' or number == '':
@@ -192,45 +214,9 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数
cover_small = tmpArr[0].strip('\"').strip('\'')
# ====================处理异常字符 END================== #\/:*?"<>|
# === 替换Studio片假名
studio = studio.replace('アイエナジー','Energy')
studio = studio.replace('アイデアポケット','Idea Pocket')
studio = studio.replace('アキノリ','AKNR')
studio = studio.replace('アタッカーズ','Attackers')
studio = re.sub('アパッチ.*','Apache',studio)
studio = studio.replace('アマチュアインディーズ','SOD')
studio = studio.replace('アリスJAPAN','Alice Japan')
studio = studio.replace('オーロラプロジェクト・アネックス','Aurora Project Annex')
studio = studio.replace('クリスタル映像','Crystal 映像')
studio = studio.replace('グローリークエスト','Glory Quest')
studio = studio.replace('ダスッ!','DAS')
studio = studio.replace('ディープス','DEEPs')
studio = studio.replace('ドグマ','Dogma')
studio = studio.replace('プレステージ','PRESTIGE')
studio = studio.replace('ムーディーズ','MOODYZ')
studio = studio.replace('メディアステーション','宇宙企画')
studio = studio.replace('ワンズファクトリー','WANZ FACTORY')
studio = studio.replace('エスワン ナンバーワンスタイル','S1')
studio = studio.replace('エスワンナンバーワンスタイル','S1')
studio = studio.replace('SODクリエイト','SOD')
studio = studio.replace('サディスティックヴィレッジ','SOD')
studio = studio.replace('VRプロダクツ','VR PRODUCE')
studio = studio.replace('VRPRODUCE','VR PRODUCE')
studio = studio.replace('レアルワークス','Real Works')
studio = studio.replace('マックスエー','MAX-A')
studio = studio.replace('ピーターズMAX','PETERS MAX')
studio = studio.replace('プレミアム','PREMIUM')
studio = studio.replace('ナチュラルハイ','NATURAL HIGH')
studio = studio.replace('マキシング','MAXING')
studio = studio.replace('エムズビデオグループ','Ms Video Group')
studio = studio.replace('ミニマム','Minimum')
studio = studio.replace('ワープエンタテインメント','WAAP Entertainment')
studio = re.sub('.*/妄想族','妄想族',studio)
studio = studio.replace('/',' ')
# === 替换Studio片假名 END
# 返回处理后的json_data
json_data['title'] = title
json_data['original_title'] = title
json_data['actor'] = actor
json_data['release'] = release
json_data['cover_small'] = cover_small
@@ -250,16 +236,14 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数
for translate_value in translate_values:
if json_data[translate_value] == "":
continue
t = ""
# if conf.get_transalte_engine() == "baidu":
# json_data[translate_value] = translate(
# json_data[translate_value],
# target_language="zh",
# engine=conf.get_transalte_engine(),
# app_id=conf.get_transalte_appId(),
# key=conf.get_transalte_key(),
# delay=conf.get_transalte_delay(),
# )
if translate_value == "title":
title_dict = json.load(
open(str(Path.home() / '.local' / 'share' / 'avdc' / 'c_number.json'), 'r', encoding="utf-8"))
try:
json_data[translate_value] = title_dict[number]
continue
except:
pass
if conf.get_transalte_engine() == "azure":
t = translate(
json_data[translate_value],
@@ -272,6 +256,67 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数
if len(t):
json_data[translate_value] = special_characters_replacement(t)
if oCC:
cc_vars = conf.cc_convert_vars().split(",")
ccm = conf.cc_convert_mode()
def convert_list(mapping_data,language,vars):
total = []
for i in vars:
if len(mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=i)) != 0:
i = mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=i)[0]
total.append(i)
return total
def convert(mapping_data,language,vars):
if len(mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=vars)) != 0:
return mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=vars)[0]
else:
return vars
for cc in cc_vars:
if json_data[cc] == "" or len(json_data[cc]) == 0:
continue
if cc == "actor":
try:
if ccm == 1:
json_data['actor_list'] = convert_list(actor_mapping_data, "zh_cn", json_data['actor_list'])
json_data['actor'] = convert(actor_mapping_data, "zh_cn", json_data['actor'])
elif ccm == 2:
json_data['actor_list'] = convert_list(actor_mapping_data, "zh_tw", json_data['actor_list'])
json_data['actor'] = convert(actor_mapping_data, "zh_tw", json_data['actor'])
elif ccm == 3:
json_data['actor_list'] = convert_list(actor_mapping_data, "jp", json_data['actor_list'])
json_data['actor'] = convert(actor_mapping_data, "jp", json_data['actor'])
except:
json_data['actor_list'] = [oCC.convert(aa) for aa in json_data['actor_list']]
json_data['actor'] = oCC.convert(json_data['actor'])
elif cc == "tag":
try:
if ccm == 1:
json_data[cc] = convert_list(info_mapping_data, "zh_cn", json_data[cc])
json_data[cc] = ADC_function.delete_all_elements_in_list("删除", json_data[cc])
elif ccm == 2:
json_data[cc] = convert_list(info_mapping_data, "zh_tw", json_data[cc])
json_data[cc] = ADC_function.delete_all_elements_in_list("删除", json_data[cc])
elif ccm == 3:
json_data[cc] = convert_list(info_mapping_data, "jp", json_data[cc])
json_data[cc] = ADC_function.delete_list_all_elements("删除", json_data[cc])
except:
json_data[cc] = [oCC.convert(t) for t in json_data[cc]]
else:
try:
if ccm == 1:
json_data[cc] = convert(info_mapping_data, "zh_cn", json_data[cc])
json_data[cc] = ADC_function.delete_list_all_elements("删除", json_data[cc])
elif ccm == 2:
json_data[cc] = convert(info_mapping_data, "zh_tw", json_data[cc])
json_data[cc] = ADC_function.delete_list_all_elements("删除", json_data[cc])
elif ccm == 3:
json_data[cc] = convert(info_mapping_data, "jp", json_data[cc])
json_data[cc] = ADC_function.delete_list_all_elements("删除", json_data[cc])
except IndexError:
json_data[cc] = oCC.convert(json_data[cc])
except:
pass
naming_rule=""
for i in conf.naming_rule().split("+"):
if i not in json_data:
@@ -293,4 +338,9 @@ def special_characters_replacement(text) -> str:
replace('"', ''). # U+FF02 FULLWIDTH QUOTATION MARK @ Basic Multilingual Plane
replace('<', ''). # U+1438 CANADIAN SYLLABICS PA @ Basic Multilingual Plane
replace('>', ''). # U+1433 CANADIAN SYLLABICS PO @ Basic Multilingual Plane
replace('|', 'ǀ')) # U+01C0 LATIN LETTER DENTAL CLICK @ Basic Multilingual Plane
replace('|', 'ǀ'). # U+01C0 LATIN LETTER DENTAL CLICK @ Basic Multilingual Plane
replace('&lsquo;', ''). # U+02018 LEFT SINGLE QUOTATION MARK
replace('&rsquo;', ''). # U+02019 RIGHT SINGLE QUOTATION MARK
replace('&hellip;','').
replace('&amp;', '')
)

View File

@@ -6,6 +6,7 @@ from lxml import etree#need install
from bs4 import BeautifulSoup#need install
import json
from ADC_function import *
from WebCrawler import javbus
'''
API
@@ -17,95 +18,94 @@ API
host = 'https://www.airav.wiki'
# airav这个网站没有演员图片所以直接使用javbus的图
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'star-name'})
d={}
for i in a:
l=i.a['href']
t=i.get_text()
html = etree.fromstring(get_html(l), etree.HTMLParser())
p=urljoin("https://www.javbus.com",
str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
p2={t:p}
d.update(p2)
return d
def getActorPhoto(javbus_json):
result = javbus_json.get('actor_photo')
if isinstance(result, dict) and len(result):
return result
return ''
def getTitle(htmlcode): #获取标题
doc = pq(htmlcode)
# h5:first-child定位第一个h5标签妈的找了好久才找到这个语法
title = str(doc('div.d-flex.videoDataBlock h5.d-none.d-md-block:nth-child(2)').text()).replace(' ', '-')
try:
title2 = re.sub('n\d+-','',title)
html = etree.fromstring(htmlcode, etree.HTMLParser())
title = str(html.xpath('/html/head/title/text()')[0])
result = str(re.findall('](.*?)- AIRAV-WIKI', title)[0]).strip()
return result
return title2
def getStudio(htmlcode, javbus_json): #获取厂商 已修改
# javbus如果有数据以它为准
result = javbus_json.get('studio')
if isinstance(result, str) and len(result):
return result
html = etree.fromstring(htmlcode,etree.HTMLParser())
return str(html.xpath('//a[contains(@href,"?video_factory=")]/text()')).strip(" ['']")
def getYear(htmlcode, javbus_json): #获取年份
result = javbus_json.get('year')
if isinstance(result, str) and len(result):
return result
release = getRelease(htmlcode, javbus_json)
if len(release) != len('2000-01-01'):
return ''
return release[:4]
def getCover(htmlcode, javbus_json): #获取封面图片
result = javbus_json.get('cover')
if isinstance(result, str) and len(result):
return result
html = etree.fromstring(htmlcode, etree.HTMLParser())
return html.xpath('//img[contains(@src,"/storage/big_pic/")]/@src')[0]
def getRelease(htmlcode, javbus_json): #获取出版日期
result = javbus_json.get('release')
if isinstance(result, str) and len(result):
return result
html = etree.fromstring(htmlcode, etree.HTMLParser())
try:
result = re.search(r'\d{4}-\d{2}-\d{2}', str(html.xpath('//li[contains(text(),"發片日期")]/text()'))).group()
except:
return title
def getStudio(htmlcode): #获取厂商 已修改
html = etree.fromstring(htmlcode,etree.HTMLParser())
# 如果记录中冇导演厂商排在第4位
if '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
# 如果记录中有导演厂商排在第5位
elif '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
else:
result = ''
return ''
return result
def getYear(htmlcode): #获取年份
html = etree.fromstring(htmlcode,etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
def getRuntime(javbus_json): #获取播放时长
result = javbus_json.get('runtime')
if isinstance(result, str) and len(result):
return result
def getCover(htmlcode): #获取封面链接
doc = pq(htmlcode)
image = doc('a.bigImage')
return urljoin("https://www.javbus.com", image.attr('href'))
def getRelease(htmlcode): #获取出版日期
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
return result
def getRuntime(htmlcode): #获取分钟 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[3]/text()')).strip(" ['']分鐘")
return result
def getActor(htmlcode): #获取女优
return ''
# airav女优数据库较多日文汉字姓名javbus较多日语假名因此airav优先
def getActor(htmlcode, javbus_json): #获取女优
b=[]
soup=BeautifulSoup(htmlcode,'lxml')
a=soup.find_all(attrs={'class':'star-name'})
for i in a:
b.append(i.get_text())
html = etree.fromstring(htmlcode, etree.HTMLParser())
a = html.xpath('//ul[@class="videoAvstarList"]/li/a[starts-with(@href,"/idol/")]/text()')
for v in a:
v = v.strip()
if len(v):
b.append(v)
if len(b):
return b
def getNum(htmlcode): #获取番号
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
result = javbus_json.get('actor')
if isinstance(result, list) and len(result):
return result
def getDirector(htmlcode): #获取导演 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser())
if '導演:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
else:
result = '' # 记录中有可能没有导演数据
return []
def getNum(htmlcode, javbus_json): #获取番号
result = javbus_json.get('number')
if isinstance(result, str) and len(result):
return result
def getOutline(htmlcode): #获取演员
html = etree.fromstring(htmlcode, etree.HTMLParser())
title = str(html.xpath('/html/head/title/text()')[0])
result = str(re.findall('^\[(.*?)]', title)[0])
return result
def getDirector(javbus_json): #获取导演 已修改
result = javbus_json.get('director')
if isinstance(result, str) and len(result):
return result
return ''
def getOutline(htmlcode): #获取概述
html = etree.fromstring(htmlcode, etree.HTMLParser())
try:
result = html.xpath("string(//div[@class='d-flex videoDataBlock']/div[@class='synopsis']/p)").replace('\n','')
result = html.xpath("string(//div[@class='d-flex videoDataBlock']/div[@class='synopsis']/p)").replace('\n','').strip()
return result
except:
return ''
def getSerise(htmlcode): #获取系列 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser())
# 如果记录中冇导演系列排在第6位
if '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
# 如果记录中有导演系列排在第7位
elif '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
else:
result = ''
def getSerise(javbus_json): #获取系列 已修改
result = javbus_json.get('series')
if isinstance(result, str) and len(result):
return result
return ''
def getTag(htmlcode): # 获取标签
tag = []
soup = BeautifulSoup(htmlcode, 'lxml')
@@ -169,52 +169,50 @@ def main(number):
try:
try:
htmlcode = get_html('https://cn.airav.wiki/video/' + number)
javbus_htmlcode = get_html('https://www.javbus.com/ja/' + number)
javbus_json = json.loads(javbus.main(number))
except:
print(number)
dic = {
# 标题可使用airav
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
# 制作商选择使用javbus
'studio': getStudio(javbus_htmlcode),
# 年份也是用javbus
'year': str(re.search('\d{4}', getYear(javbus_htmlcode)).group()),
'title': getTitle(htmlcode),
# 制作商先找javbus,如果没有再找本站
'studio': getStudio(htmlcode, javbus_json),
# 年份先试javbus,如果没有再找本站
'year': getYear(htmlcode, javbus_json),
# 简介 使用 airav
'outline': getOutline(htmlcode),
# 使用javbus
'runtime': getRuntime(javbus_htmlcode),
'runtime': getRuntime(javbus_json),
# 导演 使用javbus
'director': getDirector(javbus_htmlcode),
# 作者 使用airav
'actor': getActor(javbus_htmlcode),
# 发售日使用javbus
'release': getRelease(javbus_htmlcode),
'director': getDirector(javbus_json),
# 演员 先试airav
'actor': getActor(htmlcode, javbus_json),
# 发售日先试javbus
'release': getRelease(htmlcode, javbus_json),
# 番号使用javbus
'number': getNum(javbus_htmlcode),
'number': getNum(htmlcode, javbus_json),
# 封面链接 使用javbus
'cover': getCover(javbus_htmlcode),
'cover': getCover(htmlcode, javbus_json),
# 剧照获取
'extrafanart': getExtrafanart(htmlcode),
'imagecut': 1,
# 使用 airav
'tag': getTag(htmlcode),
# 使用javbus
'label': getSerise(javbus_htmlcode),
'label': getSerise(javbus_json),
# 妈的airav不提供作者图片
'actor_photo': getActorPhoto(javbus_htmlcode),
# 'actor_photo': getActorPhoto(javbus_json),
'website': 'https://www.airav.wiki/video/' + number,
'source': 'airav.py',
# 使用javbus
'series': getSerise(javbus_htmlcode),
'series': getSerise(javbus_json)
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8')
return js
except Exception as e:
if config.Config().debug():
if config.getInstance().debug():
print(e)
data = {
"title": "",
@@ -226,6 +224,6 @@ def main(number):
if __name__ == '__main__':
#print(main('ADN-188'))
print(main('ADN-188'))
print(main('CJOD-278'))
print(main('ADV-R0624')) # javbus页面返回404, airav有数据
print(main('ADN-188')) # 一人
print(main('CJOD-278')) # 多人 javbus演员名称采用日语假名airav采用日文汉字

View File

@@ -3,50 +3,42 @@ sys.path.append('..')
import re
from lxml import etree
import json
from bs4 import BeautifulSoup
from ADC_function import *
# import sys
from WebCrawler.storyline import getStoryline
# import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'avatar-box'})
def getActorPhoto(html):
a = html.xpath('//a[@class="avatar-box"]')
d = {}
for i in a:
l = i.img['src']
t = i.span.get_text()
l = i.find('.//img').attrib['src']
t = i.find('span').text
p2 = {t: l}
d.update(p2)
return d
def getTitle(a):
def getTitle(html):
try:
html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
return result.replace('/', '')
except:
return ''
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
soup = BeautifulSoup(a, 'lxml')
a = soup.find_all(attrs={'class': 'avatar-box'})
def getActor(html):
a = html.xpath('//a[@class="avatar-box"]')
d = []
for i in a:
d.append(i.span.get_text())
d.append(i.find('span').text)
return d
def getStudio(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getStudio(html):
result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
return result1
def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getRuntime(html):
result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
return result1
def getLabel(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getLabel(html):
result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
return result1
def getNum(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getNum(html):
result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
return result1
def getYear(release):
@@ -55,28 +47,20 @@ def getYear(release):
return result
except:
return release
def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getRelease(html):
result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
return result1
def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
def getCover(html):
result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
return result
def getCover_small(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
def getCover_small(html):
result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
return result
def getTag(a): # 获取演员
soup = BeautifulSoup(a, 'lxml')
a = soup.find_all(attrs={'class': 'genre'})
d = []
for i in a:
d.append(i.get_text())
return d
def getSeries(htmlcode):
def getTag(html):
x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
return [i.strip() for i in x[2:]] if len(x) > 2 else []
def getSeries(html):
try:
html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']")
return result1
except:
@@ -86,42 +70,45 @@ def main(number):
html = get_html('https://tellme.pw/avsox')
site = etree.HTML(html).xpath('//div[@class="container"]/div/a/@href')[0]
a = get_html(site + '/cn/search/' + number)
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
html = etree.fromstring(a, etree.HTMLParser())
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
if result1 == '' or result1 == 'null' or result1 == 'None':
a = get_html(site + '/cn/search/' + number.replace('-', '_'))
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
html = etree.fromstring(a, etree.HTMLParser())
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
if result1 == '' or result1 == 'null' or result1 == 'None':
a = get_html(site + '/cn/search/' + number.replace('_', ''))
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
html = etree.fromstring(a, etree.HTMLParser())
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
web = get_html("https:" + result1)
soup = BeautifulSoup(web, 'lxml')
info = str(soup.find(attrs={'class': 'row movie'}))
detail = get_html("https:" + result1)
lx = etree.fromstring(detail, etree.HTMLParser())
try:
new_number = getNum(lx)
if new_number.upper() != number.upper():
raise ValueError('number not found')
title = getTitle(lx).strip(new_number)
dic = {
'actor': getActor(web),
'title': getTitle(web).strip(getNum(web)),
'studio': getStudio(info),
'outline': '', #
'runtime': getRuntime(info),
'actor': getActor(lx),
'title': title,
'studio': getStudio(lx),
'outline': getStoryline(number, title),
'runtime': getRuntime(lx),
'director': '', #
'release': getRelease(info),
'number': getNum(info),
'cover': getCover(web),
'cover_small': getCover_small(a),
'release': getRelease(lx),
'number': new_number,
'cover': getCover(lx),
'cover_small': getCover_small(html),
'imagecut': 3,
'tag': getTag(web),
'label': getLabel(info),
'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': getActorPhoto(web),
'tag': getTag(lx),
'label': getLabel(lx),
'year': getYear(getRelease(lx)),
'actor_photo': getActorPhoto(lx),
'website': "https:" + result1,
'source': 'avsox.py',
'series': getSeries(info),
'series': getSeries(lx),
}
except Exception as e:
if config.Config().debug():
if config.getInstance().debug():
print(e)
dic = {"title": ""}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
@@ -129,3 +116,4 @@ def main(number):
if __name__ == "__main__":
print(main('012717_472'))
print(main('1')) # got fake result raise 'number not found'

View File

@@ -1,60 +1,68 @@
import sys
sys.path.append('../')
import json
from bs4 import BeautifulSoup
from lxml import html
import re
from ADC_function import *
from WebCrawler.storyline import getStoryline
G_SITE = 'https://www.caribbeancom.com'
def main(number: str) -> json:
try:
caribbytes, browser = get_html_by_browser(
'https://www.caribbeancom.com/moviepages/'+number+'/index.html',
return_type="browser")
if not caribbytes or not caribbytes.ok:
url = f'{G_SITE}/moviepages/{number}/index.html'
result, session = get_html_session(url, return_type='session')
htmlcode = result.content.decode('euc-jp')
if not result or not htmlcode or '<title>404' in htmlcode or 'class="movie-info section"' not in htmlcode:
raise ValueError("page not found")
lx = html.fromstring(str(browser.page))
lx = html.fromstring(htmlcode)
title = get_title(lx)
if not browser.page.select_one("#moviepages > div > div:nth-child(1) > div.movie-info.section"):
raise ValueError("page info not found")
except Exception as e:
if config.Config().debug():
print(e)
dic = {"title": ""}
return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))
dic = {
'title': get_title(lx),
'title': title,
'studio': '加勒比',
'year': get_year(lx),
'outline': get_outline(lx),
'outline': get_outline(lx, number, title),
'runtime': get_runtime(lx),
'director': '',
'actor': get_actor(lx),
'release': get_release(lx),
'number': number,
'cover': 'https://www.caribbeancom.com/moviepages/' + number + '/images/l_l.jpg',
'cover': f'{G_SITE}/moviepages/{number}/images/l_l.jpg',
'tag': get_tag(lx),
'extrafanart': get_extrafanart(lx),
'label': get_series(lx),
'imagecut': 1,
# 'actor_photo': get_actor_photo(browser),
'website': 'https://www.caribbeancom.com/moviepages/' + number + '/index.html',
# 'actor_photo': get_actor_photo(lx, session),
'website': f'{G_SITE}/moviepages/{number}/index.html',
'source': 'carib.py',
'series': get_series(lx),
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
return js
except Exception as e:
if config.getInstance().debug():
print(e)
dic = {"title": ""}
return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))
def get_title(lx: html.HtmlElement) -> str:
return str(lx.xpath("//div[@class='movie-info section']/div[@class='heading']/h1[@itemprop='name']/text()")[0]).strip()
def get_year(lx: html.HtmlElement) -> str:
return lx.xpath("//li[2]/span[@class='spec-content']/text()")[0][:4]
def get_outline(lx: html.HtmlElement) -> str:
return lx.xpath("//div[@class='movie-info section']/p[@itemprop='description']/text()")[0].strip()
def get_outline(lx: html.HtmlElement, number: str, title: str) -> str:
o = lx.xpath("//div[@class='movie-info section']/p[@itemprop='description']/text()")[0].strip()
g = getStoryline(number, title)
if len(g):
return g
return o
def get_release(lx: html.HtmlElement) -> str:
return lx.xpath("//li[2]/span[@class='spec-content']/text()")[0].replace('/','-')
@@ -68,11 +76,8 @@ def get_actor(lx: html.HtmlElement):
return r
def get_tag(lx: html.HtmlElement) -> str:
r = []
genres = lx.xpath("//span[@class='spec-content']/a[@itemprop='genre']/text()")
for g in genres:
r.append(translateTag_to_sc(str(g)))
return r
return genres
def get_extrafanart(lx: html.HtmlElement) -> str:
r = []
@@ -94,31 +99,31 @@ def get_series(lx: html.HtmlElement) -> str:
def get_runtime(lx: html.HtmlElement) -> str:
return str(lx.xpath("//span[@class='spec-content']/span[@itemprop='duration']/text()")[0]).strip()
def get_actor_photo(browser):
htmla = browser.page.select('#moviepages > div > div:nth-child(1) > div.movie-info.section > ul > li:nth-child(1) > span.spec-content > a')
def get_actor_photo(lx, session):
htmla = lx.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']")
names = lx.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']/span[@itemprop='name']/text()")
t = {}
for a in htmla:
if a.text.strip() == '':
for name, a in zip(names, htmla):
if name.strip() == '':
continue
p = {a.text.strip(): a['href']}
p = {name.strip(): a.attrib['href']}
t.update(p)
o = {}
for k, v in t.items():
if '/search_act/' not in v:
continue
r = browser.open_relative(v)
r = session.get(urljoin(G_SITE, v))
if not r.ok:
continue
html = browser.page.prettify()
html = r.text
pos = html.find('.full-bg')
if pos<0:
continue
css = html[pos:pos+100]
p0 = css.find('background: url(')
p1 = css.find('.jpg)')
if p0<0 or p1<0:
cssBGjpgs = re.findall(r'background: url\((.+\.jpg)', css, re.I)
if not cssBGjpgs or not len(cssBGjpgs[0]):
continue
p = {k: urljoin(browser.url, css[p0+len('background: url('):p1+len('.jpg')])}
p = {k: urljoin(r.url, cssBGjpgs[0])}
o.update(p)
return o

View File

@@ -153,7 +153,7 @@ def main(number):
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js
except Exception as e:
if config.Config().debug():
if config.getInstance().debug():
print(e)
data = {
"title": "",

View File

@@ -123,24 +123,11 @@ def getTag(text):
result = html.xpath(
"//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()"
)
total = []
for i in result:
try:
total.append(translateTag_to_sc(i))
except:
pass
return total
return result
except:
result = html.xpath(
"//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
)
total = []
for i in result:
try:
total.append(translateTag_to_sc(i))
except:
pass
return total
return result

View File

@@ -48,10 +48,7 @@ def getCover_fc2com(htmlcode2): #获取厂商 #
# return result
def getTag_fc2com(lx):
result = lx.xpath("//a[@class='tag tagTag']/text()")
tag = []
for i in result:
tag.append(ADC_function.translateTag_to_sc(i))
return tag
return result
def getYear_fc2com(release):
try:
result = re.search('\d{4}',release).group()
@@ -93,6 +90,7 @@ def main(number):
actor = '素人'
lx = etree.fromstring(htmlcode2, etree.HTMLParser())
cover = str(lx.xpath("//div[@class='items_article_MainitemThumb']/span/img/@src")).strip(" ['']")
cover = ADC_function.urljoin('https://adult.contents.fc2.com', cover)
dic = {
'title': lx.xpath('/html/head/title/text()')[0],
'studio': getStudio_fc2com(htmlcode2),
@@ -116,7 +114,7 @@ def main(number):
'series': '',
}
except Exception as e:
if ADC_function.config.Config().debug():
if ADC_function.config.getInstance().debug():
print(e)
dic = {"title": ""}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
@@ -124,4 +122,5 @@ def main(number):
if __name__ == '__main__':
print(main('FC2-1787685'))
print(main('FC2-2086710'))

View File

@@ -103,7 +103,7 @@ def main(number):
'series': '',
}
except Exception as e:
if ADC_function.config.Config().debug():
if ADC_function.config.getInstance().debug():
print(e)
dic = {"title": ""}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')

View File

@@ -1,114 +1,78 @@
import sys
sys.path.append('../')
import re
from pyquery import PyQuery as pq#need install
from lxml import etree#need install
from bs4 import BeautifulSoup#need install
import json
from ADC_function import *
from WebCrawler import fanza
from WebCrawler import airav
from WebCrawler.storyline import getStoryline
import inspect
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'star-name'})
def getActorPhoto(html):
actors = html.xpath('//div[@class="star-name"]/a')
d={}
for i in a:
l=i.a['href']
t=i.get_text()
html = etree.fromstring(get_html(l), etree.HTMLParser())
for i in actors:
url=i.attrib['href']
t=i.attrib['title']
html = etree.fromstring(get_html(url), etree.HTMLParser())
p=urljoin("https://www.javbus.com",
str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
p2={t:p}
d.update(p2)
return d
def getTitle(htmlcode): #获取标题
doc = pq(htmlcode)
title=str(doc('div.container h3').text()).replace(' ','-')
try:
title2 = re.sub('n\d+-','',title)
return title2
except:
def getTitle(html): #获取标题
title = str(html.xpath('/html/head/title/text()')[0])
title = str(re.findall('^.+?\s+(.*) - JavBus$', title)[0]).strip()
return title
def getStudio(htmlcode): #获取厂商 已修改
html = etree.fromstring(htmlcode,etree.HTMLParser())
# 如果记录中冇导演厂商排在第4位
if '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
# 如果记录中有导演厂商排在第5位
elif '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
else:
result = ''
return result
def getYear(htmlcode): #获取年份
html = etree.fromstring(htmlcode,etree.HTMLParser())
def getStudioJa(html):
x = html.xpath('//span[contains(text(),"メーカー:")]/../a/text()')
return str(x[0]) if len(x) else ''
def getStudio(html): #获取厂商
x = html.xpath('//span[contains(text(),"製作商:")]/../a/text()')
return str(x[0]) if len(x) else ''
def getYear(html): #获取年份
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']").strip()
return result[:4] if len(result)>=len('2000-01-01') else ''
def getCover(html): #获取封面链接
image = str(html.xpath('//a[@class="bigImage"]/@href')[0])
return urljoin("https://www.javbus.com", image)
def getRelease(html): #获取出版日期
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
return result
def getCover(htmlcode): #获取封面链接
doc = pq(htmlcode)
image = doc('a.bigImage')
return urljoin("https://www.javbus.com", image.attr('href'))
def getRelease(htmlcode): #获取出版日期
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
return result
def getRuntime(htmlcode): #获取分钟 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser())
def getRuntime(html): #获取分钟 已修改
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[3]/text()')).strip(" ['']分鐘")
return result
def getActor(htmlcode): #获取女优
def getActor(html): #获取女优
b=[]
soup=BeautifulSoup(htmlcode,'lxml')
a=soup.find_all(attrs={'class':'star-name'})
for i in a:
b.append(i.get_text())
actors = html.xpath('//div[@class="star-name"]/a')
for i in actors:
b.append(i.attrib['title'])
return b
def getNum(htmlcode): #获取番号
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
return result
def getDirector(htmlcode): #获取导演 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser())
if '導演:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
else:
result = '' # 记录中有可能没有导演数据
return result
def getCID(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
#print(htmlcode)
def getNum(html): #获取番号
kwdlist = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
return kwdlist[0]
def getDirectorJa(html):
x = html.xpath('//span[contains(text(),"監督:")]/../a/text()')
return str(x[0]) if len(x) else ''
def getDirector(html): #获取导演
x = html.xpath('//span[contains(text(),"導演:")]/../a/text()')
return str(x[0]) if len(x) else ''
def getCID(html):
string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
result = re.sub('/.*?.jpg','',string)
return result
def getOutline(number): #获取剧情介绍
try:
response = json.loads(airav.main(number))
result = response['outline']
return result
except:
return ''
def getSerise(htmlcode): #获取系列 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser())
# 如果记录中冇导演系列排在第6位
if '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
# 如果记录中有导演系列排在第7位
elif '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
else:
result = ''
return result
def getTag(htmlcode): # 获取标签
tag = []
soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'genre'})
for i in a:
if 'onmouseout' in str(i) or '多選提交' in str(i):
continue
tag.append(translateTag_to_sc(i.get_text()))
return tag
def getOutline(number, title): #获取剧情介绍 多进程并发查询
if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
return '' # 从airav.py过来的调用不计算outline直接返回避免重复抓取数据拖慢处理速度
return getStoryline(number,title)
def getSeriseJa(html):
x = html.xpath('//span[contains(text(),"シリーズ:")]/../a/text()')
return str(x[0]) if len(x) else ''
def getSerise(html): #获取系列
x = html.xpath('//span[contains(text(),"系列:")]/../a/text()')
return str(x[0]) if len(x) else ''
def getTag(html): # 获取标签
klist = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
return klist[1:]
def getExtrafanart(htmlcode): # 获取剧照
html_pather = re.compile(r'<div id=\"sample-waterfall\">[\s\S]*?</div></a>\s*?</div>')
html = html_pather.search(htmlcode)
@@ -117,32 +81,34 @@ def getExtrafanart(htmlcode): # 获取剧照
extrafanart_pather = re.compile(r'<a class=\"sample-box\" href=\"(.*?)\"')
extrafanart_imgs = extrafanart_pather.findall(html)
if extrafanart_imgs:
return extrafanart_imgs
return [urljoin('https://www.javbus.com',img) for img in extrafanart_imgs]
return ''
def main_uncensored(number):
htmlcode = get_html('https://www.javbus.com/ja/' + number)
if getTitle(htmlcode) == '':
htmlcode = get_html('https://www.javbus.com/ja/' + number.replace('-','_'))
if "<title>404 Page Not Found" in htmlcode:
raise Exception('404 page not found')
lx = etree.fromstring(htmlcode, etree.HTMLParser())
title = getTitle(lx)
dic = {
'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
'studio': getStudio(htmlcode),
'year': getYear(htmlcode),
'outline': getOutline(number),
'runtime': getRuntime(htmlcode),
'director': getDirector(htmlcode),
'actor': getActor(htmlcode),
'release': getRelease(htmlcode),
'number': getNum(htmlcode),
'cover': getCover(htmlcode),
'tag': getTag(htmlcode),
'title': title,
'studio': getStudioJa(lx),
'year': getYear(lx),
'outline': getOutline(number, title),
'runtime': getRuntime(lx),
'director': getDirectorJa(lx),
'actor': getActor(lx),
'release': getRelease(lx),
'number': getNum(lx),
'cover': getCover(lx),
'tag': getTag(lx),
'extrafanart': getExtrafanart(htmlcode),
'label': getSerise(htmlcode),
'label': getSeriseJa(lx),
'imagecut': 0,
'actor_photo': '',
# 'actor_photo': '',
'website': 'https://www.javbus.com/ja/' + number,
'source': 'javbus.py',
'series': getSerise(htmlcode),
'series': getSeriseJa(lx),
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js
@@ -151,36 +117,47 @@ def main_uncensored(number):
def main(number):
try:
try:
url = "https://www." + secrets.choice([
'buscdn.fun', 'busdmm.fun', 'busfan.fun', 'busjav.fun',
'cdnbus.fun',
'dmmbus.fun', 'dmmsee.fun',
'fanbus.us',
'seedmm.fun',
]) + "/"
try:
htmlcode = get_html('https://www.fanbus.us/' + number)
htmlcode = get_html(url + number)
except:
htmlcode = get_html('https://www.javbus.com/' + number)
if "<title>404 Page Not Found" in htmlcode:
raise Exception('404 page not found')
lx = etree.fromstring(htmlcode,etree.HTMLParser())
title = getTitle(lx)
dic = {
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
'studio': getStudio(htmlcode),
'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
'outline': getOutline(number),
'runtime': getRuntime(htmlcode),
'director': getDirector(htmlcode),
'actor': getActor(htmlcode),
'release': getRelease(htmlcode),
'number': getNum(htmlcode),
'cover': getCover(htmlcode),
'title': title,
'studio': getStudio(lx),
'year': getYear(lx),
'outline': getOutline(number, title),
'runtime': getRuntime(lx),
'director': getDirector(lx),
'actor': getActor(lx),
'release': getRelease(lx),
'number': getNum(lx),
'cover': getCover(lx),
'imagecut': 1,
'tag': getTag(htmlcode),
'tag': getTag(lx),
'extrafanart': getExtrafanart(htmlcode),
'label': getSerise(htmlcode),
'actor_photo': getActorPhoto(htmlcode),
'label': getSerise(lx),
# 'actor_photo': getActorPhoto(lx),
'website': 'https://www.javbus.com/' + number,
'source': 'javbus.py',
'series': getSerise(htmlcode),
'series': getSerise(lx),
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8')
return js
except:
return main_uncensored(number)
except Exception as e:
if config.Config().debug():
if config.getInstance().debug():
print(e)
data = {
"title": "",
@@ -191,5 +168,13 @@ def main(number):
return js
if __name__ == "__main__" :
config.G_conf_override['debug_mode:switch'] = True
print(main('ABP-888'))
print(main('ABP-960'))
print(main('ADV-R0624')) # 404
print(main('MMNT-010'))
print(main('ipx-292'))
print(main('CEMD-011'))
print(main('CJOD-278'))
print(main('100221_001'))
print(main('AVSW-061'))

View File

@@ -3,25 +3,21 @@ sys.path.append('../')
import re
from lxml import etree
import json
from bs4 import BeautifulSoup
from ADC_function import *
from WebCrawler import airav
# import sys
from WebCrawler.storyline import getStoryline
# import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getTitle(a):
html = etree.fromstring(a, etree.HTMLParser())
def getTitle(html):
browser_title = str(html.xpath("/html/head/title/text()")[0])
return browser_title[:browser_title.find(' | JavDB')].strip()
def getActor(a):
html = etree.fromstring(a, etree.HTMLParser())
def getActor(html):
actors = html.xpath('//span[@class="value"]/a[contains(@href,"/actors/")]/text()')
genders = html.xpath('//span[@class="value"]/a[contains(@href,"/actors/")]/../strong/@class')
r = []
idx = 0
actor_gendor = config.Config().actor_gender()
actor_gendor = config.getInstance().actor_gender()
if not actor_gendor in ['female','male','both','all']:
actor_gendor = 'female'
for act in actors:
@@ -33,8 +29,8 @@ def getActor(a):
idx = idx + 1
return r
def getaphoto(url):
html_page = get_html(url)
def getaphoto(url, session):
html_page = session.get(url).text if session is not None else get_html(url)
img_prether = re.compile(r'<span class\=\"avatar\" style\=\"background\-image\: url\((.*?)\)')
img_url = img_prether.findall(html_page)
if img_url:
@@ -42,24 +38,18 @@ def getaphoto(url):
else:
return ''
def getActorPhoto(html): #//*[@id="star_qdt"]/li/a/img
actorall_prether = re.compile(r'<strong>演員\:</strong>\s*?.*?<span class=\"value\">(.*)\s*?</div>')
actorall = actorall_prether.findall(html)
if actorall:
actoralls = actorall[0]
actor_prether = re.compile(r'<a href\=\"(.*?)\">(.*?)</a>')
actor = actor_prether.findall(actoralls)
def getActorPhoto(html, javdb_site, session):
actorall = html.xpath('//strong[contains(text(),"演員:")]/../span/a[starts-with(@href,"/actors/")]')
if not actorall:
return {}
a = getActor(html)
actor_photo = {}
for i in actor:
actor_photo[i[1]] = getaphoto('https://' + javdb_site + '.com'+i[0])
for i in actorall:
if i.text in a:
actor_photo[i.text] = getaphoto(urljoin(f'https://{javdb_site}.com', i.attrib['href']), session)
return actor_photo
else:
return {}
def getStudio(a):
def getStudio(a, html):
# html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
# result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']")
# result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']")
@@ -67,23 +57,25 @@ def getStudio(a):
patherr = re.compile(r'<strong>片商\:</strong>[\s\S]*?<a href=\".*?>(.*?)</a></span>')
pianshang = patherr.findall(a)
if pianshang:
result = pianshang[0]
else:
result = ""
result = pianshang[0].strip()
if len(result):
return result
# 以卖家作为工作室
try:
result = str(html.xpath('//strong[contains(text(),"賣家:")]/../span/a/text()')).strip(" ['']")
except:
result = ''
return result
def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getRuntime(html):
result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"時長")]/../span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').rstrip('mi')
def getLabel(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getLabel(html):
result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
def getNum(a):
html = etree.fromstring(a, etree.HTMLParser())
def getNum(html):
result1 = str(html.xpath('//strong[contains(text(),"番號")]/../span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip(" ['']")
return str(result2 + result1).strip('+')
@@ -113,33 +105,19 @@ def getRelease(a):
else:
result = ''
return result
def getTag(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getTag(html):
try:
result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()')
total = []
for i in result:
try:
total.append(translateTag_to_sc(i))
except:
pass
return total
return result
except:
result = html.xpath('//strong[contains(text(),"類別")]/../span/text()')
total = []
for i in result:
try:
total.append(translateTag_to_sc(i))
except:
pass
return total
return result
def getCover_small(a, index=0):
def getCover_small(html, index=0):
# same issue mentioned below,
# javdb sometime returns multiple results
# DO NOT just get the firt one, get the one with correct index number
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try:
result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
if not 'https' in result:
@@ -170,68 +148,76 @@ def getTrailer(htmlcode): # 获取预告片
video_url = ''
return video_url
def getExtrafanart(htmlcode): # 获取剧照
html_pather = re.compile(r'<div class=\"tile\-images preview\-images\">[\s\S]*?</a>\s+?</div>\s+?</div>')
html = html_pather.search(htmlcode)
if html:
html = html.group()
extrafanart_pather = re.compile(r'<a class="tile-item" href=\"(.*?)\"')
extrafanart_imgs = extrafanart_pather.findall(html)
if extrafanart_imgs:
return extrafanart_imgs
return ''
def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
def getExtrafanart(html): # 获取剧照
result = []
try:
result = html.xpath("//article[@class='message video-panel']/div[@class='message-body']/div[@class='tile-images preview-images']/a[contains(@href,'/samples/')]/@href")
except:
pass
return result
def getCover(html):
try:
result = html.xpath("//div[contains(@class, 'column-video-cover')]/a/img/@src")[0]
except: # 2020.7.17 Repair Cover Url crawl
result = html.xpath("//div[contains(@class, 'column-video-cover')]/img/@src")[0]
return result
def getDirector(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getDirector(html):
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
def getOutline(number): #获取剧情介绍
try:
response = json.loads(airav.main(number))
result = response['outline']
return result
except:
return ''
def getSeries(a):
#/html/body/section/div/div[3]/div[2]/nav/div[7]/span/a
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getOutline(number, title): #获取剧情介绍 多进程并发查询
return getStoryline(number,title)
def getSeries(html):
result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
def main(number):
javdb_site = random.choice(["javdb9", "javdb30"])
# javdb更新后同一时间只能登录一个数字站最新登录站会踢出旧的登录因此按找到的第一个javdb*.json文件选择站点
# 如果无.json文件或者超过有效期则随机选择一个站点。
javdb_sites = ["javdb31", "javdb32"]
debug = config.getInstance().debug()
try:
# if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number).group():
# pass
# else:
# number = number.upper()
number = number.upper()
cookie_json = './' + javdb_site + '.json'
javdb_cookies = {'over18':'1', 'theme':'auto', 'locale':'zh'}
# 不加载过期的cookiejavdb登录界面显示为7天免登录故假定cookie有效期为7天
has_json = False
for cj in javdb_sites:
javdb_site = cj
cookie_json = javdb_site + '.json'
cookies_dict, cookies_filepath = load_cookies(cookie_json)
if isinstance(cookies_dict, dict) and isinstance(cookies_filepath, str):
cdays = file_modification_days(cookies_filepath)
if cdays < 7:
javdb_cookies = cookies_dict
has_json = True
break
elif cdays != 9999:
print(
f'[!]Cookies file {cookies_filepath} was updated {cdays} days ago, it will not be used for HTTP requests.')
try:
print(f'[!]Cookies file {cookies_filepath} was updated {cdays} days ago, it will not be used for HTTP requests.')
if not has_json:
javdb_site = secrets.choice(javdb_sites)
if debug:
print(f'[!]javdb:select site {javdb_site}')
session = None
javdb_url = 'https://' + javdb_site + '.com/search?q=' + number + '&f=all'
query_result = get_html(javdb_url, cookies=javdb_cookies)
try:
if debug:
raise # try get_html_by_scraper() branch
res, session = get_html_session(javdb_url, cookies=javdb_cookies, return_type='session')
if not res:
raise
query_result = res.text
except:
query_result = get_html('https://javdb.com/search?q=' + number + '&f=all', cookies=javdb_cookies)
res, session = get_html_by_scraper(javdb_url, cookies=javdb_cookies, return_type='scraper')
if not res:
raise ValueError('page not found')
query_result = res.text
if session is None:
raise ValueError('page not found')
html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
# javdb sometime returns multiple results,
# and the first elememt maybe not the one we are looking for
@@ -250,61 +236,73 @@ f'[!]Cookies file {cookies_filepath} was updated {cdays} days ago, it will not b
raise ValueError("number not found")
correct_url = urls[0]
try:
javdb_detail_url = 'https://' + javdb_site + '.com' + correct_url
detail_page = get_html(javdb_detail_url, cookies=javdb_cookies)
# get faster benefit from http keep-alive
javdb_detail_url = urljoin(res.url, correct_url)
detail_page = session.get(javdb_detail_url).text
except:
detail_page = get_html('https://javdb.com' + correct_url, cookies=javdb_cookies)
session = None
# etree.fromstring开销很大最好只用一次而它的xpath很快比bs4 find/select快可以多用
lx = etree.fromstring(detail_page, etree.HTMLParser())
# no cut image by default
imagecut = 3
# If gray image exists ,then replace with normal cover
if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
cover_small = getCover_small(query_result)
cover_small = getCover_small(html)
else:
try:
cover_small = getCover_small(query_result, index=ids.index(number))
cover_small = getCover_small(html, index=ids.index(number))
except:
# if input number is "STAR438" not "STAR-438", use first search result.
cover_small = getCover_small(query_result)
cover_small = getCover_small(html)
if 'placeholder' in cover_small:
# replace wit normal cover and cut it
imagecut = 1
cover_small = getCover(detail_page)
cover_small = getCover(lx)
dp_number = getNum(detail_page)
dp_number = getNum(lx)
if dp_number.upper() != number:
raise ValueError("number not found")
title = getTitle(detail_page)
title = getTitle(lx)
if title and dp_number:
number = dp_number
# remove duplicate title
title = title.replace(number, '').strip()
dic = {
'actor': getActor(detail_page),
'actor': getActor(lx),
'title': title,
'studio': getStudio(detail_page),
'outline': getOutline(number),
'runtime': getRuntime(detail_page),
'director': getDirector(detail_page),
'studio': getStudio(detail_page, lx),
'outline': getOutline(number, title),
'runtime': getRuntime(lx),
'director': getDirector(lx),
'release': getRelease(detail_page),
'number': number,
'cover': getCover(detail_page),
'cover': getCover(lx),
'cover_small': cover_small,
'trailer': getTrailer(detail_page),
'extrafanart': getExtrafanart(detail_page),
'extrafanart': getExtrafanart(lx),
'imagecut': imagecut,
'tag': getTag(detail_page),
'label': getLabel(detail_page),
'tag': getTag(lx),
'label': getLabel(lx),
'year': getYear(detail_page), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': getActorPhoto(detail_page),
'website': 'https://javdb.com' + correct_url,
# 'actor_photo': getActorPhoto(lx, javdb_site, session),
'website': urljoin('https://javdb.com', correct_url),
'source': 'javdb.py',
'series': getSeries(detail_page),
'series': getSeries(lx),
}
if not dic['actor'] and re.match(r'FC2-[\d]+', number, re.A):
dic['actor'].append('素人')
if not dic['series']:
dic['series'] = dic['studio']
if not dic['label']:
dic['label'] = dic['studio']
except Exception as e:
if config.Config().debug():
if debug:
print(e)
dic = {"title": ""}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
@@ -313,10 +311,18 @@ f'[!]Cookies file {cookies_filepath} was updated {cdays} days ago, it will not b
# main('DV-1562')
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
if __name__ == "__main__":
config.G_conf_override['debug_mode:switch'] = True
# print(main('blacked.20.05.30'))
# print(main('AGAV-042'))
# print(main('BANK-022'))
print(main('FC2-735670'))
print(main('FC2-1174949')) # not found
# print(main('070116-197'))
# print(main('093021_539')) # 没有剧照 片商pacopacomama
print(main('FC2-2278260'))
# print(main('FC2-735670'))
# print(main('FC2-1174949')) # not found
print(main('MVSD-439'))
print(main('EHM0001')) # not found
# print(main('EHM0001')) # not found
print(main('FC2-2314275'))
# print(main('EBOD-646'))
# print(main('LOVE-262'))
print(main('ABP-890'))

View File

@@ -65,13 +65,7 @@ def getTag(a):
result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
'\\n')
result = str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
total = []
for i in result:
try:
total.append(translateTag_to_sc(i))
except:
pass
return total
return result
def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="EnlargeImage"]/@href')).strip(" ['']")
@@ -137,7 +131,7 @@ def main(number2):
'series': getSeries(a),
}
except Exception as e:
if config.Config().debug():
if config.getInstance().debug():
print(e)
dic = {"title": ""}

415
WebCrawler/storyline.py Normal file
View File

@@ -0,0 +1,415 @@
import sys
sys.path.append('../')
import re
import json
import builtins
from ADC_function import *
from lxml.html import fromstring
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool
from difflib import SequenceMatcher
from unicodedata import category
from number_parser import is_uncensored
G_registered_storyline_site = {"airavwiki", "airav", "avno1", "xcity", "amazon", "58avgo"}
G_mode_txt = ('顺序执行','线程池','进程池')
class noThread(object):
def map(self, fn, param):
return list(builtins.map(fn, param))
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
pass
# 获取剧情介绍 从列表中的站点同时查,取值优先级从前到后
def getStoryline(number, title, sites: list=None):
start_time = time.time()
conf = config.getInstance()
if not conf.is_storyline():
return ''
debug = conf.debug() or conf.storyline_show() == 2
storyine_sites = conf.storyline_site().split(',') if sites is None else sites
if is_uncensored(number):
storyine_sites += conf.storyline_uncensored_site().split(',')
else:
storyine_sites += conf.storyline_censored_site().split(',')
r_dup = set()
sort_sites = []
for s in storyine_sites:
ns = re.sub(r'.*?:', '', s, re.A)
if ns in G_registered_storyline_site and ns not in r_dup:
sort_sites.append(s)
r_dup.add(ns)
sort_sites.sort()
apply_sites = [re.sub(r'.*?:', '', s, re.A) for s in sort_sites]
mp_args = ((site, number, title, debug) for site in apply_sites)
cores = min(len(apply_sites), os.cpu_count())
if cores == 0:
return ''
run_mode = conf.storyline_mode()
assert run_mode in (0,1,2)
with ThreadPool(cores) if run_mode == 1 else Pool(cores) if run_mode == 2 else noThread() as pool:
results = pool.map(getStoryline_mp, mp_args)
sel = ''
if not debug and conf.storyline_show() == 0:
for value in results:
if isinstance(value, str) and len(value):
if not is_japanese(value):
return value
if not len(sel):
sel = value
return sel
# 以下debug结果输出会写入日志进程池中的则不会只在标准输出中显示
s = f'[!]Storyline{G_mode_txt[run_mode]}模式运行{len(apply_sites)}个任务共耗时(含启动开销){time.time() - start_time:.3f}秒,结束于{time.strftime("%H:%M:%S")}'
sel_site = ''
for site, desc in zip(apply_sites, results):
if isinstance(desc, str) and len(desc):
if not is_japanese(desc):
sel_site, sel = site, desc
break
if not len(sel_site):
sel_site, sel = site, desc
for site, desc in zip(apply_sites, results):
sl = len(desc) if isinstance(desc, str) else 0
s += f'[选中{site}字数:{sl}]' if site == sel_site else f'{site}字数:{sl}' if sl else f'{site}:空'
print(s)
return sel
def getStoryline_mp(args):
def _inner(site, number, title, debug):
start_time = time.time()
storyline = None
if not isinstance(site, str):
return storyline
elif site == "airavwiki":
storyline = getStoryline_airavwiki(number, debug)
elif site == "airav":
storyline = getStoryline_airav(number, debug)
elif site == "avno1":
storyline = getStoryline_avno1(number, debug)
elif site == "xcity":
storyline = getStoryline_xcity(number, debug)
elif site == "amazon":
storyline = getStoryline_amazon(title, number, debug)
elif site == "58avgo":
storyline = getStoryline_58avgo(number, debug)
if not debug:
return storyline
# 进程池模式的子进程getStoryline_*()的print()不会写入日志中,线程池和顺序执行不受影响
print("[!]MP 进程[{}]运行{:.3f}秒,结束于{}返回结果: {}".format(
site,
time.time() - start_time,
time.strftime("%H:%M:%S"),
storyline if isinstance(storyline, str) and len(storyline) else '[空]')
)
return storyline
return _inner(*args)
def getStoryline_airav(number, debug):
try:
site = secrets.choice(('airav.cc','airav4.club'))
url = f'https://{site}/searchresults.aspx?Search={number}&Type=0'
res, session = get_html_session(url, return_type='session')
if not res:
raise ValueError(f"get_html_by_session('{url}') failed")
lx = fromstring(res.text)
urls = lx.xpath('//div[@class="resultcontent"]/ul/li/div/a[@class="ga_click"]/@href')
txts = lx.xpath('//div[@class="resultcontent"]/ul/li/div/a[@class="ga_click"]/h3[@class="one_name ga_name"]/text()')
detail_url = None
for txt, url in zip(txts, urls):
if re.search(number, txt, re.I):
detail_url = urljoin(res.url, url)
break
if detail_url is None:
raise ValueError("number not found")
res = session.get(detail_url)
if not res.ok:
raise ValueError(f"session.get('{detail_url}') failed")
lx = fromstring(res.text)
t = str(lx.xpath('/html/head/title/text()')[0]).strip()
airav_number = str(re.findall(r'^\s*\[(.*?)]', t)[0])
if not re.search(number, airav_number, re.I):
raise ValueError(f"page number ->[{airav_number}] not match")
desc = str(lx.xpath('//span[@id="ContentPlaceHolder1_Label2"]/text()')[0]).strip()
return desc
except Exception as e:
if debug:
print(f"[-]MP getStoryline_airav Error: {e},number [{number}].")
pass
return None
def getStoryline_airavwiki(number, debug):
try:
kwd = number[:6] if re.match(r'\d{6}[\-_]\d{2,3}', number) else number
url = f'https://cn.airav.wiki/?search={kwd}'
result, browser = get_html_by_browser(url, return_type='browser', use_scraper=True)
if not result.ok:
raise ValueError(f"get_html_by_browser('{url}','{number}') failed")
s = browser.page.select('div.row > div > div.videoList.row > div > a.d-block')
link = None
for a in s:
title = a.img['title']
list_number = re.findall('^(.*?)\s+', title, re.A)[0].strip()
if kwd == number: # 番号PRED-164 和 RED-164需要能够区分
if re.match(f'^{number}$', list_number, re.I):
link = a
break
elif re.search(number, list_number, re.I):
link = a
break
if link is None:
raise ValueError("number not found")
result = browser.follow_link(link)
if not result.ok or not re.search(number, browser.url, re.I):
raise ValueError("detail page not found")
title = browser.page.select('head > title')[0].text.strip()
detail_number = str(re.findall('\[(.*?)]', title)[0])
if not re.search(number, detail_number, re.I):
raise ValueError("detail page number not match, got ->[{detail_number}]")
desc = browser.page.select_one('div.d-flex.videoDataBlock > div.synopsis > p').text.strip()
return desc
except Exception as e:
if debug:
print(f"[-]MP def getStoryline_airavwiki Error: {e}, number [{number}].")
pass
return ''
def getStoryline_58avgo(number, debug):
try:
url = 'http://58avgo.com/cn/index.aspx' + secrets.choice([
'', '?status=3', '?status=4', '?status=7', '?status=9', '?status=10', '?status=11', '?status=12',
'?status=1&Sort=Playon', '?status=1&Sort=dateupload', 'status=1&Sort=dateproduce'
]) # 随机选一个避免网站httpd日志中单个ip的请求太过单一
kwd = number[:6] if re.match(r'\d{6}[\-_]\d{2,3}', number) else number
result, browser = get_html_by_form(url,
fields = {'ctl00$TextBox_SearchKeyWord' : kwd},
return_type = 'browser')
if not result:
raise ValueError(f"get_html_by_form('{url}','{number}') failed")
if f'searchresults.aspx?Search={kwd}' not in browser.url:
raise ValueError("number not found")
s = browser.page.select('div.resultcontent > ul > li.listItem > div.one-info-panel.one > a.ga_click')
link = None
for a in s:
title = a.h3.text.strip()
list_number = title[title.rfind(' ')+1:].strip()
if re.search(number, list_number, re.I):
link = a
break
if link is None:
raise ValueError("number not found")
result = browser.follow_link(link)
if not result.ok or 'playon.aspx' not in browser.url:
raise ValueError("detail page not found")
title = browser.page.select_one('head > title').text.strip()
detail_number = str(re.findall('\[(.*?)]', title)[0])
if not re.search(number, detail_number, re.I):
raise ValueError("detail page number not match, got ->[{detail_number}]")
return browser.page.select_one('#ContentPlaceHolder1_Label2').text.strip()
except Exception as e:
if debug:
print(f"[-]MP getOutline_58avgo Error: {e}, number [{number}].")
pass
return ''
def getStoryline_avno1(number, debug): #获取剧情介绍 从avno1.cc取得
try:
site = secrets.choice(['1768av.club','2nine.net','av999.tv','avno1.cc',
'hotav.biz','iqq2.xyz','javhq.tv',
'www.hdsex.cc','www.porn18.cc','www.xxx18.cc',])
url = f'http://{site}/cn/search.php?kw_type=key&kw={number}'
lx = fromstring(get_html_by_scraper(url))
descs = lx.xpath('//div[@class="type_movie"]/div/ul/li/div/@data-description')
titles = lx.xpath('//div[@class="type_movie"]/div/ul/li/div/a/h3/text()')
if not descs or not len(descs):
raise ValueError(f"number not found")
partial_num = bool(re.match(r'\d{6}[\-_]\d{2,3}', number))
for title, desc in zip(titles, descs):
page_number = title[title.rfind(' ')+1:].strip()
if not partial_num:
if re.match(f'^{number}$', page_number, re.I):
return desc.strip()
elif re.search(number, page_number, re.I):
return desc.strip()
raise ValueError(f"page number ->[{page_number}] not match")
except Exception as e:
if debug:
print(f"[-]MP getOutline_avno1 Error: {e}, number [{number}].")
pass
return ''
def getStoryline_avno1OLD(number, debug): #获取剧情介绍 从avno1.cc取得
try:
url = 'http://www.avno1.cc/cn/' + secrets.choice(['usercenter.php?item=' +
secrets.choice(['pay_support', 'qa', 'contact', 'guide-vpn']),
'?top=1&cat=hd', '?top=1', '?cat=hd', 'porn', '?cat=jp', '?cat=us', 'recommend_category.php'
]) # 随机选一个避免网站httpd日志中单个ip的请求太过单一
result, browser = get_html_by_form(url,
form_select='div.wrapper > div.header > div.search > form',
fields = {'kw' : number},
return_type = 'browser')
if not result:
raise ValueError(f"get_html_by_form('{url}','{number}') failed")
s = browser.page.select('div.type_movie > div > ul > li > div')
for div in s:
title = div.a.h3.text.strip()
page_number = title[title.rfind(' ')+1:].strip()
if re.search(number, page_number, re.I):
return div['data-description'].strip()
raise ValueError(f"page number ->[{page_number}] not match")
except Exception as e:
if debug:
print(f"[-]MP getOutline_avno1 Error: {e}, number [{number}].")
pass
return ''
def getStoryline_xcity(number, debug): #获取剧情介绍 从xcity取得
try:
xcity_number = number.replace('-','')
query_result, browser = get_html_by_form(
'https://xcity.jp/' + secrets.choice(['about/','sitemap/','policy/','law/','help/','main/']),
fields = {'q' : xcity_number.lower()},
return_type = 'browser')
if not query_result or not query_result.ok:
raise ValueError("page not found")
result = browser.follow_link(browser.links('avod\/detail')[0])
if not result.ok:
raise ValueError("detail page not found")
return browser.page.select_one('h2.title-detail + p.lead').text.strip()
except Exception as e:
if debug:
print(f"[-]MP getOutline_xcity Error: {e}, number [{number}].")
pass
return ''
def getStoryline_amazon(q_title, number, debug):
if not isinstance(q_title, str) or not len(q_title):
return None
try:
cookie, cookies_filepath = load_cookies('amazon.json')
url = "https://www.amazon.co.jp/s?k=" + q_title
res, session = get_html_session(url, cookies=cookie, return_type='session')
if not res:
raise ValueError("get_html_session() failed")
lx = fromstring(res.text)
lks = lx.xpath('//a[contains(@href, "/black-curtain/save-eligibility/black-curtain")]/@href')
if len(lks) and lks[0].startswith('/'):
res = session.get(urljoin(res.url, lks[0]))
cookie = None
lx = fromstring(res.text)
titles = lx.xpath("//span[contains(@class,'a-color-base a-text-normal')]/text()")
urls = lx.xpath("//span[contains(@class,'a-color-base a-text-normal')]/../@href")
if not len(urls) or len(urls) != len(titles):
raise ValueError("titles not found")
idx = amazon_select_one(titles, q_title, number, debug)
if not isinstance(idx, int) or idx < 0:
raise ValueError("title and number not found")
furl = urljoin(res.url, urls[idx])
res = session.get(furl)
if not res.ok:
raise ValueError("browser.open_relative()) failed.")
lx = fromstring(res.text)
lks = lx.xpath('//a[contains(@href, "/black-curtain/save-eligibility/black-curtain")]/@href')
if len(lks) and lks[0].startswith('/'):
res = session.get(urljoin(res.url, lks[0]))
cookie = None
lx = fromstring(res.text)
div = lx.xpath('//*[@id="productDescription"]')[0]
ama_t = ' '.join([e.text.strip() for e in div if not re.search('Comment|h3', str(e.tag), re.I) and isinstance(e.text, str)])
ama_t = re.sub(r'審査番号:\d+', '', ama_t).strip()
if cookie is None:
# 删除无效cookies无论是用户创建还是自动创建以避免持续故障
cookies_filepath and len(cookies_filepath) and Path(cookies_filepath).is_file() and Path(cookies_filepath).unlink(missing_ok=True)
# 自动创建的cookies文件放在搜索路径表的末端最低优先级。有amazon.co.jp帐号的用户可以从浏览器导出cookie放在靠前搜索路径
ama_save = Path.home() / ".local/share/avdc/amazon.json"
ama_save.parent.mkdir(parents=True, exist_ok=True)
ama_save.write_text(json.dumps(session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8')
return ama_t
except Exception as e:
if debug:
print(f'[-]MP getOutline_amazon Error: {e}, number [{number}], title: {q_title}')
pass
return None
# 查货架中DVD和蓝光商品中标题相似度高的
def amazon_select_one(a_titles, q_title, number, debug):
sel = -1
ratio = 0
que_t = ''.join(c for c in q_title if not re.match(r'(P|S|Z).*', category(c), re.A))
for tloc, title in enumerate(a_titles):
if re.search(number, title, re.I): # 基本不带番号,但也有极个别有的,找到番号相同的直接通过
return tloc
if not re.search('DVD|Blu-ray', title, re.I):
continue
ama_t = str(re.sub('DVD|Blu-ray', "", title, re.I))
ama_t = ''.join(c for c in ama_t if not re.match(r'(P|S|Z).*', category(c), re.A))
findlen = 0
lastpos = -1
for cloc, char in reversed(tuple(enumerate(ama_t))):
pos = que_t.rfind(char)
if lastpos >= 0:
pos_near = que_t[:lastpos].rfind(char)
if pos_near < 0:
findlen = 0
lastpos = -1
ama_t = ama_t[:cloc+1]
else:
pos = pos_near
if pos < 0:
if category(char) == 'Nd':
return -1
if re.match(r'[\u4e00|\u4e8c|\u4e09|\u56db|\u4e94|\u516d|\u4e03|\u516b|\u4e5d|\u5341]', char, re.U):
return -1
ama_t = ama_t[:cloc]
findlen = 0
lastpos = -1
continue
if findlen > 0 and len(que_t) > 1 and lastpos == pos+1:
findlen += 1
lastpos = pos
if findlen >= 4:
break
continue
findlen = 1
lastpos = pos
if findlen==0:
return -1
r = SequenceMatcher(None, ama_t, que_t).ratio()
if r > ratio:
sel = tloc
ratio = r
save_t_ = ama_t
if ratio > 0.999:
break
if ratio < 0.5:
return -1
if not debug:
# 目前采信相似度高于0.9的结果
return sel if ratio >= 0.9 else -1
# debug 模式下记录识别准确率日志
if ratio < 0.9:
# 相似度[0.5, 0.9)的淘汰结果单独记录日志
(Path.home() / '.avlogs/ratio0.5.txt').open('a', encoding='utf-8').write(
f' [{number}] Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
return -1
# 被采信的结果日志
(Path.home() / '.avlogs/ratio.txt').open('a', encoding='utf-8').write(
f' [{number}] Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
return sel

View File

@@ -3,16 +3,12 @@ sys.path.append('../')
import re
from lxml import etree
import json
from bs4 import BeautifulSoup
from ADC_function import *
# import sys
from WebCrawler.storyline import getStoryline
# import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getTitle(a):
html = etree.fromstring(a, etree.HTMLParser())
def getTitle(html):
result = html.xpath('//*[@id="program_detail_title"]/text()')[0]
return result
@@ -43,8 +39,7 @@ def getActorPhoto(browser):
return o
def getStudio(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getStudio(html):
try:
result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[4]/a/span/text()')).strip(" ['']")
except:
@@ -52,20 +47,14 @@ def getStudio(a):
return result.strip('+').replace("', '", '').replace('"', '')
def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getRuntime(html):
try:
result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[3]/text()')[0]
except:
return ''
try:
return re.findall('\d+',result1)[0]
x = html.xpath('//span[@class="koumoku" and text()="収録時間"]/../text()')[1].strip()
return x
except:
return ''
def getLabel(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getLabel(html):
try:
result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[5]/a/span/text()')[0]
return result
@@ -73,8 +62,7 @@ def getLabel(a):
return ''
def getNum(a):
html = etree.fromstring(a, etree.HTMLParser())
def getNum(html):
try:
result = html.xpath('//*[@id="hinban"]/text()')[0]
return result
@@ -90,8 +78,7 @@ def getYear(getRelease):
return getRelease
def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getRelease(html):
try:
result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[2]/text()')[1])
except:
@@ -102,31 +89,25 @@ def getRelease(a):
return ''
def getTag(a):
result2=[]
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[6]/a/text()')
for i in result1:
i=i.replace(u'\n','')
i=i.replace(u'\t','')
if len(i):
result2.append(i)
return result2
def getTag(html):
result = html.xpath('//span[@class="koumoku" and text()="ジャンル"]/../a[starts-with(@href,"/avod/genre/")]/text()')
total = []
for i in result:
total.append(i.replace("\n","").replace("\t",""))
return total
def getCover_small(a, index=0):
def getCover_small(html, index=0):
# same issue mentioned below,
# javdb sometime returns multiple results
# DO NOT just get the firt one, get the one with correct index number
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
if not 'https' in result:
result = 'https:' + result
return result
def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
def getCover(html):
try:
result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[1]/p/a/@href')[0]
return 'https:' + result
@@ -134,8 +115,7 @@ def getCover(htmlcode):
return ''
def getDirector(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
def getDirector(html):
try:
result = html.xpath('//*[@id="program_detail_director"]/text()')[0].replace(u'\n','').replace(u'\t', '')
return result
@@ -143,19 +123,21 @@ def getDirector(a):
return ''
def getOutline(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
def getOutline(html, number, title):
storyline_site = config.getInstance().storyline_site().split(',')
a = set(storyline_site) & {'airav', 'avno1'} # 只要中文的简介文字
if len(a):
site = [n for n in storyline_site if n in a]
g = getStoryline(number, title, site)
if len(g):
return g
try:
result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[5]/p/text()')[0]
x = html.xpath('//h2[@class="title-detail"]/../p[@class="lead"]/text()')[0]
return x.replace(getNum(html), '')
except:
return ''
try:
return re.sub('\\\\\w*\d+','',result)
except:
return result
def getSeries(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
def getSeries(html):
try:
try:
result = html.xpath("//span[contains(text(),'シリーズ')]/../a/span/text()")[0]
@@ -181,11 +163,10 @@ def getExtrafanart(htmlcode): # 获取剧照
return s
return ''
def main(number):
try:
def open_by_browser(number):
xcity_number = number.replace('-','')
query_result, browser = get_html_by_form(
'https://xcity.jp/about/',
'https://xcity.jp/' + secrets.choice(['about/','sitemap/','policy/','law/','help/','main/']),
fields = {'q' : xcity_number.lower()},
return_type = 'browser')
if not query_result or not query_result.ok:
@@ -193,38 +174,44 @@ def main(number):
result = browser.follow_link(browser.links('avod\/detail')[0])
if not result.ok:
raise ValueError("xcity.py: detail page not found")
detail_page = str(browser.page)
return str(browser.page), browser
def main(number):
try:
detail_page, browser = open_by_browser(number)
url = browser.url
newnum = getNum(detail_page).upper()
lx = etree.fromstring(detail_page, etree.HTMLParser())
newnum = getNum(lx).upper()
number_up = number.upper()
if newnum != number_up:
if newnum == xcity_number.upper():
if newnum == number.replace('-','').upper():
newnum = number_up
else:
raise ValueError("xcity.py: number not found")
title = getTitle(lx)
dic = {
'actor': getActor(browser),
'title': getTitle(detail_page),
'studio': getStudio(detail_page),
'outline': getOutline(detail_page),
'runtime': getRuntime(detail_page),
'director': getDirector(detail_page),
'release': getRelease(detail_page),
'title': title,
'studio': getStudio(lx),
'outline': getOutline(lx, number, title),
'runtime': getRuntime(lx),
'director': getDirector(lx),
'release': getRelease(lx),
'number': newnum,
'cover': getCover(detail_page),
'cover': getCover(lx),
'cover_small': '',
'extrafanart': getExtrafanart(detail_page),
'imagecut': 1,
'tag': getTag(detail_page),
'label': getLabel(detail_page),
'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()),
'tag': getTag(lx),
'label': getLabel(lx),
'year': getYear(getRelease(lx)), # str(re.search('\d{4}',getRelease(a)).group()),
# 'actor_photo': getActorPhoto(browser),
'website': url,
'source': 'xcity.py',
'series': getSeries(detail_page),
'series': getSeries(lx),
}
except Exception as e:
if config.Config().debug():
if config.getInstance().debug():
print(e)
dic = {"title": ""}

View File

@@ -1,12 +1,15 @@
# 详细教程请看
# - https://github.com/yoshiko2/AV_Data_Capture/wiki#%E9%85%8D%E7%BD%AEconfigini
[common]
main_mode=1
source_folder=./
failed_output_folder=failed
success_output_folder=JAV_output
soft_link=0
failed_move=1
auto_exit=0
transalte_to_sc=0
multi_threading=1
multi_threading=0
;actor_gender value: female(♀) or male(♂) or both(♀ ♂) or all(♂ ♀ ⚧)
actor_gender=female
del_empty_folder=1
@@ -16,6 +19,8 @@ nfo_skip_days=30
; 处理完多少个视频文件后停止0为处理所有视频文件
stop_counter=0
; 以上两个参数配合使用可以以多次少量的方式刮削或整理数千个文件而不触发翻译或元数据站封禁
ignore_failed_list=0
download_only_missing_images=1
[proxy]
;proxytype: http or socks5 or socks5h switch: 0 1
@@ -62,8 +67,7 @@ switch=0
; 用来确定是否是无码
[uncensored]
uncensored_prefix=S2M,BT,LAF,SMD
uncensored_prefix=S2M,BT,LAF,SMD,SMBD,SM3D2DBD,SKY-,SKYHD,CWP,CWDV,CWBD,CW3D2DBD,MKD,MKBD,MXBD,MK3D2DBD,MCB3DBD,MCBD,RHJ,MMDV
[media]
; 影片后缀
@@ -79,6 +83,29 @@ water=2
; 剧照
[extrafanart]
switch=0
switch=1
parallel_download=5
extrafanart_folder=extrafanart
; 剧情简介
[storyline]
switch=1
; website为javbus javdb avsox xcity carib时site censored_site uncensored_site 为获取剧情简介信息的
; 可选数据源站点列表。列表内站点同时并发查询,取值优先级由冒号前的序号决定,从小到大,数字小的站点没数据才会采用后面站点获得的。
; 其中airavwiki airav avno1 58avgo是中文剧情简介区别是airav只能查有码avno1 airavwiki 有码无码都能查,
; 58avgo只能查无码或者流出破解马赛克的影片(此功能没使用)。
; xcity和amazon是日语的由于amazon商城没有番号信息选中对应DVD的准确率仅99.6%。如果三个列表全部为空则不查询,
; 设置成不查询可大幅提高刮削速度。
; site=
site=1:avno1,4:airavwiki
censored_site=2:airav,5:xcity,6:amazon
uncensored_site=3:58avgo
; 运行模式0:顺序执行(最慢) 1:线程池(默认值) 2:进程池(启动开销比线程池大,并发站点越多越快)
run_mode=1
; show_result剧情简介调试信息 0关闭 1简略 2详细(详细部分不记入日志)剧情简介失效时可打开2查看原因
show_result=0
; 繁简转换 繁简转换模式mode=0:不转换 1:繁转简 2:简转繁
[cc_convert]
mode=1
vars=outline,series,studio,tag,title

225
config.py
View File

@@ -1,33 +1,82 @@
import os
import re
import sys
import configparser
import codecs
from pathlib import Path
G_conf_override = {
# index 0 save Config() first instance for quick access by using getInstance()
0 : None,
# register override config items
"common:main_mode" : None,
"common:source_folder" : None,
"common:auto_exit" : None,
"common:nfo_skip_days" : None,
"common:stop_counter" : None,
"common:ignore_failed_list" : None,
"debug_mode:switch" : None
}
def getInstance():
if isinstance(G_conf_override[0], Config):
return G_conf_override[0]
return Config()
class Config:
def __init__(self, path: str = "config.ini"):
path_search_order = [
path,
"./config.ini",
os.path.join(Path.home(), "avdc.ini"),
os.path.join(Path.home(), ".avdc.ini"),
os.path.join(Path.home(), ".avdc/config.ini"),
os.path.join(Path.home(), ".config/avdc/config.ini")
]
path_search_order = (
Path(path),
Path.cwd() / "config.ini",
Path.home() / "avdc.ini",
Path.home() / ".avdc.ini",
Path.home() / ".avdc/config.ini",
Path.home() / ".config/avdc/config.ini"
)
ini_path = None
for p in path_search_order:
if os.path.isfile(p):
ini_path = p
if p.is_file():
ini_path = p.resolve()
break
if ini_path:
self.conf = configparser.ConfigParser()
self.ini_path = ini_path
try:
self.conf.read(ini_path, encoding="utf-8-sig")
if self.conf.read(ini_path, encoding="utf-8-sig"):
if G_conf_override[0] is None:
G_conf_override[0] = self
except:
self.conf.read(ini_path, encoding="utf-8")
if self.conf.read(ini_path, encoding="utf-8"):
if G_conf_override[0] is None:
G_conf_override[0] = self
else:
print("[-]Config file not found!")
print("ERROR: Config file not found!")
print("Please put config file into one of the following path:")
print('\n'.join([str(p.resolve()) for p in path_search_order[2:]]))
# 对于找不到配置文件的情况,还是在打包时附上对应版本的默认配置文件,有需要时为其在搜索路径中生成,
# 要比用户乱找一个版本不对应的配置文件会可靠些。这样一来,单个执行文件就是功能完整的了,放在任何
# 执行路径下都可以放心使用。
res_path = None
# pyinstaller打包的在打包中找config.ini
if hasattr(sys, '_MEIPASS') and (Path(getattr(sys, '_MEIPASS')) / 'config.ini').is_file():
res_path = Path(getattr(sys, '_MEIPASS')) / 'config.ini'
# 脚本运行的所在位置找
elif (Path(__file__).resolve().parent / 'config.ini').is_file():
res_path = Path(__file__).resolve().parent / 'config.ini'
if res_path is None:
sys.exit(2)
ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:")
if re.search('n', ins, re.I):
sys.exit(2)
# 用户目录才确定具有写权限,因此选择 ~/avdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的
# 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了,只是作为多配置文件的切换技巧保留。
write_path = path_search_order[2] # Path.home() / "avdc.ini"
write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8')
print("Config file '{}' created.".format(write_path.resolve()))
input("Press Enter key exit...")
sys.exit(0)
# self.conf = self._default_config()
# try:
# self.conf = configparser.ConfigParser()
@@ -40,13 +89,24 @@ class Config:
# print("[-]",e)
# sys.exit(3)
# #self.conf = self._default_config()
def getboolean_override(self, section, item) -> bool:
return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool(G_conf_override[f"{section}:{item}"])
def main_mode(self) -> str:
def getint_override(self, section, item) -> int:
return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int(G_conf_override[f"{section}:{item}"])
def get_override(self, section, item) -> str:
return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str(G_conf_override[f"{section}:{item}"])
def main_mode(self) -> int:
try:
return self.conf.getint("common", "main_mode")
return self.getint_override("common", "main_mode")
except ValueError:
self._exit("common:main_mode")
def source_folder(self) -> str:
return self.get_override("common", "source_folder")
def failed_folder(self) -> str:
return self.conf.get("common", "failed_output_folder")
@@ -61,7 +121,7 @@ class Config:
def failed_move(self) -> bool:
return self.conf.getboolean("common", "failed_move")
def auto_exit(self) -> bool:
return self.conf.getboolean("common", "auto_exit")
return self.getboolean_override("common", "auto_exit")
def transalte_to_sc(self) -> bool:
return self.conf.getboolean("common", "transalte_to_sc")
def multi_threading(self) -> bool:
@@ -70,14 +130,18 @@ class Config:
return self.conf.getboolean("common", "del_empty_folder")
def nfo_skip_days(self) -> int:
try:
return self.conf.getint("common", "nfo_skip_days")
return self.getint_override("common", "nfo_skip_days")
except:
return 30
def stop_counter(self) -> int:
try:
return self.conf.getint("common", "stop_counter")
return self.getint_override("common", "stop_counter")
except:
return 0
def ignore_failed_list(self) -> bool:
return self.getboolean_override("common", "ignore_failed_list")
def download_only_missing_images(self) -> bool:
return self.conf.getboolean("common", "download_only_missing_images")
def is_transalte(self) -> bool:
return self.conf.getboolean("transalte", "switch")
def is_trailer(self) -> bool:
@@ -89,6 +153,13 @@ class Config:
def is_extrafanart(self) -> bool:
return self.conf.getboolean("extrafanart", "switch")
def extrafanart_thread_pool_download(self) -> int:
try:
v = self.conf.getint("extrafanart", "parallel_download")
return v if v >= 0 else 5
except:
return 5
def watermark_type(self) -> int:
return int(self.conf.get("watermark", "water"))
@@ -173,7 +244,58 @@ class Config:
return self.conf.get("escape", "folders")
def debug(self) -> bool:
return self.conf.getboolean("debug_mode", "switch")
return self.getboolean_override("debug_mode", "switch")
def is_storyline(self) -> bool:
try:
return self.conf.getboolean("storyline", "switch")
except:
return True
def storyline_site(self) -> str:
try:
return self.conf.get("storyline", "site")
except:
return "1:avno1,4:airavwiki"
def storyline_censored_site(self) -> str:
try:
return self.conf.get("storyline", "censored_site")
except:
return "2:airav,5:xcity,6:amazon"
def storyline_uncensored_site(self) -> str:
try:
return self.conf.get("storyline", "uncensored_site")
except:
return "3:58avgo"
def storyline_show(self) -> int:
try:
v = self.conf.getint("storyline", "show_result")
return v if v in (0,1,2) else 2 if v > 2 else 0
except:
return 0
def storyline_mode(self) -> int:
try:
v = self.conf.getint("storyline", "run_mode")
return v if v in (0,1,2) else 2 if v > 2 else 0
except:
return 1
def cc_convert_mode(self) -> int:
try:
v = self.conf.getint("cc_convert", "mode")
return v if v in (0,1,2) else 2 if v > 2 else 0
except:
return 1
def cc_convert_vars(self) -> str:
try:
return self.conf.get("cc_convert", "vars")
except:
return "actor,director,label,outline,series,studio,tag,title"
@staticmethod
def _exit(sec: str) -> None:
@@ -188,6 +310,7 @@ class Config:
sec1 = "common"
conf.add_section(sec1)
conf.set(sec1, "main_mode", "1")
conf.set(sec1, "source_folder", "./")
conf.set(sec1, "failed_output_folder", "failed")
conf.set(sec1, "success_output_folder", "JAV_output")
conf.set(sec1, "soft_link", "0")
@@ -199,6 +322,8 @@ class Config:
conf.set(sec1, "del_empty_folder", "1")
conf.set(sec1, "nfo_skip_days", 30)
conf.set(sec1, "stop_counter", 0)
conf.set(sec1, "ignore_failed_list", 0)
conf.set(sec1, "download_only_missing_images", 1)
sec2 = "proxy"
conf.add_section(sec2)
@@ -264,6 +389,22 @@ class Config:
conf.add_section(sec13)
conf.set(sec13, "switch", 1)
conf.set(sec13, "extrafanart_folder", "extrafanart")
conf.set(sec13, "parallel_download", 1)
sec14 = "storyline"
conf.add_section(sec14)
conf.set(sec14, "switch", 1)
conf.set(sec14, "site", "1:avno1,4:airavwiki")
conf.set(sec14, "censored_site", "2:airav,5:xcity,6:amazon")
conf.set(sec14, "uncensored_site", "3:58avgo")
conf.set(sec14, "show_result", 0)
conf.set(sec14, "run_mode", 1)
conf.set(sec14, "cc_convert", 1)
sec15 = "cc_convert"
conf.add_section(sec15)
conf.set(sec15, "mode", 1)
conf.set(sec15, "vars", "actor,director,label,outline,series,studio,tag,title")
return conf
@@ -308,9 +449,45 @@ if __name__ == "__main__":
code = compile(evstr, "<string>", "eval")
print('{}: "{}"'.format(evstr, eval(code)))
config = Config()
mfilter = ('conf', 'proxy', '_exit', '_default_config')
mfilter = {'conf', 'proxy', '_exit', '_default_config', 'getboolean_override', 'getint_override', 'get_override', 'ini_path'}
for _m in [m for m in dir(config) if not m.startswith('__') and m not in mfilter]:
evprint(f'config.{_m}()')
pfilter = ('proxies', 'SUPPORT_PROXY_TYPE')
for _p in [p for p in dir(config.proxy()) if not p.startswith('__') and p not in pfilter]:
evprint(f'config.proxy().{_p}')
pfilter = {'proxies', 'SUPPORT_PROXY_TYPE'}
# test getInstance()
assert(getInstance() == config)
for _p in [p for p in dir(getInstance().proxy()) if not p.startswith('__') and p not in pfilter]:
evprint(f'getInstance().proxy().{_p}')
# Override Test
G_conf_override["common:nfo_skip_days"] = 4321
G_conf_override["common:stop_counter"] = 1234
assert config.nfo_skip_days() == 4321
assert getInstance().stop_counter() == 1234
# remove override
G_conf_override["common:stop_counter"] = None
G_conf_override["common:nfo_skip_days"] = None
assert config.nfo_skip_days() != 4321
assert config.stop_counter() != 1234
# Create new instance
conf2 = Config()
assert getInstance() != conf2
assert getInstance() == config
G_conf_override["common:main_mode"] = 9
G_conf_override["common:source_folder"] = "A:/b/c"
# Override effect to all instances
assert config.main_mode() == 9
assert conf2.main_mode() == 9
assert getInstance().main_mode() == 9
assert conf2.source_folder() == "A:/b/c"
print("### Override Test ###".center(36))
evprint('getInstance().main_mode()')
evprint('config.source_folder()')
G_conf_override["common:main_mode"] = None
evprint('conf2.main_mode()')
evprint('config.main_mode()')
# unregister key acess will raise except
try:
print(G_conf_override["common:actor_gender"])
except KeyError as ke:
print(f'Catched KeyError: {ke} is not a register key of G_conf_override dict.', file=sys.stderr)
print(f"Load Config file '{conf2.ini_path}'.")

388
core.py Executable file → Normal file
View File

@@ -3,8 +3,6 @@ import os.path
import pathlib
import re
import shutil
import platform
import errno
import sys
from PIL import Image
@@ -14,7 +12,7 @@ from datetime import datetime
from ADC_function import *
from WebCrawler import get_data_from_json
from number_parser import is_uncensored
def escape_path(path, escape_literals: str): # Remove escape literals
backslash = '\\'
@@ -23,7 +21,8 @@ def escape_path(path, escape_literals: str): # Remove escape literals
return path
def moveFailedFolder(filepath, conf):
def moveFailedFolder(filepath):
conf = config.getInstance()
failed_folder = conf.failed_folder()
soft_link = conf.soft_link()
# 模式3或软连接改为维护一个失败列表启动扫描时加载用于排除该路径以免反复处理
@@ -33,7 +32,6 @@ def moveFailedFolder(filepath, conf):
print("[-]Add to Failed List file, see '%s'" % ftxt)
with open(ftxt, 'a', encoding='utf-8') as flt:
flt.write(f'{filepath}\n')
flt.close()
elif conf.failed_move() and not soft_link:
failed_name = os.path.join(failed_folder, os.path.basename(filepath))
mtxt = os.path.abspath(os.path.join(failed_folder, 'where_was_i_before_being_moved.txt'))
@@ -41,8 +39,13 @@ def moveFailedFolder(filepath, conf):
with open(mtxt, 'a', encoding='utf-8') as wwibbmt:
tmstr = datetime.now().strftime("%Y-%m-%d %H:%M")
wwibbmt.write(f'{tmstr} FROM[{filepath}]TO[{failed_name}]\n')
wwibbmt.close()
try:
if os.path.exists(failed_name):
print('[-]File Exists while moving to FailedFolder')
return
shutil.move(filepath, failed_name)
except:
print('[-]File Moving to FailedFolder unsuccessful!')
def get_info(json_data): # 返回json里的数据
@@ -63,14 +66,15 @@ def get_info(json_data): # 返回json里的数据
return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label
def small_cover_check(path, number, cover_small, leak_word, c_word, conf: config.Config, filepath):
filename = f"{number}{leak_word}{c_word}-poster.jpg"
download_file_with_filename(cover_small, filename, path, conf, filepath)
def small_cover_check(path, number, cover_small, leak_word, c_word, hack_word, filepath):
filename = f"{number}{leak_word}{c_word}{hack_word}-poster.jpg"
download_file_with_filename(cover_small, filename, path, filepath)
print('[+]Image Downloaded! ' + os.path.join(path, filename))
def create_folder(json_data, conf: config.Config): # 创建文件夹
def create_folder(json_data): # 创建文件夹
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(json_data)
conf = config.getInstance()
success_folder = conf.success_folder()
actor = json_data.get('actor')
location_rule = eval(conf.location_rule(), json_data)
@@ -81,35 +85,40 @@ def create_folder(json_data, conf: config.Config): # 创建文件夹
if 'title' in conf.location_rule() and len(title) > maxlen:
shorttitle = title[0:maxlen]
location_rule = location_rule.replace(title, shorttitle)
path = os.path.join(success_folder, location_rule).strip()
if not os.path.isdir(path):
# 当演员为空时location_rule被计算为'/number'绝对路径,导致路径连接忽略第一个路径参数,因此添加./使其始终为相对路径
path = os.path.join(success_folder, f'./{location_rule.strip()}')
if not os.path.exists(path):
path = escape_path(path, conf.escape_literals())
try:
os.makedirs(path)
if not os.path.isdir(path):
raise
except:
path = success_folder + '/' + location_rule.replace('/[' + number + ')-' + title, "/number")
path = escape_path(path, conf.escape_literals())
try:
os.makedirs(path)
return path
except:
print(f"[-]Fatal error! Can not make folder '{path}'")
sys.exit(0)
return os.path.normpath(path)
# =====================资源下载部分===========================
# path = examle:photo , video.in the Project Folder!
def download_file_with_filename(url, filename, path, conf: config.Config, filepath):
def download_file_with_filename(url, filename, path, filepath):
conf = config.getInstance()
configProxy = conf.proxy()
for i in range(configProxy.retry):
try:
if configProxy.enable:
if not os.path.isdir(path):
if not os.path.exists(path):
try:
os.makedirs(path)
if not os.path.isdir(path):
raise IOError
except:
print(f"[-]Fatal error! Can not make folder '{path}'")
sys.exit(0)
proxies = configProxy.proxies()
headers = {
'User-Agent': G_USER_AGENT}
@@ -121,10 +130,12 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa
code.write(r.content)
return
else:
if not os.path.isdir(path):
if not os.path.exists(path):
try:
os.makedirs(path)
if not os.path.isdir(path):
raise IOError
except:
print(f"[-]Fatal error! Can not make folder '{path}'")
sys.exit(0)
headers = {
'User-Agent': G_USER_AGENT}
r = requests.get(url, timeout=configProxy.timeout, headers=headers)
@@ -148,90 +159,142 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa
print('[-]Image Download : Connect retry ' + str(i) + '/' + str(configProxy.retry))
except IOError:
print(f"[-]Create Directory '{path}' failed!")
moveFailedFolder(filepath, conf)
moveFailedFolder(filepath)
return
print('[-]Connect Failed! Please check your Proxy or Network!')
moveFailedFolder(filepath, conf)
moveFailedFolder(filepath)
return
def trailer_download(trailer, leak_word, c_word, number, path, filepath, conf: config.Config):
if download_file_with_filename(trailer, number + leak_word + c_word + '-trailer.mp4', path, conf, filepath) == 'failed':
def trailer_download(trailer, leak_word, c_word, hack_word, number, path, filepath):
if download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path, filepath) == 'failed':
return
configProxy = conf.proxy()
configProxy = config.getInstance().proxy()
for i in range(configProxy.retry):
if os.path.getsize(path+'/' + number + leak_word + c_word + '-trailer.mp4') == 0:
if file_not_exist_or_empty(path+'/' + number + leak_word + c_word + hack_word + '-trailer.mp4'):
print('[!]Video Download Failed! Trying again. [{}/3]', i + 1)
download_file_with_filename(trailer, number + leak_word + c_word + '-trailer.mp4', path, conf, filepath)
download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path, filepath)
continue
else:
break
if os.path.getsize(path + '/' + number + leak_word + c_word + '-trailer.mp4') == 0:
if file_not_exist_or_empty(path + '/' + number + leak_word + c_word + hack_word + '-trailer.mp4'):
return
print('[+]Video Downloaded!', path + '/' + number + leak_word + c_word + '-trailer.mp4')
print('[+]Video Downloaded!', path + '/' + number + leak_word + c_word + hack_word + '-trailer.mp4')
# 剧照下载成功否则移动到failed
def extrafanart_download(data, path, conf: config.Config, filepath):
def extrafanart_download(data, path, number, filepath):
if config.getInstance().extrafanart_thread_pool_download():
return extrafanart_download_threadpool(data, path, number)
extrafanart_download_one_by_one(data, path, filepath)
def extrafanart_download_one_by_one(data, path, filepath):
tm_start = time.perf_counter()
j = 1
conf = config.getInstance()
path = os.path.join(path, conf.get_extrafanart())
configProxy = conf.proxy()
download_only_missing_images = conf.download_only_missing_images()
for url in data:
jpg_filename = f'extrafanart-{j}.jpg'
jpg_fullpath = os.path.join(path, jpg_filename)
if download_file_with_filename(url, jpg_filename, path, conf, filepath) == 'failed':
moveFailedFolder(filepath, conf)
if download_only_missing_images and not file_not_exist_or_empty(jpg_fullpath):
continue
if download_file_with_filename(url, jpg_filename, path, filepath) == 'failed':
moveFailedFolder(filepath)
return
configProxy = conf.proxy()
for i in range(configProxy.retry):
if os.path.getsize(jpg_fullpath) == 0:
if file_not_exist_or_empty(jpg_fullpath):
print('[!]Image Download Failed! Trying again. [{}/3]', i + 1)
download_file_with_filename(url, jpg_filename, path, conf, filepath)
download_file_with_filename(url, jpg_filename, path, filepath)
continue
else:
break
if os.path.getsize(jpg_fullpath) == 0:
if file_not_exist_or_empty(jpg_fullpath):
return
print('[+]Image Downloaded!', jpg_fullpath)
j += 1
if conf.debug():
print(f'[!]Extrafanart download one by one mode runtime {time.perf_counter() - tm_start:.3f}s')
def extrafanart_download_threadpool(url_list, save_dir, number):
tm_start = time.perf_counter()
conf = config.getInstance()
extrafanart_dir = Path(save_dir) / conf.get_extrafanart()
download_only_missing_images = conf.download_only_missing_images()
dn_list = []
for i, url in enumerate(url_list, start=1):
jpg_fullpath = extrafanart_dir / f'extrafanart-{i}.jpg'
if download_only_missing_images and not file_not_exist_or_empty(jpg_fullpath):
continue
dn_list.append((url, jpg_fullpath))
if not len(dn_list):
return
parallel = min(len(dn_list), conf.extrafanart_thread_pool_download())
if parallel > 100:
print('[!]Warrning: Parallel download thread too large may cause website ban IP!')
result = parallel_download_files(dn_list, parallel)
failed = 0
for i, r in enumerate(result, start=1):
if not r:
failed += 1
print(f'[-]Extrafanart {i} for [{number}] download failed!')
if failed: # 非致命错误电影不移入失败文件夹将来可以用模式3补齐
print(f"[-]Failed downloaded {failed}/{len(result)} extrafanart images for [{number}] to '{extrafanart_dir}', you may retry run mode 3 later.")
else:
print(f"[+]Successfully downloaded {len(result)} extrafanart to '{extrafanart_dir}'")
if conf.debug():
print(f'[!]Extrafanart download ThreadPool mode runtime {time.perf_counter() - tm_start:.3f}s')
# 封面是否下载成功否则移动到failed
def image_download(cover, number, leak_word, c_word, path, conf: config.Config, filepath):
filename = f"{number}{leak_word}{c_word}-fanart.jpg"
def image_download(cover, number, leak_word, c_word, hack_word, path, filepath):
filename = f"{number}{leak_word}{c_word}{hack_word}-fanart.jpg"
full_filepath = os.path.join(path, filename)
if download_file_with_filename(cover, filename, path, conf, filepath) == 'failed':
moveFailedFolder(filepath, conf)
if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath):
return
if download_file_with_filename(cover, filename, path, filepath) == 'failed':
moveFailedFolder(filepath)
return
configProxy = conf.proxy()
configProxy = config.getInstance().proxy()
for i in range(configProxy.retry):
if os.path.getsize(full_filepath) == 0:
if file_not_exist_or_empty(full_filepath):
print('[!]Image Download Failed! Trying again. [{}/3]', i + 1)
download_file_with_filename(cover, filename, path, conf, filepath)
download_file_with_filename(cover, filename, path, filepath)
continue
else:
break
if os.path.getsize(full_filepath) == 0:
if file_not_exist_or_empty(full_filepath):
return
print('[+]Image Downloaded!', full_filepath)
shutil.copyfile(full_filepath, os.path.join(path, f"{number}{leak_word}{c_word}-thumb.jpg"))
shutil.copyfile(full_filepath, os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-thumb.jpg"))
def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu, uncensored, conf):
def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu, uncensored, hack_word):
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(json_data)
failed_folder = conf.failed_folder()
if conf.main_mode() == 3: # 模式3下由于视频文件不做任何改变.nfo文件必须和视频文件名称除后缀外完全一致KODI等软件方可支持
if config.getInstance().main_mode() == 3: # 模式3下由于视频文件不做任何改变.nfo文件必须和视频文件名称除后缀外完全一致KODI等软件方可支持
nfo_path = str(Path(filepath).with_suffix('.nfo'))
else:
nfo_path = os.path.join(path,f"{number}{part}{leak_word}{c_word}.nfo")
nfo_path = os.path.join(path,f"{number}{part}{leak_word}{c_word}{hack_word}.nfo")
try:
if not os.path.exists(path):
try:
if not os.path.isdir(path):
os.makedirs(path)
if not os.path.isdir(path):
raise IOError
except:
print(f"[-]Fatal error! can not make folder '{path}'")
sys.exit(0)
# KODI内查看影片信息时找不到number配置naming_rule=number+'#'+title虽可解决
# 但使得标题太长放入时常为空的outline内会更适合软件给outline留出的显示版面也较大
outline = f"{number}#{outline}"
with open(nfo_path, "wt", encoding='UTF-8') as code:
print('<?xml version="1.0" encoding="UTF-8" ?>', file=code)
print("<movie>", file=code)
print(" <title>" + naming_rule + "</title>", file=code)
print(" <originaltitle>" + naming_rule + "</originaltitle>", file=code)
print(" <sorttitle>" + naming_rule + "</sorttitle>", file=code)
print(" <customrating>JP-18+</customrating>", file=code)
print(" <mpaa>JP-18+</mpaa>", file=code)
print(" <set>", file=code)
print(" </set>", file=code)
print(" <studio>" + studio + "</studio>", file=code)
@@ -240,9 +303,9 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
print(" <plot>" + outline + "</plot>", file=code)
print(" <runtime>" + str(runtime).replace(" ", "") + "</runtime>", file=code)
print(" <director>" + director + "</director>", file=code)
print(" <poster>" + number + leak_word + c_word + "-poster.jpg</poster>", file=code)
print(" <thumb>" + number + leak_word + c_word + "-thumb.jpg</thumb>", file=code)
print(" <fanart>" + number + leak_word + c_word + '-fanart.jpg' + "</fanart>", file=code)
print(" <poster>" + number + leak_word + c_word + hack_word + "-poster.jpg</poster>", file=code)
print(" <thumb>" + number + leak_word + c_word + hack_word + "-thumb.jpg</thumb>", file=code)
print(" <fanart>" + number + leak_word + c_word + hack_word + '-fanart.jpg' + "</fanart>", file=code)
try:
for key in actor_list:
print(" <actor>", file=code)
@@ -258,6 +321,8 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
print(" <tag>流出</tag>", file=code)
if uncensored == 1:
print(" <tag>无码</tag>", file=code)
if hack_word != '':
print(" <tag>破解</tag>", file=code)
try:
for i in tag:
print(" <tag>" + i + "</tag>", file=code)
@@ -270,6 +335,8 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
print(" <genre>流出</genre>", file=code)
if uncensored == 1:
print(" <genre>无码</genre>", file=code)
if hack_word != '':
print(" <genre>破解</genre>", file=code)
try:
for i in tag:
print(" <genre>" + i + "</genre>", file=code)
@@ -278,8 +345,10 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
aaaaaaaa = ''
print(" <num>" + number + "</num>", file=code)
print(" <premiered>" + release + "</premiered>", file=code)
print(" <releasedate>" + release + "</releasedate>", file=code)
print(" <release>" + release + "</release>", file=code)
print(" <cover>" + cover + "</cover>", file=code)
if config.Config().is_trailer():
if config.getInstance().is_trailer():
print(" <trailer>" + trailer + "</trailer>", file=code)
print(" <website>" + website + "</website>", file=code)
print("</movie>", file=code)
@@ -287,17 +356,17 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
except IOError as e:
print("[-]Write Failed!")
print("[-]", e)
moveFailedFolder(filepath, conf)
moveFailedFolder(filepath)
return
except Exception as e1:
print("[-]Write Failed!")
print("[-]", e1)
moveFailedFolder(filepath, conf)
moveFailedFolder(filepath)
return
def cutImage(imagecut, path, number, leak_word, c_word):
fullpath_noext = os.path.join(path, f"{number}{leak_word}{c_word}")
def cutImage(imagecut, path, number, leak_word, c_word, hack_word):
fullpath_noext = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}")
if imagecut == 1: # 剪裁大封面
try:
img = Image.open(fullpath_noext + '-fanart.jpg')
@@ -307,7 +376,8 @@ def cutImage(imagecut, path, number, leak_word, c_word):
img2 = img.crop((w / 1.9, 0, w, h))
img2.save(fullpath_noext + '-poster.jpg')
print('[+]Image Cutted! ' + fullpath_noext + '-poster.jpg')
except:
except Exception as e:
print(e)
print('[-]Cover cut failed!')
elif imagecut == 0: # 复制封面
shutil.copyfile(fullpath_noext + '-fanart.jpg', fullpath_noext + '-poster.jpg')
@@ -321,7 +391,7 @@ def cutImage(imagecut, path, number, leak_word, c_word):
# leak 流出 参数值为 1 0
# uncensored 无码 参数值为 1 0
# ========================================================================加水印
def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf:config.Config):
def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack):
mark_type = ''
if cn_sub:
mark_type += ',字幕'
@@ -329,19 +399,21 @@ def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf:config.Conf
mark_type += ',流出'
if uncensored:
mark_type += ',无码'
if hack:
mark_type += ',破解'
if mark_type == '':
return
add_mark_thread(thumb_path, cn_sub, leak, uncensored, conf)
add_mark_thread(thumb_path, cn_sub, leak, uncensored, hack)
print('[+]Thumb Add Mark: ' + mark_type.strip(','))
add_mark_thread(poster_path, cn_sub, leak, uncensored, conf)
add_mark_thread(poster_path, cn_sub, leak, uncensored, hack)
print('[+]Poster Add Mark: ' + mark_type.strip(','))
def add_mark_thread(pic_path, cn_sub, leak, uncensored, conf):
size = 14
def add_mark_thread(pic_path, cn_sub, leak, uncensored, hack):
size = 9
img_pic = Image.open(pic_path)
# 获取自定义位置取余配合pos达到顺时针添加的效果
# 左上 0, 右上 1, 右下 2 左下 3
count = conf.watermark_type()
count = config.getInstance().watermark_type()
if cn_sub == 1 or cn_sub == '1':
add_to_pic(pic_path, img_pic, size, count, 1) # 添加
count = (count + 1) % 4
@@ -350,6 +422,8 @@ def add_mark_thread(pic_path, cn_sub, leak, uncensored, conf):
count = (count + 1) % 4
if uncensored == 1 or uncensored == '1':
add_to_pic(pic_path, img_pic, size, count, 3)
if hack == 1 or hack == '1':
add_to_pic(pic_path, img_pic, size, count, 4)
img_pic.close()
def add_to_pic(pic_path, img_pic, size, count, mode):
@@ -361,6 +435,8 @@ def add_to_pic(pic_path, img_pic, size, count, mode):
pngpath = "Img/LEAK.png"
elif mode == 3:
pngpath = "Img/UNCENSORED.png"
elif mode == 4:
pngpath = "Img/HACK.png"
else:
print('[-]Error: watermark image param mode invalid!')
return
@@ -391,40 +467,55 @@ def add_to_pic(pic_path, img_pic, size, count, mode):
img_pic.save(pic_path, quality=95)
# ========================结束=================================
def paste_file_to_folder(filepath, path, number, leak_word, c_word, conf: config.Config): # 文件路径,番号,后缀,要移动至的位置
def paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word): # 文件路径,番号,后缀,要移动至的位置
filepath_obj = pathlib.Path(filepath)
houzhui = filepath_obj.suffix
file_parent_origin_path = str(filepath_obj.parent)
try:
targetpath = os.path.join(path, f"{number}{leak_word}{c_word}{houzhui}")
targetpath = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{houzhui}")
# 任何情况下都不要覆盖以免遭遇数据源或者引擎错误导致所有文件得到同一个number逐一
# 同名覆盖致使全部文件损失且不可追回的最坏情况
if os.path.exists(targetpath):
raise FileExistsError('File Exists on destination path, we will never overwriting.')
soft_link = config.getInstance().soft_link()
# 如果soft_link=1 使用软链接
if conf.soft_link() == 0:
if soft_link == 0:
shutil.move(filepath, targetpath)
elif conf.soft_link() == 1:
# 采用相对路径,以便网络访问时能正确打开视频
elif soft_link == 1:
# 先尝试采用相对路径,以便网络访问时能正确打开视频,失败则可能是因为跨盘符等原因无法支持
# 相对路径径,改用绝对路径方式尝试建立软链接
try:
filerelpath = os.path.relpath(filepath, path)
os.symlink(filerelpath, targetpath)
elif conf.soft_link() == 2:
except:
os.symlink(filepath_obj.resolve(), targetpath)
elif soft_link == 2:
shutil.move(filepath, targetpath)
# 移走文件后,在原来位置增加一个可追溯的软链接,指向文件新位置
# 以便追查文件从原先位置被移动到哪里了,避免因为得到错误番号后改名移动导致的文件失踪
# 便于手工找回文件。并将软连接文件名后缀修改,以避免再次被搜刮
# 便于手工找回文件。由于目前软链接已经不会被刮削,文件名后缀无需再修改
targetabspath = os.path.abspath(targetpath)
if targetabspath != os.path.abspath(filepath):
targetrelpath = os.path.relpath(targetabspath, file_parent_origin_path)
os.symlink(targetrelpath, filepath + '#sym')
sub_res = conf.sub_rule()
os.symlink(targetrelpath, filepath)
sub_res = config.getInstance().sub_rule()
for subname in sub_res:
sub_filepath = str(filepath_obj.with_suffix(subname))
if os.path.isfile(sub_filepath): # 字幕移动
shutil.move(sub_filepath, os.path.join(path, f"{number}{leak_word}{c_word}{subname}"))
if os.path.isfile(sub_filepath.replace(subname,".chs" + subname)):
sub_filepath = sub_filepath.replace(subname,".chs" + subname)
subname = ".chs" + subname
elif os.path.isfile(sub_filepath.replace(subname,".cht" + subname)):
sub_filepath = sub_filepath.replace(subname, ".cht" + subname)
subname = ".cht" + subname
if os.path.isfile(sub_filepath):
shutil.move(sub_filepath, os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{subname}"))
print('[+]Sub moved!')
return True
except FileExistsError:
print('[-]File Exists! Please check your movie!')
print('[-]move to the root folder of the program.')
except FileExistsError as fee:
print(f'[-]FileExistsError: {fee}')
moveFailedFolder(filepath)
return
except PermissionError:
print('[-]Error! Please run as administrator!')
@@ -434,29 +525,31 @@ def paste_file_to_folder(filepath, path, number, leak_word, c_word, conf: config
return
def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, conf): # 文件路径,番号,后缀,要移动至的位置
def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, hack_word): # 文件路径,番号,后缀,要移动至的位置
if multi_part == 1:
number += part # 这时number会被附加上CD1后缀
filepath_obj = pathlib.Path(filepath)
houzhui = filepath_obj.suffix
file_parent_origin_path = str(filepath_obj.parent)
targetpath = os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}{houzhui}")
if os.path.exists(targetpath):
raise FileExistsError('File Exists on destination path, we will never overwriting.')
try:
if conf.soft_link():
os.symlink(filepath, os.path.join(path, f"{number}{part}{leak_word}{c_word}{houzhui}"))
if config.getInstance().soft_link():
os.symlink(filepath, targetpath)
else:
shutil.move(filepath, os.path.join(path, f"{number}{part}{leak_word}{c_word}{houzhui}"))
shutil.move(filepath, targetpath)
sub_res = conf.sub_rule()
sub_res = config.getInstance().sub_rule()
for subname in sub_res:
sub_filepath = str(filepath_obj.with_suffix(subname))
if os.path.isfile(sub_filepath): # 字幕移动
shutil.move(sub_filepath, os.path.join(path, f"{number}{part}{leak_word}{c_word}{subname}"))
shutil.move(sub_filepath, os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}{subname}"))
print('[+]Sub moved!')
print('[!]Success')
return True
except FileExistsError:
print('[-]File Exists! Please check your movie!')
print('[-]move to the root folder of the program.')
except FileExistsError as fee:
print(f'[-]FileExistsError: {fee}')
return
except PermissionError:
print('[-]Error! Please run as administrator!')
@@ -465,7 +558,7 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
print(f'[-]OS Error errno {oserr.errno}')
return
def get_part(filepath, conf):
def get_part(filepath):
try:
if re.search('-CD\d+', filepath):
return re.findall('-CD\d+', filepath)[0]
@@ -473,27 +566,31 @@ def get_part(filepath, conf):
return re.findall('-cd\d+', filepath)[0]
except:
print("[-]failed!Please rename the filename again!")
moveFailedFolder(filepath, conf)
moveFailedFolder(filepath)
return
def debug_print(data: json):
try:
print("[+] ---Debug info---")
print("[+] ------- DEBUG INFO -------")
for i, v in data.items():
if i == 'outline':
print('[+] -', i, ' :', len(v), 'characters')
print('[+] -', "%-14s" % i, ':', len(v), 'characters')
continue
if i == 'actor_photo' or i == 'year':
continue
print('[+] -', "%-11s" % i, ':', v)
if i == 'extrafanart':
print('[+] -', "%-14s" % i, ':', len(v), 'links')
continue
print('[+] -', "%-14s" % i, ':', v)
print("[+] ---Debug info---")
print("[+] ------- DEBUG INFO -------")
except:
pass
def core_main(file_path, number_th, conf: config.Config):
def core_main(file_path, number_th, oCC):
conf = config.getInstance()
# =======================================================================初始化所需变量
multi_part = 0
part = ''
@@ -501,17 +598,19 @@ def core_main(file_path, number_th, conf: config.Config):
c_word = ''
cn_sub = ''
liuchu = ''
hack = ''
hack_word = ''
filepath = file_path # 影片的路径 绝对路径
# 下面被注释的变量不需要
#rootpath= os.getcwd
number = number_th
json_data = get_data_from_json(number, conf) # 定义番号
json_data = get_data_from_json(number, oCC) # 定义番号
# Return if blank dict returned (data not found)
if not json_data:
moveFailedFolder(filepath, conf)
moveFailedFolder(filepath)
return
if json_data["number"] != number:
@@ -526,16 +625,13 @@ def core_main(file_path, number_th, conf: config.Config):
# =======================================================================判断-C,-CD后缀
if '-CD' in filepath or '-cd' in filepath:
multi_part = 1
part = get_part(filepath, conf)
part = get_part(filepath)
if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
cn_sub = '1'
c_word = '-C' # 中文字幕影片后缀
# 判断是否无码
if is_uncensored(number):
uncensored = 1
else:
uncensored = 0
uncensored = 1 if is_uncensored(number) else 0
if '流出' in filepath or 'uncensored' in filepath:
@@ -545,12 +641,16 @@ def core_main(file_path, number_th, conf: config.Config):
else:
leak = 0
if 'hack'.upper() in str(filepath).upper() or '破解' in filepath:
hack = 1
hack_word = "-hack"
# 调试模式检测
if conf.debug():
debug_print(json_data)
# 创建文件夹
#path = create_folder(rootpath + '/' + conf.success_folder(), json_data.get('location_rule'), json_data, conf)
#path = create_folder(rootpath + '/' + conf.success_folder(), json_data.get('location_rule'), json_data)
# main_mode
# 1: 刮削模式 / Scraping mode
@@ -558,54 +658,55 @@ def core_main(file_path, number_th, conf: config.Config):
# 3不改变路径刮削
if conf.main_mode() == 1:
# 创建文件夹
path = create_folder(json_data, conf)
path = create_folder(json_data)
if multi_part == 1:
number += part # 这时number会被附加上CD1后缀
# 检查小封面, 如果image cut为3则下载小封面
if imagecut == 3:
small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, conf, filepath)
small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, hack_word, filepath)
# creatFolder会返回番号路径
image_download( json_data.get('cover'), number, leak_word, c_word, path, conf, filepath)
image_download( json_data.get('cover'), number, leak_word, c_word, hack_word, path, filepath)
if not multi_part or part.lower() == '-cd1':
try:
# 下载预告片
if conf.is_trailer() and json_data.get('trailer'):
trailer_download(json_data.get('trailer'), leak_word, c_word, number, path, filepath, conf)
trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, filepath)
except:
pass
try:
# 下载剧照 data, path, conf: config.Config, filepath
# 下载剧照 data, path, filepath
if conf.is_extrafanart() and json_data.get('extrafanart'):
extrafanart_download(json_data.get('extrafanart'), path, conf, filepath)
extrafanart_download(json_data.get('extrafanart'), path, number, filepath)
except:
pass
# 裁剪图
cutImage(imagecut, path, number, leak_word, c_word)
cutImage(imagecut, path, number, leak_word, c_word, hack_word)
# 打印文件
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, tag, json_data.get('actor_list'), liuchu, uncensored, conf)
# 移动文件
paste_file_to_folder(filepath, path, number, leak_word, c_word, conf)
poster_path = os.path.join(path, f"{number}{leak_word}{c_word}-poster.jpg")
thumb_path = os.path.join(path, f"{number}{leak_word}{c_word}-thumb.jpg")
# 添加水印
poster_path = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-poster.jpg")
thumb_path = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-thumb.jpg")
if conf.is_watermark():
add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf)
add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack)
# 移动电影
paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word)
# 最后输出.nfo元数据文件以完成.nfo文件创建作为任务成功标志
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, tag, json_data.get('actor_list'), liuchu, uncensored, hack_word)
elif conf.main_mode() == 2:
# 创建文件夹
path = create_folder(json_data, conf)
path = create_folder(json_data)
# 移动文件
paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, conf)
poster_path = os.path.join(path, f"{number}{leak_word}{c_word}-poster.jpg")
thumb_path = os.path.join(path, f"{number}{leak_word}{c_word}-thumb.jpg")
paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, hack_word)
poster_path = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-poster.jpg")
thumb_path = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-thumb.jpg")
if conf.is_watermark():
add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf)
add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack)
elif conf.main_mode() == 3:
path = str(Path(file_path).parent)
@@ -614,28 +715,29 @@ def core_main(file_path, number_th, conf: config.Config):
# 检查小封面, 如果image cut为3则下载小封面
if imagecut == 3:
small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, conf, filepath)
small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, hack_word, filepath)
# creatFolder会返回番号路径
image_download(json_data.get('cover'), number, leak_word, c_word, path, conf, filepath)
image_download(json_data.get('cover'), number, leak_word, c_word, hack_word, path, filepath)
if not multi_part or part.lower() == '-cd1':
# 下载预告片
if conf.is_trailer() and json_data.get('trailer'):
trailer_download(json_data.get('trailer'), leak_word, c_word, number, path, filepath, conf)
trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, filepath)
# 下载剧照 data, path, conf: config.Config, filepath
# 下载剧照 data, path, filepath
if conf.is_extrafanart() and json_data.get('extrafanart'):
extrafanart_download(json_data.get('extrafanart'), path, conf, filepath)
extrafanart_download(json_data.get('extrafanart'), path, number, filepath)
# 裁剪图
cutImage(imagecut, path, number, leak_word, c_word)
cutImage(imagecut, path, number, leak_word, c_word, hack_word)
# 打印文件
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath,
tag, json_data.get('actor_list'), liuchu, uncensored, conf)
poster_path = os.path.join(path, f"{number}{leak_word}{c_word}-poster.jpg")
thumb_path = os.path.join(path, f"{number}{leak_word}{c_word}-thumb.jpg")
# 添加水印
poster_path = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-poster.jpg")
thumb_path = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-thumb.jpg")
if conf.is_watermark():
add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf)
add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack)
# 最后输出.nfo元数据文件以完成.nfo文件创建作为任务成功标志
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath,
tag, json_data.get('actor_list'), liuchu, uncensored, hack_word)

View File

@@ -1,14 +1,14 @@
import os
import re
from core import *
import sys
import config
G_spat = re.compile(
"^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|^hhd800\.com@",
"^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|^hhd800\.com@|-uncensored|_uncensored|-leak|_leak",
re.IGNORECASE)
def get_number(debug,filepath: str) -> str:
def get_number(debug,file_path: str) -> str:
# """
# >>> from number_parser import get_number
# >>> get_number("/Users/Guest/AV_Data_Capture/snis-829.mp4")
@@ -32,77 +32,188 @@ def get_number(debug,filepath: str) -> str:
# >>> get_number("snis-829-C.mp4")
# 'snis-829'
# """
filepath = os.path.basename(filepath)
if debug == False:
filepath = os.path.basename(file_path)
# debug True 和 False 两块代码块合并原因是此模块及函数只涉及字符串计算没有IO操作debug on时输出导致异常信息即可
try:
if '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号
#filepath = filepath.replace("_", "-")
file_number = get_number_by_dict(filepath)
if file_number:
return file_number
elif '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号
filepath = G_spat.sub("", filepath)
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
lower_check = filename.lower()
if 'fc2' in lower_check:
filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
file_number = get_number_by_dict(lower_check)
if file_number:
return file_number
return str(re.search(r'\w+(-|_)\w+', filename, re.A).group())
filename = re.sub("(-|_)cd\d{1,2}", "", filename, flags=re.IGNORECASE)
file_number = str(re.search(r'\w+(-|_)\w+', filename, re.A).group())
file_number = re.sub("(-|_)c$", "", file_number, flags=re.IGNORECASE)
return file_number.upper()
else: # 提取不含减号-的番号FANZA CID
# 欧美番号匹配规则
oumei = re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', filepath)
if oumei:
return oumei.group()
try:
return str(
re.findall(r'(.+?)\.',
str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip(
"['']").replace('_', '-')
except:
return re.search(r'(.+?)\.', filepath)[0]
return str(re.search(r'(.+?)\.', filepath)[0])
except Exception as e:
print('[-]' + str(e))
return
elif debug == True:
if '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号
#filepath = filepath.replace("_", "-")
filepath = G_spat.sub("", filepath)
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
lower_check = filename.lower()
if 'fc2' in lower_check:
filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
file_number = get_number_by_dict(lower_check)
if file_number:
return file_number
return str(re.search(r'\w+(-|_)\w+', filename, re.A).group())
else: # 提取不含减号-的番号FANZA CID
# 欧美番号匹配规则
oumei = re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', filepath)
if oumei:
return oumei.group()
try:
return str(
re.findall(r'(.+?)\.',
str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip(
"['']").replace('_', '-')
except:
return re.search(r'(.+?)\.', filepath)[0]
G_TAKE_NUM_RULES = {
'tokyo' : lambda x:str(re.search(r'(cz|gedo|k|n|red-|se)\d{2,4}', x, re.A).group()),
'carib' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.A).group()).replace('_', '-'),
'1pon' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.A).group()).replace('-', '_'),
'10mu' : lambda x:str(re.search(r'\d{6}(-|_)\d{2}', x, re.A).group()).replace('-', '_'),
'x-art' : lambda x:str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.A).group())
}
def get_number_by_dict(lower_filename: str) -> str:
for k,v in G_TAKE_NUM_RULES.items():
if k in lower_filename:
return v(lower_filename)
if debug:
print(f'[-]Number Parser exception: {e} [{file_path}]')
return None
# if __name__ == "__main__":
# 按javdb数据源的命名规范提取number
G_TAKE_NUM_RULES = {
'tokyo.*hot' : lambda x:str(re.search(r'(cz|gedo|k|n|red-|se)\d{2,4}', x, re.I).group()),
'carib' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('_', '-'),
'1pon|mura|paco' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('-', '_'),
'10mu' : lambda x:str(re.search(r'\d{6}(-|_)\d{2}', x, re.I).group()).replace('-', '_'),
'x-art' : lambda x:str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()),
'xxx-av': lambda x:''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]),
'heydouga': lambda x:'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]),
'heyzo' : lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0]
}
def get_number_by_dict(filename: str) -> str:
try:
for k,v in G_TAKE_NUM_RULES.items():
if re.search(k, filename, re.I):
return v(filename)
except:
pass
return None
class Cache_uncensored_conf:
prefix = None
def is_empty(self):
return bool(self.prefix is None)
def set(self, v: list):
if not v or not len(v) or not len(v[0]):
raise ValueError('input prefix list empty or None')
s = v[0]
if len(v) > 1:
for i in v[1:]:
s += f"|{i}.+"
self.prefix = re.compile(s, re.I)
def check(self, number):
if self.prefix is None:
raise ValueError('No init re compile')
return self.prefix.match(number)
G_cache_uncensored_conf = Cache_uncensored_conf()
# ========================================================================是否为无码
def is_uncensored(number):
if re.match(
r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydouga-.+|x-art\.\d{2}\.\d{2}\.\d{2}',
number,
re.I
):
return True
if G_cache_uncensored_conf.is_empty():
G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(','))
return G_cache_uncensored_conf.check(number)
if __name__ == "__main__":
# import doctest
# doctest.testmod(raise_on_error=True)
test_use_cases = (
"MEYD-594-C.mp4",
"SSIS-001_C.mp4",
"SSIS100-C.mp4",
"SSIS101_C.mp4",
"ssni984.mp4",
"ssni666.mp4",
"SDDE-625_uncensored_C.mp4",
"SDDE-625_uncensored_leak_C.mp4",
"SDDE-625_uncensored_leak_C_cd1.mp4",
"Tokyo Hot n9001 FHD.mp4", # 无-号,以前无法正确提取
"TokyoHot-n1287-HD SP2006 .mp4",
"caribean-020317_001.nfo", # -号误命名为_号的
"257138_3xplanet_1Pondo_080521_001.mp4",
"ADV-R0624-CD3.wmv", # 多碟影片
"XXX-AV 22061-CD5.iso", # 新支持片商格式 xxx-av-22061 命名规则来自javdb数据源
"xxx-av 20589.mp4",
"Muramura-102114_145-HD.wmv", # 新支持片商格式 102114_145 命名规则来自javdb数据源
"heydouga-4102-023-CD2.iso", # 新支持片商格式 heydouga-4102-023 命名规则来自javdb数据源
"HeyDOuGa4236-1048 Ai Qiu - .mp4", # heydouga-4236-1048 命名规则来自javdb数据源
"pacopacomama-093021_539-FHD.mkv", # 新支持片商格式 093021_539 命名规则来自javdb数据源
"sbw99.cc@heyzo_hd_2636_full.mp4"
)
def evprint(evstr):
code = compile(evstr, "<string>", "eval")
print("{1:>20} # '{0}'".format(evstr[18:-2], eval(code)))
for t in test_use_cases:
evprint(f'get_number(True, "{t}")')
if len(sys.argv)<=1 or not re.search('^[A-Z]:?', sys.argv[1], re.IGNORECASE):
sys.exit(0)
# 使用Everything的ES命令行工具搜集全盘视频文件名作为用例测试number数据参数为盘符 A .. Z 或带盘符路径
# https://www.voidtools.com/support/everything/command_line_interface/
# ES命令行工具需要Everything文件搜索引擎处于运行状态es.exe单个执行文件需放入PATH路径中。
# Everything是免费软件
# 示例:
# python.exe .\number_parser.py ALL # 从所有磁盘搜索视频
# python.exe .\number_parser.py D # 从D盘搜索
# python.exe .\number_parser.py D: # 同上
# python.exe .\number_parser.py D:\download\JAVs # 搜索D盘的\download\JAVs目录路径必须带盘符
# ==================
# Linux/WSL1|2 使用mlocate(Ubuntu/Debian)或plocate(Debian sid)搜集全盘视频文件名作为测试用例number数据
# 需安装'sudo apt install mlocate或plocate'并首次运行sudo updatedb建立全盘索引
# MAC OS X 使用findutils的glocate需安装'sudo brew install findutils'并首次运行sudo gupdatedb建立全盘索引
# 示例:
# python3 ./number_parser.py ALL
import subprocess
ES_search_path = "ALL disks"
if sys.argv[1] == "ALL":
if sys.platform == "win32":
# ES_prog_path = 'C:/greensoft/es/es.exe'
ES_prog_path = 'es.exe' # es.exe需要放在PATH环境变量的路径之内
ES_cmdline = f'{ES_prog_path} -name size:gigantic ext:mp4;avi;rmvb;wmv;mov;mkv;flv;ts;webm;iso;mpg;m4v'
out_bytes = subprocess.check_output(ES_cmdline.split(' '))
out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030此编码为UNICODE方言与UTF-8系全射关系无转码损失
out_list = out_text.splitlines()
elif sys.platform in ("linux", "darwin"):
ES_prog_path = 'locate' if sys.platform == 'linux' else 'glocate'
ES_cmdline = r"{} -b -i --regex '\.mp4$|\.avi$|\.rmvb$|\.wmv$|\.mov$|\.mkv$|\.webm$|\.iso$|\.mpg$|\.m4v$'".format(ES_prog_path)
out_bytes = subprocess.check_output(ES_cmdline.split(' '))
out_text = out_bytes.decode('utf-8')
out_list = [ os.path.basename(line) for line in out_text.splitlines()]
else:
print('[-]Unsupported platform! Please run on OS Windows/Linux/MacOSX. Exit.')
sys.exit(1)
else: # Windows single disk
if sys.platform != "win32":
print('[!]Usage: python3 ./number_parser.py ALL')
sys.exit(0)
# ES_prog_path = 'C:/greensoft/es/es.exe'
ES_prog_path = 'es.exe' # es.exe需要放在PATH环境变量的路径之内
if os.path.isdir(sys.argv[1]):
ES_search_path = sys.argv[1]
else:
ES_search_path = sys.argv[1][0] + ':/'
if not os.path.isdir(ES_search_path):
ES_search_path = 'C:/'
ES_search_path = os.path.normcase(ES_search_path)
ES_cmdline = f'{ES_prog_path} -path {ES_search_path} -name size:gigantic ext:mp4;avi;rmvb;wmv;mov;mkv;webm;iso;mpg;m4v'
out_bytes = subprocess.check_output(ES_cmdline.split(' '))
out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030此编码为UNICODE方言与UTF-8系全射关系无转码损失
out_list = out_text.splitlines()
print(f'\n[!]{ES_prog_path} is searching {ES_search_path} for movies as number parser test cases...')
print(f'[+]Find {len(out_list)} Movies.')
for filename in out_list:
try:
n = get_number(True, filename)
if n:
print(' [{0}] {2}# {1}'.format(n, filename, '#无码' if is_uncensored(n) else ''))
else:
print(f'[-]Number return None. # {filename}')
except Exception as e:
print(f'[-]Number Parser exception: {e} [{filename}]')
sys.exit(0)

View File

@@ -2,6 +2,7 @@
# Set-ExecutionPolicy RemoteSigned -Scope CurrentUser -Force
$CLOUDSCRAPER_PATH=$(python -c 'import cloudscraper as _; print(_.__path__[0])' | select -Last 1)
$OPENCC_PATH=$(python -c 'import opencc as _; print(_.__path__[0])' | select -Last 1)
mkdir build
mkdir __pycache__
@@ -10,7 +11,9 @@ pyinstaller --onefile AV_Data_Capture.py `
--hidden-import ADC_function.py `
--hidden-import core.py `
--add-data "$CLOUDSCRAPER_PATH;cloudscraper" `
--add-data "Img;Img"
--add-data "$OPENCC_PATH;opencc" `
--add-data "Img;Img" `
--add-data "config.ini;." `
rmdir -Recurse -Force build
rmdir -Recurse -Force __pycache__

View File

@@ -8,3 +8,4 @@ pysocks==1.7.1
urllib3==1.24.3
certifi==2020.12.5
MechanicalSoup==1.1.0
opencc-python-reimplemented

View File

@@ -1,4 +1,9 @@
pkg install python38 py38-requests py38-pip py38-lxml py38-pillow py38-cloudscraper py38-pysocks git zip py38-beautifulsoup448
pkg install python38 py38-requests py38-pip py38-lxml py38-pillow py38-cloudscraper py38-pysocks git zip py38-beautifulsoup448 py38-mechanicalsoup
pip install pyquery pyinstaller
pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py --add-data "$(python3.8 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" --add-data "Img:Img"
pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py \
--add-data "$(python3.8 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
--add-data "$(python3.8 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
--add-data "Img:Img" \
--add-data "config.ini:." \
cp config.ini ./dist

View File

@@ -12,5 +12,10 @@
#fi
pip3 install -r requirements.txt
pip3 install cloudscraper==1.2.52
pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py --add-data "$(python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" --add-data "Img:Img"
pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py \
--add-data "$(python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
--add-data "$(python3 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
--add-data "Img:Img" \
--add-data "config.ini:." \
cp config.ini ./dist