WebCrawler:全面换装getInstance(),厘清airav.py与javbus.py及javdb.py的相爱相杀

This commit is contained in:
lededev
2021-10-08 11:46:35 +08:00
parent cf072e79d1
commit a405c5c41b
11 changed files with 206 additions and 167 deletions

View File

@@ -134,6 +134,14 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数
print('[-]Movie Number not found!')
return None
# 增加number严格判断避免提交任何number总是返回"本橋実来 ADZ335"这种返回number不一致的数据源故障
# 目前选用number命名规则是javdb.com Domain Creation Date: 2013-06-19T18:34:27Z
# 然而也可以跟进关注其它命名规则例如airav.wiki Domain Creation Date: 2019-08-28T07:18:42.0Z
# 如果将来javdb.com命名规则下不同Studio出现同名碰撞导致无法区分可考虑更换规则更新相应的number分析和抓取代码。
if str(json_data.get('number')).upper() != file_number.upper():
print('[-]Movie number has changed! [{}]->[{}]'.format(file_number, str(json_data.get('number'))))
return None
# ================================================网站规则添加结束================================================
title = json_data.get('title')
@@ -225,6 +233,8 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数
studio = studio.replace('エムズビデオグループ','Ms Video Group')
studio = studio.replace('ミニマム','Minimum')
studio = studio.replace('ワープエンタテインメント','WAAP Entertainment')
studio = studio.replace('pacopacomama,パコパコママ','pacopacomama')
studio = studio.replace('パコパコママ','pacopacomama')
studio = re.sub('.*/妄想族','妄想族',studio)
studio = studio.replace('/',' ')
# === 替换Studio片假名 END