Add source getchu

This commit is contained in:
yoshiko2
2022-05-08 03:25:53 +08:00
parent 89e651b279
commit e3029ef8cd
3 changed files with 51 additions and 1 deletions

View File

@@ -25,6 +25,7 @@ from . import fc2club
from . import mv91 from . import mv91
from . import madou from . import madou
from . import gcolle from . import gcolle
from . import getchu
def get_data_state(data: dict) -> bool: # 元数据获取失败检测 def get_data_state(data: dict) -> bool: # 元数据获取失败检测
@@ -65,6 +66,7 @@ def get_data_from_json(file_number, oCC):
"mv91": mv91.main, "mv91": mv91.main,
"madou": madou.main, "madou": madou.main,
"gcolle": gcolle.main, "gcolle": gcolle.main,
"getchu": getchu.main,
} }
conf = config.getInstance() conf = config.getInstance()

48
WebCrawler/getchu.py Normal file
View File

@@ -0,0 +1,48 @@
import sys
sys.path.append('../')
from ADC_function import *
from WebCrawler.crawler import *
import re
def main(number):
getchu = Crawler(get_html("https://dl.getchu.com/i/" + number))
dic = {
"title": getchu.getString("//div[contains(@style,'color: #333333; padding: 3px 0px 0px 5px;')]/text()"),
"cover": "https://dl.getchu.com" + getchu.getString("//td[contains(@bgcolor,'#ffffff')]/img/@src"),
"director": getchu.getString("//td[contains(text(),'作者')]/following-sibling::td/text()"),
"studio": getchu.getString("//td[contains(text(),'サークル')]/following-sibling::td/a/text()"),
"actor": getchu.getString("//td[contains(text(),'サークル')]/following-sibling::td/a/text()"),
"label": getchu.getString("//td[contains(text(),'サークル')]/following-sibling::td/a/text()"),
"runtime": str(re.findall('\d+', str(getchu.getString("//td[contains(text(),'画像数&ページ数')]/following-sibling::td/text()")))).strip(" ['']"),
"release": getchu.getString("//td[contains(text(),'配信開始日')]/following-sibling::td/text()").replace("/","-"),
"tag": getchu.getStrings("//td[contains(text(),'趣向')]/following-sibling::td/a/text()"),
"outline": getchu.getStrings("//*[contains(text(),'作品内容')]/following-sibling::td/text()"),
"extrafanart": getchu.getStrings("//td[contains(@style,'background-color: #444444;')]/a/@href"),
"series": getchu.getString("//td[contains(text(),'サークル')]/following-sibling::td/a/text()"),
"number": number,
"imagecut": 4,
"year": str(re.findall('\d{4}', str(getchu.getString("//td[contains(text(),'配信開始日')]/following-sibling::td/text()").replace("/","-")))).strip(" ['']"),
"actor_photo": "",
"website": "https://dl.getchu.com/i/" + number,
"source": "getchu.py",
}
extrafanart = []
for i in dic['extrafanart']:
i = "https://dl.getchu.com" + i
extrafanart.append(i)
dic['extrafanart'] = extrafanart
outline = ''
_list = dic['outline']
for i in _list:
outline = outline + i
dic['outline'] = outline
result = json.dumps(dic,ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), )
return result
if __name__ == '__main__':
test = ['item4040774','item4039026']
for i in test:
print(main(i))

View File

@@ -46,7 +46,7 @@ max_title_len=50
update_check=1 update_check=1
[priority] [priority]
website=javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,fc2club,madou,mv91,javdb,gcolle website=javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,fc2club,madou,mv91,getchu,javdb,gcolle
[escape] [escape]
literals=\()/ literals=\()/