diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index baaf0df..cf0c1b2 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -25,6 +25,7 @@ from . import fc2club from . import mv91 from . import madou from . import gcolle +from . import getchu def get_data_state(data: dict) -> bool: # 元数据获取失败检测 @@ -65,6 +66,7 @@ def get_data_from_json(file_number, oCC): "mv91": mv91.main, "madou": madou.main, "gcolle": gcolle.main, + "getchu": getchu.main, } conf = config.getInstance() diff --git a/WebCrawler/getchu.py b/WebCrawler/getchu.py new file mode 100644 index 0000000..aea1edc --- /dev/null +++ b/WebCrawler/getchu.py @@ -0,0 +1,48 @@ +import sys +sys.path.append('../') +from ADC_function import * +from WebCrawler.crawler import * +import re + +def main(number): + getchu = Crawler(get_html("https://dl.getchu.com/i/" + number)) + dic = { + "title": getchu.getString("//div[contains(@style,'color: #333333; padding: 3px 0px 0px 5px;')]/text()"), + "cover": "https://dl.getchu.com" + getchu.getString("//td[contains(@bgcolor,'#ffffff')]/img/@src"), + "director": getchu.getString("//td[contains(text(),'作者')]/following-sibling::td/text()"), + "studio": getchu.getString("//td[contains(text(),'サークル')]/following-sibling::td/a/text()"), + "actor": getchu.getString("//td[contains(text(),'サークル')]/following-sibling::td/a/text()"), + "label": getchu.getString("//td[contains(text(),'サークル')]/following-sibling::td/a/text()"), + "runtime": str(re.findall('\d+', str(getchu.getString("//td[contains(text(),'画像数&ページ数')]/following-sibling::td/text()")))).strip(" ['']"), + "release": getchu.getString("//td[contains(text(),'配信開始日')]/following-sibling::td/text()").replace("/","-"), + "tag": getchu.getStrings("//td[contains(text(),'趣向')]/following-sibling::td/a/text()"), + "outline": getchu.getStrings("//*[contains(text(),'作品内容')]/following-sibling::td/text()"), + "extrafanart": getchu.getStrings("//td[contains(@style,'background-color: #444444;')]/a/@href"), + "series": getchu.getString("//td[contains(text(),'サークル')]/following-sibling::td/a/text()"), + "number": number, + "imagecut": 4, + "year": str(re.findall('\d{4}', str(getchu.getString("//td[contains(text(),'配信開始日')]/following-sibling::td/text()").replace("/","-")))).strip(" ['']"), + "actor_photo": "", + "website": "https://dl.getchu.com/i/" + number, + "source": "getchu.py", + } + + extrafanart = [] + for i in dic['extrafanart']: + i = "https://dl.getchu.com" + i + extrafanart.append(i) + dic['extrafanart'] = extrafanart + + outline = '' + _list = dic['outline'] + for i in _list: + outline = outline + i + dic['outline'] = outline + + result = json.dumps(dic,ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) + return result + +if __name__ == '__main__': + test = ['item4040774','item4039026'] + for i in test: + print(main(i)) diff --git a/config.ini b/config.ini index fce4a7f..9443b27 100755 --- a/config.ini +++ b/config.ini @@ -46,7 +46,7 @@ max_title_len=50 update_check=1 [priority] -website=javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,fc2club,madou,mv91,javdb,gcolle +website=javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,fc2club,madou,mv91,getchu,javdb,gcolle [escape] literals=\()/