From 9ed239105ac85e84902424dd9a2590ba89cfe845 Mon Sep 17 00:00:00 2001 From: tojito Date: Mon, 19 Jul 2021 01:04:50 +0800 Subject: [PATCH] add fc2club --- ADC_function.py | 2 + WebCrawler/fc2club.py | 114 ++++++++++++++++++++++++++++++++++++++++++ config.ini | 4 +- config.py | 2 +- core.py | 3 ++ 5 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 WebCrawler/fc2club.py diff --git a/ADC_function.py b/ADC_function.py index 6fe0370..93fca51 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -1,3 +1,4 @@ +from os import replace import requests import hashlib import pathlib @@ -583,3 +584,4 @@ def abs_url(base_url: str, href: str) -> str: if href.startswith('http'): return href return urljoin(base_url, href) + diff --git a/WebCrawler/fc2club.py b/WebCrawler/fc2club.py new file mode 100644 index 0000000..7d0fac6 --- /dev/null +++ b/WebCrawler/fc2club.py @@ -0,0 +1,114 @@ +import sys +sys.path.append('../') +import re +from lxml import etree#need install +import json +import ADC_function +# import sys +# import io +# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) + +def getTitle_fc2com(htmlcode): #获取标题 + html = etree.fromstring(htmlcode,etree.HTMLParser()) + result = str(html.xpath('//*[@class="show-top-grids"]/div[1]/h3/text()')).strip(" ['']") + print(result) + return result +def getActor_fc2com(htmlcode): + try: + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('//*[@class="show-top-grids"]/div[1]/h5[5]/a/text()')).strip(" ['']") + print(result) + return result + except: + return '' +def getStudio_fc2com(htmlcode): #获取厂商 + try: + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('//*[@class="show-top-grids"]/div[1]/h5[3]/a[1]/text()')).strip(" ['']") + print(result) + return result + except: + return '' +def getNum_fc2com(htmlcode): #获取番号 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + title = str(html.xpath('//*[@class="show-top-grids"]/div[1]/h3/text()')).strip(" ['']") + num = title.split(' ')[0] + if num.startswith('FC2') != True: + num = '' + return num +def getRelease_fc2com(htmlcode2): # + return '' +def getCover_fc2com(htmlcode2): #获取img # + html = etree.fromstring(htmlcode2, etree.HTMLParser()) + imgUrl = str(html.xpath('//*[@class="slides"]/li[1]/img/@src')).strip(" ['']") + imgUrl = imgUrl.replace('../','https://fc2club.net/') + print(imgUrl) + return imgUrl +# def getOutline_fc2com(htmlcode2): #获取番号 # +# xpath_html = etree.fromstring(htmlcode2, etree.HTMLParser()) +# path = str(xpath_html.xpath('//*[@id="top"]/div[1]/section[4]/iframe/@src')).strip(" ['']") +# html = etree.fromstring(ADC_function.get_html('https://adult.contents.fc2.com/'+path), etree.HTMLParser()) +# print('https://adult.contents.fc2.com'+path) +# print(ADC_function.get_html('https://adult.contents.fc2.com'+path,cookies={'wei6H':'1'})) +# result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',') +# return result +def getTag_fc2com(htmlcode): #获取tag + html = etree.fromstring(htmlcode,etree.HTMLParser()) + a = html.xpath('//*[@class="show-top-grids"]/div[1]/h5[4]/a') + tag = [] + for i in range(len(a)): + tag.append(str(a[i].xpath('text()')).strip("['']")) + return tag +def getYear_fc2com(release): + return '' + +def getExtrafanart(htmlcode): # 获取剧照 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + imgUrl = str(html.xpath('//*[@class="slides"]/li[1]/img/@src')).strip(" ['']") + imgUrl = imgUrl.replace('../','https://fc2club.net/') + return imgUrl + +def getTrailer(htmlcode): + return '' + +def main(number): + try: + number = number.replace('FC2-', '').replace('fc2-', '') + webUrl = 'https://fc2club.net/html/FC2-' + number + '.html' + #print(webUrl) + htmlcode2 = ADC_function.get_html(webUrl) + #print(htmlcode2) + actor = getActor_fc2com(htmlcode2) + if getActor_fc2com(htmlcode2) == '': + actor = 'FC2系列' + dic = { + 'title': getTitle_fc2com(htmlcode2), + 'studio': getStudio_fc2com(htmlcode2), + 'year': getYear_fc2com(getRelease_fc2com(htmlcode2)), + 'outline': '', # getOutline_fc2com(htmlcode2), + 'runtime': '', + 'director': getStudio_fc2com(htmlcode2), + 'actor': actor, + 'release': getRelease_fc2com(htmlcode2), + 'number': 'FC2-' + number, + 'label': '', + 'cover': getCover_fc2com(htmlcode2), + 'extrafanart': getExtrafanart(htmlcode2), + "trailer": getTrailer(htmlcode2), + 'imagecut': 0, + 'tag': getTag_fc2com(htmlcode2), + 'actor_photo': '', + 'website': 'https://fc2club.net/html/FC2-' + number + '.html/', + 'source': 'https://fc2club.net/html/FC2-' + number + '.html/', + 'series': '', + } + except Exception as e: + if ADC_function.config.Config().debug(): + print(e) + dic = {"title": ""} + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') + return js + +if __name__ == '__main__': + print(main('FC2-402422')) + diff --git a/config.ini b/config.ini index d3a03d9..cb490cd 100644 --- a/config.ini +++ b/config.ini @@ -3,7 +3,7 @@ main_mode=1 failed_output_folder=failed success_output_folder=JAV_output soft_link=0 -failed_move=0 +failed_move=1 auto_exit=0 transalte_to_sc=0 multi_threading=1 @@ -28,7 +28,7 @@ max_title_len=50 update_check=1 [priority] -website=javbus,javdb,airav,fanza,xcity,mgstage,fc2,avsox,dlsite,carib +website=fc2club,javbus,javdb,airav,fanza,xcity,mgstage,fc2,avsox,dlsite,carib [escape] literals=\()/ diff --git a/config.py b/config.py index 72c9417..84c07fb 100644 --- a/config.py +++ b/config.py @@ -190,7 +190,7 @@ class Config: sec5 = "priority" conf.add_section(sec5) - conf.set(sec5, "website", "airav,javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321,xcity") + conf.set(sec5, "website", "airav,javbus,javdb,fanza,xcity,mgstage,fc2,fc2club,avsox,jav321,xcity") sec6 = "escape" conf.add_section(sec6) diff --git a/core.py b/core.py index f0c39c1..2062e82 100755 --- a/core.py +++ b/core.py @@ -26,6 +26,7 @@ from WebCrawler import xcity # from WebCrawler import javlib from WebCrawler import dlsite from WebCrawler import carib +from WebCrawler import fc2club def escape_path(path, escape_literals: str): # Remove escape literals @@ -67,6 +68,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON # "javlib": javlib.main, "dlsite": dlsite.main, "carib": carib.main, + "fc2club": fc2club.main } # default fetch order list, from the beginning to the end @@ -91,6 +93,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON ): sources.insert(0, sources.pop(sources.index("javdb"))) sources.insert(1, sources.pop(sources.index("fc2"))) + sources.insert(2, sources.pop(sources.index("fc2club"))) elif "dlsite" in sources and ( "rj" in lo_file_number or "vj" in lo_file_number ):