add fc2club
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
from os import replace
|
||||
import requests
|
||||
import hashlib
|
||||
import pathlib
|
||||
@@ -583,3 +584,4 @@ def abs_url(base_url: str, href: str) -> str:
|
||||
if href.startswith('http'):
|
||||
return href
|
||||
return urljoin(base_url, href)
|
||||
|
||||
|
||||
114
WebCrawler/fc2club.py
Normal file
114
WebCrawler/fc2club.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import sys
|
||||
sys.path.append('../')
|
||||
import re
|
||||
from lxml import etree#need install
|
||||
import json
|
||||
import ADC_function
|
||||
# import sys
|
||||
# import io
|
||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
||||
|
||||
def getTitle_fc2com(htmlcode): #获取标题
|
||||
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
||||
result = str(html.xpath('//*[@class="show-top-grids"]/div[1]/h3/text()')).strip(" ['']")
|
||||
print(result)
|
||||
return result
|
||||
def getActor_fc2com(htmlcode):
|
||||
try:
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('//*[@class="show-top-grids"]/div[1]/h5[5]/a/text()')).strip(" ['']")
|
||||
print(result)
|
||||
return result
|
||||
except:
|
||||
return ''
|
||||
def getStudio_fc2com(htmlcode): #获取厂商
|
||||
try:
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('//*[@class="show-top-grids"]/div[1]/h5[3]/a[1]/text()')).strip(" ['']")
|
||||
print(result)
|
||||
return result
|
||||
except:
|
||||
return ''
|
||||
def getNum_fc2com(htmlcode): #获取番号
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
title = str(html.xpath('//*[@class="show-top-grids"]/div[1]/h3/text()')).strip(" ['']")
|
||||
num = title.split(' ')[0]
|
||||
if num.startswith('FC2') != True:
|
||||
num = ''
|
||||
return num
|
||||
def getRelease_fc2com(htmlcode2): #
|
||||
return ''
|
||||
def getCover_fc2com(htmlcode2): #获取img #
|
||||
html = etree.fromstring(htmlcode2, etree.HTMLParser())
|
||||
imgUrl = str(html.xpath('//*[@class="slides"]/li[1]/img/@src')).strip(" ['']")
|
||||
imgUrl = imgUrl.replace('../','https://fc2club.net/')
|
||||
print(imgUrl)
|
||||
return imgUrl
|
||||
# def getOutline_fc2com(htmlcode2): #获取番号 #
|
||||
# xpath_html = etree.fromstring(htmlcode2, etree.HTMLParser())
|
||||
# path = str(xpath_html.xpath('//*[@id="top"]/div[1]/section[4]/iframe/@src')).strip(" ['']")
|
||||
# html = etree.fromstring(ADC_function.get_html('https://adult.contents.fc2.com/'+path), etree.HTMLParser())
|
||||
# print('https://adult.contents.fc2.com'+path)
|
||||
# print(ADC_function.get_html('https://adult.contents.fc2.com'+path,cookies={'wei6H':'1'}))
|
||||
# result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
|
||||
# return result
|
||||
def getTag_fc2com(htmlcode): #获取tag
|
||||
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
||||
a = html.xpath('//*[@class="show-top-grids"]/div[1]/h5[4]/a')
|
||||
tag = []
|
||||
for i in range(len(a)):
|
||||
tag.append(str(a[i].xpath('text()')).strip("['']"))
|
||||
return tag
|
||||
def getYear_fc2com(release):
|
||||
return ''
|
||||
|
||||
def getExtrafanart(htmlcode): # 获取剧照
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
imgUrl = str(html.xpath('//*[@class="slides"]/li[1]/img/@src')).strip(" ['']")
|
||||
imgUrl = imgUrl.replace('../','https://fc2club.net/')
|
||||
return imgUrl
|
||||
|
||||
def getTrailer(htmlcode):
|
||||
return ''
|
||||
|
||||
def main(number):
|
||||
try:
|
||||
number = number.replace('FC2-', '').replace('fc2-', '')
|
||||
webUrl = 'https://fc2club.net/html/FC2-' + number + '.html'
|
||||
#print(webUrl)
|
||||
htmlcode2 = ADC_function.get_html(webUrl)
|
||||
#print(htmlcode2)
|
||||
actor = getActor_fc2com(htmlcode2)
|
||||
if getActor_fc2com(htmlcode2) == '':
|
||||
actor = 'FC2系列'
|
||||
dic = {
|
||||
'title': getTitle_fc2com(htmlcode2),
|
||||
'studio': getStudio_fc2com(htmlcode2),
|
||||
'year': getYear_fc2com(getRelease_fc2com(htmlcode2)),
|
||||
'outline': '', # getOutline_fc2com(htmlcode2),
|
||||
'runtime': '',
|
||||
'director': getStudio_fc2com(htmlcode2),
|
||||
'actor': actor,
|
||||
'release': getRelease_fc2com(htmlcode2),
|
||||
'number': 'FC2-' + number,
|
||||
'label': '',
|
||||
'cover': getCover_fc2com(htmlcode2),
|
||||
'extrafanart': getExtrafanart(htmlcode2),
|
||||
"trailer": getTrailer(htmlcode2),
|
||||
'imagecut': 0,
|
||||
'tag': getTag_fc2com(htmlcode2),
|
||||
'actor_photo': '',
|
||||
'website': 'https://fc2club.net/html/FC2-' + number + '.html/',
|
||||
'source': 'https://fc2club.net/html/FC2-' + number + '.html/',
|
||||
'series': '',
|
||||
}
|
||||
except Exception as e:
|
||||
if ADC_function.config.Config().debug():
|
||||
print(e)
|
||||
dic = {"title": ""}
|
||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||
return js
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(main('FC2-402422'))
|
||||
|
||||
@@ -3,7 +3,7 @@ main_mode=1
|
||||
failed_output_folder=failed
|
||||
success_output_folder=JAV_output
|
||||
soft_link=0
|
||||
failed_move=0
|
||||
failed_move=1
|
||||
auto_exit=0
|
||||
transalte_to_sc=0
|
||||
multi_threading=1
|
||||
@@ -28,7 +28,7 @@ max_title_len=50
|
||||
update_check=1
|
||||
|
||||
[priority]
|
||||
website=javbus,javdb,airav,fanza,xcity,mgstage,fc2,avsox,dlsite,carib
|
||||
website=fc2club,javbus,javdb,airav,fanza,xcity,mgstage,fc2,avsox,dlsite,carib
|
||||
|
||||
[escape]
|
||||
literals=\()/
|
||||
|
||||
@@ -190,7 +190,7 @@ class Config:
|
||||
|
||||
sec5 = "priority"
|
||||
conf.add_section(sec5)
|
||||
conf.set(sec5, "website", "airav,javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321,xcity")
|
||||
conf.set(sec5, "website", "airav,javbus,javdb,fanza,xcity,mgstage,fc2,fc2club,avsox,jav321,xcity")
|
||||
|
||||
sec6 = "escape"
|
||||
conf.add_section(sec6)
|
||||
|
||||
3
core.py
3
core.py
@@ -26,6 +26,7 @@ from WebCrawler import xcity
|
||||
# from WebCrawler import javlib
|
||||
from WebCrawler import dlsite
|
||||
from WebCrawler import carib
|
||||
from WebCrawler import fc2club
|
||||
|
||||
|
||||
def escape_path(path, escape_literals: str): # Remove escape literals
|
||||
@@ -67,6 +68,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON
|
||||
# "javlib": javlib.main,
|
||||
"dlsite": dlsite.main,
|
||||
"carib": carib.main,
|
||||
"fc2club": fc2club.main
|
||||
}
|
||||
|
||||
# default fetch order list, from the beginning to the end
|
||||
@@ -91,6 +93,7 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON
|
||||
):
|
||||
sources.insert(0, sources.pop(sources.index("javdb")))
|
||||
sources.insert(1, sources.pop(sources.index("fc2")))
|
||||
sources.insert(2, sources.pop(sources.index("fc2club")))
|
||||
elif "dlsite" in sources and (
|
||||
"rj" in lo_file_number or "vj" in lo_file_number
|
||||
):
|
||||
|
||||
Reference in New Issue
Block a user