@@ -4,26 +4,33 @@ sys.path.append('../')
|
|||||||
from WebCrawler.crawler import *
|
from WebCrawler.crawler import *
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from requests_html import HTMLSession
|
|
||||||
|
|
||||||
def main(number):
|
def main(number):
|
||||||
config_file = config.getInstance()
|
save_cookies = False
|
||||||
|
cookie_filename = 'gcolle.json'
|
||||||
|
try:
|
||||||
|
gcolle_cooikes, cookies_filepath = load_cookies(cookie_filename)
|
||||||
|
session = get_html_session(cookies=gcolle_cooikes)
|
||||||
number = number.upper().replace('GCOLLE-','')
|
number = number.upper().replace('GCOLLE-','')
|
||||||
session = get_html_session()
|
|
||||||
|
|
||||||
htmlcode = get_html_session('https://gcolle.net/product_info.php/products_id/' + number)
|
|
||||||
htmlcode = session.get('https://gcolle.net/product_info.php/products_id/' + number).text
|
htmlcode = session.get('https://gcolle.net/product_info.php/products_id/' + number).text
|
||||||
html = etree.HTML(htmlcode)
|
|
||||||
# R18 countinue
|
|
||||||
htmlcode = session.get(html.xpath('//*[@id="main_content"]/table[1]/tbody/tr/td[2]/table/tbody/tr/td/h4/a[2]/@href')[0]).text
|
|
||||||
gcolle_crawler = Crawler(htmlcode)
|
gcolle_crawler = Crawler(htmlcode)
|
||||||
|
r18_continue = gcolle_crawler.getString('//*[@id="main_content"]/table[1]/tbody/tr/td[2]/table/tbody/tr/td/h4/a[2]/@href')
|
||||||
|
if r18_continue and r18_continue.startswith('http'):
|
||||||
|
htmlcode = session.get(r18_continue).text
|
||||||
|
gcolle_crawler = Crawler(htmlcode)
|
||||||
|
save_cookies = True
|
||||||
|
cookies_filepath and len(cookies_filepath) and Path(cookies_filepath).is_file() and Path(cookies_filepath).unlink(missing_ok=True)
|
||||||
|
|
||||||
number_html = gcolle_crawler.getString('//td[contains(text(),"商品番号")]/../td[2]/text()')
|
number_html = gcolle_crawler.getString('//td[contains(text(),"商品番号")]/../td[2]/text()')
|
||||||
if number != number_html:
|
if number != number_html:
|
||||||
if config_file.debug():
|
raise Exception('[-]gcolle.py: number not match')
|
||||||
print('[!]gcolle.py: number not match')
|
|
||||||
return {'title':''}
|
if save_cookies:
|
||||||
|
cookies_save = Path.home() / f".local/share/mdc/{cookie_filename}"
|
||||||
|
cookies_save.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
cookies_save.write_text(json.dumps(session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8')
|
||||||
|
|
||||||
# get extrafanart url
|
# get extrafanart url
|
||||||
if len(gcolle_crawler.getStrings('//*[@id="cart_quantity"]/table/tr[3]/td/div/img/@src')) == 0:
|
if len(gcolle_crawler.getStrings('//*[@id="cart_quantity"]/table/tr[3]/td/div/img/@src')) == 0:
|
||||||
@@ -63,12 +70,19 @@ def main(number):
|
|||||||
# else:
|
# else:
|
||||||
# print(k,v)
|
# print(k,v)
|
||||||
# print('===============================================================')
|
# print('===============================================================')
|
||||||
|
except Exception as e:
|
||||||
|
dic = {'title':''}
|
||||||
|
if config.getInstance().debug():
|
||||||
|
print(e)
|
||||||
|
|
||||||
return dic
|
return dic
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main('840724')
|
from pprint import pprint
|
||||||
main('840386')
|
config.getInstance().set_override("debug_mode:switch=1")
|
||||||
main('838671')
|
pprint(main('840724'))
|
||||||
main('814179')
|
pprint(main('840386'))
|
||||||
main('834255')
|
pprint(main('838671'))
|
||||||
main('814179')
|
pprint(main('814179'))
|
||||||
|
pprint(main('834255'))
|
||||||
|
pprint(main('814179'))
|
||||||
|
|||||||
@@ -10,4 +10,3 @@ certifi==2020.12.5
|
|||||||
MechanicalSoup==1.1.0
|
MechanicalSoup==1.1.0
|
||||||
opencc-python-reimplemented
|
opencc-python-reimplemented
|
||||||
face_recognition
|
face_recognition
|
||||||
requests_html
|
|
||||||
Reference in New Issue
Block a user