From 0dff1a72c00ae75cdec7c39ebeee5b49e0a520d5 Mon Sep 17 00:00:00 2001 From: lededev Date: Wed, 20 Apr 2022 12:48:38 +0800 Subject: [PATCH] clean up --- WebCrawler/gcolle.py | 15 +++++++-------- requirements.txt | 1 - 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/WebCrawler/gcolle.py b/WebCrawler/gcolle.py index 7330cdd..9a7dbd3 100644 --- a/WebCrawler/gcolle.py +++ b/WebCrawler/gcolle.py @@ -4,7 +4,6 @@ sys.path.append('../') from WebCrawler.crawler import * from ADC_function import * from lxml import etree -from requests_html import HTMLSession def main(number): config_file = config.getInstance() @@ -12,7 +11,6 @@ def main(number): number = number.upper().replace('GCOLLE-','') session = get_html_session() - htmlcode = get_html_session('https://gcolle.net/product_info.php/products_id/' + number) htmlcode = session.get('https://gcolle.net/product_info.php/products_id/' + number).text html = etree.HTML(htmlcode) # R18 countinue @@ -66,9 +64,10 @@ def main(number): return dic if __name__ == '__main__': - main('840724') - main('840386') - main('838671') - main('814179') - main('834255') - main('814179') + from pprint import pprint + pprint(main('840724')) + pprint(main('840386')) + pprint(main('838671')) + pprint(main('814179')) + pprint(main('834255')) + pprint(main('814179')) diff --git a/requirements.txt b/requirements.txt index 333a750..7b63d8e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,3 @@ certifi==2020.12.5 MechanicalSoup==1.1.0 opencc-python-reimplemented face_recognition -requests_html \ No newline at end of file