From fceed1d04e1c4f97b685c553da207b8a7cda5fce Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Mon, 3 May 2021 22:14:22 +0800 Subject: [PATCH] Fix mgstage source error Exception handling --- WebCrawler/mgstage.py | 45 +++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/WebCrawler/mgstage.py b/WebCrawler/mgstage.py index e98eb5a..b29f2fd 100644 --- a/WebCrawler/mgstage.py +++ b/WebCrawler/mgstage.py @@ -114,29 +114,32 @@ def main(number2): a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','') b = str(soup.find(attrs={'id': 'introduction'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','') #print(b) - dic = { - 'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''), - 'studio': getStudio(a), - 'outline': getOutline(b), - 'runtime': getRuntime(a), - 'director': getDirector(a), - 'actor': getActor(a), - 'release': getRelease(a), - 'number': getNum(a), - 'cover': getCover(htmlcode), - 'imagecut': 0, - 'tag': getTag(a), - 'label':getLabel(a), - 'extrafanart': getExtrafanart(htmlcode), - 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), - 'actor_photo': '', - 'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/', - 'source': 'mgstage.py', - 'series': getSeries(a), - } + try: + dic = { + 'title': getTitle(htmlcode).replace("\\n", '').replace(' ', ''), + 'studio': getStudio(a), + 'outline': getOutline(b), + 'runtime': getRuntime(a), + 'director': getDirector(a), + 'actor': getActor(a), + 'release': getRelease(a), + 'number': getNum(a), + 'cover': getCover(htmlcode), + 'imagecut': 0, + 'tag': getTag(a), + 'label': getLabel(a), + 'extrafanart': getExtrafanart(htmlcode), + 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), + 'actor_photo': '', + 'website': 'https://www.mgstage.com/product/product_detail/' + str(number) + '/', + 'source': 'mgstage.py', + 'series': getSeries(a), + } + except: + dic = {"title": ""} + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js - #print(htmlcode) if __name__ == '__main__': print(main('SIRO-4149'))