diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index c15bb48..231d683 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -474,18 +474,16 @@ def main(): check_update(version) # Download Mapping Table, parallel version - down_map_tab = [] - actor_xml = Path.home() / '.local' / 'share' / 'avdc' / 'mapping_actor.xml' - if not actor_xml.exists(): - down_map_tab.append(( - "https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/mapping_actor.xml", - actor_xml)) - info_xml = Path.home() / '.local' / 'share' / 'avdc' / 'mapping_info.xml' - if not info_xml.exists(): - down_map_tab.append(( - "https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/mapping_info.xml", - info_xml)) - res = parallel_download_files(down_map_tab) + user_data_home = Path.home() / '.local' / 'share' / 'avdc' + map_tab = ( + ('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/mapping_actor.xml', + user_data_home / 'mapping_actor.xml'), + ('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/mapping_info.xml', + user_data_home / 'mapping_info.xml'), + ('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/c_number.json', + user_data_home / 'c_number.json') + ) + res = parallel_download_files(((k, v) for k, v in map_tab if not v.exists())) for i, fp in enumerate(res, start=1): if fp and len(fp): print(f"[+] [{i}/{len(res)}] Mapping Table Downloaded to {fp}") diff --git a/WebCrawler/avsox.py b/WebCrawler/avsox.py index 8b73b83..3699aa4 100644 --- a/WebCrawler/avsox.py +++ b/WebCrawler/avsox.py @@ -57,8 +57,8 @@ def getCover_small(html): result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']") return result def getTag(html): - result = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',') - return result + x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',') + return [i.strip() for i in x[2:]] if len(x) > 2 else [] def getSeries(html): try: result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']") diff --git a/WebCrawler/fc2.py b/WebCrawler/fc2.py index 27bc1a0..c559c8d 100644 --- a/WebCrawler/fc2.py +++ b/WebCrawler/fc2.py @@ -14,7 +14,7 @@ def getTitle_fc2com(htmlcode): #获取厂商 return result def getActor_fc2com(htmlcode): try: - htmtml = etree.fromstring(htmlcode, etree.HTMLParser()) + html = etree.fromstring(htmlcode, etree.HTMLParser()) result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0] return result except: diff --git a/WebCrawler/javbus.py b/WebCrawler/javbus.py index 09dc045..413107d 100644 --- a/WebCrawler/javbus.py +++ b/WebCrawler/javbus.py @@ -72,7 +72,7 @@ def getSerise(html): #获取系列 return str(x[0]) if len(x) else '' def getTag(html): # 获取标签 klist = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',') - return klist + return [v for v in klist[1:]] def getExtrafanart(htmlcode): # 获取剧照 html_pather = re.compile(r'