bugfix

2021-11-14 08:52:45 +08:00
parent 4d7aad19d0
commit 701cc954cb
5 changed files with 16 additions and 18 deletions
--- a/AV_Data_Capture.py
+++ b/AV_Data_Capture.py
@@ -474,18 +474,16 @@ def main():
        check_update(version)
    # Download Mapping Table, parallel version
-    down_map_tab = []
+    user_data_home = Path.home() / '.local' / 'share' / 'avdc'
-    actor_xml = Path.home() / '.local' / 'share' / 'avdc' / 'mapping_actor.xml'
+    map_tab = (
-    if not actor_xml.exists():
+        ('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/mapping_actor.xml',
-        down_map_tab.append((
+        user_data_home / 'mapping_actor.xml'),
-            "https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/mapping_actor.xml",
+        ('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/mapping_info.xml',
-            actor_xml))
+        user_data_home / 'mapping_info.xml'),
-    info_xml = Path.home() / '.local' / 'share' / 'avdc' / 'mapping_info.xml'
+        ('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/c_number.json',
-    if not info_xml.exists():
+        user_data_home / 'c_number.json')
-        down_map_tab.append((
+    )
-            "https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/MappingTable/mapping_info.xml",
+    res = parallel_download_files(((k, v) for k, v in map_tab if not v.exists()))
            info_xml))
    res = parallel_download_files(down_map_tab)
    for i, fp in enumerate(res, start=1):
        if fp and len(fp):
            print(f"[+] [{i}/{len(res)}] Mapping Table Downloaded to {fp}")
--- a/WebCrawler/avsox.py
+++ b/WebCrawler/avsox.py
@@ -57,8 +57,8 @@ def getCover_small(html):
    result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
    return result
 def getTag(html):
-    result = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
+    x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
-    return result
+    return [i.strip() for i in x[2:]]  if len(x) > 2 else []
 def getSeries(html):
    try:
        result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']")
--- a/WebCrawler/fc2.py
+++ b/WebCrawler/fc2.py
@@ -14,7 +14,7 @@ def getTitle_fc2com(htmlcode): #获取厂商
    return result
 def getActor_fc2com(htmlcode):
    try:
-        htmtml = etree.fromstring(htmlcode, etree.HTMLParser())
+        html = etree.fromstring(htmlcode, etree.HTMLParser())
        result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
        return result
    except:
--- a/WebCrawler/javbus.py
+++ b/WebCrawler/javbus.py
@@ -72,7 +72,7 @@ def getSerise(html):   #获取系列
    return str(x[0]) if len(x) else ''
 def getTag(html):  # 获取标签
    klist = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
-    return klist
+    return [v for v in klist[1:]]
 def getExtrafanart(htmlcode):  # 获取剧照
    html_pather = re.compile(r'<div id=\"sample-waterfall\">[\s\S]*?</div></a>\s*?</div>')
    html = html_pather.search(htmlcode)