Upate 3.5.1

2020-07-17 15:06:42 +08:00
parent df3a959852
commit 82315472f6
4 changed files with 25 additions and 7 deletions
@@ -1,12 +1,16 @@
 import os
 import configparser
 import codecs
 class Config:
    def __init__(self, path: str = "config.ini"):
        if os.path.exists(path):
            self.conf = configparser.ConfigParser()
-            self.conf.read(path, encoding="utf-8")
+            try:
                self.conf.read(path, encoding="utf-8-sig")
            except:
                self.conf.read(path, encoding="utf-8")
        else:
            print("[-] Config file not found! Use the default settings")
            self.conf = self._default_config()
@@ -68,13 +68,22 @@ def getCover_small(a, index=0):
    # javdb sometime returns multiple results
    # DO NOT just get the firt one, get the one with correct index number
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
+    try:
-    if not 'https' in result:
+        result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
-        result = 'https:' + result
+        if not 'https' in result:
-    return result
+            result = 'https:' + result
        return result
    except: # 2020.7.17 Repair Cover Url crawl
        result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index]
        if not 'https' in result:
            result = 'https:' + result
        return result
 def getCover(htmlcode):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath("//div[contains(@class, 'column-video-cover')]/a/img/@src")).strip(" ['']")
+    try:
        result = html.xpath("//div[contains(@class, 'column-video-cover')]/a/img/@src")[0]
    except: # 2020.7.17 Repair Cover Url crawl
        result = html.xpath("//div[contains(@class, 'column-video-cover')]/img/@src")[0]
    return result
 def getDirector(a):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
@@ -45,4 +45,4 @@ def get_number(filepath: str) -> str:
 if __name__ == "__main__":
    import doctest
-    doctest.testmod(raise_on_error=True)
+    doctest.testmod(raise_on_error=True)
@@ -0,0 +1,5 @@
 ## 2020.6.22 更新
 * 改进：网站爬虫子程序参数混乱
 * 修复：命名规则release参数带```/```的问题
 * 新增：socks5本地代理连接
 * 新增：命名规则series参数