update scrapinglib

- 优化提取extrafanart，trailer等，直接使用xpath expr，不需要正则匹配 - 优化 getchu 获取cover方法,直接使用og标签信息 - 优化 www.getchu 识别 getchu-id 的资源 - 统一获取 tag 方法,返回值 list
2022-06-15 14:23:49 +08:00
parent eed33408a8
commit 0dda035057
16 changed files with 107 additions and 218 deletions
--- a/scrapinglib/fc2.py
+++ b/scrapinglib/fc2.py
@@ -18,6 +18,7 @@ class Fc2(Parser):
    expr_director = '//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()'
    expr_actor = '//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()'
    expr_cover = "//div[@class='items_article_MainitemThumb']/span/img/@src"
+    expr_extrafanart = '//ul[@class="items_article_SampleImagesArea"]/li/a/@href'
    expr_tags = "//a[@class='tag tagTag']/text()"

    def search(self, number):
@@ -45,17 +46,6 @@ class Fc2(Parser):
    def getCover(self, htmltree):
        return urljoin('https://adult.contents.fc2.com', super().getCover(htmltree)) 

-    def getExtrafanart(self, htmltree):
-        html_pather = re.compile(r'<ul class=\"items_article_SampleImagesArea\"[\s\S]*?</ul>')
-        html = html_pather.search(self.htmlcode)
-        if html:
-            html = html.group()
-            extrafanart_pather = re.compile(r'<a href=\"(.*?)\"')
-            extrafanart_imgs = extrafanart_pather.findall(html)
-            if extrafanart_imgs:
-                return extrafanart_imgs
-        return ''
-
    def getTrailer(self, htmltree):
        video_pather = re.compile(r'\'[a-zA-Z0-9]{32}\'')
        video = video_pather.findall(self.htmlcode)