update lib
This commit is contained in:
@@ -57,10 +57,10 @@ class Scraping():
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
full_sources = ['avsox', 'javbus', 'xcity', 'mgstage', 'madou', 'fc2',
|
adult_full_sources = ['avsox', 'javbus', 'xcity', 'mgstage', 'madou', 'fc2',
|
||||||
'dlsite', 'jav321', 'fanza', 'airav', 'carib', 'mv91',
|
'dlsite', 'jav321', 'fanza', 'airav', 'carib', 'mv91',
|
||||||
'gcolle', 'javdb', 'getchu']
|
'gcolle', 'javdb', 'getchu']
|
||||||
func_mapping = {
|
adult_func_mapping = {
|
||||||
'avsox': Avsox().scrape,
|
'avsox': Avsox().scrape,
|
||||||
'javbus': Javbus().scrape,
|
'javbus': Javbus().scrape,
|
||||||
'xcity': Xcity().scrape,
|
'xcity': Xcity().scrape,
|
||||||
@@ -78,6 +78,11 @@ class Scraping():
|
|||||||
'getchu': Getchu().scrape,
|
'getchu': Getchu().scrape,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
general_full_sources = ['tmdb']
|
||||||
|
general_func_mapping = {
|
||||||
|
'tmdb': Tmdb().scrape,
|
||||||
|
}
|
||||||
|
|
||||||
proxies = None
|
proxies = None
|
||||||
verify = None
|
verify = None
|
||||||
|
|
||||||
@@ -98,22 +103,45 @@ class Scraping():
|
|||||||
else:
|
else:
|
||||||
return self.searchGeneral(number, sources)
|
return self.searchGeneral(number, sources)
|
||||||
|
|
||||||
def searchGeneral(self, number, sources):
|
def searchGeneral(self, name, sources):
|
||||||
""" 查询电影电视剧
|
""" 查询电影电视剧
|
||||||
imdb,tmdb
|
imdb,tmdb
|
||||||
"""
|
"""
|
||||||
data = Tmdb().scrape(number, self)
|
sources = self.checkGeneralSources(sources, name)
|
||||||
json_data = json.loads(data)
|
|
||||||
return json_data
|
|
||||||
|
|
||||||
def searchAdult(self, number, sources):
|
|
||||||
sources = self.checkSources(sources, number)
|
|
||||||
json_data = {}
|
json_data = {}
|
||||||
for source in sources:
|
for source in sources:
|
||||||
try:
|
try:
|
||||||
print('[+]select', source)
|
print('[+]select', source)
|
||||||
try:
|
try:
|
||||||
data = self.func_mapping[source](number, self)
|
data = self.general_func_mapping[source](name, self)
|
||||||
|
if data == 404:
|
||||||
|
continue
|
||||||
|
json_data = json.loads(data)
|
||||||
|
except Exception as e:
|
||||||
|
print('[!] 出错啦')
|
||||||
|
print(e)
|
||||||
|
# if any service return a valid return, break
|
||||||
|
if self.get_data_state(json_data):
|
||||||
|
print(f"[+]Find movie [{name}] metadata on website '{source}'")
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Return if data not found in all sources
|
||||||
|
if not json_data:
|
||||||
|
print(f'[-]Movie Number [{name}] not found!')
|
||||||
|
return None
|
||||||
|
|
||||||
|
return json_data
|
||||||
|
|
||||||
|
def searchAdult(self, number, sources):
|
||||||
|
sources = self.checkAdultSources(sources, number)
|
||||||
|
json_data = {}
|
||||||
|
for source in sources:
|
||||||
|
try:
|
||||||
|
print('[+]select', source)
|
||||||
|
try:
|
||||||
|
data = self.adult_func_mapping[source](number, self)
|
||||||
if data == 404:
|
if data == 404:
|
||||||
continue
|
continue
|
||||||
json_data = json.loads(data)
|
json_data = json.loads(data)
|
||||||
@@ -135,10 +163,26 @@ class Scraping():
|
|||||||
|
|
||||||
return json_data
|
return json_data
|
||||||
|
|
||||||
|
def checkGeneralSources(self, c_sources, name):
|
||||||
def checkSources(self, c_sources, file_number):
|
|
||||||
if not c_sources:
|
if not c_sources:
|
||||||
sources = self.full_sources
|
sources = self.general_full_sources
|
||||||
|
else:
|
||||||
|
sources = c_sources.split(',')
|
||||||
|
|
||||||
|
# check sources in func_mapping
|
||||||
|
todel = []
|
||||||
|
for s in sources:
|
||||||
|
if not s in self.general_func_mapping:
|
||||||
|
print('[!] Source Not Exist : ' + s)
|
||||||
|
todel.append(s)
|
||||||
|
for d in todel:
|
||||||
|
print('[!] Remove Source : ' + s)
|
||||||
|
sources.remove(d)
|
||||||
|
return sources
|
||||||
|
|
||||||
|
def checkAdultSources(self, c_sources, file_number):
|
||||||
|
if not c_sources:
|
||||||
|
sources = self.adult_full_sources
|
||||||
else:
|
else:
|
||||||
sources = c_sources.split(',')
|
sources = c_sources.split(',')
|
||||||
def insert(sources,source):
|
def insert(sources,source):
|
||||||
@@ -146,7 +190,7 @@ class Scraping():
|
|||||||
sources.insert(0, sources.pop(sources.index(source)))
|
sources.insert(0, sources.pop(sources.index(source)))
|
||||||
return sources
|
return sources
|
||||||
|
|
||||||
if len(sources) <= len(self.func_mapping):
|
if len(sources) <= len(self.adult_func_mapping):
|
||||||
# if the input file name matches certain rules,
|
# if the input file name matches certain rules,
|
||||||
# move some web service to the beginning of the list
|
# move some web service to the beginning of the list
|
||||||
lo_file_number = file_number.lower()
|
lo_file_number = file_number.lower()
|
||||||
@@ -182,7 +226,7 @@ class Scraping():
|
|||||||
# check sources in func_mapping
|
# check sources in func_mapping
|
||||||
todel = []
|
todel = []
|
||||||
for s in sources:
|
for s in sources:
|
||||||
if not s in self.func_mapping:
|
if not s in self.adult_func_mapping:
|
||||||
print('[!] Source Not Exist : ' + s)
|
print('[!] Source Not Exist : ' + s)
|
||||||
todel.append(s)
|
todel.append(s)
|
||||||
for d in todel:
|
for d in todel:
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ class Fanza(Parser):
|
|||||||
source = 'fanza'
|
source = 'fanza'
|
||||||
|
|
||||||
expr_title = '//*[starts-with(@id, "title")]/text()'
|
expr_title = '//*[starts-with(@id, "title")]/text()'
|
||||||
|
expr_actor = "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
|
||||||
expr_outline = "//div[@class='mg-b20 lh4']/text()"
|
expr_outline = "//div[@class='mg-b20 lh4']/text()"
|
||||||
expr_outline2 = "//div[@class='mg-b20 lh4']//p/text()"
|
expr_outline2 = "//div[@class='mg-b20 lh4']//p/text()"
|
||||||
expr_runtime = "//td[contains(text(),'収録時間')]/following-sibling::td/text()"
|
expr_runtime = "//td[contains(text(),'収録時間')]/following-sibling::td/text()"
|
||||||
@@ -132,10 +133,16 @@ class Fanza(Parser):
|
|||||||
return ''
|
return ''
|
||||||
|
|
||||||
def getLabel(self, htmltree):
|
def getLabel(self, htmltree):
|
||||||
return self.getFanzaStrings('レーベル')
|
ret = self.getFanzaStrings('レーベル')
|
||||||
|
if ret == "----":
|
||||||
|
return ''
|
||||||
|
return ret
|
||||||
|
|
||||||
def getSeries(self, htmltree):
|
def getSeries(self, htmltree):
|
||||||
return self.getFanzaStrings('シリーズ:')
|
ret = self.getFanzaStrings('シリーズ:')
|
||||||
|
if ret == "----":
|
||||||
|
return ''
|
||||||
|
return ret
|
||||||
|
|
||||||
def getFanzaString(self, expr):
|
def getFanzaString(self, expr):
|
||||||
result1 = str(self.htmltree.xpath("//td[contains(text(),'"+expr+"')]/following-sibling::td/a/text()")).strip(" ['']")
|
result1 = str(self.htmltree.xpath("//td[contains(text(),'"+expr+"')]/following-sibling::td/a/text()")).strip(" ['']")
|
||||||
|
|||||||
@@ -23,8 +23,7 @@ def get(url: str, cookies=None, ua: str = None, extra_headers=None, return_type:
|
|||||||
for i in range(retry):
|
for i in range(retry):
|
||||||
try:
|
try:
|
||||||
result = requests.get(url, headers=headers, timeout=timeout, proxies=proxies,
|
result = requests.get(url, headers=headers, timeout=timeout, proxies=proxies,
|
||||||
verify=verify,
|
verify=verify, cookies=cookies)
|
||||||
cookies=cookies)
|
|
||||||
if return_type == "object":
|
if return_type == "object":
|
||||||
return result
|
return result
|
||||||
elif return_type == "content":
|
elif return_type == "content":
|
||||||
@@ -44,7 +43,7 @@ def get(url: str, cookies=None, ua: str = None, extra_headers=None, return_type:
|
|||||||
raise Exception('Connect Failed')
|
raise Exception('Connect Failed')
|
||||||
|
|
||||||
|
|
||||||
def post(url: str, data: dict, cookies = None, ua: str = None, return_type: str = None, encoding: str = None,
|
def post(url: str, data: dict, files=None, cookies=None, ua: str = None, return_type: str = None, encoding: str = None,
|
||||||
retry: int = 3, timeout: int = G_DEFAULT_TIMEOUT, proxies=None, verify=None):
|
retry: int = 3, timeout: int = G_DEFAULT_TIMEOUT, proxies=None, verify=None):
|
||||||
"""
|
"""
|
||||||
是否使用代理应由上层处理
|
是否使用代理应由上层处理
|
||||||
@@ -54,9 +53,8 @@ def post(url: str, data: dict, cookies = None, ua: str = None, return_type: str
|
|||||||
|
|
||||||
for i in range(retry):
|
for i in range(retry):
|
||||||
try:
|
try:
|
||||||
result = requests.post(url, data=data, headers=headers, timeout=timeout, proxies=proxies,
|
result = requests.post(url, data=data, files=files, headers=headers, timeout=timeout, proxies=proxies,
|
||||||
verify=verify,
|
verify=verify, cookies=cookies)
|
||||||
cookies=cookies)
|
|
||||||
if return_type == "object":
|
if return_type == "object":
|
||||||
return result
|
return result
|
||||||
elif return_type == "content":
|
elif return_type == "content":
|
||||||
|
|||||||
@@ -4,7 +4,6 @@
|
|||||||
import re
|
import re
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from requests import session
|
|
||||||
from .httprequest import get_html_session
|
from .httprequest import get_html_session
|
||||||
from .parser import Parser
|
from .parser import Parser
|
||||||
|
|
||||||
@@ -13,22 +12,26 @@ class Javdb(Parser):
|
|||||||
source = 'javdb'
|
source = 'javdb'
|
||||||
|
|
||||||
fixstudio = False
|
fixstudio = False
|
||||||
|
noauth = False
|
||||||
|
|
||||||
expr_number = '//strong[contains(text(),"番號")]/../span/text()'
|
expr_number = '//strong[contains(text(),"番號")]/../span/text()'
|
||||||
expr_number2 = '//strong[contains(text(),"番號")]/../span/a/text()'
|
expr_number2 = '//strong[contains(text(),"番號")]/../span/a/text()'
|
||||||
expr_title = "/html/head/title/text()"
|
expr_title = "/html/head/title/text()"
|
||||||
|
expr_title_no = '//*[contains(@class,"movie-list")]/div/a/div[contains(@class, "video-title")]/text()'
|
||||||
expr_runtime = '//strong[contains(text(),"時長")]/../span/text()'
|
expr_runtime = '//strong[contains(text(),"時長")]/../span/text()'
|
||||||
expr_runtime2 = '//strong[contains(text(),"時長")]/../span/a/text()'
|
expr_runtime2 = '//strong[contains(text(),"時長")]/../span/a/text()'
|
||||||
expr_uncensored = '//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?") or contains(@href,"/tags/western?")]'
|
expr_uncensored = '//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?") or contains(@href,"/tags/western?")]'
|
||||||
expr_actor = '//span[@class="value"]/a[contains(@href,"/actors/")]/text()'
|
expr_actor = '//span[@class="value"]/a[contains(@href,"/actors/")]/text()'
|
||||||
expr_actor2 = '//span[@class="value"]/a[contains(@href,"/actors/")]/../strong/@class'
|
expr_actor2 = '//span[@class="value"]/a[contains(@href,"/actors/")]/../strong/@class'
|
||||||
expr_release = '//strong[contains(text(),"日期")]/../span/text()'
|
expr_release = '//strong[contains(text(),"日期")]/../span/text()'
|
||||||
|
expr_release_no = '//*[contains(@class,"movie-list")]/div/a/div[contains(@class, "meta")]/text()'
|
||||||
expr_studio = '//strong[contains(text(),"片商")]/../span/a/text()'
|
expr_studio = '//strong[contains(text(),"片商")]/../span/a/text()'
|
||||||
expr_studio2 = '//strong[contains(text(),"賣家:")]/../span/a/text()'
|
expr_studio2 = '//strong[contains(text(),"賣家:")]/../span/a/text()'
|
||||||
expr_director = '//strong[contains(text(),"導演")]/../span/text()'
|
expr_director = '//strong[contains(text(),"導演")]/../span/text()'
|
||||||
expr_director2 = '//strong[contains(text(),"導演")]/../span/a/text()'
|
expr_director2 = '//strong[contains(text(),"導演")]/../span/a/text()'
|
||||||
expr_cover = "//div[contains(@class, 'column-video-cover')]/a/img/@src"
|
expr_cover = "//div[contains(@class, 'column-video-cover')]/a/img/@src"
|
||||||
expr_cover2 = "//div[contains(@class, 'column-video-cover')]/img/@src"
|
expr_cover2 = "//div[contains(@class, 'column-video-cover')]/img/@src"
|
||||||
|
expr_cover_no = '//*[contains(@class,"movie-list")]/div/a/div[contains(@class, "cover")]/img/@src'
|
||||||
expr_extrafanart = "//article[@class='message video-panel']/div[@class='message-body']/div[@class='tile-images preview-images']/a[contains(@href,'/samples/')]/@href"
|
expr_extrafanart = "//article[@class='message video-panel']/div[@class='message-body']/div[@class='tile-images preview-images']/a[contains(@href,'/samples/')]/@href"
|
||||||
expr_tags = '//strong[contains(text(),"類別")]/../span/a/text()'
|
expr_tags = '//strong[contains(text(),"類別")]/../span/a/text()'
|
||||||
expr_tags2 = '//strong[contains(text(),"類別")]/../span/text()'
|
expr_tags2 = '//strong[contains(text(),"類別")]/../span/text()'
|
||||||
@@ -57,12 +60,16 @@ class Javdb(Parser):
|
|||||||
else:
|
else:
|
||||||
self.dbsite = 'javdb'
|
self.dbsite = 'javdb'
|
||||||
|
|
||||||
def search(self, number):
|
def search(self, number: str):
|
||||||
self.number = number
|
self.number = number
|
||||||
self.session = get_html_session(cookies=self.cookies, proxies=self.proxies, verify=self.verify)
|
self.session = get_html_session(cookies=self.cookies, proxies=self.proxies, verify=self.verify)
|
||||||
self.detailurl = self.queryNumberUrl(number)
|
self.detailurl = self.queryNumberUrl(number)
|
||||||
|
|
||||||
self.deatilpage = self.session.get(self.detailurl).text
|
self.deatilpage = self.session.get(self.detailurl).text
|
||||||
|
if '此內容需要登入才能查看或操作' in self.deatilpage or '需要VIP權限才能訪問此內容' in self.deatilpage:
|
||||||
|
self.noauth = True
|
||||||
|
self.imagecut = 0
|
||||||
|
result = self.dictformat(self.querytree)
|
||||||
|
else:
|
||||||
htmltree = etree.fromstring(self.deatilpage, etree.HTMLParser())
|
htmltree = etree.fromstring(self.deatilpage, etree.HTMLParser())
|
||||||
result = self.dictformat(htmltree)
|
result = self.dictformat(htmltree)
|
||||||
return result
|
return result
|
||||||
@@ -75,18 +82,19 @@ class Javdb(Parser):
|
|||||||
print(e)
|
print(e)
|
||||||
raise Exception(f'[!] {self.number}: page not fond in javdb')
|
raise Exception(f'[!] {self.number}: page not fond in javdb')
|
||||||
|
|
||||||
htmltree = etree.fromstring(resp.text, etree.HTMLParser())
|
self.querytree = etree.fromstring(resp.text, etree.HTMLParser())
|
||||||
# javdb sometime returns multiple results,
|
# javdb sometime returns multiple results,
|
||||||
# and the first elememt maybe not the one we are looking for
|
# and the first elememt maybe not the one we are looking for
|
||||||
# iterate all candidates and find the match one
|
# iterate all candidates and find the match one
|
||||||
urls = self.getAll(htmltree, '//*[contains(@class,"movie-list")]/div/a/@href')
|
urls = self.getAll(self.querytree, '//*[contains(@class,"movie-list")]/div/a/@href')
|
||||||
# 记录一下欧美的ids ['Blacked','Blacked']
|
# 记录一下欧美的ids ['Blacked','Blacked']
|
||||||
if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
|
if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
|
||||||
correct_url = urls[0]
|
correct_url = urls[0]
|
||||||
else:
|
else:
|
||||||
ids = self.getAll(htmltree, '//*[contains(@class,"movie-list")]/div/a/div[contains(@class, "video-title")]/strong/text()')
|
ids = self.getAll(self.querytree, '//*[contains(@class,"movie-list")]/div/a/div[contains(@class, "video-title")]/strong/text()')
|
||||||
try:
|
try:
|
||||||
correct_url = urls[ids.index(number)]
|
self.queryid = ids.index(number)
|
||||||
|
correct_url = urls[self.queryid]
|
||||||
except:
|
except:
|
||||||
# 为避免获得错误番号,只要精确对应的结果
|
# 为避免获得错误番号,只要精确对应的结果
|
||||||
if ids[0].upper() != number:
|
if ids[0].upper() != number:
|
||||||
@@ -95,6 +103,8 @@ class Javdb(Parser):
|
|||||||
return urljoin(resp.url, correct_url)
|
return urljoin(resp.url, correct_url)
|
||||||
|
|
||||||
def getNum(self, htmltree):
|
def getNum(self, htmltree):
|
||||||
|
if self.noauth:
|
||||||
|
return self.number
|
||||||
result1 = str(self.getAll(htmltree, self.expr_number)).strip(" ['']")
|
result1 = str(self.getAll(htmltree, self.expr_number)).strip(" ['']")
|
||||||
result2 = str(self.getAll(htmltree, self.expr_number2)).strip(" ['']")
|
result2 = str(self.getAll(htmltree, self.expr_number2)).strip(" ['']")
|
||||||
dp_number = str(result2 + result1).strip('+')
|
dp_number = str(result2 + result1).strip('+')
|
||||||
@@ -105,10 +115,22 @@ class Javdb(Parser):
|
|||||||
return self.number
|
return self.number
|
||||||
|
|
||||||
def getTitle(self, htmltree):
|
def getTitle(self, htmltree):
|
||||||
|
if self.noauth:
|
||||||
|
return self.getTreeIndex(htmltree, self.expr_title_no, self.queryid)
|
||||||
browser_title = super().getTitle(htmltree)
|
browser_title = super().getTitle(htmltree)
|
||||||
title = browser_title[:browser_title.find(' | JavDB')].strip()
|
title = browser_title[:browser_title.find(' | JavDB')].strip()
|
||||||
return title.replace(self.number, '').strip()
|
return title.replace(self.number, '').strip()
|
||||||
|
|
||||||
|
def getCover(self, htmltree):
|
||||||
|
if self.noauth:
|
||||||
|
return self.getTreeIndex(htmltree, self.expr_cover_no, self.queryid)
|
||||||
|
return super().getCover(htmltree)
|
||||||
|
|
||||||
|
def getRelease(self, htmltree):
|
||||||
|
if self.noauth:
|
||||||
|
return self.getTreeIndex(htmltree, self.expr_release_no, self.queryid).strip()
|
||||||
|
return super().getRelease(htmltree)
|
||||||
|
|
||||||
def getRuntime(self, htmltree):
|
def getRuntime(self, htmltree):
|
||||||
result1 = str(self.getAll(htmltree, self.expr_runtime)).strip(" ['']")
|
result1 = str(self.getAll(htmltree, self.expr_runtime)).strip(" ['']")
|
||||||
result2 = str(self.getAll(htmltree, self.expr_runtime2)).strip(" ['']")
|
result2 = str(self.getAll(htmltree, self.expr_runtime2)).strip(" ['']")
|
||||||
|
|||||||
@@ -3,12 +3,14 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
from . import httprequest
|
|
||||||
|
|
||||||
|
from . import httprequest
|
||||||
|
from .utils import getTreeElement, getTreeAll
|
||||||
|
|
||||||
class Parser:
|
class Parser:
|
||||||
|
|
||||||
source = 'base'
|
source = 'base'
|
||||||
|
# poster: `0` 复制 `1` 裁剪
|
||||||
imagecut = 1
|
imagecut = 1
|
||||||
uncensored = False
|
uncensored = False
|
||||||
allow_number_change = False
|
allow_number_change = False
|
||||||
@@ -249,21 +251,9 @@ class Parser:
|
|||||||
def getTreeIndex(self, tree: html.HtmlElement, expr, index=0):
|
def getTreeIndex(self, tree: html.HtmlElement, expr, index=0):
|
||||||
""" 根据表达式从`xmltree`中获取匹配值,默认 index 为 0
|
""" 根据表达式从`xmltree`中获取匹配值,默认 index 为 0
|
||||||
"""
|
"""
|
||||||
if expr == '':
|
return getTreeElement(tree, expr, index)
|
||||||
return ''
|
|
||||||
result = tree.xpath(expr)
|
|
||||||
try:
|
|
||||||
return result[index]
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getAll(self, tree: html.HtmlElement, expr):
|
def getAll(self, tree: html.HtmlElement, expr):
|
||||||
""" 根据表达式从`xmltree`中获取全部匹配值
|
""" 根据表达式从`xmltree`中获取全部匹配值
|
||||||
"""
|
"""
|
||||||
if expr == '':
|
return getTreeAll(tree, expr)
|
||||||
return ''
|
|
||||||
result = tree.xpath(expr)
|
|
||||||
try:
|
|
||||||
return result
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|||||||
31
scrapinglib/utils.py
Normal file
31
scrapinglib/utils.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from lxml.html import HtmlElement
|
||||||
|
|
||||||
|
def getTreeElement(tree: HtmlElement, expr, index=0):
|
||||||
|
""" 根据表达式从`xmltree`中获取匹配值,默认 index 为 0
|
||||||
|
:param tree (html.HtmlElement)
|
||||||
|
:param expr
|
||||||
|
:param index
|
||||||
|
"""
|
||||||
|
if expr == '':
|
||||||
|
return ''
|
||||||
|
result = tree.xpath(expr)
|
||||||
|
try:
|
||||||
|
return result[index]
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def getTreeAll(tree: HtmlElement, expr):
|
||||||
|
""" 根据表达式从`xmltree`中获取全部匹配值
|
||||||
|
:param tree (html.HtmlElement)
|
||||||
|
:param expr
|
||||||
|
:param index
|
||||||
|
"""
|
||||||
|
if expr == '':
|
||||||
|
return ''
|
||||||
|
result = tree.xpath(expr)
|
||||||
|
try:
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
Reference in New Issue
Block a user