javdb:get faster benefit from http keep-alive

2021-10-15 21:16:48 +08:00
parent 7f8d500b13
commit 189f4db616
1 changed files with 13 additions and 5 deletions
@@ -1,4 +1,6 @@
 import sys
+
+from mechanicalsoup.stateful_browser import StatefulBrowser
 sys.path.append('../')
 import re
 from lxml import etree
@@ -246,7 +248,10 @@ def main(number):
            print(f'[!]javdb:select site {javdb_site}')
        try:
            javdb_url = 'https://' + javdb_site + '.com/search?q=' + number + '&f=all'
-            query_result = get_html(javdb_url, cookies=javdb_cookies)
+            res, browser = get_html_by_browser(javdb_url, cookies=javdb_cookies, return_type='browser')
+            if not res.ok:
+                raise
+            query_result = res.text
        except:
            query_result = get_html('https://javdb.com/search?q=' + number + '&f=all', cookies=javdb_cookies)
        html = etree.fromstring(query_result, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
@@ -267,8 +272,11 @@ def main(number):
                    raise ValueError("number not found")
                correct_url = urls[0]
        try:
-            javdb_detail_url = 'https://' + javdb_site + '.com' + correct_url
-            detail_page = get_html(javdb_detail_url, cookies=javdb_cookies)
+            if isinstance(browser, StatefulBrowser):  # get faster benefit from http keep-alive
+                detail_page = browser.open_relative(correct_url).text
+            else:
+                javdb_detail_url = 'https://' + javdb_site + '.com' + correct_url
+                detail_page = get_html(javdb_detail_url, cookies=javdb_cookies)
        except:
            detail_page = get_html('https://javdb.com' + correct_url, cookies=javdb_cookies)

@@ -344,8 +352,8 @@ if __name__ == "__main__":
    # print(main('BANK-022'))
    print(main('070116-197'))
    print(main('093021_539'))  # 没有剧照 片商pacopacomama
-    # print(main('FC2-2278260'))
-    # print(main('FC2-735670'))
+    print(main('FC2-2278260'))
+    print(main('FC2-735670'))
    # print(main('FC2-1174949')) # not found
    print(main('MVSD-439'))
    # print(main('EHM0001')) # not found