From 83e8e8cb44523eea4795fac612125bad3c7c3d38 Mon Sep 17 00:00:00 2001
From: Feng4 <wuxiang198@gmail.com>
Date: Sat, 26 Dec 2020 23:57:34 +0800
Subject: [PATCH] Update jav321.py

---
 WebCrawler/jav321.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)
diff --git a/WebCrawler/jav321.py b/WebCrawler/jav321.py
index 2d0b0b0..b7fd8b4 100644
--- a/WebCrawler/jav321.py
+++ b/WebCrawler/jav321.py
@@ -4,15 +4,18 @@ import json
 from bs4 import BeautifulSoup
 from lxml import html
 from ADC_function import post_html
+import re
 
 
 def main(number: str) -> json:
     result = post_html(url="https://www.jav321.com/search", query={"sn": number})
+
     soup = BeautifulSoup(result.text, "html.parser")
     lx = html.fromstring(str(soup))
 
     if "/video/" in result.url:
         data = parse_info(soup)
+
         dic = {
             "title": get_title(lx),
             "year": get_year(data),
@@ -20,6 +23,8 @@ def main(number: str) -> json:
             "director": "",
             "cover": get_cover(lx),
             "imagecut": 1,
+            "trailer": get_trailer(result.text),
+            "extrafanart": get_extrafanart(result.text),
             "actor_photo": "",
             "website": result.url,
             "source": "jav321.py",
@@ -30,7 +35,6 @@ def main(number: str) -> json:
 
     return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))
 
-
 def get_title(lx: html.HtmlElement) -> str:
     return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[1]/h3/text()")[0].strip()
 
@@ -79,6 +83,24 @@ def get_anchor_info(h: str) -> str:
 def get_text_info(h: str) -> str:
     return h.split(": ")[1]
 
+def get_trailer(html) -> str:
+    videourl_pather = re.compile(r'<source src=\"(.*?)\"')
+    videourl = videourl_pather.findall(html)
+    if videourl:
+        return videourl[0]
+    else:
+        return ''
+
+def get_extrafanart(htmlcode):  # 获取剧照
+    html_pather = re.compile(r'<div class=\"col\-md\-3\"><div class=\"col\-xs\-12 col\-md\-12\">[\s\S]*?</script><script async src=\"\/\/adserver\.juicyads\.com/js/jads\.js\">')
+    html = html_pather.search(htmlcode)
+    if html:
+        html = html.group()
+        extrafanart_pather = re.compile(r'<img.*?src=\"(.*?)\"')
+        extrafanart_imgs = extrafanart_pather.findall(html)
+        if extrafanart_imgs:
+            return extrafanart_imgs
+    return ''
 
 def get_cover(lx: html.HtmlElement) -> str:
     return lx.xpath("/html/body/div[2]/div[2]/div[1]/p/a/img/@src")[0]
@@ -112,6 +134,7 @@ def get_tag(data: hash) -> str:
         return ""
 
 
+
 def get_studio(data: hash) -> str:
     if "片商" in data:
         return get_anchor_info(data["片商"])
@@ -155,4 +178,4 @@ def get_series(data: hash) -> str:
 
 
 if __name__ == "__main__":
-    print(main("soe-259"))
+    print(main("jul-404"))