From 764fba74ec9c40e8d24b91c9bfc5908246f0e346 Mon Sep 17 00:00:00 2001
From: wenead99 <42309414+wenead99@users.noreply.github.com>
Date: Wed, 19 Jun 2019 18:19:34 +0800
Subject: [PATCH] Beta 10.2 Update

---
 ADC_function.py    | 17 +++++++----
 AV_Data_Capture.py |  6 ++--
 core.py            |  2 +-
 fc2fans_club.py    |  4 +--
 javbus.py          | 35 +++++++++++------------
 siro.py            | 71 +++++++++++++++++++++++++---------------------
 6 files changed, 75 insertions(+), 60 deletions(-)

diff --git a/ADC_function.py b/ADC_function.py
index e356b5b..0a3b4ed 100644
--- a/ADC_function.py
+++ b/ADC_function.py
@@ -1,8 +1,15 @@
 import requests
-from configparser import ConfigParser
+from configparser import RawConfigParser
 import os
+import re
 
-config = ConfigParser()
+# content = open('proxy.ini').read()
+# content = re.sub(r"\xfe\xff","", content)
+# content = re.sub(r"\xff\xfe","", content)
+# content = re.sub(r"\xef\xbb\xbf","", content)
+# open('BaseConfig.cfg', 'w').write(content)
+
+config = RawConfigParser()
 if os.path.exists('proxy.ini'):
     config.read('proxy.ini', encoding='UTF-8')
 else:
@@ -10,14 +17,14 @@ else:
         print("[proxy]",file=code)
         print("proxy=127.0.0.1:1080",file=code)
 
-def get_html(url):#网页请求核心
+def get_html(url,cookies = None):#网页请求核心
     if not str(config['proxy']['proxy']) == '':
         proxies = {
             "http" : "http://"  + str(config['proxy']['proxy']),
             "https": "https://" + str(config['proxy']['proxy'])
         }
         headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
-        getweb = requests.get(str(url), headers=headers, proxies=proxies)
+        getweb = requests.get(str(url), headers=headers, proxies=proxies,cookies=cookies)
         getweb.encoding = 'utf-8'
         # print(getweb.text)
         try:
@@ -27,7 +34,7 @@ def get_html(url):#网页请求核心
     else:
         headers = {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
-        getweb = requests.get(str(url),  headers=headers)
+        getweb = requests.get(str(url),  headers=headers,cookies=cookies)
         getweb.encoding = 'utf-8'
         try:
             return getweb.text
diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py
index 9782d27..3e4d7b3 100644
--- a/AV_Data_Capture.py
+++ b/AV_Data_Capture.py
@@ -19,8 +19,10 @@ def movie_lists():
     f2 = glob.glob(os.getcwd() + r"\*.mkv")
     # FLV
     g2 = glob.glob(os.getcwd() + r"\*.flv")
+    # TS
+    h2 = glob.glob(os.getcwd() + r"\*.ts")
 
-    total = a2+b2+c2+d2+e2+f2+g2
+    total = a2+b2+c2+d2+e2+f2+g2+h2
     return total
 
 def lists_from_test(custom_nuber): #电影列表
@@ -58,4 +60,4 @@ if __name__ =='__main__':
     print("[!]Cleaning empty folders")
     CEF('JAV_output')
     print("[+]All finished!!!")
-    input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看错误信息。")
+    input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看错误信息。")
\ No newline at end of file
diff --git a/core.py b/core.py
index 0f20df3..ed5e9a2 100644
--- a/core.py
+++ b/core.py
@@ -299,7 +299,7 @@ def cutImage():
         h = img.height
         img.save(path + '/' + naming_rule + '.png')
 def pasteFileToFolder(filepath, path): #文件路径，番号，后缀，要移动至的位置
-    houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|avi|rmvb|wmv|mov|mp4|mkv|flv)$', filepath).group())
+    houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|avi|rmvb|wmv|mov|mp4|mkv|flv|ts)$', filepath).group())
     os.rename(filepath, naming_rule + houzhui)
     shutil.move(naming_rule + houzhui, path)
 
diff --git a/fc2fans_club.py b/fc2fans_club.py
index a74ecb9..11272e3 100644
--- a/fc2fans_club.py
+++ b/fc2fans_club.py
@@ -38,8 +38,8 @@ def getOutline(htmlcode,number):     #获取番号
 #     result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[4]/p/text()')).replace("\\n",'',10000).strip(" ['']").replace("'",'',10000)
 #     return result
 
-def main(number):
-    str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")
+def main(number2):
+    number=number2.replace('PPV','').replace('ppv','')
     htmlcode = ADC_function.get_html('http://fc2fans.club/html/FC2-' + number + '.html')
     dic = {
         'title': getTitle(htmlcode),
diff --git a/javbus.py b/javbus.py
index da62764..e5e1712 100644
--- a/javbus.py
+++ b/javbus.py
@@ -9,14 +9,7 @@ from bs4 import BeautifulSoup#need install
 from PIL import Image#need install
 import time
 import json
-
-def get_html(url):#网页请求核心
-    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
-    getweb = requests.get(str(url),timeout=10,headers=headers).text
-    try:
-        return getweb
-    except:
-        print("[-]Connect Failed! Please check your Proxy.")
+from ADC_function import *
 
 def getTitle(htmlcode):  #获取标题
     doc = pq(htmlcode)
@@ -34,7 +27,6 @@ def getCover(htmlcode):  #获取封面链接
     doc = pq(htmlcode)
     image = doc('a.bigImage')
     return image.attr('href')
-    print(image.attr('href'))
 def getRelease(htmlcode): #获取出版日期
     html = etree.fromstring(htmlcode, etree.HTMLParser())
     result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
@@ -62,8 +54,10 @@ def getOutline(htmlcode):  #获取演员
     doc = pq(htmlcode)
     result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
     return result
-
-
+def getSerise(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
+    return result
 def getTag(htmlcode):  # 获取演员
     tag = []
     soup = BeautifulSoup(htmlcode, 'lxml')
@@ -79,7 +73,7 @@ def main(number):
     htmlcode=get_html('https://www.javbus.com/'+number)
     dww_htmlcode=get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
     dic = {
-        'title':    getTitle(htmlcode),
+        'title':    str(re.sub('\w+-\d+-','',getTitle(htmlcode))),
         'studio':   getStudio(htmlcode),
         'year':     str(re.search('\d{4}',getYear(htmlcode)).group()),
         'outline':  getOutline(dww_htmlcode),
@@ -90,7 +84,8 @@ def main(number):
         'number':   getNum(htmlcode),
         'cover':    getCover(htmlcode),
         'imagecut': 1,
-        'tag':      getTag(htmlcode)
+        'tag':      getTag(htmlcode),
+        'label':   getSerise(htmlcode),
     }
     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
 
@@ -98,7 +93,7 @@ def main(number):
         htmlcode = get_html('https://www.javbus.com/' + number)
         dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
         dic = {
-            'title': getTitle(htmlcode),
+            'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))),
             'studio': getStudio(htmlcode),
             'year': getYear(htmlcode),
             'outline': getOutline(dww_htmlcode),
@@ -109,7 +104,8 @@ def main(number):
             'number': getNum(htmlcode),
             'cover': getCover(htmlcode),
             'imagecut': 1,
-            'tag': getTag(htmlcode)
+            'tag': getTag(htmlcode),
+            'label':   getSerise(htmlcode),
         }
         js2 = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
         return js2
@@ -118,11 +114,12 @@ def main(number):
 
 def main_uncensored(number):
     htmlcode = get_html('https://www.javbus.com/' + number)
+    dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
     dic = {
-        'title': getTitle(htmlcode),
+        'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))),
         'studio': getStudio(htmlcode),
         'year': getYear(htmlcode),
-        'outline': getOutline(htmlcode),
+        'outline': getOutline(dww_htmlcode),
         'runtime': getRuntime(htmlcode),
         'director': getDirector(htmlcode),
         'actor': getActor(htmlcode),
@@ -130,6 +127,7 @@ def main_uncensored(number):
         'number': getNum(htmlcode),
         'cover': getCover(htmlcode),
         'tag': getTag(htmlcode),
+        'label': getSerise(htmlcode),
         'imagecut': 0,
     }
     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
@@ -138,7 +136,7 @@ def main_uncensored(number):
         number2 = number.replace('-', '_')
         htmlcode = get_html('https://www.javbus.com/' + number2)
         dic2 = {
-            'title': getTitle(htmlcode),
+            'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))),
             'studio': getStudio(htmlcode),
             'year': getYear(htmlcode),
             'outline': '',
@@ -149,6 +147,7 @@ def main_uncensored(number):
             'number': getNum(htmlcode),
             'cover': getCover(htmlcode),
             'tag': getTag(htmlcode),
+            'label':getSerise(htmlcode),
             'imagecut': 0,
         }
         js2 = json.dumps(dic2, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
diff --git a/siro.py b/siro.py
index f7359ee..aad875e 100644
--- a/siro.py
+++ b/siro.py
@@ -3,70 +3,74 @@ from lxml import etree
 import json
 import requests
 from bs4 import BeautifulSoup
-
-def get_html(url):#网页请求核心
-    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
-    cookies = {'adc':'1'}
-    getweb = requests.get(str(url),timeout=10,cookies=cookies,headers=headers).text
-    try:
-        return getweb
-    except:
-        print("[-]Connect Failed! Please check your Proxy.")
+from ADC_function import *
 
 def getTitle(a):
     html = etree.fromstring(a, etree.HTMLParser())
     result = str(html.xpath('//*[@id="center_column"]/div[2]/h1/text()')).strip(" ['']")
     return result
 def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    html = etree.fromstring(a, etree.HTMLParser())
-    result=str(html.xpath('//table[2]/tr[1]/td/a/text()')).strip(" ['\\n                                        ']")
-    return result
+    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
+    result2=str(html.xpath('//table/tr[1]/td[1]/text()')).strip(" ['\\n                                        ']")
+    result1 = str(html.xpath('//table/tr[1]/td[1]/a/text()')).strip(" ['\\n                                        ']")
+    return str(result1+result2).strip('+')
 def getStudio(a):
     html = etree.fromstring(a, etree.HTMLParser())
-    result=str(html.xpath('//table[2]/tr[2]/td/a/text()')).strip(" ['\\n                                        ']")
-    return result
+    result2=str(html.xpath('//table[2]/tr[2]/td/text()')).strip(" ['\\n                                        ']")
+    result1 = str(html.xpath('//table/tr[2]/td[1]/a/text()')).strip(" ['\\n                                        ']")
+    return str(result1+result2).strip('+')
 def getRuntime(a):
     html = etree.fromstring(a, etree.HTMLParser())
-    result=str(html.xpath('//table[2]/tr[3]/td/text()')).strip(" ['\\n                                        ']")
-    return result
+    result2=str(html.xpath('//table/tr[3]/td[1]/text()')).strip(" ['\\n                                        ']")
+    result1 = str(html.xpath('//table/tr[3]/td[1]/a/text()')).strip(" ['\\n                                        ']")
+    return str(result1 + result2).strip('+').strip('mi')
+def getLabel(a):
+    html = etree.fromstring(a, etree.HTMLParser())
+    result2=str(html.xpath('//table/tr[6]/td[1]/text()')).strip(" ['\\n                                        ']")
+    result1 = str(html.xpath('//table/tr[6]/td[1]/a/text()')).strip(" ['\\n                                        ']")
+    return str(result1 + result2).strip('+')
 def getNum(a):
     html = etree.fromstring(a, etree.HTMLParser())
-    result=str(html.xpath('//table[2]/tr[4]/td/text()')).strip(" ['\\n                                        ']")
-    return result
+    result2=str(html.xpath('//table/tr[2]/td[4]/a/text()')).strip(" ['\\n                                        ']")
+    result1 = str(html.xpath('//table/tr[2]/td[4]/text()')).strip(" ['\\n                                        ']")
+    return str(result1 + result2).strip('+')
 def getYear(a):
     html = etree.fromstring(a, etree.HTMLParser())
-    #result=str(html.xpath('//table[2]/tr[5]/td/text()')).strip(" ['\\n                                        ']")
-    result=str(html.xpath('//table[2]/tr[5]/td/text()')).strip(" ['\\n                                        ']")
-    return result
+    result2=str(html.xpath('//table/tr[2]/td[5]/a/text()')).strip(" ['\\n                                        ']")
+    result1=str(html.xpath('//table/tr[2]/td[5]/text()')).strip(" ['\\n                                        ']")
+    return result2+result1
 def getRelease(a):
     html = etree.fromstring(a, etree.HTMLParser())
-    result=str(html.xpath('//table[2]/tr[5]/td/text()')).strip(" ['\\n                                        ']")
-    return result
+    result2=str(html.xpath('//table/tr[5]/td[1]/text()')).strip(" ['\\n                                        ']")
+    result1 = str(html.xpath('//table/tr[5]/a/td[1]/text()')).strip(" ['\\n                                        ']")
+    return str(result1 + result2).strip('+')
 def getTag(a):
     html = etree.fromstring(a, etree.HTMLParser())
-    result=str(html.xpath('//table[2]/tr[9]/td/text()')).strip(" ['\\n                                        ']")
-    return result
+    result2=str(html.xpath('//table/tr[8]/td[1]/a/text()')).strip(" ['\\n                                        ']")
+    result1=str(html.xpath('//table/tr[8]/td[1]/text()')).strip(" ['\\n                                        ']")
+    return str(result1 + result2).strip('+')
 def getCover(htmlcode):
     html = etree.fromstring(htmlcode, etree.HTMLParser())
     result = str(html.xpath('//*[@id="center_column"]/div[2]/div[1]/div/div/h2/img/@src')).strip(" ['']")
     return result
 def getDirector(a):
     html = etree.fromstring(a, etree.HTMLParser())
-    result = str(html.xpath('//table[2]/tr[7]/td/a/text()')).strip(" ['\\n                                        ']")
-    return result
+    result1 = str(html.xpath('//table/tr[2]/td[1]/text()')).strip(" ['\\n                                        ']")
+    result2 = str(html.xpath('//table/tr[2]/td[1]/a/text()')).strip(" ['\\n                                        ']")
+    return str(result1 + result2).strip('+')
 def getOutline(htmlcode):
     html = etree.fromstring(htmlcode, etree.HTMLParser())
     result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
     return result
-
 def main(number):
-    htmlcode=get_html('https://www.mgstage.com/product/product_detail/'+str(number))
+    htmlcode=get_html('https://www.mgstage.com/product/product_detail/'+str(number),cookies={'adc':'1'})
     soup = BeautifulSoup(htmlcode, 'lxml')
     a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n                                        ','')
+    #print(a)
     dic = {
         'title': getTitle(htmlcode).replace("\\n",'').replace('        ',''),
         'studio': getStudio(a),
-        'year': getYear(a),
+        'year': str(re.search('\d{4}',getRelease(a)).group()),
         'outline': getOutline(htmlcode),
         'runtime': getRuntime(a),
         'director': getDirector(a),
@@ -75,7 +79,10 @@ def main(number):
         'number': number,
         'cover': getCover(htmlcode),
         'imagecut': 0,
-        'tag':' ',
+        'tag': getTag(a).replace("'\\n',",'').replace(' ', '').replace("\\n','\\n",','),
+        'label':getLabel(a)
     }
     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
-    return js
\ No newline at end of file
+    #print('https://www.mgstage.com/product/product_detail/'+str(number))
+    return js
+#print(main('SIRO-3552'))
\ No newline at end of file