Update 3.5

2020-06-22 15:59:24 +08:00
parent 67353cde87
commit 0d435d5568
6 changed files with 79 additions and 38 deletions
--- a/AV_Data_Capture.py
+++ b/AV_Data_Capture.py
@@ -66,23 +66,27 @@ def create_data_and_move(file_path: str, c: config.Config):
    # Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
    n_number = get_number(file_path)

-    try:
-        print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number))
-        core_main(file_path, n_number, c)
-        print("[*]======================================================")
-    except Exception as err:
-        print("[-] [{}] ERROR:".format(file_path))
-        print('[-]', err)
+    print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number))
+    core_main(file_path, n_number, c)
+    print("[*]======================================================")

-        if c.soft_link():
-            print("[-]Link {} to failed folder".format(file_path))
-            os.symlink(file_path, str(os.getcwd()) + "/" + conf.failed_folder() + "/")
-        else:
-            try:
-                print("[-]Move [{}] to failed folder".format(file_path))
-                shutil.move(file_path, str(os.getcwd()) + "/" + conf.failed_folder() + "/")
-            except Exception as err:
-                print('[!]', err)
+    # try:
+    #     print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number))
+    #     core_main(file_path, n_number, c)
+    #     print("[*]======================================================")
+    # except Exception as err:
+    #     print("[-] [{}] ERROR:".format(file_path))
+    #     print('[-]', err)
+    #
+    #     if c.soft_link():
+    #         print("[-]Link {} to failed folder".format(file_path))
+    #         os.symlink(file_path, str(os.getcwd()) + "/" + conf.failed_folder() + "/")
+    #     else:
+    #         try:
+    #             print("[-]Move [{}] to failed folder".format(file_path))
+    #             shutil.move(file_path, str(os.getcwd()) + "/" + conf.failed_folder() + "/")
+    #         except Exception as err:
+    #             print('[!]', err)


 if __name__ == '__main__':
--- a/config.ini
+++ b/config.ini
@@ -19,7 +19,7 @@ naming_rule=number+'-'+title
 update_check=1

 [priority]
-website=javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321
+website=javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321,javlib

 [escape]
 literals=\()/
--- a/fc2fans_club.py
+++ b/fc2fans_club.py
@@ -162,4 +162,4 @@ def main(number):
    return js

 if __name__ == '__main__':
-    print(main('1252953'))
+    print(main('1252953'))
--- a/javlib.py
+++ b/javlib.py
@@ -49,6 +49,7 @@ def main(number: str):
            "number": get_table_el_td(soup, "video_id"),
            "release": get_table_el_td(soup, "video_date"),
            "runtime": get_from_xpath(lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'),
+            "series":'',
        }
    else:
        dic = {}
@@ -103,7 +104,7 @@ def get_cover(lx: html.HtmlComment) -> str:


 if __name__ == "__main__":
-    # lists = ["DVMC-003", "GS-0167", "JKREZ-001", "KMHRS-010", "KNSD-023"]
-    lists = ["DVMC-003"]
+    lists = ["DVMC-003", "GS-0167", "JKREZ-001", "KMHRS-010", "KNSD-023"]
+    #lists = ["DVMC-003"]
    for num in lists:
        print(main(num))
--- a/update_check.json
+++ b/update_check.json
@@ -1,5 +1,5 @@
 {
-	"version": "3.4.3",
-	"version_show": "3.4.3",
+	"version": "3.5",
+	"version_show": "3.5",
 	"download": "https://github.com/yoshiko2/AV_Data_Capture/releases"
 }
--- a/xcity.py
+++ b/xcity.py
@@ -32,14 +32,19 @@ def getActorPhoto(actor):  # //*[@id="star_qdt"]/li/a/img

 def getStudio(a):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[4]/a/span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+    try:
+        result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[4]/a/span/text()')).strip(" ['']")
+    except:
+        result = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
+    return result.strip('+').replace("', '", '').replace('"', '')


 def getRuntime(a):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[3]/text()')).strip(" ['']")
+    try:
+        result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[3]/text()')[0]
+    except:
+        return ''
    try:
        return re.findall('\d+',result1)[0]
    except:
@@ -48,14 +53,20 @@ def getRuntime(a):

 def getLabel(a):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[5]/a/span/text()')).strip(" ['']")
-    return result1
+    try:
+        result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[5]/a/span/text()')[0]
+        return result
+    except:
+        return ''


 def getNum(a):
    html = etree.fromstring(a, etree.HTMLParser())
-    result1 = str(html.xpath('//*[@id="hinban"]/text()')).strip(" ['']")
-    return result1
+    try:
+        result = html.xpath('//*[@id="hinban"]/text()')[0]
+        return result
+    except:
+        return ''


 def getYear(getRelease):
@@ -68,9 +79,12 @@ def getYear(getRelease):

 def getRelease(a):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')).strip(" ['']")
    try:
-        return re.findall('\d{4}/\d{2}/\d{2}', result1)[0].replace('/','-')
+        result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')[0]
+    except:
+        return ''
+    try:
+        return re.findall('\d{4}/\d{2}/\d{2}', result)[0].replace('/','-')
    except:
        return ''

@@ -99,24 +113,45 @@ def getCover_small(a, index=0):

 def getCover(htmlcode):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[1]/p/a/@href')).strip(" ['']")
-    return 'https:'+result
+    try:
+        result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[1]/p/a/@href')[0]
+        return 'https:' + result
+    except:
+        return ''


 def getDirector(a):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//*[@id="program_detail_director"]/text()')).strip(" ['']").replace(u'\\n','').replace(u'\\t','')
-    return result1
+    try:
+        result = html.xpath('//*[@id="program_detail_director"]/text()')[0].replace(u'\n','').replace(u'\t', '')
+        return result
+    except:
+        return ''


 def getOutline(htmlcode):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[5]/p/text()')).strip(" ['']")
+    try:
+        result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[5]/p/text()')[0]
+    except:
+        return ''
    try:
        return re.sub('\\\\\w*\d+','',result)
    except:
        return result

+def getSeries(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    try:
+        try:
+            result = html.xpath("//span[contains(text(),'シリーズ')]/../a/span/text()")[0]
+            return result
+        except:
+            result = html.xpath("//span[contains(text(),'シリーズ')]/../span/text()")[0]
+            return result
+    except:
+        return ''
+

 def main(number):
    try:
@@ -142,8 +177,9 @@ def main(number):
            'label': getLabel(detail_page),
            'year': getYear(getRelease(detail_page)),  # str(re.search('\d{4}',getRelease(a)).group()),
            'actor_photo': getActorPhoto(getActor(detail_page)),
-            'website': 'https://javdb.com' + urls,
+            'website': 'https://xcity.jp' + urls,
            'source': 'xcity.py',
+            'series': getSeries(detail_page),
        }
    except Exception as e:
        # print(e)