From 0d435d55686547ab60b1bff6308660acaf2cf4de Mon Sep 17 00:00:00 2001 From: Yoshiko2 <42309414+yoshiko2@users.noreply.github.com> Date: Mon, 22 Jun 2020 15:59:24 +0800 Subject: [PATCH] Update 3.5 --- AV_Data_Capture.py | 36 +++++++++++++----------- config.ini | 2 +- fc2fans_club.py | 2 +- javlib.py | 5 ++-- update_check.json | 4 +-- xcity.py | 68 +++++++++++++++++++++++++++++++++++----------- 6 files changed, 79 insertions(+), 38 deletions(-) diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 8ee81c2..4c84ce6 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -66,23 +66,27 @@ def create_data_and_move(file_path: str, c: config.Config): # Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4 n_number = get_number(file_path) - try: - print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number)) - core_main(file_path, n_number, c) - print("[*]======================================================") - except Exception as err: - print("[-] [{}] ERROR:".format(file_path)) - print('[-]', err) + print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number)) + core_main(file_path, n_number, c) + print("[*]======================================================") - if c.soft_link(): - print("[-]Link {} to failed folder".format(file_path)) - os.symlink(file_path, str(os.getcwd()) + "/" + conf.failed_folder() + "/") - else: - try: - print("[-]Move [{}] to failed folder".format(file_path)) - shutil.move(file_path, str(os.getcwd()) + "/" + conf.failed_folder() + "/") - except Exception as err: - print('[!]', err) + # try: + # print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number)) + # core_main(file_path, n_number, c) + # print("[*]======================================================") + # except Exception as err: + # print("[-] [{}] ERROR:".format(file_path)) + # print('[-]', err) + # + # if c.soft_link(): + # print("[-]Link {} to failed folder".format(file_path)) + # os.symlink(file_path, str(os.getcwd()) + "/" + conf.failed_folder() + "/") + # else: + # try: + # print("[-]Move [{}] to failed folder".format(file_path)) + # shutil.move(file_path, str(os.getcwd()) + "/" + conf.failed_folder() + "/") + # except Exception as err: + # print('[!]', err) if __name__ == '__main__': diff --git a/config.ini b/config.ini index 2a6703c..3d908d3 100644 --- a/config.ini +++ b/config.ini @@ -19,7 +19,7 @@ naming_rule=number+'-'+title update_check=1 [priority] -website=javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321 +website=javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321,javlib [escape] literals=\()/ diff --git a/fc2fans_club.py b/fc2fans_club.py index cb410cd..2c31a51 100755 --- a/fc2fans_club.py +++ b/fc2fans_club.py @@ -162,4 +162,4 @@ def main(number): return js if __name__ == '__main__': - print(main('1252953')) + print(main('1252953')) \ No newline at end of file diff --git a/javlib.py b/javlib.py index bdd7d10..cd85c8d 100644 --- a/javlib.py +++ b/javlib.py @@ -49,6 +49,7 @@ def main(number: str): "number": get_table_el_td(soup, "video_id"), "release": get_table_el_td(soup, "video_date"), "runtime": get_from_xpath(lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'), + "series":'', } else: dic = {} @@ -103,7 +104,7 @@ def get_cover(lx: html.HtmlComment) -> str: if __name__ == "__main__": - # lists = ["DVMC-003", "GS-0167", "JKREZ-001", "KMHRS-010", "KNSD-023"] - lists = ["DVMC-003"] + lists = ["DVMC-003", "GS-0167", "JKREZ-001", "KMHRS-010", "KNSD-023"] + #lists = ["DVMC-003"] for num in lists: print(main(num)) diff --git a/update_check.json b/update_check.json index ce1c381..2f4b8bc 100644 --- a/update_check.json +++ b/update_check.json @@ -1,5 +1,5 @@ { - "version": "3.4.3", - "version_show": "3.4.3", + "version": "3.5", + "version_show": "3.5", "download": "https://github.com/yoshiko2/AV_Data_Capture/releases" } diff --git a/xcity.py b/xcity.py index 0a12ad8..e3f04cb 100644 --- a/xcity.py +++ b/xcity.py @@ -32,14 +32,19 @@ def getActorPhoto(actor): # //*[@id="star_qdt"]/li/a/img def getStudio(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[4]/a/span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') + try: + result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[4]/a/span/text()')).strip(" ['']") + except: + result = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']") + return result.strip('+').replace("', '", '').replace('"', '') def getRuntime(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[3]/text()')).strip(" ['']") + try: + result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[3]/text()')[0] + except: + return '' try: return re.findall('\d+',result1)[0] except: @@ -48,14 +53,20 @@ def getRuntime(a): def getLabel(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[5]/a/span/text()')).strip(" ['']") - return result1 + try: + result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[5]/a/span/text()')[0] + return result + except: + return '' def getNum(a): html = etree.fromstring(a, etree.HTMLParser()) - result1 = str(html.xpath('//*[@id="hinban"]/text()')).strip(" ['']") - return result1 + try: + result = html.xpath('//*[@id="hinban"]/text()')[0] + return result + except: + return '' def getYear(getRelease): @@ -68,9 +79,12 @@ def getYear(getRelease): def getRelease(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')).strip(" ['']") try: - return re.findall('\d{4}/\d{2}/\d{2}', result1)[0].replace('/','-') + result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')[0] + except: + return '' + try: + return re.findall('\d{4}/\d{2}/\d{2}', result)[0].replace('/','-') except: return '' @@ -99,24 +113,45 @@ def getCover_small(a, index=0): def getCover(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[1]/p/a/@href')).strip(" ['']") - return 'https:'+result + try: + result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[1]/p/a/@href')[0] + return 'https:' + result + except: + return '' def getDirector(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//*[@id="program_detail_director"]/text()')).strip(" ['']").replace(u'\\n','').replace(u'\\t','') - return result1 + try: + result = html.xpath('//*[@id="program_detail_director"]/text()')[0].replace(u'\n','').replace(u'\t', '') + return result + except: + return '' def getOutline(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[5]/p/text()')).strip(" ['']") + try: + result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[5]/p/text()')[0] + except: + return '' try: return re.sub('\\\\\w*\d+','',result) except: return result +def getSeries(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + try: + try: + result = html.xpath("//span[contains(text(),'シリーズ')]/../a/span/text()")[0] + return result + except: + result = html.xpath("//span[contains(text(),'シリーズ')]/../span/text()")[0] + return result + except: + return '' + def main(number): try: @@ -142,8 +177,9 @@ def main(number): 'label': getLabel(detail_page), 'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()), 'actor_photo': getActorPhoto(getActor(detail_page)), - 'website': 'https://javdb.com' + urls, + 'website': 'https://xcity.jp' + urls, 'source': 'xcity.py', + 'series': getSeries(detail_page), } except Exception as e: # print(e)