Update 3.5
This commit is contained in:
@@ -66,23 +66,27 @@ def create_data_and_move(file_path: str, c: config.Config):
|
|||||||
# Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
|
# Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
|
||||||
n_number = get_number(file_path)
|
n_number = get_number(file_path)
|
||||||
|
|
||||||
try:
|
|
||||||
print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number))
|
print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number))
|
||||||
core_main(file_path, n_number, c)
|
core_main(file_path, n_number, c)
|
||||||
print("[*]======================================================")
|
print("[*]======================================================")
|
||||||
except Exception as err:
|
|
||||||
print("[-] [{}] ERROR:".format(file_path))
|
|
||||||
print('[-]', err)
|
|
||||||
|
|
||||||
if c.soft_link():
|
# try:
|
||||||
print("[-]Link {} to failed folder".format(file_path))
|
# print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number))
|
||||||
os.symlink(file_path, str(os.getcwd()) + "/" + conf.failed_folder() + "/")
|
# core_main(file_path, n_number, c)
|
||||||
else:
|
# print("[*]======================================================")
|
||||||
try:
|
# except Exception as err:
|
||||||
print("[-]Move [{}] to failed folder".format(file_path))
|
# print("[-] [{}] ERROR:".format(file_path))
|
||||||
shutil.move(file_path, str(os.getcwd()) + "/" + conf.failed_folder() + "/")
|
# print('[-]', err)
|
||||||
except Exception as err:
|
#
|
||||||
print('[!]', err)
|
# if c.soft_link():
|
||||||
|
# print("[-]Link {} to failed folder".format(file_path))
|
||||||
|
# os.symlink(file_path, str(os.getcwd()) + "/" + conf.failed_folder() + "/")
|
||||||
|
# else:
|
||||||
|
# try:
|
||||||
|
# print("[-]Move [{}] to failed folder".format(file_path))
|
||||||
|
# shutil.move(file_path, str(os.getcwd()) + "/" + conf.failed_folder() + "/")
|
||||||
|
# except Exception as err:
|
||||||
|
# print('[!]', err)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ naming_rule=number+'-'+title
|
|||||||
update_check=1
|
update_check=1
|
||||||
|
|
||||||
[priority]
|
[priority]
|
||||||
website=javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321
|
website=javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321,javlib
|
||||||
|
|
||||||
[escape]
|
[escape]
|
||||||
literals=\()/
|
literals=\()/
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ def main(number: str):
|
|||||||
"number": get_table_el_td(soup, "video_id"),
|
"number": get_table_el_td(soup, "video_id"),
|
||||||
"release": get_table_el_td(soup, "video_date"),
|
"release": get_table_el_td(soup, "video_date"),
|
||||||
"runtime": get_from_xpath(lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'),
|
"runtime": get_from_xpath(lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'),
|
||||||
|
"series":'',
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
dic = {}
|
dic = {}
|
||||||
@@ -103,7 +104,7 @@ def get_cover(lx: html.HtmlComment) -> str:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# lists = ["DVMC-003", "GS-0167", "JKREZ-001", "KMHRS-010", "KNSD-023"]
|
lists = ["DVMC-003", "GS-0167", "JKREZ-001", "KMHRS-010", "KNSD-023"]
|
||||||
lists = ["DVMC-003"]
|
#lists = ["DVMC-003"]
|
||||||
for num in lists:
|
for num in lists:
|
||||||
print(main(num))
|
print(main(num))
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"version": "3.4.3",
|
"version": "3.5",
|
||||||
"version_show": "3.4.3",
|
"version_show": "3.5",
|
||||||
"download": "https://github.com/yoshiko2/AV_Data_Capture/releases"
|
"download": "https://github.com/yoshiko2/AV_Data_Capture/releases"
|
||||||
}
|
}
|
||||||
|
|||||||
66
xcity.py
66
xcity.py
@@ -32,14 +32,19 @@ def getActorPhoto(actor): # //*[@id="star_qdt"]/li/a/img
|
|||||||
|
|
||||||
def getStudio(a):
|
def getStudio(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[4]/a/span/text()')).strip(" ['']")
|
try:
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
|
result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[4]/a/span/text()')).strip(" ['']")
|
||||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
except:
|
||||||
|
result = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
|
||||||
|
return result.strip('+').replace("', '", '').replace('"', '')
|
||||||
|
|
||||||
|
|
||||||
def getRuntime(a):
|
def getRuntime(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[3]/text()')).strip(" ['']")
|
try:
|
||||||
|
result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[3]/text()')[0]
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
try:
|
try:
|
||||||
return re.findall('\d+',result1)[0]
|
return re.findall('\d+',result1)[0]
|
||||||
except:
|
except:
|
||||||
@@ -48,14 +53,20 @@ def getRuntime(a):
|
|||||||
|
|
||||||
def getLabel(a):
|
def getLabel(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[5]/a/span/text()')).strip(" ['']")
|
try:
|
||||||
return result1
|
result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[5]/a/span/text()')[0]
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getNum(a):
|
def getNum(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = etree.fromstring(a, etree.HTMLParser())
|
||||||
result1 = str(html.xpath('//*[@id="hinban"]/text()')).strip(" ['']")
|
try:
|
||||||
return result1
|
result = html.xpath('//*[@id="hinban"]/text()')[0]
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getYear(getRelease):
|
def getYear(getRelease):
|
||||||
@@ -68,9 +79,12 @@ def getYear(getRelease):
|
|||||||
|
|
||||||
def getRelease(a):
|
def getRelease(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')).strip(" ['']")
|
|
||||||
try:
|
try:
|
||||||
return re.findall('\d{4}/\d{2}/\d{2}', result1)[0].replace('/','-')
|
result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')[0]
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
try:
|
||||||
|
return re.findall('\d{4}/\d{2}/\d{2}', result)[0].replace('/','-')
|
||||||
except:
|
except:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
@@ -99,24 +113,45 @@ def getCover_small(a, index=0):
|
|||||||
|
|
||||||
def getCover(htmlcode):
|
def getCover(htmlcode):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[1]/p/a/@href')).strip(" ['']")
|
try:
|
||||||
|
result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[1]/p/a/@href')[0]
|
||||||
return 'https:' + result
|
return 'https:' + result
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getDirector(a):
|
def getDirector(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//*[@id="program_detail_director"]/text()')).strip(" ['']").replace(u'\\n','').replace(u'\\t','')
|
try:
|
||||||
return result1
|
result = html.xpath('//*[@id="program_detail_director"]/text()')[0].replace(u'\n','').replace(u'\t', '')
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getOutline(htmlcode):
|
def getOutline(htmlcode):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[5]/p/text()')).strip(" ['']")
|
try:
|
||||||
|
result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[5]/p/text()')[0]
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
try:
|
try:
|
||||||
return re.sub('\\\\\w*\d+','',result)
|
return re.sub('\\\\\w*\d+','',result)
|
||||||
except:
|
except:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def getSeries(htmlcode):
|
||||||
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
result = html.xpath("//span[contains(text(),'シリーズ')]/../a/span/text()")[0]
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
result = html.xpath("//span[contains(text(),'シリーズ')]/../span/text()")[0]
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def main(number):
|
def main(number):
|
||||||
try:
|
try:
|
||||||
@@ -142,8 +177,9 @@ def main(number):
|
|||||||
'label': getLabel(detail_page),
|
'label': getLabel(detail_page),
|
||||||
'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()),
|
'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()),
|
||||||
'actor_photo': getActorPhoto(getActor(detail_page)),
|
'actor_photo': getActorPhoto(getActor(detail_page)),
|
||||||
'website': 'https://javdb.com' + urls,
|
'website': 'https://xcity.jp' + urls,
|
||||||
'source': 'xcity.py',
|
'source': 'xcity.py',
|
||||||
|
'series': getSeries(detail_page),
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# print(e)
|
# print(e)
|
||||||
|
|||||||
Reference in New Issue
Block a user