Merge pull request #226 from 68cdrBxM8YdoJ/fix-number-parsing
Fix get_number func
This commit is contained in:
4
.github/workflows/main.yml
vendored
4
.github/workflows/main.yml
vendored
@@ -28,6 +28,10 @@ jobs:
|
|||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
- name: Test number_perser.get_number
|
||||||
|
run: |
|
||||||
|
python number_parser.py -v
|
||||||
|
|
||||||
# - name: Show cloudsraper package location
|
# - name: Show cloudsraper package location
|
||||||
# run: |
|
# run: |
|
||||||
# python -c 'import cloudscraper as _; print(_.__path__)'
|
# python -c 'import cloudscraper as _; print(_.__path__)'
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import argparse
|
import argparse
|
||||||
from core import *
|
from core import *
|
||||||
|
from number_parser import get_number
|
||||||
|
|
||||||
|
|
||||||
def check_update(local_version):
|
def check_update(local_version):
|
||||||
@@ -66,30 +67,9 @@ def CEF(path):
|
|||||||
a = ''
|
a = ''
|
||||||
|
|
||||||
|
|
||||||
def getNumber(filepath,absolute_path = False):
|
|
||||||
if absolute_path == True:
|
|
||||||
filepath=filepath.replace('\\','/')
|
|
||||||
file_number = str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
|
|
||||||
return file_number
|
|
||||||
if '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号
|
|
||||||
filepath = filepath.replace("_", "-")
|
|
||||||
filepath.strip('22-sht.me').strip('-HD').strip('-hd')
|
|
||||||
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
|
|
||||||
if 'FC2' or 'fc2' in filename:
|
|
||||||
filename = filename.replace('-PPV', '').replace('PPV-', '').replace('FC2PPV-', 'FC2-').replace('FC2PPV_', 'FC2-')
|
|
||||||
filename = filename.replace('-ppv', '').replace('ppv-', '').replace('fc2ppv-', 'FC2-').replace('fc2ppv_', 'FC2-')
|
|
||||||
file_number = re.search(r'\w+-\w+', filename, re.A).group()
|
|
||||||
return file_number
|
|
||||||
else: # 提取不含减号-的番号,FANZA CID
|
|
||||||
try:
|
|
||||||
return str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
|
|
||||||
except:
|
|
||||||
return re.search(r'(.+?)\.', filepath)[0]
|
|
||||||
|
|
||||||
|
|
||||||
def create_data_and_move(file_path: str, c: config.Config):
|
def create_data_and_move(file_path: str, c: config.Config):
|
||||||
# Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
|
# Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
|
||||||
n_number = getNumber(file_path, absolute_path=True)
|
n_number = get_number(file_path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number))
|
print("[!]Making Data for [{}], the number is [{}]".format(file_path, n_number))
|
||||||
|
|||||||
49
number_parser.py
Normal file
49
number_parser.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def get_number(filepath: str) -> str:
|
||||||
|
"""
|
||||||
|
>>> from number_parser import get_number
|
||||||
|
>>> get_number("/Users/Guest/AV_Data_Capture/snis-829.mp4")
|
||||||
|
'snis-829'
|
||||||
|
>>> get_number("/Users/Guest/AV_Data_Capture/snis-829-C.mp4")
|
||||||
|
'snis-829'
|
||||||
|
>>> get_number("C:¥Users¥Guest¥snis-829.mp4")
|
||||||
|
'snis-829'
|
||||||
|
>>> get_number("C:¥Users¥Guest¥snis-829-C.mp4")
|
||||||
|
'snis-829'
|
||||||
|
>>> get_number("./snis-829.mp4")
|
||||||
|
'snis-829'
|
||||||
|
>>> get_number("./snis-829-C.mp4")
|
||||||
|
'snis-829'
|
||||||
|
>>> get_number(".¥snis-829.mp4")
|
||||||
|
'snis-829'
|
||||||
|
>>> get_number(".¥snis-829-C.mp4")
|
||||||
|
'snis-829'
|
||||||
|
>>> get_number("snis-829.mp4")
|
||||||
|
'snis-829'
|
||||||
|
>>> get_number("snis-829-C.mp4")
|
||||||
|
'snis-829'
|
||||||
|
"""
|
||||||
|
filepath = os.path.basename(filepath)
|
||||||
|
|
||||||
|
if '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号
|
||||||
|
filepath = filepath.replace("_", "-")
|
||||||
|
filepath.strip('22-sht.me').strip('-HD').strip('-hd')
|
||||||
|
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
|
||||||
|
if 'FC2' or 'fc2' in filename:
|
||||||
|
filename = filename.replace('-PPV', '').replace('PPV-', '').replace('FC2PPV-', 'FC2-').replace('FC2PPV_', 'FC2-')
|
||||||
|
filename = filename.replace('-ppv', '').replace('ppv-', '').replace('fc2ppv-', 'FC2-').replace('fc2ppv_', 'FC2-')
|
||||||
|
file_number = re.search(r'\w+-\w+', filename, re.A).group()
|
||||||
|
return file_number
|
||||||
|
else: # 提取不含减号-的番号,FANZA CID
|
||||||
|
try:
|
||||||
|
return str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
|
||||||
|
except:
|
||||||
|
return re.search(r'(.+?)\.', filepath)[0]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import doctest
|
||||||
|
doctest.testmod(raise_on_error=True)
|
||||||
Reference in New Issue
Block a user