From 54d8f3af87bea96e1aa260c1af507f54217fe44e Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 6 Mar 2022 01:31:43 +0800 Subject: [PATCH] madou priority against javdb --- WebCrawler/__init__.py | 4 ++++ WebCrawler/madou.py | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index d467eab..f5ce63c 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -95,6 +95,10 @@ def get_data_from_json(file_number, oCC): "rj" in lo_file_number or "vj" in lo_file_number ): sources.insert(0, sources.pop(sources.index("dlsite"))) + elif "madou" in sources and ( + re.match(r"^md[0-9]{4}$", lo_file_number) + ): + sources.insert(0, sources.pop(sources.index("madou"))) elif re.match(r"^[a-z0-9]{3,}$", lo_file_number): if "javdb" in sources: sources.insert(0, sources.pop(sources.index("javdb"))) diff --git a/WebCrawler/madou.py b/WebCrawler/madou.py index 01fc19c..9dc7773 100644 --- a/WebCrawler/madou.py +++ b/WebCrawler/madou.py @@ -1,3 +1,5 @@ +import sys +sys.path.append('../') from bs4 import BeautifulSoup # need install from lxml import etree # need install from pyquery import PyQuery as pq # need install @@ -5,10 +7,8 @@ from ADC_function import * import json import re from lib2to3.pgen2 import parse -import sys from urllib.parse import urlparse, unquote -sys.path.append('../') def getActorPhoto(html): @@ -162,3 +162,4 @@ def main(number): if __name__ == '__main__': print(main('MD0094')) + print(main('MD0222'))