51 lines
1.5 KiB
Python
51 lines
1.5 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding:utf-8 -*-
|
||
# Author: yiliyas
|
||
# date: 2022.4.24
|
||
|
||
from urllib import request
|
||
import re
|
||
|
||
# 上海市商业联合会抗疫保供产销对接信息平台生活物资供应名录(json格式数据)类别统计
|
||
url = 'https://c.kdcer.com/sh_keep_supply/home/categories'
|
||
req = request.Request(url)
|
||
response=request.urlopen(req)
|
||
content=response.read().decode('utf-8-sig')
|
||
|
||
# 获得类型id,类别,企业名
|
||
pattern = re.compile('"name":"(.*?)",.*?,"count":(.*?)}',re.S)
|
||
items = re.findall(pattern,content)
|
||
m=0
|
||
for item in items:
|
||
m = m+1
|
||
cname=item[0]
|
||
cnum=item[1]
|
||
# 获得全部企业——总数
|
||
if str(cname).find("全部企业")>-1:
|
||
nums = int(cnum)
|
||
# 输入markdown格式
|
||
strs='|'+cname+'|'+str(cnum)
|
||
print(strs)
|
||
|
||
# 打印markdown表头
|
||
title2='\n|序号|类型|点击(详细工商信息)|\n'+'|:----|:----|:----|'
|
||
print(title2)
|
||
n=0
|
||
# 获得总页数(每页20个)
|
||
m = int(nums/20)+1
|
||
for i in range(1,nums):
|
||
url = 'https://c.kdcer.com/sh_keep_supply/home/enterprises?categoryId=-1&keyword=&page='+str(i)+'&limit=20'
|
||
req = request.Request(url)
|
||
response=request.urlopen(req)
|
||
content=response.read().decode('utf-8-sig')
|
||
# 获得类型id,类别,企业名
|
||
pattern = re.compile('"name":"(.*?)",".*?,"categoryName":"(.*?)"',re.S)
|
||
items = re.findall(pattern,content)
|
||
for item in items:
|
||
n=n+1
|
||
cname=item[0]
|
||
cgname=item[1]
|
||
strs='|'+str(n)+'|'+cgname+'|'+cname
|
||
print(strs)
|
||
|