Files
authority-data/code/shyq.py
2022-05-22 17:53:23 +08:00

51 lines
1.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author: yiliyas
# date: 2022.4.24
from urllib import request
import re
# 上海市商业联合会抗疫保供产销对接信息平台生活物资供应名录json格式数据类别统计
url = 'https://c.kdcer.com/sh_keep_supply/home/categories'
req = request.Request(url)
response=request.urlopen(req)
content=response.read().decode('utf-8-sig')
# 获得类型id,类别,企业名
pattern = re.compile('"name":"(.*?)",.*?,"count":(.*?)}',re.S)
items = re.findall(pattern,content)
m=0
for item in items:
m = m+1
cname=item[0]
cnum=item[1]
# 获得全部企业——总数
if str(cname).find("全部企业")>-1:
nums = int(cnum)
# 输入markdown格式
strs='|'+cname+'|'+str(cnum)
print(strs)
# 打印markdown表头
title2='\n|序号|类型|点击(详细工商信息)|\n'+'|:----|:----|:----|'
print(title2)
n=0
# 获得总页数每页20个
m = int(nums/20)+1
for i in range(1,nums):
url = 'https://c.kdcer.com/sh_keep_supply/home/enterprises?categoryId=-1&keyword=&page='+str(i)+'&limit=20'
req = request.Request(url)
response=request.urlopen(req)
content=response.read().decode('utf-8-sig')
# 获得类型id,类别,企业名
pattern = re.compile('"name":"(.*?)",".*?,"categoryName":"(.*?)"',re.S)
items = re.findall(pattern,content)
for item in items:
n=n+1
cname=item[0]
cgname=item[1]
strs='|'+str(n)+'|'+cgname+'|'+cname
print(strs)