Add files via upload
Version 2.0:Add shell&python code
This commit is contained in:
108
code/internet.md
Normal file
108
code/internet.md
Normal file
@@ -0,0 +1,108 @@
|
||||
- [**返回**](../README.md) | [**下载**](../data/data.md)
|
||||
> **shell命令一句话爬虫**
|
||||
|
||||
- 下载第39~49次《中国互联网络发展状况统计报告》
|
||||
```
|
||||
wget http://www.cnnic.net.cn/hlwfzyj/hlwxzbg/index.htm|
|
||||
cat index.htm |
|
||||
grep "次"|
|
||||
grep pdf|
|
||||
awk -F "</a>" '{print $1}'|
|
||||
awk -F "=" '{print " http://www.cnnic.net.cn/hlwfzyj/hlwxzbg "$3}'|
|
||||
sed 's/target//g'|
|
||||
sed 's/"_blank">//g'|
|
||||
awk -F "'" '{print $1$2}'|
|
||||
sed 's/ //g'|
|
||||
sed 's/hlwxzbg./hlwxzbg/g'|
|
||||
xargs wget -c
|
||||
```
|
||||
- 下载第34~37次《中国互联网络发展状况统计报告》
|
||||
```
|
||||
wget http://www.cnnic.net.cn/hlwfzyj/hlwxzbg/index_1.htm|
|
||||
cat index_1.htm |
|
||||
grep "次"|
|
||||
grep pdf|
|
||||
awk -F "</a>" '{print $1}'|
|
||||
awk -F "=" '{print " http://www.cnnic.net.cn/hlwfzyj/hlwxzbg "$3}'|
|
||||
sed 's/target//g'|
|
||||
sed 's/"_blank">//g'|
|
||||
awk -F "'" '{print $1$2}'|
|
||||
sed 's/ //g'|
|
||||
sed 's/hlwxzbg./hlwxzbg/g'|
|
||||
xargs wget -c
|
||||
```
|
||||
- 下载第33次《中国互联网络发展状况统计报告》
|
||||
```
|
||||
wget http://www.cnnic.net.cn/hlwfzyj/hlwxzbg/index_2.htm|
|
||||
cat index_2.htm |
|
||||
grep "次"|
|
||||
grep pdf|
|
||||
awk -F "</a>" '{print $1}'|
|
||||
awk -F "=" '{print " http://www.cnnic.net.cn/hlwfzyj/hlwxzbg "$3}'|
|
||||
sed 's/target//g'|sed 's/"_blank">//g'|
|
||||
awk -F "'" '{print $1$2}'|sed 's/ //g'|
|
||||
sed 's/hlwxzbg./hlwxzbg/g'|
|
||||
xargs wget -c
|
||||
```
|
||||
- 下载第26~29次《中国互联网络发展状况统计报告》
|
||||
```
|
||||
wget http://www.cnnic.net.cn/hlwfzyj/hlwxzbg/index_3.htm|
|
||||
cat index_3.htm |
|
||||
grep "次"|
|
||||
grep pdf|
|
||||
awk -F "</a>" '{print $1}'|
|
||||
awk -F "=" '{print " http://www.cnnic.net.cn/hlwfzyj/hlwxzbg "$3}'|
|
||||
sed 's/target//g'|
|
||||
sed 's/"_blank">//g'|
|
||||
awk -F "'" '{print $1$2}'|
|
||||
sed 's/ //g'|
|
||||
sed 's/hlwxzbg./hlwxzbg/g'|
|
||||
xargs wget -c
|
||||
```
|
||||
- 下载第22~25次《中国互联网络发展状况统计报告》
|
||||
```
|
||||
wget http://www.cnnic.net.cn/hlwfzyj/hlwxzbg/index_4.htm|
|
||||
cat index_4.htm |
|
||||
grep "次"|
|
||||
grep -E "pdf|rar|doc"|
|
||||
awk -F "</a>" '{print $1}'|
|
||||
awk -F "=" '{print " http://www.cnnic.net.cn/hlwfzyj/hlwxzbg "$3}'|
|
||||
sed 's/target//g'|
|
||||
sed 's/"_blank">//g'|
|
||||
awk -F "'" '{print $1$2}'|
|
||||
sed 's/ //g'|
|
||||
sed 's/hlwxzbg./hlwxzbg/g'|
|
||||
xargs wget -c
|
||||
```
|
||||
- 下载第8~21次《中国互联网络发展状况统计报告》
|
||||
```
|
||||
wget http://www.cnnic.net.cn/hlwfzyj/hlwxzbg/index_5.htm|
|
||||
cat index_5.htm |
|
||||
cat index_5.htm |
|
||||
grep "次"|
|
||||
grep -E "pdf|rar|doc"|
|
||||
awk -F "</a>" '{print $1}'|
|
||||
awk -F "=" '{print " http://www.cnnic.net.cn/hlwfzyj/hlwxzbg "$3}'|
|
||||
sed 's/target//g'|
|
||||
sed 's/"_blank">//g'|
|
||||
awk -F "'" '{print $1$2}'|
|
||||
sed 's/ //g'|
|
||||
sed 's/hlwxzbg./hlwxzbg/g'|
|
||||
xargs wget -c
|
||||
```
|
||||
- 下载第1~7次《中国互联网络发展状况统计报告》
|
||||
- 多行
|
||||
```
|
||||
wget http://www.cnnic.net.cn/hlwfzyj/hlwxzbg/index_6.htm|
|
||||
cat index_6.htm |
|
||||
grep "次"|
|
||||
grep pdf|
|
||||
awk -F "</a>" '{print $1}'|
|
||||
awk -F "=" '{print " http://www.cnnic.net.cn/hlwfzyj/hlwxzbg "$3}'|
|
||||
sed 's/target//g'|sed 's/"_blank">//g'|
|
||||
awk -F "'" '{print $1$2}'|sed 's/ //g'|
|
||||
sed 's/hlwxzbg./hlwxzbg/g'|
|
||||
xargs wget -c
|
||||
```
|
||||
- 单行
|
||||
>wget http://www.cnnic.net.cn/hlwfzyj/hlwxzbg/index_6.htm|cat index_6.htm |grep "次"|grep pdf)|awk -F "</a>" '{print $1}'|awk -F "=" '{print " http://www.cnnic.net.cn/hlwfzyj/hlwxzbg "$3}'|sed 's/target//g'|sed 's/"_blank">//g'|awk -F "'" '{print $1$2}'|sed 's/ //g'|sed 's/hlwxzbg./hlwxzbg/g'|xargs wget -c
|
||||
2
code/test.md
Normal file
2
code/test.md
Normal file
@@ -0,0 +1,2 @@
|
||||
- ^_^ 调测中,请稍候 ^_^
|
||||
|
||||
9
code/test.py
Normal file
9
code/test.py
Normal file
@@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding:utf-8 -*-
|
||||
# Author: yiliyas
|
||||
# Date: 2022.5
|
||||
|
||||
sout=' ^_^ 调测中,请稍候 ^_^ '
|
||||
print(sout)
|
||||
|
||||
|
||||
40
code/tjgb.md
Normal file
40
code/tjgb.md
Normal file
@@ -0,0 +1,40 @@
|
||||
- [**返回**](../README.md) | [**下载**](../data/data.md)
|
||||
> **shell命令一句话爬虫**
|
||||
|
||||
### 下载统计公报 :+1::+1::+1:
|
||||
#### 2013~2021年
|
||||
```
|
||||
wget http://www.stats.gov.cn/tjsj/tjgb/ndtjgb/index.html|
|
||||
xargs cat index.html |
|
||||
grep -E "t202|t201[4-9]"|
|
||||
grep "cont_tit"|
|
||||
awk -F '"' '{print "http://www.stats.gov.cn/"$2}'|
|
||||
xargs wget
|
||||
```
|
||||
#### 2001~2012年
|
||||
```
|
||||
wget http://www.stats.gov.cn/tjsj/tjgb/ndtjgb/index.html|
|
||||
xargs cat index.html |
|
||||
grep -E "t201[0-3]|t200[0-9]"|
|
||||
grep "cont_tit"|
|
||||
awk -F '"' '{print "http://www.stats.gov.cn/tjsj/tjgb/ndtjgb"$2}'|
|
||||
xargs wget
|
||||
```
|
||||
#### 1982~2000年
|
||||
```
|
||||
wget http://www.stats.gov.cn/tjsj/tjgb/ndtjgb./index_1.html|
|
||||
xargs cat index_1.html |
|
||||
grep "t2002"|
|
||||
grep "cont_tit"|
|
||||
awk -F '"' '{print "http://www.stats.gov.cn/tjsj/tjgb/ndtjgb"$2}'|
|
||||
xargs wget
|
||||
```
|
||||
#### 1978~1981年
|
||||
```
|
||||
wget http://www.stats.gov.cn/tjsj/tjgb/ndtjgb./index_2.html|
|
||||
xargs cat index_2.html|
|
||||
grep "t2002"|
|
||||
grep "cont_tit"|
|
||||
awk -F '"' '{print "http://www.stats.gov.cn/tjsj/tjgb/ndtjgb"$2}'|
|
||||
xargs wget
|
||||
```
|
||||
85
code/tjnq.md
Normal file
85
code/tjnq.md
Normal file
@@ -0,0 +1,85 @@
|
||||
- [**返回**](../README.md) | [**下载**](../data/data.md)
|
||||
> **shell命令一句话爬虫**
|
||||
### 下载中文版统计年签 :+1::+1::+1:
|
||||
### 2005~2021
|
||||
>wget http://www.stats.gov.cn/tjsj/ndsj/2021/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2021/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2020/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2020/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2019/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2019/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2018/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2018/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2017/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2017/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2016/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2016/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2015/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2015/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2014/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2014/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2013/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2013/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2012/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2012/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2011/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2011/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2010/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2010/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2009/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2009/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2008/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2008/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2007/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2007/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2006/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2006/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2005/left.htm|xargs cat left.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2005/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
### 2004
|
||||
>wget http://www.stats.gov.cn/tjsj/ndsj/yb2004-c/left.htm|
|
||||
xargs cat left.htm|
|
||||
grep href|grep html|
|
||||
awk -F '"' '{print " http://www.stats.gov.cn/tjsj/ndsj/yb2004-c/"$2}'|
|
||||
grep -E '.jpg|.htm'|
|
||||
xargs wget
|
||||
>
|
||||
>
|
||||
- [2003](http://www.stats.gov.cn/tjsj/ndsj/yearbook2003_c.pdf)
|
||||
- [2002](http://www.stats.gov.cn/yearbook2001/indexC.htm)
|
||||
- [2001](http://www.stats.gov.cn/tjsj/ndsj/2001c/mulu.htm)
|
||||
- [2000](http://www.stats.gov.cn/tjsj/ndsj/zgnj/mulu.html)
|
||||
- [1999](http://www.stats.gov.cn/yearbook/indexC.htm)
|
||||
|
||||
### 下载英文版统计年签 :+1::+1::+1:
|
||||
### 2007~2021
|
||||
>wget http://www.stats.gov.cn/tjsj/ndsj/2021/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2021/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2020/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2020/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2019/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2019/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2018/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2018/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2017/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2017/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2016/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2016/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2015/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2015/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2014/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2014/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2013/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2013/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2012/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2012/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2011/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2011/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2010/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2010/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2009/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2009/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2008/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2008/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
> wget http://www.stats.gov.cn/tjsj/ndsj/2007/left_.htm|xargs cat left_.htm|grep href|grep html|awk -F "'" '{print " http://www.stats.gov.cn/tjsj/ndsj/2007/"$2}'|grep -E '.jpg|.htm'|xargs wget
|
||||
>
|
||||
|
||||
Reference in New Issue
Block a user