【原】python爬虫实例 | 自定义爬取百度美女图片并保存到本地，可自定义爬取图片类型、图片数量、保存路径等(超详细直接使用)

Python集中营 2022-10-10 发布于甘肃

展开全文

代码运行

 1输入爬取页数：
 2
 31
 4
 5输入搜索关键字：
 6
 7小猪佩奇
 8
 9输入图片保存路径(例如：/usr/load/)：
10
11C:/imgs/
12
13Process finished with exit code 0

基本属性

 1# 创建UA对象用于生成随机UA
 2
 3user_agent = UserAgent()
 4
 5# 设置爬取多少页
 6
 7page_num = input('输入爬取页数：\n')
 8
 9# 输入搜索关键词
10
11current = input('输入搜索关键字：\n')
12
13# 定义图片保存路径
14
15file_path = input('输入图片保存路径(例如：/usr/load/)：\n')

构造网络请求

 1# 设置请求头
 2
 3headers = {
 4
 5    'User-Agent': user_agent.random
 6
 7}
 8
 9# 设置从某一页的第几张图片开始爬取
10
11current_page_nums = 0
12
13for m in range(1, int(page_num) + 1):
14
15    # 百度搜索URL地址
16
17    url = 'https://image.baidu.com/search/acjson?'
18
19    # 搜索参数设置
20
21    params = {
22
23        'tn': 'resultjson_com',
24
25        'logid': '',
26
27        'ipn': 'rj',
28
29        'ct': '201326592',
30
31        'is': '',
32
33        'fp': 'result',
34
35        'queryWord': str(current),
36
37        'cl': '2',
38
39        'lm': '-1',
40
41        'ie': 'utf-8',
42
43        'oe': 'utf-8',
44
45        'adpicid': '',
46
47        'st': '-1',
48
49        'z': '',
50
51        'ic': '',
52
53        'hd': '',
54
55        'latest': '',
56
57        'copyright': '',
58
59        'word': str(current),
60
61        's': '',
62
63        'se': '',
64
65        'tab': '',
66
67        'width': '',
68
69        'height': '',
70
71        'face': '0',
72
73        'istype': '2',
74
75        'qc': '',
76
77        'nc': '1',
78
79        'fr': '',
80
81        'expermode': '',
82
83        'force': '',
84
85        'cg': '',
86
87        'pn': current_page_nums,
88
89        'rn': '30',
90
91        'gsm': '1e',
92
93    }
94
95    # 执行请求
96
97    response = requests.get(url=url, headers=headers, params=params)

处理响应结果

 1    # 定义结果编码
 2
 3    response.encoding = 'utf-8'
 4
 5    # 获取响应的json数据
 6
 7    response = response.json()
 8
 9    # 获取data键数据列表
10
11    result_list = response['data']
12
13    del result_list[-1]
14
15    # 定义图片路径列表
16
17    img_paths = []
18
19    # 遍历获取图片路径到img_paths
20
21    for i in result_list:
22
23        # 提取图片地址
24
25        img_paths.append(i['thumbURL'])

下载保存图片

 1    # 定义图片保存序号
 2
 3    pic_ser = 0
 4
 5    for img_path in img_paths:
 6
 7        # 执行图片下载
 8
 9        img = requests.get(url=img_path, headers=headers).content
10
11        # 设置图片保存路径
12
13        img_path = file_path + str(pic_ser) + '.jpg'
14
15        with open(img_path, 'wb') as fp:
16
17            # 保存图片
18
19            fp.write(img)
20
21        pic_ser = pic_ser + 1
22
23    current_page_nums = current_page_nums + 29