采集农产品每日价格数据
一、爬取网站
http://www./priceDetail.html
二、爬取目标
采集2022年12月份猪肉的价格数据。
目标地址
一级分类:
LOne = {"蔬菜": 1186, "水果": 1187, "肉禽蛋": 1189,
"水产": 1190, "粮油": 1188, "豆制品": 1203,
"调料": 1204}
二级分类:
LTwo = {"水菜": 1199, "特菜": 1200,
"进口果": 1201, "干果": 1202,
"猪肉类": 1205, "牛肉类": 1206, "羊肉类": 1207, "禽蛋类": 1208,
"淡水鱼": 1209, "海水鱼": 1210, "虾蟹类": 1217, "贝壳类": 1218, "其他类": 1211,
"米面类": 1212, "杂粮类": 1213, "食用油": 1214}
目标数据
- Content-Type: application/json;charset=UTF-8
三、爬取代码
import requests
import csv
import time
LOne = {"蔬菜": 1186, "水果": 1187, "肉禽蛋": 1189,
"水产": 1190, "粮油": 1188, "豆制品": 1203,
"调料": 1204}
LTwo = {"水菜": 1199, "特菜": 1200,
"进口果": 1201, "干果": 1202,
"猪肉类": 1205, "牛肉类": 1206, "羊肉类": 1207, "禽蛋类": 1208,
"淡水鱼": 1209, "海水鱼": 1210, "虾蟹类": 1217, "贝壳类": 1218, "其他类": 1211,
"米面类": 1212, "杂粮类": 1213, "食用油": 1214}
url = "http://www./getPriceData.html"
data = {
"limit": 200,
"current": 1,
"pubDateStartTime": "2022/12/01",
"pubDateEndTime": "2022/12/31",
"prodPcatid": LOne["肉禽蛋"],
"prodCatid": LTwo["猪肉类"],
"prodName": ""
}
with open(r'.\猪肉报价.csv', mode='w+', newline='', encoding='utf-8') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(["一级分类", "二级分类", "品名", "最低价", "平均价", "最高价",
"规格", "产地", "单位", "发布日期"])
response = requests.post(url, data)
json_data = response.json()
count = json_data['count']
limit = json_data['limit']
n = count // limit + 1
for i in range(1, n + 1):
time.sleep(1)
data['current'] = i
response = requests.post(url, data)
json_data = response.json()['list']
for e in json_data:
e1 = e['prodCat'] # "一级分类"
e2 = e['prodPcat'] # "二级分类"
e3 = e['prodName'] # "品名"
e4 = e['lowPrice'] # "最低价"
e5 = e['avgPrice'] # "平均价"
e6 = e['highPrice'] # "最高价"
e7 = e['specInfo'] # "规格"
e8 = e['place'] # "产地"
e9 = e['unitInfo'] # "单位"
e10 = e['pubDate'].split(' ')[0] # "发布日期"
t = [e1, e2, e3, e4, e5, e6, e7, e8, e9, e10]
print(t)
csv_writer.writerow(t)
四、爬取结果