#使用面向对象的方式去爬取 import json import urllib.request import urllib.parse class MeiTuan(object): def __init__(self,start_page,end_page,q): self.start_page = start_page self.end_page = end_page self.q = q self.url = 'http://apimobile.meituan.com/group/v4/poi/pcsearch/1?' self.temp=[] def get_request(self,page): offset = (page-1) * 32 data = { 'uuid': '9b200d0955c947758b40.1537682343.1.0.0', 'userid': '-1', 'limit': '32', 'offset': str(offset), 'cateId': '-1', 'q':self.q } headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36' } data = urllib.parse.urlencode(data) url = self.url + data print(url) request = urllib.request.Request(url=url,headers=headers) return request #获得响应的对象 def get_response(self,req): res = urllib.request.urlopen(req) return res #获得数据,提取数据 def get_content(self,res): content = res.read().decode("utf8") datas = json.loads(content) data = datas['data']['searchResult'] for i in data: # print(i) jbmz = i['title']#酒吧名字 jbimg = i['imageUrl']#酒吧图片 jbdd = i['address']#酒吧地点 pjxf = i['lowestprice']#酒吧人均消费 pj = i['avgscore']#评价 yhlist = [] # 优惠列表 try: for j in i['deals']: tcmz = j['title']#套餐名字 yhlist.append(tcmz) tcjg = j['price']#套餐价格 yhlist.append(tcjg) tcyj = j['value']#套餐原价 yhlist.append(tcyj) tcys = j['sales']#已经售卖 yhlist.append(tcys) except: yhlist.append("没有优惠套餐") self.temp.extend([jbmz,jbimg,jbdd,pjxf,pj,yhlist]) # return self.temp def run(self): for page in range(self.start_page,self.end_page+1): # 获取请求对象 request = self.get_request(page) #获得响应的的对象 response = self.get_response(request) #获取请求的数据 data = self.get_content(response) string = str(self.temp) # with open('美团.txt','w',encoding='utf8') as fp: # # fp.write(string) # # print('ok') print(string) def main(): q = input("输入搜索的关键字:") start_page = int(input("请输入起始页码:")) end_page = int(input("请输入终止页码:")) meituan = MeiTuan(start_page,end_page,q) meituan.run() if __name__ == '__main__': main()
|
|