import requests
import re
import json
import os
'''
作者:小松叔
操作系统:win10专业版
编程语言:python3.5.2
'''
#创建根目录
if not os.path.exists(root): #判断创建的目录是否存在,如果不存在则创建
os.mkdir(root)
number_list = [] #用于储存每个女郎的id号
user_id_list = [] #用来储存每个女郎的user_id
album_id_list = [] #用于储存每个女郎的每个相册id
#获取每个女郎的user_id和 user_id号
for i in range ( 1 , 10 ): #遍历女郎列表前10页,在这10页中获取每个女郎的user_id
url1 = user_url + str (i)
try :
user_r = requests.get(url1)
user_r.raise_for_status()
user_r.encoding = user_r.apparent_encoding
user_html = user_r.text
user_id = re.findall(r 'user_id=[0-9]{0,9}' ,user_html) #利用正则表达式,匹配每个女郎的user_id
for i in user_id:
user_id_list.append(i)
for i in user_id_list:
number = re.search( '[0-9]{1,10}' ,i).group( 0 ) #遍历user_id_list,利用正则获取每个女郎的user_id号,并将其保存在number_list中
number_list.append(number)
except :
print ( "gain web error!" )
#获取每个女郎的相册id
for Number in number_list:
url = first_url + str (Number)
for i in range ( 1 , 6 ): #每个女郎有5页的相册,遍历每一页。
album_id_link = url + "&page" + str (i)
album_id_link_r = requests.get(album_id_link)
album_id_link_r.encoding = album_id_link_r.apparent_encoding
album_id_link_html = album_id_link_r.text
album_id = re.findall( 'album_id=[0-9]*' ,album_id_link_html) #利用正则,找到相册ID
for i in album_id:
if i not in album_id_list:
album_id_list.append(i) for i in album_id_list:
album_folder = root + i + "//"
if not os.path.exists(album_folder): #创建相册目录,用于保存对应相册里的照片
os.mkdir(album_folder)
for k in range ( 1 , 3 ): #对于每个相册里的照片,遍历前3页。
picture_link_r = requests.get(URL)
picture_link_r.encoding = picture_link_r.apparent_encoding
picture_link_html = picture_link_r.text
picture_link_data = json.loads(picture_link_html) #将json对象转换为python对象,即字典
print (picture_link_data[ 'isError' ])
if picture_link_data[ 'isError' ] = = str ( 0 ): #如果isError = 1,则表示该页没有可以图片
picture_link_picList = picture_link_data[ 'picList' ]
for i in picture_link_picList:
picture_name = i[ 'picUrl' ].split( '/' )[ - 1 ]
Path = album_folder + picture_name
if not os.path.exists(Path):
r = requests.get( "https:" + i[ 'picUrl' ])
with open (Path, 'wb' ) as f :
f.write(r.content)
|