# 爬取妹子图
import requests
import os
from bs4 import BeautifulSoup
MZITU = ‘https://www.mzitu.com’
USER_AGENT = ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36’
HEADERS = {
‘User-Agent’: USER_AGENT,
‘Referer’: MZITU}
SAVE_PATH = ‘D:妹子图\’
def get_request(url):
res = requests.get(url, headers=HEADERS)
print(“正在请求 ” + url)
return res
def get_request_soup(url):
html = get_request(url).text
print(“正在解析 “)
return BeautifulSoup(html, ‘lxml’)
def get_girl_list(url):
soup = get_request_soup(url)
next_url = soup.find(class_=’next page-numbers’).get(‘href’)
girls = soup.find(class_=’postlist’).select(‘li’)
for girl in girls:
url = girl.find(‘a’).get(‘href’)
name = girl.find(‘span’).find(‘a’).string
get_girl_group(url, 1, name)
if not next_url == “”:
get_girl_list(next_url)
def get_girl_group(url, page, name):
soup = get_request_soup(url + “/” + str(page))
max_page = int(soup.find(class_=’pagenavi’).select(‘span’)[-2].string)
image = soup.find(class_=’main-image’).find(‘img’).get(‘src’)
save_img(image, name, page)
page += 1
if page
get_girl_group(url, page, name)
# 存储妹子图片到本地
def save_img(url, file_name, page):
file_path = SAVE_PATH + file_name
image_path = file_path + “” + str(page) + “”.jpg””
print(‘检查文件是否存在 ‘ + image_path)
if not os.path.exists(image_path):
print(“”文件正在下载…””)
html = get_request(url).content
print(“”下载成功””)
print(“”检查文件夹是否存在””)
exi = os.path.exists(file_path)
if not exi:
print(“”创建文件夹 “” + file_name)
os.makedirs(file_path)
else:
print(“”文件夹已存在 “” + file_name)
print(“”存入磁盘中…””)
f = open(image_path
声明:本站部分文章及图片源自用户投稿,如本站任何资料有侵权请您尽早请联系jinwei@zod.com.cn进行处理,非常感谢!