可以更改对应得值,来爬取对应得页码以及图片个数,新手上路,如有不足欢迎指导。
import requests
from bs4 import BeautifulSoup
import os
import time
url = 'https://www.4kbizhi.com/index_%d.html'
url1 = 'https://pic.netbian.com/index_%d.html'
headers = {'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
}
if not os.path.exists('./picture1'):
os.mkdir('./picture1')
def fun1(soup):
for number in range(1, 5):
img = soup.select(".col li a img")[number]['src']
imgName = soup.select(".col li a img")[number]['alt'] + '.jpg'
img_Url = 'https://www.4kbizhi.com' + img
response_content = requests.get(url=img_Url, headers=headers).content
img_path = 'picture/' + imgName
with open(img_path, 'wb')as fp:
fp.write(response_content)
print(imgName, 'over')
def fun2():
for page in range(14, 15):
newUrl = format(url % page)
response = requests.get(newUrl, headers=headers)
response.encoding = 'gbk'
response_text = response.text
soup = BeautifulSoup(response_text, 'lxml')
fun1(soup)
def fun4(soup):
for number in range(1, 5):
img = soup.select(".slist li a img")[number]['src']
imgName = soup.select(".slist li a img")[number]['alt'] + '.jpg'
img_Url = 'https://pic.netbian.com' + img
response_content = requests.get(url=img_Url, headers=headers).content
img_path = 'picture1/' + imgName
with open(img_path, 'wb')as fp:
fp.write(response_content)
print(imgName, 'over')
def fun3():
for page in range(14, 15):
newUrl = format(url1 % page)
response = requests.get(newUrl, headers=headers)
response.encoding = 'gbk'
response_text = response.text
soup = BeautifulSoup(response_text, 'lxml')
fun4(soup)
if __name__ == '__main__':
print("开始爬取最新高清壁纸")
fun2()
print("爬取完成")
time.sleep(3)
print("开始爬取彼岸图网")
fun3()
print("爬取完成")