[python]代码库
import requests
import re
import os
import time
headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
"""请求网页"""
response = requests.get("https://www.vmgirls.com/15270.html", headers=headers)
html = response.text
# print(html)
"""解析网页"""
# dir_name = re.findall('<title>(.*?)</title>', html)[-1]
dir_name = re.findall('<h1 class="post-title mb-3">(.*?)</h1>', html)[-1]
# print(dir_name)
if not os.path.exists(dir_name):
os.mkdir(dir_name)
urls = re.findall('<a rel="nofollow" href="(.*?)" alt=".*?" title=".*?">', html)
# print(urls)
"""保存图片"""
for url in urls:
time.sleep(2)
file_name = url.split('/')[-1] # 文件命名
print(url)
response = requests.get(url, headers=headers)
with open(dir_name + '/' + file_name, 'wb') as f:
f.write(response.content)
f.close()
[代码运行效果截图]
高级设计师
by: Python自学 发表于:2022-08-16 02:15:37 顶(9) | 踩(1) 回复
自己动手封装成函数,爬取目录循环调用
回复评论