#百度贴吧,输入一个关键字,取前5个保存 |
from urllib.request import urlopen |
from urllib.parse import urlencode |
key = input ( '请输入一个查询关键字' ) |
args = { |
'kw' : key, |
'ie' : 'utf-8' |
} |
url1 = 'http://tieba.baidu.com/f?' + urlencode(args) |
def get_one_page(index): |
url = url1 + '&pn={}' . format (index * 50 ) |
response = urlopen(url) |
return response.read().decode() |
def save_one_page(index,html): |
filename = 'tieba\\tieba_{}.html' . format (index + 1 ) |
with open (filename, 'w' ,encoding = 'utf-8' ) as file : |
file .write(html) |
pass |
if __name__ = = '__main__' : |
for index in range ( 0 , 5 ): |
html = get_one_page(index) |
save_one_page(index,html) |
''' |
http://tieba.baidu.com/f?kw=%E8%80%83%E7%A0%94&ie=utf-8&pn=0 |
http://tieba.baidu.com/f?kw=%E8%80%83%E7%A0%94&ie=utf-8&pn=50 |
http://tieba.baidu.com/f?kw=%E8%80%83%E7%A0%94&ie=utf-8&pn=100 |
http://tieba.baidu.com/f?kw=%E8%80%83%E7%A0%94&ie=utf-8&pn=150 |
http://tieba.baidu.com/f?kw=%E8%80%83%E7%A0%94&ie=utf-8&pn=200 |
''' |