from urllib.request import urlopen |
# url = 'http://tieba.baidu.com/f?kw=%E4%BE%AF%E6%98%8E%E6%98%8A&ie=utf-8&pn=100' |
# response = urlopen(url) |
# print(response.read().decode()) |
def get_one_page(index): |
url = 'http://tieba.baidu.com/f?kw=%E4%BE%AF%E6%98%8E%E6%98%8A&ie=utf-8&pn={}' . format (index * 50 ) |
response = urlopen(url) |
return response.read().decode() |
def save_one_page(index, html): |
file_name = 'tieba\\houminghao_page_{}.html' . format (index + 1 ) |
with open (file_name, 'w' , encoding = 'utf-8' ) as file : |
file .write(html) |
pass |
if __name__ = = '__main__' : |
for index in range ( 0 , 5 ): |
html = get_one_page(index) |
save_one_page(index, html) |