import requests, pandas as pd |
from bs4 import BeautifulSoup |
url = "http://comment.bilibili.com/186803402.xml" |
req = requests.get(url) |
html = req.content |
html_doc = str (html, "utf-8" ) # 修改成utf-8 |
# 解析 |
soup = BeautifulSoup(html_doc, "lxml" ) |
results = soup.find_all( 'd' ) |
contents = [x.text for x in results] |
# 保存结果 |
dic = { "contents" : contents} |
df = pd.DataFrame(dic) |
df[ "contents" ].to_csv( "bili.csv" , encoding = "utf-8" , index = False ) |
_init_.py |
资深程序员
by: no_no_no 发表于:2022-03-06 17:01:20 顶(0) | 踩(0) 回复
666
回复评论