
import requests, pandas as pd
from bs4 import BeautifulSoup
url = "http://comment.bilibili.com/186803402.xml"
req = requests.get(url)
html = req.content
html_doc = str(html, "utf-8") # 修改成utf-8
# 解析
soup = BeautifulSoup(html_doc, "lxml")
results = soup.find_all('d')
contents = [x.text for x in results]
# 保存结果
dic = {"contents": contents}
df = pd.DataFrame(dic)
df["contents"].to_csv("bili.csv", encoding="utf-8", index=False)
_init_.py



资深程序员
by: no_no_no 发表于:2022-03-06 17:01:20 顶(0) | 踩(0) 回复
666
回复评论