[python]代码库
from urllib.request import urlopen
import json
### 用json形式从豆瓣抓取电影的排行,通过审查元素找到看起来是json格式的链接
def get20Movie(url):
html = urlopen(url)
htmlJsonString = html.read()
jsonObj = json.loads(htmlJsonString.decode()) # decode()很关键
movieNameAndScores = []
for each in jsonObj.get('subjects'):
movieNameAndScores.append(each.get('title')+each.get('rate'))
# print(each.get('title')+each.get('rate'))
return movieNameAndScores
j = 0
while 1:
movieNameAndScores = get20Movie('https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start='+str(j))
for each in movieNameAndScores:
print(each)
j += 20
[代码运行效果截图]
by: 发表于:2017-09-19 09:41:51 顶(1) | 踩(1) 回复
??
回复评论