from urllib.request import urlopen |
import json |
### 用json形式从豆瓣抓取电影的排行,通过审查元素找到看起来是json格式的链接 |
def get20Movie(url): |
html = urlopen(url) |
htmlJsonString = html.read() |
jsonObj = json.loads(htmlJsonString.decode()) # decode()很关键 |
movieNameAndScores = [] |
for each in jsonObj.get( 'subjects' ): |
movieNameAndScores.append(each.get( 'title' ) + each.get( 'rate' )) |
# print(each.get('title')+each.get('rate')) |
return movieNameAndScores |
j = 0 |
while 1 : |
movieNameAndScores = get20Movie( 'https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=' + str (j)) |
for each in movieNameAndScores: |
print (each) |
j + = 20 |
by: 发表于:2017-09-19 09:41:51 顶(1) | 踩(1) 回复
??
回复评论