视频爬取，下载

编程随想

AI时代，找源码已成为过去式，纪念我过去的十年 by Python自学0(回) 116天前

还有人吗 by 高嘉易2(回) 134天前

会python真的可以为所欲为 by Python自学0(回) 614天前

这里还有人吗 by mikeKil2(回) 799天前

这里还有人吗 by mikeKil0(回) 799天前

每天面对着电脑屏幕，敲打键盘。我所面对的并不只是代码，而是一种生活方式。 by js特效0(回) 1024天前

[python]代码库

import re,requests,openpyxl,time,bs4,threading from lxml import etree header={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.70'} i,n=1,0 Name=[] web_url={} addr={} lock=threading.RLock() def getUrl(): url = 'https://www.c345y.com/vmovie/sm/' global i,web_url lock.acquire() if i==1: url=url else: url=url+'index_'+str(i)+'.html' print('----------------' + str(i) + '----------') i+=1 lock.release() try: res=requests.get(url=url,headers=header,timeout=30) res.close() time.sleep(0.5) res.encoding='utf8' html=etree.HTML(res.text) itms=html.xpath('/html/body/div[6]/div/div[2]//dl')[0] url = itms.xpath('//dl/dt/a/@href') name = itms.xpath('//dl/dt/a/@title') lock.acquire() for j in range(0, len(name)): web_url[name[j]] = 'https://www.c234y.com' + url[j + 4] lock.release() except: print('访问'+url+'失败！') def gerAddr(): global addr,n,mz,dz lock.acquire() print('----------------' + str(n+1) + '----------') try: print(mz[n], dz[n]) mz1=mz[n] dz1=dz[n] n += 1 lock.release() res2 = requests.get(url=dz1,headers=header,timeout=30) res2.close() html2 = etree.HTML(res2.text) addr[mz1] = html2.xpath('//*[@id="url"]/@value')[0] time.sleep(0.5) except: print('访问失败') n += 1 #if lock. lock.release() print(time.asctime()) for k in range(0,16): t1=threading.Thread(target=getUrl) t1.start() t2=threading.Thread(target=getUrl) t2.start() t3=threading.Thread(target=getUrl) t3.start() t4=threading.Thread(target=getUrl) t4.start() t5=threading.Thread(target=getUrl) t5.start() t1.join() t2.join() t3.join() t4.join() t5.join() print(time.asctime()) mz=[] dz=[] file='影片列表.xlsx' print(web_url.items()) for a,b in web_url.items(): mz.append(a) dz.append(b) print(mz,dz) wb=openpyxl.load_workbook(file) sheet=wb['Sheet1'] sheet=wb.active sheet['A1']='序号' sheet['B1']='影片名' sheet['C1']='播放网址' sheet['D1']='下载地址' for nu in range(0,len(web_url)): print(mz[nu - 2], dz[nu - 2]) sheet['A'+str(nu+2)]= str(nu+1) sheet['B' + str(nu+2)] = mz[nu] sheet['C' + str(nu+2)] = dz[nu] wb.save(file) print(len(mz)) flag=len(mz) print(time.asctime()) while flag>0: t1=threading.Thread(target=gerAddr) t1.start() t2=threading.Thread(target=gerAddr) t2.start() t3=threading.Thread(target=gerAddr) t3.start() t4=threading.Thread(target=gerAddr) t4.start() t5=threading.Thread(target=gerAddr) t5.start() t1.join() t2.join() t3.join() t4.join() t5.join() flag-=5 print(addr.items()) print('----------------') #sheet=openpyxl.load_workbook('test.xlsx') sheet=wb.active for nu2 in range(0,len(addr)): sheet['D' + str(nu2+2)] = addr.get(mz[nu2]) time.sleep(2) wb.save(file) print(time.asctime()) # --下载部分--- import requests,openpyxl,threading,time,re header={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.70'} lock=threading.RLock() ##失败重试代码## # file = '影片列表.xlsx' # path = 'D:\AV\\' # n = 0 # name=[] # url=[] # wb = openpyxl.load_workbook(file) # sheet = wb.active # sheet = wb['Sheet'] # num=[] # with open('失败统计.txt','r',encoding='utf-8') as f: # lines=f.readlines() # for line in lines: # nu = re.findall(r'------ (.*?)----',line)[0] # num.append(nu) # name.append(sheet['B' + str(int(nu) + 1)].value) # url.append(sheet['D' + str(int(nu) + 1)].value) file='影片列表.xlsx' path='D:\AV\\' n=1 wb = openpyxl.load_workbook(file) sheet = wb['Sheet'] sheet = wb.active name=sheet['B'] url=sheet['D'] def download(): global n lock.acquire() n1=n n+=1 lock.release() ##失败重试代码## # if n1>=len(num): # print('----已完成----') # return None # else: name1=name[n1] url1=url[n1] time1=time.time() print('---', str(n1),'---', name1,'----开始下载', time.asctime()) try: res=requests.get(url=url1,headers=header,timeout=30) with open(path+str(n1)+'、 '+name1+'.mp4','wb') as f: f.write(res.content) print('------',str(n1)+'----' + name1 + '下载----成功-----用时',time.time()-time1) except: print('------',str(n1)+'----'+name1+'下载----失败-----用时',time.time()-time1) ##失败重试代码## #----------------------------------------------------------------------------# # print('---', num[n1], '---', name1, '----开始下载', time.asctime()) # try: # res = requests.get(url=url1, headers=header, timeout=300) # with open(path +num[n1] + '、 ' + name1 + '.mp4', 'wb') as f: # f.write(res.content) # print('------', num[n1] + '----' + name1 + '下载----成功-----用时', time.time() - time1) # except: # print('------', num[n1] + '----' + name1 + '下载----失败-----用时', time.time() - time1) for k in range(0,50): t1=threading.Thread(target=download) t1.start() time.sleep(0.2) t2=threading.Thread(target=download) t2.start() time.sleep(0.2) t3=threading.Thread(target=download) t3.start() time.sleep(0.2) t4=threading.Thread(target=download) t4.start() time.sleep(0.2) t5=threading.Thread(target=download) t5.start() t1.join() t2.join() t3.join() t4.join() t5.join()

用户注册

用户登录

发表随想

该用户最新代码

编程随想

[python]代码库

网友评论 (发表评论)

发表评论：

评论须知：

扫码下载

输入口令后可复制整站源码