python爬虫——爬取网络各种资源

太阳照着月亮 2024-08-20 10:07:03 阅读 84

python爬虫——爬取网络各种资源
1.某讯视频

<code>import requests

import re

from tqdm import tqdm

url = "https://apd-vlive.apdcdn.tc.qq.com/defaultts.tc.qq.com/B_JxNyiJmktHRgresXhfyMem1E4_DPhVbhxv28spVNp5Dj6vs6uhjyh7JsYzrUOQcL/svp_50112/ZV6e2op5S_S1AyUVjIbzXsJek1I7zANtM2Tv2peQ2YVY3YFimvlfjsXz1DQmrgxOvXrMl6Vs6HiozYNZAtgUo-JKZKtrgs6Vnubhh-IFRlbEUIcUZOu39XJX7hJt5uDrq9jZ-uScgH0wZi5gJSD03ZA0p0pU32ocepjRtSdPw3Zw-tx5nWAPXVGQZgfcOS3TTPtCNs0qoCwEgtP3z-i0YoIZT-MACU25AB2ILMv_z8HX2bCMw-pYKQ/gzc_1000102_0b53zuabqaaahiae4ebljvrmbtodddfqahca.f322062.ts.m3u8?ver=4"

headers = {

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"

}

response = requests.get(url=url,headers=headers).text

m3u8_data = re.sub('#.*','',response).split()

for ts in tqdm(m3u8_data):

ts_url = 'https://ltscsy.qq.com/B_JxNyiJmktHRgresXhfyMeulWsW_l0JzF9NWhW-VqfOrj6vs6uhjyh7JsYzrUOQcL/svp_50112/vDKS4TspZpx8uhYKG9EVBe5I0alPqhW0tx6JBvJ2aS25FDZoNU5KZ6zqkZHI0oluZXeMLWOdHJVJkwU7hTESavdDeIvxTvVGzzDbdV2aXouqP0rqMwh7iS-HBpSSyoJ7-2trKnnldoZQZ49UsJ97yCUsFgW4sYeCBUsR2eKR2-HnO6bayh1rWhDvF63Nr5aLs8_zJIy0ARYOUMGtem6NWCkxgFVaQdLf2-dyEgVe40V1g7FupCtIRw/'+ts

video_data = requests.get(ts_url).content

with open('葫芦娃.mp4','ab') as f:

f.write(video_data)

2.某音视频

import requests

url = "https://v3-web.douyinvod.com/f32cffe441fd98a917184c59b4c4e876/65f8333a/video/tos/cn/tos-cn-ve-15/oYBBlyMPtABcQwziAatpj9EgFfecM9iB8DhIAw/?a=6383&ch=5&cr=3&dr=0&lr=all&cd=0%7C0%7C0%7C3&cv=1&br=1024&bt=1024&cs=0&ds=4&ft=LjhJEL998xI7uEPmH0P5H4eaciDXt0YbZ_QEe09-mB~D1Inz&mime_type=video_mp4&qs=0&rc=aTs4aWY8aGk2ODZmNWU1ZkBpMzRxdjQ6ZmtzcTMzNGkzM0AxLmJhLjEtNWMxY15fM2MuYSNsbC1ycjRfMGhgLS1kLTBzcw%3D%3D&btag=e00008000&cquery=100a&dy_q=1710761249&feature_id=46a7bb47b4fd1280f3d3825bf2b29388&l=20240318192728651A9F612C610A0B9193"

headers = {

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"

}

response = requests.get(url=url,headers=headers)

video_data = response.content

with open('aa.mp4','ab') as f:

f.write(video_data)

3.某站视频

import json

import re

import requests

import os

url = "https://www.bilibili.com/video/BV1kC411a7cn/?spm_id_from=333.1007.tianma.2-2-4.click&vd_source=f4be0a001848558927c3212d18de2626"

headers = {

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",

"Cookie":"CURRENT_FNVAL=4048; DedeUserID=539138603; DedeUserID__ckMd5=671cb5f4af58c4e5; enable_web_push=DISABLE; buvid3=A9DB3A97-A849-C537-2902-0CA83CA2F72A78091infoc; b_nut=1706950178; i-wanna-go-back=-1; b_ut=5; _uuid=112B10126-29107-64D10-4BF1-610B365B5847376377infoc; buvid_fp_plain=undefined; buvid4=98D90CF7-909F-21A5-857B-E2A5352CB3A970761-024031409-FeaNf0N026PHa1xHiEcvIw%3D%3D; FEED_LIVE_VERSION=V8; header_theme_version=CLOSE; is-2022-channel=1; rpdid=|(RlRRR)lRR0J'u~u|R|mY)J; CURRENT_QUALITY=80; fingerprint=d35797e9afc601e4d1f9c94226939e95; buvid_fp=d35797e9afc601e4d1f9c94226939e95; bp_video_offset_539138603=909006115716464659; b_lsid=1A79ABE1_18E5076F703; bmg_af_switch=1; bmg_src_def_domain=i1.hdslb.com; SESSDATA=23faecec%2C1726298960%2C1a4a3%2A31CjBgBTtVdw4XOBT-_73RNdtnfi3F-w5kEs7_tl50_QHZHhu9sQ025YYZXx4OVxkF7GASVmttUFUxdGdCcnZZT2p0Y0VTRmMzVzhKXzRybzhGYkp4ZTQwQWNIQ256MWNwQjRYRnRLaC0wMlE3eVZ4S294Z3NYWVFjam9zMG5sNGJfVnlkYWVMWF9RIIEC; bili_jct=a32fd0781da03d9df2a4e7c79b3bc9ad; sid=75goizpx; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MTEwMDYxNjcsImlhdCI6MTcxMDc0NjkwNywicGx0IjotMX0.jOzBgRd1WLdIwFX7y_xFu1h20PjzadWHz5538s0qCOs; bili_ticket_expires=1711006107; home_feed_column=4; browser_resolution=778-730; PVID=2"

}

response = requests.get(url=url,headers=headers)

title = re.findall('<h1 title="(.*?)"',response.text)[0]code>

playinfo = re.findall('<script>window.__playinfo__=(.*?)</script>',response.text)[0]

playinfo = json.loads(playinfo)

audio_url = playinfo['data']['dash']['audio'][0]['baseUrl']

video_url = playinfo['data']['dash']['video'][0]['baseUrl']

audio_data = requests.get(audio_url).content

video_data = requests.get(video_url).content

with open('audio.mp3','wb') as f:

f.write(audio_data)

with open('video.mp4','wb') as f:

f.write(video_data)

command = f'D:\\PackageDown\\ffmpeg-6.0-full_build\\bin\\ffmpeg.exe -i audio.mp3 -i video.mp4 -acodec copy -vcodec copy "{ title}.mp4"'

os.system(command=command)

os.remove('video.mp4')

os.remove('audio.mp3')

4.音乐

import json

import requests

headers = {

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"

}

url = "https://wwwapi.kugou.com/play/songinfo?srcappid=2919&clientver=20000&clienttime=1710753441574&mid=94dda63306ec019da57becefaf677248&uuid=94dda63306ec019da57becefaf677248&dfid=4FHz9d0RpBdS3oyFkd3iivge&appid=1014&platid=4&encode_album_audio_id=6ts59xd9&token=&userid=0&signature=f9070fa15e1408f6c86a667aecfc7b5a"

response = requests.get(url=url,headers=headers)

data = json.loads(response.text)

names =data['data']['song_name']

paly_url = data['data']['play_url']

muisc = requests.get(paly_url).content

with open(f"{ names}"+".mp3","wb") as f:

f.write(muisc)

最后

某讯视频是采用m3u8视频流格式,先找到你所需要爬取电影的m3u8文件的url,然后通过访问这个url得到 .ts文件的地址某音和音乐只需要找到视频的链接就可以直接下载某站的视频有所不同,某站视频的视频和音频是分开的,现需要搜索 .m4s文件,找到分别视频和音频的url。将视频和音频下载下来之后,会是两个文件,一个.mp3文件和一个.mp4文件。需要借助第三方软件ffmpeg将视频和音频合并,这样就得到了完整的视频资源

上面爬取的某讯视频和音乐都是免费的,是会员的就不能爬(如果你充了会员的话那也可以爬),爬虫是可见即可爬,切记爬虫不等于破解!!!

上面代码仅供参考,请勿商用!!!



声明

本文内容仅代表作者观点,或转载于其他网站,本站不以此文作为商业用途
如有涉及侵权,请联系本站进行删除
转载本站原创文章,请注明来源及作者。