python 多线程元气壁纸爬虫
声明此爬虫仅用于学习交流,且用且珍惜。技术点threadingrequests代码import requestsimport jsonimport threadingimport randomdef get_requestHeaders():headers = {'accept': 'application/json, text/plain, */*','accept-encoding': 'gz
·
声明
此爬虫仅用于学习交流,且用且珍惜。这里的id已经锁定了动漫,所以需要修改类别,那就自行分析叭。。。因为这个文件写完有一段时间了,所以我也忘记具体位置了,可能是[“cate_id”:2]这个。还有,这里爬的时静态壁纸,动态的原则也是可以的。
技术点
- threading
- requests
- json
代码
import requests
import json
import threading
import random
def get_requestHeaders():
headers = {
'accept': 'application/json, text/plain, */*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'content-length': '172',
'content-type': 'application/json;charset=UTF-8',
'dnt': '1',
'origin': 'https://wallpaper.zhhainiao.com',
'referer': 'https://wallpaper.zhhainiao.com/',
'sec-ch-ua': '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36',
'x-cf-device-id': 'xxxx-xxx-xxx',
'x-cf-platform': 'webview',
}
return headers
def thd(dt,page):
for i in dicts:
if i["Jpg4kUrl"] != "":
url = i["Jpg4kUrl"] # 存在4k就下载4k的
elif i["Jpg1920Url"] != "":
url = i["Jpg1920Url"]
name = i["wname"]
if name == '':
name = str(random.randint(1, 9)) + str(random.randint(1, 99)) + str(random.randint(1, 999999)) + ".jpg" # 没有文件名就随便起一个
print(url, name,page)
pic = requests.get(url).content
with open("z:/spider/" + name + ".jpg", 'wb') as f: # 文件保存路径,我这里用到时z盘内存盘,emm,自行修改叭
f.write(pic)
if __name__ == '__main__':
for page in range(1, 100):
r = requests.post("https://pcwallpaper.zhhainiao.com/wallpaper/static/list",
headers=get_requestHeaders(),
data='{"login_info":{},"cate_id":2,"tag_id":null,"sort_type":2,"page":' + str(page) +
',"page_size":24,'
'"common":{"open_id":null,"token":null,"device_id":null,"player_version":0,"platform":"pc"}}')
json_data = r.text
dumps = json.loads(json_data)
dicts = dumps["data"]["list"]
url = ""
name = ""
threading.Thread(target=thd, args=(dicts,page,)).start()
更多推荐
所有评论(0)