Python爬虫实战之表情包爬取:
1.斗图网表情包:GIF表情包 - 斗图发表情包 - 斗图王 (doutuwang.com)
直接展示代码:多线程爬取速度直接拉满
import requests
import re
from pyquery import PyQuery # 网页解析工具 相当于前端页面中的 jquery
from threading import Thread
headers = {
'cookie': 'Hm_lvt_bc9e044718f666fbad26376b3f10b13a=1676212122; Hm_lpvt_bc9e044718f666fbad26376b3f10b13a=1676212128',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'
}
def request_save(url):
html = requests.get(url, headers=headers).text
# print(html.text)
date = PyQuery(html)
ret = date(".thumbnail a img").items()
for i in ret:
image = (i.attr("src"))
name = (i.attr("alt"))
print(image, name)
try:
image_cont = requests.get(url=image,headers=headers)
except:
pass
print(image,name)
with open(f'斗图网表情包\{name}.gif','wb') as f:
f.write(image_cont.content)
if __name__ == '__main__':
url = ['https://www.doutuwang.com/category/gxgif']
start_urls = url + [f'https://www.doutuwang.com/category/gxgif/page/{i}' for i in range(2, 10)] # 更改range中的最后一个值可制定爬取的页数
for url in start_urls:
Thread(target=request_save, args=(url,)).start()
2.闪萌表情包:闪萌 - 中文GIF搜索引擎 - gif搜索_动图搜索_gif_动图_表情_斗图 (weshineapp.com)
代码比较简单这里我就不做过多解释直接上代码:
import requests
from pprint import pprint
url = 'http://www.weshineapp.com/api/v1/index/package/6?offset=0&limit=18' # (表白图API链接)
# url = 'http://www.weshineapp.com/api/v1/index/package/117?offset=0&limit=18' #(瞎眼图API链接)
s = 0
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.78'
}
html_url = requests.get(url=url, headers=headers)
pprint(html_url.json())
for url in html_url.json()['data']:
s += 1
name = url['author_name']
image_url = url['thumb_url']
print(name, image_url)
image = requests.get(url=image_url, headers=headers)
f = open(f'表白表情包\{name}{s}.gif', 'wb')
f.write(image.content)
f.close()