Python爬虫实战之表情包爬取,以后斗图再也不用担心斗不赢了!

652 阅读1分钟

Python爬虫实战之表情包爬取:

1.斗图网表情包:GIF表情包 - 斗图发表情包 - 斗图王 (doutuwang.com)

4.png

直接展示代码:多线程爬取速度直接拉满

import requests
import re
from pyquery import PyQuery  # 网页解析工具  相当于前端页面中的 jquery
from threading import Thread



headers = {
    'cookie': 'Hm_lvt_bc9e044718f666fbad26376b3f10b13a=1676212122; Hm_lpvt_bc9e044718f666fbad26376b3f10b13a=1676212128',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'
}


def request_save(url):
    html = requests.get(url, headers=headers).text
    # print(html.text)
    date = PyQuery(html)
    ret = date(".thumbnail a img").items()
    for i in ret:
        image = (i.attr("src"))
        name = (i.attr("alt"))
        print(image, name)
        try:
            image_cont = requests.get(url=image,headers=headers)
        except:
            pass
        print(image,name)
        with open(f'斗图网表情包\{name}.gif','wb') as f:
            f.write(image_cont.content)


if __name__ == '__main__':
    url = ['https://www.doutuwang.com/category/gxgif']
    start_urls = url + [f'https://www.doutuwang.com/category/gxgif/page/{i}' for i in range(2, 10)] # 更改range中的最后一个值可制定爬取的页数
    for url in start_urls:
        Thread(target=request_save, args=(url,)).start()


2.png

3.png

2.闪萌表情包:闪萌 - 中文GIF搜索引擎 - gif搜索_动图搜索_gif_动图_表情_斗图 (weshineapp.com)

4.png

代码比较简单这里我就不做过多解释直接上代码:

import requests
from pprint import pprint
url = 'http://www.weshineapp.com/api/v1/index/package/6?offset=0&limit=18' # (表白图API链接)
# url = 'http://www.weshineapp.com/api/v1/index/package/117?offset=0&limit=18' #(瞎眼图API链接)
s = 0
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.78'
}
html_url = requests.get(url=url, headers=headers)
pprint(html_url.json())
for url in html_url.json()['data']:
    s += 1
    name = url['author_name']
    image_url = url['thumb_url']
    print(name, image_url)
    image = requests.get(url=image_url, headers=headers)
    f = open(f'表白表情包\{name}{s}.gif', 'wb')
    f.write(image.content)
    f.close()

5.png