selenium爬虫示例

19 阅读1分钟

引入依赖

import time
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium import webdriver

定义变量

options = Options()
# 运行无头浏览器
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=options)
driver.get('https://music.163.com/#/song?id=287035')
# driver.maximize_window()

contents = []
TextContents = []

如果未设置webdriver环境变量

driver = webdriver.Chrome(options=options, executable_path='path/to/chromedriver')

爬取内容

# 全局等待机制,等待3秒
driver.implicitly_wait(10)
# 定位iframe 元素
driver.switch_to.frame(0)
# 模拟滚动(滚动到底部)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

for i in range(21):
    print('第' + str(i + 1) + '页')
    contents = driver.find_elements(By.CSS_SELECTOR, '.cnt-wrap .cnt')
    time.sleep(5)
    print(len(contents))
    for content in contents:
        print(content.text.replace("\n", ""))
        TextContents.append(content.text)
    element = driver.find_element(By.CLASS_NAME, 'znxt')
    element.click()
    driver.implicitly_wait(15)

print("结束")
driver.quit()

写入文件

with open("example.txt", "w", newline='', encoding='utf-8') as file:
    for item in TextContents:
        print(item)
        file.write(item.replace("\n", "") + "\n")