引入依赖
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
定义变量
options = Options()
# 运行无头浏览器
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=options)
driver.get('https://music.163.com/#/song?id=287035')
# driver.maximize_window()
contents = []
TextContents = []
如果未设置webdriver环境变量
driver = webdriver.Chrome(options=options, executable_path='path/to/chromedriver')
爬取内容
# 全局等待机制,等待3秒
driver.implicitly_wait(10)
# 定位iframe 元素
driver.switch_to.frame(0)
# 模拟滚动(滚动到底部)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
for i in range(21):
print('第' + str(i + 1) + '页')
contents = driver.find_elements(By.CSS_SELECTOR, '.cnt-wrap .cnt')
time.sleep(5)
print(len(contents))
for content in contents:
print(content.text.replace("\n", ""))
TextContents.append(content.text)
element = driver.find_element(By.CLASS_NAME, 'znxt')
element.click()
driver.implicitly_wait(15)
print("结束")
driver.quit()
写入文件
with open("example.txt", "w", newline='', encoding='utf-8') as file:
for item in TextContents:
print(item)
file.write(item.replace("\n", "") + "\n")