获取driver,使用的是以下代码,给到browser变量上,后面将操作这个变量来实现api需求。
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
def get_browser(path=None):
if path is None:
driver = webdriver.Chrome()
else:
if 'chromedriver.exe' not in path:
service = Service(executable_path=f"{path}/chromedriver.exe")
else:
service = Service(executable_path=path)
driver = webdriver.Chrome(service=service)
return driver
browser = get_browser(path="xxxxx")
目录
访问网页并获取源代码
def GetPageSource(url):
try:
browser.get(url)
time.sleep(5)
return browser.page_source
except Exception as e:
return ""
print(GetPageSource("http://www.baidu.com"))
browser.quit()
查找单个节点
我们以淘宝网的搜索窗口为例。
通过id定位节点
from selenium.webdriver.common.by import By
def FindSinNodeById(url, byId):
browser.get(url)
time.sleep(5)
node = browser.find_element(By.ID, byId)
print(node)
print(node.text)
FindSinNodeById("https://www.taobao.com/", "q")
通过css定位
from selenium.webdriver.common.by import By
def FindSinNodeByCss(url, byCss):
browser.get(url)
time.sleep(5)
node = browser.find_element(By.CSS_SELECTOR, byCss)
print(node)
print(node.text)
FindSinNodeByCss("https://www.taobao.com/", "#q")
通过xpath定位
from selenium.webdriver.common.by import By
def FindSinNodeByXpath(url, byXpath):
browser.get(url)
time.sleep(5)
node = browser.find_element(By.XPATH, byXpath)
print(node)
print(node.text)
FindSinNodeByXpath("https://www.taobao.com/", '//*[@id="q"]')
其他定位方式
| 定位方式 | 描述 | |
|---|---|---|
| By.NAME | 通过name进行定位 | "q" |
| By.CLASS_NAME | 通过class名称定位 | |
| By.LINK_TEXT | 专门定位文本的标签,需要指定标签内全部的文本内容才能够进行定位 | |
| By.PARTIAL_LINK_TEXT | 指定某部分文本即可定位成功,不用将文本内容全部输入即可定位成功 | "淘宝" |
| By.TAG_NAME | ag表示定位的一类功能,也就是用来定位div、h2这一类标签 | "div" |
查找多个节点
使用流程与查找单个节点一样,只不过是将find_element换成了find_elements,例如:
from selenium.webdriver.common.by import By
browser.get("https://www.taobao.com/")
node = browser.find_elements(By.PARTIAL_LINK_TEXT, "淘宝")
print(node)
for i in node:
print(i.text)
time.sleep(3)
browser.quit()
节点交互
from selenium.webdriver.common.by import By
browser.get("https://www.taobao.com/")
InputNode = browser.find_element(By.ID, "q") # 获取输入框节点
InputNode.send_keys("懒人椅")
time.sleep(3)
InputNode.clear() # 清空输入框
InputNode.send_keys("沙发")
time.sleep(3)
SearchNode = browser.find_element(By.XPATH, '//button[@data-spm="d13"]')
SearchNode.click() # 点击按钮
time.sleep(5)
browser.quit()
动作链
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
import time
# 打开页面
browser.get("https://dun.163.com/trial/jigsaw")
time.sleep(5)
# 获取滑块节点
node = browser.find_element(By.XPATH, "//span[@class='yidun_slider__icon']")
# 创建动作链,先将鼠标移动到滑块节点上
actions = ActionChains(browser)
actions.move_to_element(node) # 移动到滑块
actions.click_and_hold() # 点击并按住滑块
# 向右移动 100 像素
actions.move_by_offset(100, 0) # x轴向右移动100px,y轴不变
actions.release() # 释放鼠标
actions.perform() # 执行动作链
运行javascript
browser = get_browser(path="C:/Users/12043/Desktop/DragonW/SpiderCase/")
browser.get("https://www.zhihu.com/explore")
time.sleep(3)
browser.execute_script("window.scrollTo(0, document.body.scrollHeight)")
browser.execute_script("alert('To Bottom')")
time.sleep(3)
获取节点信息
获取节点属性
from selenium.webdriver.common.by import By
browser.get("https://www.baidu.com/")
time.sleep(3)
node = browser.find_element(By.XPATH, '//*[@id="hotsearch-content-wrapper"]/li[3]/a')
print(node)
print(node.get_attribute("href")) # 获取节点的href属性
获取文本值
from selenium.webdriver.common.by import By
browser.get("https://www.baidu.com/")
time.sleep(3)
node = browser.find_element(By.XPATH, '//*[@id="hotsearch-content-wrapper"]/li[3]/a')
print(node)
print(node.text) # 获取文本值
获取id、位置、标签名、大小
from selenium.webdriver.common.by import By
browser.get("https://www.baidu.com/")
time.sleep(3)
node = browser.find_element(By.XPATH, '//*[@id="hotsearch-content-wrapper"]/li[3]/a')
print(node.id)
print(node.location)
print(node.tag_name)
print(node.size)
切换浏览器窗口
from selenium.webdriver.common.by import By
import time
# 打开网页
browser.get("https://www.zhihu.com/signin?next=%2F")
time.sleep(3)
# 点击按钮
node = browser.find_element(By.XPATH, '//*[@id="root"]/div/main/div/div/div/div/div[2]/div/div[3]/span/button[1]')
node.click()
time.sleep(3)
# 获取所有的窗口句柄
handles = browser.window_handles
browser.switch_to.window(handles[-1]) # 切换到新打开的窗口上面
ele = browser.find_element(By.XPATH, "//img[@class='js_qrcode_img web_qrcode_img']")
print(ele.get_attribute("src"))