selenium常用api

99 阅读2分钟

获取driver,使用的是以下代码,给到browser变量上,后面将操作这个变量来实现api需求。

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
def get_browser(path=None):
    if path is None:
        driver = webdriver.Chrome()
    else:
        if 'chromedriver.exe' not in path:
            service = Service(executable_path=f"{path}/chromedriver.exe")
        else:
            service = Service(executable_path=path)
        driver = webdriver.Chrome(service=service)
    return driver

browser = get_browser(path="xxxxx")

目录

访问网页并获取源代码

def GetPageSource(url):
    try:
        browser.get(url)
        time.sleep(5)
        return browser.page_source
    except Exception as e:
        return ""
print(GetPageSource("http://www.baidu.com"))
browser.quit()

查找单个节点

我们以淘宝网的搜索窗口为例。

selenium常用api1.png

通过id定位节点

from selenium.webdriver.common.by import By
def FindSinNodeById(url, byId):
    browser.get(url)
    time.sleep(5)
    node = browser.find_element(By.ID, byId)
    print(node)
    print(node.text)
FindSinNodeById("https://www.taobao.com/", "q")

通过css定位

from selenium.webdriver.common.by import By
def FindSinNodeByCss(url, byCss):
    browser.get(url)
    time.sleep(5)
    node = browser.find_element(By.CSS_SELECTOR, byCss)
    print(node)
    print(node.text)
FindSinNodeByCss("https://www.taobao.com/", "#q")

通过xpath定位

from selenium.webdriver.common.by import By
def FindSinNodeByXpath(url, byXpath):
    browser.get(url)
    time.sleep(5)
    node = browser.find_element(By.XPATH, byXpath)
    print(node)
    print(node.text)
FindSinNodeByXpath("https://www.taobao.com/", '//*[@id="q"]')

其他定位方式

定位方式描述
By.NAME通过name进行定位"q"
By.CLASS_NAME通过class名称定位
By.LINK_TEXT专门定位文本的标签,需要指定标签内全部的文本内容才能够进行定位
By.PARTIAL_LINK_TEXT指定某部分文本即可定位成功,不用将文本内容全部输入即可定位成功"淘宝"
By.TAG_NAMEag表示定位的一类功能,也就是用来定位div、h2这一类标签"div"

查找多个节点

使用流程与查找单个节点一样,只不过是将find_element换成了find_elements,例如:

from selenium.webdriver.common.by import By
browser.get("https://www.taobao.com/")
node = browser.find_elements(By.PARTIAL_LINK_TEXT, "淘宝")
print(node)
for i in node:
    print(i.text)
time.sleep(3)
browser.quit()

节点交互

from selenium.webdriver.common.by import By
browser.get("https://www.taobao.com/")
InputNode = browser.find_element(By.ID, "q")  # 获取输入框节点
InputNode.send_keys("懒人椅")
time.sleep(3)
InputNode.clear()  # 清空输入框
InputNode.send_keys("沙发")
time.sleep(3)
SearchNode = browser.find_element(By.XPATH, '//button[@data-spm="d13"]')
SearchNode.click()  # 点击按钮
time.sleep(5)
browser.quit()

动作链

from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
import time

# 打开页面
browser.get("https://dun.163.com/trial/jigsaw")
time.sleep(5)

# 获取滑块节点
node = browser.find_element(By.XPATH, "//span[@class='yidun_slider__icon']")

# 创建动作链,先将鼠标移动到滑块节点上
actions = ActionChains(browser)
actions.move_to_element(node)  # 移动到滑块
actions.click_and_hold()  # 点击并按住滑块

# 向右移动 100 像素
actions.move_by_offset(100, 0)  # x轴向右移动100px,y轴不变
actions.release()  # 释放鼠标
actions.perform()  # 执行动作链

运行javascript

browser = get_browser(path="C:/Users/12043/Desktop/DragonW/SpiderCase/")
browser.get("https://www.zhihu.com/explore")
time.sleep(3)
browser.execute_script("window.scrollTo(0, document.body.scrollHeight)")
browser.execute_script("alert('To Bottom')")
time.sleep(3)

获取节点信息

获取节点属性

from selenium.webdriver.common.by import By
browser.get("https://www.baidu.com/")
time.sleep(3)
node = browser.find_element(By.XPATH, '//*[@id="hotsearch-content-wrapper"]/li[3]/a')
print(node)
print(node.get_attribute("href"))  # 获取节点的href属性

获取文本值

from selenium.webdriver.common.by import By
browser.get("https://www.baidu.com/")
time.sleep(3)
node = browser.find_element(By.XPATH, '//*[@id="hotsearch-content-wrapper"]/li[3]/a')
print(node)
print(node.text)  # 获取文本值

获取id、位置、标签名、大小

from selenium.webdriver.common.by import By
browser.get("https://www.baidu.com/")
time.sleep(3)
node = browser.find_element(By.XPATH, '//*[@id="hotsearch-content-wrapper"]/li[3]/a')
print(node.id)
print(node.location)
print(node.tag_name)
print(node.size)

切换浏览器窗口

from selenium.webdriver.common.by import By
import time

# 打开网页
browser.get("https://www.zhihu.com/signin?next=%2F")
time.sleep(3)

# 点击按钮
node = browser.find_element(By.XPATH, '//*[@id="root"]/div/main/div/div/div/div/div[2]/div/div[3]/span/button[1]')
node.click()
time.sleep(3)
# 获取所有的窗口句柄
handles = browser.window_handles
browser.switch_to.window(handles[-1])  # 切换到新打开的窗口上面
ele = browser.find_element(By.XPATH, "//img[@class='js_qrcode_img web_qrcode_img']")
print(ele.get_attribute("src"))