“用影刀使用XPath点击网页元素,最崩溃的是什么? ——不是XPath写不对,而是每次点击要用两个指令!”
每次想用XPath点个元素,都得: 1️⃣ 先获取元素对象(web),填XPath,还要强行起个名字
2️⃣ 再使用指令点击元素(web),把刚才起名字的元素召唤出来点一下……
更绝望的是:遇到跨域或者iframe嵌套的页面,官方指令就会点了个寂寞!
🔧 于是,我搓了个“作弊指令” 指令简介:
1条指令=点击完成:不用分两步,不用起名字,XPath直接扔进去就能点!
无视跨域/iframe:管你元素藏多深,照样精准“狙杀”!
开发环境:
影刀RPA
对比体验:
以前(官方指令):
现在(我的指令):
技术黑科技 1.一键穿透iframe/跨域的万能元素点击器 1️⃣ 穿透iframe结界:用JS注入大法暴力操作DOM 2️⃣ 元素标记术:给每个元素打上唯一ID(像快递单号) 3️⃣ 多层搜索:就算元素藏在俄罗斯套娃式的iframe里也能挖出来" 实现代码:
print = lambda *_, **__: None
def get_uid(uids=[]):
uid = str(int(time.time() * 1000))
if uid in uids:
uid = str(int(time.time() * 1000))
return uid
class IframePage:
def __init__(self, web_page, iframe=None, uids=[], iframe_page_list=[]):
self.web_page = web_page
self.iframe = web_page if iframe is None else iframe
self.iframe_page_list = iframe_page_list
self.uids = uids
self.resources_path = os.path.join(os.path.dirname(__file__), "resources")
self.selectors_path = os.path.join(self.resources_path, "selectorsV2.xml")
self.init_resources()
SelectorStore(self.resources_path)
def init_resources(self):
"""初始化资源文件"""
images_path = os.path.join(self.resources_path, "imagesV2.xml")
images_xml_str = """
<?xml version="1.0" encoding="utf-8"?>
<repository xmlns:x="rpa://imageselector/core">
</repository>
"""
selectors_xml_str = """
<?xml version="1.0" encoding="utf-8"?>
<repository xmlns:x="rpa://selector/core" xmlns:regex="rpa://selector/operator/regex" xmlns:wildcard="rpa://selector/operator/wildcard">
</repository>
"""
with open(images_path, "w", encoding="u8") as f:
f.write(images_xml_str.strip())
with open(self.selectors_path, "w", encoding="u8") as f:
f.write(selectors_xml_str.strip())
def create_ele_libs(self, tag_name, uids, new_uid=None):
"""创建元素库"""
root = ET.Element("repository")
root.set("xmlns:x", "rpa://selector/core")
root.set("xmlns:regex", "rpa://selector/operator/regex")
root.set("xmlns:wildcard", "rpa://selector/operator/wildcard")
group_attrib = {"id": "驿站", "name": "iframe", "type": "Web"}
group_node = ET.SubElement(root, "group", group_attrib)
selector_node_attrib = {"name": "list-iframe", "type": "simple"}
selector_node = ET.SubElement(group_node, "selector", selector_node_attrib)
for uid in uids:
ET.SubElement(selector_node, "web", {
"x:name": "iframe",
"diy-uid": uid
})
if new_uid is None:
web_attrib = {"x:name": tag_name}
else:
web_attrib = {"x:name": tag_name, "diy-uid": new_uid}
ET.SubElement(selector_node, "web", web_attrib)
tree = ET.ElementTree(root)
tree.write(self.selectors_path, encoding="utf-8", xml_declaration=True)
def read_ele(self, single=True):
"""读取元素"""
_selector = SelectorStore(self.resources_path)("list-iframe")
if single:
return self.web_page.find(_selector, timeout=0)
return self.web_page.find_all(_selector, timeout=0)
def to_iframe(self, xpath, find_descendant_iframe=False):
"""
通过xpath切换到iframe
"""
if find_descendant_iframe:
total = 0
list_iframe_page = self.find_all_iframe()
new_iframe_page = None
for iframe_page in list_iframe_page:
try:
new_iframe_page = iframe_page.to_iframe(xpath)
if total != 0:
raise Exception("找到多个iframe元素,无法唯一定位")
total += 1
except Exception as e:
msg = e.args[0]
if msg == "找到多个元素,无法唯一定位" or msg == "找到多个iframe元素,无法唯一定位":
raise
if new_iframe_page is None:
raise Exception("未找到元素")
return new_iframe_page
iframe_ele = self.find_ele(xpath, True)
uids = self.uids.copy()
return IframePage(self.web_page, iframe_ele, uids)
def find_ele(self, xpath, is_iframe=False):
"""
获取元素对象
"""
uid = None
if self.iframe != self.web_page:
self.create_ele_libs("html", self.uids)
html_ele = self.read_ele()
code = """
function (element, xpath) {
$x = (xpath) => {
try {
let xpathResult = document.evaluate(xpath, element, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
let nodes = []
let node = null
while (node = xpathResult.iterateNext()) {
nodes.push(node)
}
return nodes
} catch (error) {
return []
}
}
eles = $x(xpath)
if (eles.length > 1) {
return ["", "", "找到多个元素,无法唯一定位"]
}
if (eles.length == 0) {
return ["", "", "未找到元素"]
}
ele = eles[0]
uid = ele.getAttribute("diy-uid")
if (uid == null || uid == "") {
uid = new Date().getTime().toString()
ele.setAttribute("diy-uid", uid)
}
return [uid, ele.tagName, "成功"]
}
"""
uid, tag_name, msg = html_ele.execute_javascript(code, xpath)
if msg != "成功":
raise Exception(msg)
self.create_ele_libs(tag_name, self.uids, uid)
ele = self.read_ele()
else:
ele = self.web_page.find_by_xpath(xpath, timeout=1)
uid = get_uid()
ele.set_attribute("diy-uid", uid)
if is_iframe:
self.uids.append(uid)
return ele
def find_ele2(self, xpath):
"""
获取元素对象-跨多层iframe
"""
print(xpath)
iframe_page_list = self.find_all_iframe()
eles = []
for iframe_page in iframe_page_list:
try:
ele = iframe_page.find_ele(xpath)
eles.append(ele)
except:
pass
if len(eles) == 1:
return eles[0]
if len(eles) == 0:
raise Exception("未找到元素")
raise Exception("找到多个元素,无法唯一定位")
def find_all_iframe(self):
"""基于当前iframe 查找所有后代 iframe (包含自身)"""
uids = self.uids.copy()
if len(self.uids) == 0:
iframe_eles = self.web_page.find_all_by_xpath("//iframe", timeout=0.1)
else:
# uid = get_uid()
self.create_ele_libs("iframe", uids)
# uids.append(uid)
iframe_eles = self.read_ele(False)
iframe_page_list = [self]
self._find_all_iframe(iframe_eles, uids, iframe_page_list)
print("iframe_page_list", len(iframe_page_list))
return iframe_page_list
def _find_all_iframe(self, iframe_eles, uids, iframe_page_list):
if len(iframe_eles) == 0:
return
for iframe_ele in iframe_eles:
uid = iframe_ele.get_attribute("diy-uid")
print("id", iframe_ele.get_attribute("id"))
if uid is None or uid == "":
uid = get_uid()
iframe_ele.set_attribute("diy-uid", uid)
uids.append(uid)
iframe_page = IframePage(self.web_page, iframe_ele, uids.copy())
iframe_page_list.append(iframe_page)
self.create_ele_libs("iframe", uids)
iframe_eles = self.read_ele(False)
self._find_all_iframe(iframe_eles, uids, iframe_page_list)
uids.pop(-1)
def find_all_ele(self, xpath, find_descendant_iframe=False):
"""
获取相似元素列表
"""
eles = []
code = """
function (element, xpath) {
function $x(xpath) {
let xpathResult = document.evaluate(xpath, element, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
let nodes = []
let node = null
while (node = xpathResult.iterateNext()) {
nodes.push(node)
}
return nodes
}
function get_uid(uids) {
let uid = new Date().getTime().toString()
while (uids.includes(uid)) {
uid += "0"
}
return uid
}
let uids = []
ele_info = []
eles = $x(xpath)
for (let i = 0; i < eles.length; i++) {
let ele = eles[i]
let uid = ele.getAttribute("diy-uid")
if (uid == null || uid == "") {
uid = get_uid(uids)
ele.setAttribute("diy-uid", uid)
}
uids.push(uid)
ele_info.push([uid, ele.tagName])
}
return ele_info
}
"""
if find_descendant_iframe:
total = 0
eles = []
list_iframe_page = self.find_all_iframe()
for iframe_page in list_iframe_page:
temp_eles = iframe_page.find_all_ele(xpath)
if len(temp_eles) != 0:
total += 1
eles = temp_eles
if total > 1:
raise Exception("在多个iframe中找到相似元素,无法唯一定位")
return eles
if self.iframe != self.web_page:
uids = self.uids.copy()
self.create_ele_libs("html", uids)
html_ele = self.read_ele()
ele_info = html_ele.execute_javascript(code, xpath)
for uid, tag_name in ele_info:
self.create_ele_libs(tag_name, uids, uid)
ele = self.read_ele()
eles.append(ele)
else:
eles = self.web_page.find_all_by_xpath(xpath, timeout=3)
return eles
def main(args):
web_page = xbot.web.get("*", mode='edge', use_wildcard=True)
iframe_page = IframePage(web_page)
# # 跨层级查找元素
name_ele = iframe_page.find_ele2('//*[@id="po_id"]').click()
2.调用一键穿透iframe/跨域的万能元素点击器****
def main(args):
try:
page = args.get("web_page") or web.get_active(mode="chrome")
xpath = args.get("xpathSelector")
frame = IframePage(page)
element = frame.find_ele2(xpath)
args["元素对象"] = element
return element
except Exception as e:
if args.get("异常处理") == "停止运行":
raise e
else:
args["元素对象"] = args.get("失败返回值")
return args.get("失败返回值")
3.封装点击元素XPath指令
def test(xpath):
element = GetElementAcrossDomains({"web_page":None, "xpathSelector": xpath})
element.click()
def simulative_click(page, element, delay_after):
element_position = xbot_visual.web.element.get_details(browser=page, element=element, operation="bound", absolute_url=False, attribute_name=None, relative_to="screen", to96dpi=True, timeout="20", _block=("main", 8, "获取元素信息(web)"))
xbot_visual.win32.click_mouse(is_move_mouse_before_click=True, point_x=element_position.center_x, point_y=element_position.center_y, relative_to="screen", move_speed="middle", button="left", click_type="click", hardware_driver_click=False, keys="null", delay_after="1", _block=("main", 9, "鼠标点击"))
xbot_visual.win32.click_mouse(is_move_mouse_before_click=False, point_x="0", point_y="0", relative_to="screen", move_speed="middle", button="left", click_type="click", hardware_driver_click=False, keys="null", delay_after=delay_after, _block=("main", 10, "鼠标点击"))
def click_by_xpath(web_page, xpath, time_out=5, is_iframe_element=False, retry_cnt=1, refresh=False, button="left", simulative=False, delay_after=1):
browser = web_page
# 跨iframe查找
if is_iframe_element:
element = GetElementAcrossDomains({"web_page": browser, "xpathSelector": xpath})
else:
# 尝试查找元素
list_web_element = None
while retry_cnt > 0:
try:
list_web_element = browser.find_all_by_xpath(xpath, timeout=time_out)
element = list_web_element[0]
break
except:
# 查找元素超时
if refresh:
browser.reload()
retry_cnt -= 1
print(f"查找元素【{xpath}】超时, 剩余重试次数【{retry_cnt}】")
if list_web_element is None:
raise Exception("xpath语法错误: {xpath}")
if len(list_web_element) != 1:
raise Exception(f"xpath不正确:找到了{len(list_web_element)}个元素!- 【{xpath}】")
if element is None:
raise Exception(f"查找跨域xpath元素不正确:【{xpath}】")
# 点击元素
element.click(button=button, simulative=simulative, delay_after=delay_after)
def main(args):
a = {
"web_page": args.get("web_page") or web.get_active(mode="chrome"),
"xpath": args.get("xpathSelector"),
"time_out": args.get("获取元素超时"),
"refresh": args.get("refresh"),
"retry_cnt": args.get("retry_count"),
"simulative": args.get("是否模拟人工"),
"button": args.get("按钮"),
"delay_after": args.get("执行成功后等待"),
"is_iframe_element": args.get("是否为iframe对象"),
}
click_by_xpath(**a)
以上便是完整核心代码
🔧编辑指令借鉴
“不想再点击元素两步走?试试这个指令吧! ”
粉丝福利:
加关注的朋友,免费帮你解疑RPA问题和自动化流程建议噢!