from lxml import etree
import requests
def fetch_html(url):
"""
获取网页的HTML内容。
"""
response = requests.get(url)
response.raise_for_status()
return response.content
def get_xpath(element, root):
"""
生成单个元素的XPath。
"""
components = []
while element is not root:
parent = element.getparent()
if parent is None:
break
index = parent.index(element) + 1
tag = element.tag
components.append(f'{tag}')
element = parent
components.reverse()
return '/' + '/'.join(components)
def is_leaf(element):
"""
判断是否为叶子节点。
"""
return len(element) == 0
def get_all_leaf_xpaths(element, root, leaf_xpaths):
"""
递归获取所有叶子节点的XPath。
"""
if is_leaf(element):
xpath = get_xpath(element, root)
leaf_xpaths.append(xpath)
else:
for child in element:
get_all_leaf_xpaths(child, root, leaf_xpaths)
url = 'https://github.com/IEIT-Yuan/Yuan-2.0'
html_content = fetch_html(url)
parser = etree.HTMLParser()
tree = etree.HTML(html_content, parser)
leaf_xpaths = []
root_element = tree.getroottree().getroot()
get_all_leaf_xpaths(root_element, root_element, leaf_xpaths)
leaf_xpaths = list(set(leaf_xpaths))
for xpath in leaf_xpaths:
print(xpath)
/body/div/div/header/div/div/div/div/qbsearch-input/div/div/modal-dialog/div/div/div/form/query-builder/div/div/div/div/div
/body/div/div/header/div/div/div/div/qbsearch-input/div/div/modal-dialog/div/div/div/form/query-builder/div/template/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/tbody/tr/td/div/div/div/div/span/a/span/span/svg/path
/body/div/div/script
/body/div/include-fragment
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/p
/body/div/div/header/div/div/div/div/qbsearch-input/div/custom-scopes/dialog-helper/dialog/div/div/div/h1
/body/div/div/div/main/div/div/div/details/details-dialog/a/div/div
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/button/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/tbody/tr/td/div/div/div/a/span/span/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/tbody/tr/td/div/div/div/a/span/span/span
/body/div/div/header/div/div/div/div/input
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/style
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/script
/body/div
/body/div/div/header/div/div/div/div/qbsearch-input/div/dialog-helper/dialog/div/div/div/h1
/body/div/div/header/div/div/div/div/qbsearch-input/div/dialog-helper/dialog/scrollable-region/div/form/label
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/h2
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div
/body/div/div/header/div/div/div/div/qbsearch-input/div/custom-scopes/dialog-helper/dialog/scrollable-region/div/div
/body/div/div/header/div/div/div/div/qbsearch-input/div/div/modal-dialog/div/div/div/div/div
/body/div/div/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/div/a
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/template
/head/meta
/body/div/div/div/main/div/div/div/div/strong/a
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/h2/a
/body/div/div/div/main/div/div/div/ul/li/a/svg/path
/body/div/div/header/div/div/div/div/qbsearch-input/div/custom-scopes/dialog-helper/dialog/scrollable-region/div/div/<cyfunction Comment at 0x7f24a808df30>
/body/div/div/header/div/div/div/div/qbsearch-input/div/custom-scopes/dialog-helper/dialog/div/div/div/h2
/body/div/div/header/div/div/div/nav/ul/li/div/div/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/button
/body/div/div/header/div/div/div/div/qbsearch-input/div/dialog-helper/dialog/scrollable-region/div/form/input
/body/div/div/div/main/div/div/div/div/div/tool-tip
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/button/span/span/div/div/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/tbody/tr/td/div
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/div/h2
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/table/tbody/tr/td/sup/br
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/p
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/h2/a/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/ul/li/a/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/tbody/tr/td/div/h2
/body/div/div/div/main/div/div/div/ul/li/tool-tip
/body/div/div/react-partial/script
/body/div/div/header/div/div/div/div/qbsearch-input/div/dialog-helper/dialog/scrollable-region/div/form/p
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/div/a/span
/body/div/div/div/main/div/div/div/ul/li/div/a/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/details/summary/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/h2
/body/div/div/div/main/turbo-frame/div/div/div/div/div/script
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/button/span/span/svg/path
/body/div/div/div/main/div/div/div/details/details-dialog/div/h3
/body/div/div/template/div/div/button/svg/path
/body/div/div/div/main/div/nav/div/action-menu/focus-group/anchored-position/div/div/action-list/div/ul/li/a/span
/body/div/template/details/summary
/body/div/template/details/details-dialog/div
/body/div/div/div/main/div/div/div/div/span/a
/body/div/div/div/main/div/div/div/div/a/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/tbody/tr/td/div/div
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/ul/li/ul/li/ul/li/ul/li/a
/head/title
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/a/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/ul/li/strong
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/<cyfunction Comment at 0x7f24a808df30>
/body/div/div/div/main/turbo-frame/div/div/div/div/div/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/ol/li/p
/body/div/div/header/div/div/div/a
/body/div/div/header/div/div/div/div/qbsearch-input/div/dialog-helper/dialog/scrollable-region/div/form/textarea
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/button/span/span/div/div/span/<cyfunction Comment at 0x7f24a808df30>
/body/div/div/div/main/div/nav/ul/li/a/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/a/span/span
/body/div/div/div/main/div/div/div/ul/li
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/a/span/span/svg/path
/body/div/div/header/div/div/div/div/qbsearch-input/div/div/modal-dialog/div/div/div/form/query-builder/div/div/button/svg/path
/body/div/footer/h2
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/ul/li/ul/li/a
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/ul/li/ul/li/ul/li/a
/body/div/div/header/div/div/div/div/qbsearch-input/div/div/modal-dialog/div/div/div/form/query-builder/div
/body/div/div/div/main/div/div/div/details/details-dialog/a/div/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/h2
/body/div/div/div/main/div/nav/div/action-menu/focus-group/button/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/nav/ul/li/a/span/svg/path
/body/div/div/react-partial/div
/body/div/div/header/div/div/div/div/qbsearch-input/div/custom-scopes/dialog-helper/dialog/scrollable-region/div/div/form/input
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/thead/tr/th/div/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/details/details-dialog/div/button/svg/path
/body/div/div/header/div/div/div/div/qbsearch-input/div/dialog-helper/dialog/scrollable-region/div/<cyfunction Comment at 0x7f24a808df30>
/body/div/div/template/div/div/div/div
/body/div/div/div/main/div/div/div/div/div/div/button/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/h2
/body/div/div/header/div/div/div/div/qbsearch-input/div/div/modal-dialog/div/div/div/form/query-builder/div/div/ul
/body/div/div/header/div/div/div/div/qbsearch-input/div/custom-scopes/dialog-helper/dialog/div/div/div/button/svg/path
/body/div/div/header/div/div/div/div/qbsearch-input/div/div/modal-dialog/div/div/div/form/query-builder/div/div/span
/body/div/div/header/div/div/div/div/qbsearch-input/div/div/modal-dialog/div/div/div/div/a
/body/div/div/div/main/div/div/div/p
/body/div/div/div/main/div/nav/div/action-menu/focus-group/tool-tip
/body/div/div/div/button/svg/path
/body/div/div/div/tool-tip
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/details/details-dialog/a/div/div
/body/div/div/a
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/hr
/body/div/div/header/div/div/div/nav/ul/li/button/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/ul/li/a/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/p/a/code
/body/div/footer/div/nav/ul/li/cookie-consent-link/button
/body/div/div/header/div/div/div/nav/ul/li/div/div/ul/li/a/svg/path
/body/div/div/header/div/div/div/div/qbsearch-input/div/custom-scopes/dialog-helper/dialog/div/button
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/span/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/input
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/include-fragment
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/tbody/tr/td/div/div/div/div
/body/div/div/header/div/div/div/div/qbsearch-input/div/custom-scopes/dialog-helper/dialog/scrollable-region/div/div/form/div
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/h3
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/include-fragment/ul/li/div
/body/div/div/header/div/div/a/svg/path
/body/div/div/div/main/div/div/div/div/div/a/svg/path
/body/div/div/header/div/div/div/div/qbsearch-input/div/custom-scopes/dialog-helper/dialog/scrollable-region/div/div/form/div/label
/body/div/div/div/main/div/div/div/div/div/div/a/svg/path
/body/div/div/span/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/table/tbody/tr/td/sup/em
/body/div/div/header/div/div/div/div/qbsearch-input/div/div/modal-dialog/div/div/div/form/query-builder/div/div/div/div
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/ul/li/a
/body/div/div/header/div/div/div/div/qbsearch-input/div/custom-scopes/dialog-helper/dialog/scrollable-region/div/div/form/div/input
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/tbody/tr/td/div/div/h3/div/a
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/p/code
/body/div/div/div/main/div/div/div/h3
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/button/span/span/div/svg/path
/body/div/div/div/main/div/nav/ul/li/a/include-fragment
/body/div/div/div/main/div/div/div/div/div/div/a/span
/body/div/template/div/clipboard-copy/svg/path
/head/script
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/thead/tr/th/span
/body/div/div/header/div/div/div/div/div/a
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/tbody/tr/td/div/button
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/tbody/tr/td/div/div/div
/body/div/div/header/div/div/div/div/qbsearch-input/div/div/modal-dialog/div/div/div/<cyfunction Comment at 0x7f24a808df30>
/body/div/div/div/main/div/div/div/div/a/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/button/span/svg/path
/body/div/div/div/main/div/div/div/ul/li/a/span
/body/div/footer/div/nav/h3
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/tbody/tr/td/div/div/div/h2
/body/div/div/header/div/div/div/div/qbsearch-input/div/button/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/div/a/strong
/body/div/div/div/main/div/nav/div/action-menu/focus-group/anchored-position/div/div/action-list/div/ul/li/a/span/svg/path
/body/div/div/div/span/a
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/<cyfunction Comment at 0x7f24a808df30>
/body/div/div/div/main/div/div/div/div/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/div/pre/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/details/details-dialog/div/h3
/body/div/div/header/div/div/div/div/qbsearch-input/div/custom-scopes/dialog-helper/dialog/scrollable-region/div/div/form/div/auto-check/input
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/table/tbody/tr/td/div/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/p/strong
/body/div/div/div/svg/path
/body/div/footer/div/div/span
/body/div/div/div/main/div/div/div/div/span
/body/div/div/div/main/div/nav/ul/li/a/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/table/thead/tr/th
/body/div/div/header/div/div/div/div/qbsearch-input/div/div/modal-dialog/div/div/div/form/query-builder/div/div/span/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/table/tbody/tr/td/a
/body/div/div/header/button/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/button/span/span
/body/div/footer/div/nav/ul/li/a
/body/div/div/header/div/div/div/div/qbsearch-input/div/custom-scopes/dialog-helper/dialog/scrollable-region/div/div/form/p/a
/body/div/div/div/main/div/div/div/ul/li/div/a/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/div/a/svg/path
/body/div/div/div/main/turbo-frame/div/h1
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/nav/ul/li/a/span
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/div/h1
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/p/a/img
/body/div/div/header/div/div/div/button/span/span/div
/body/div/div/div/main/turbo-frame/div/div/div/div/div/div/div/div/div/br
/body/div/div/header/div/div/div/nav/ul/li/a
/body/div/div
/body/div/div/div
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/table/tbody/tr/td
/body/div/template/details/details-dialog/button/svg/path
/body/div/div/header/div/div/div/nav/ul/li/div/div/ul/li/a
/body/div/div/header/h2
/body/div/div/header/div/div/div/div/qbsearch-input/div/button/div/svg/path
/body/div/div/div/main/turbo-frame/div/div/div/div/div/react-partial/div/div/div/div/div/div/article/div/h4
for xpath in leaf_xpaths:
try:
first_div_p = tree.xpath("/html"+xpath)
if len(first_div_p) and first_div_p[0].text:
print("xpath",xpath)
print("text",first_div_p[0].text)
except:
continue