import requests
from lxml import etree
from bs4 import BeautifulSoup
import re
import scrapy
from scrapy.http import Request
from urllib import parse
import json
import time
import http.client
url = "https://www.lishui.gov.cn/col/col1229265122/index.html?df=/col/col1229286995/index.html&isgk=1"
headers = {
'Accept':'*/*',
'Referer': 'https://www.lishui.gov.cn/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0
}
response = requests.get(url=url, headers=headers)
tree = etree.HTML(response.content.decode(encoding='utf8'))
print(11111,tree.text)
href = tree.xpath('//*[@id="7853602"]/div/table/tbody/tr[2]/td[1]/a/@href')
print(22222,href,response.status_code)
soup = BeautifulSoup(response.content.decode(encoding = 'utf-8'), 'html.parser')
href = soup.find(xpath = '//*[@id="7853602"]/div/table/tbody/tr[2]/td[1]/a/@href')
print(333333,href)
```
```
1、首先,import requests库,url = "https://www.lishui.gov.cn/col/col1229265122/index.html?df=/col/col1229286995/index.html&isgk=1",这个url是入口url。
2、headers请求头包含Accept、Referer、User-Agent。
3、response = requests.get(url=url, headers=headers)
4、tree = etree.HTML(response.content.decode(encoding='utf8')) from lxml import etree
5、href = tree.xpath('//*[@id="7853602"]/div/table/tbody/tr[2]/td[1]/a/@href')
6、soup = BeautifulSoup(response.content.decode(encoding = 'utf-8'), 'html.parser') from bs4 import BeautifulSoup