Python-爬虫-request_header

318 阅读1分钟
import urllib.request


def load_baidu():
    url = "https://www.baidu.com/"
    header = {
        #浏览器的版本
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.100 Safari/537.36"
    }

    request = urllib.request.Request(url,headers=header)   #创建请求对象

    #获取完整的url信息
    final_url = request.get_full_url()
    print(final_url)

    #请求网络数据
    response = urllib.request.urlopen(request)
    print(response)
    data = response.read().decode("utf-8")
    # print(data)

    #响应头
    #获取请求头的信息
    request_headers1 = request.headers
    print(request_headers1)

    #第二种打印头的方法
    request_headers2 = request.get_header("User-agent") #注:首字母大写,其余字母小写
    print(request_headers2)
    with open("02header.html", "w", encoding="utf-8")as f:
        f.write(data)

    # print(response.headers)#响应头的信息

load_baidu()


"D:\Program Files\project\spider\venv\Scripts\python.exe" "D:/Program Files/project/02-get/request_header.py"
https://www.baidu.com/
<http.client.HTTPResponse object at 0x0000023D148D2B00>
{'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.100 Safari/537.36'}
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.100 Safari/537.36

Process finished with exit code 0