Python-爬虫-网页请求

141 阅读1分钟
  1. get传参 (1)汉字报错:解释器ASCII没有url汉字转码 urllib.parse.quote safe="string.printtable" (2)字典传参 urllib.parse.urlencode()

post: urllib.request.openurl(url,data="服务器接收的数据")


import urllib.request


def load_data():
    url = "http://www.sina.com.cn/"   
    #get的请求
    #http请求
    #response:http相应的对象
    response = urllib.request.urlopen(url)     
    print(response)
    #读取内容 byte类型
    data = response.read()
    print(data)
    #转换成utf-8格式
    str_data = data.decode("utf-8")
    print(str_data)
    #存储到文件
    with open('sina.html', 'w', encoding="utf-8")as f:
        f.write(str_data)


load_data()

from lxml import etree

html=etree.parse('http://www.baidu.com',etree.HTMLParser())
result=etree.tostring(html)
print(result.decode('utf-8'))
#
# html=etree.parse('http://www.baidu.com',etree.HTMLParser())
# # html = etree.parse ('text.html')
# result=html.xpath('/html/body/div[10]/div[1]/div[4]/ul[1]/li[2]/a')
#
# print(len(result))
# print(result)
# j=1
# for i in result:
#     print("*******",j,i.attrib,i.text)
#     j+=1