import urllib.request #获取post请求(模拟真实用户登录) import urllib.parse """ data = bytes(urllib.parse.urlencode({"hello":"world"}),encoding="utf-8")#伪造身份码 response = urllib.request.urlopen("httpbin.org/post",data=… print(response.read().decode("utf-8")) """ #获取get请求 """ response=urllib.request.urlopen("acm.zzuli.edu.cn/")#网站 print(response.read().decode("utf-8"))#获取网站源代码并用uft-8进行解码- """ #超时处理 """ try: response = urllib.request.urlopen("httpbin.org/post",timeo… print(response.read().decode("utf-8")) except urllib.error.URLError as e:# print("time out!") """ #获取header信息F12 """ response = urllib.request.urlopen("baidu.com")

print(response.status)

print(response.getheaders("Server")) """ #伪造身份登录zzulioj """ url="acm.zzuli.edu.cn/" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36 Edg/87.0.664.75"} req=urllib.request.Request(url=url,headers=headers)#封装信息,如果是post则加入,method="POST" response=urllib.request.urlopen(req)#发送网页请求 print(response.read().decode("utf-8")) """

def main(): baseur = "acm.zzuli.edu.cn/" #爬取网页 datalist = getData(baseurl) savepath = "doubAmn" #保存数据 # saveData(savepath) askURl("acm.zzuli.edu.cn/")

#爬取网页 def getData(baseurl): datalist=[] #逐一解析数据 return datalist

#得到一个指定url的网页内容 def askURL(url): head={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36 Edg/87.0.664.75" } #用户代理，告诉服务器，我们是什么类型的机器，（本质上是告知服务器我们能接受什么水平的信息） request = urllib.request.Request(url,headers=header) html = "" try: request =urllib.request.urlopen(request) html = response.read().decode("utf-8") except urllib.error.URLError as e: if hasattr(e,"code"): print(e.code) if hasattr(e,"reason"): print(e.reason) return html

爬虫入门

print(response.status)