python爬虫系列(2.3-requests库模拟用户登录)

382 阅读1分钟

一、模拟登录拉钩网



import re

import requests

class LoginLaGou(object):

"""

模拟登录拉钩网

"""

def __init__(self):

self.headers = {

'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',

'Referer': 'https://passport.lagou.com/login/login.html'

}

self.data = {

'isValidate': 'true',

'username': '181****1666',

'password': 'root',

'request_form_verifyCode': '',

'submit': ''

}

self.lagou = 'https://www.lagou.com/'

self.login_index_url = 'https://passport.lagou.com/login/login.html'

self.login_url = 'https://passport.lagou.com/login/login.json'

self.session = requests.session()

def login_html(self):

"""

登录页面获取token认证

:return:

"""

response = self.session.get(url=self.login_index_url, headers=self.headers)

if response.status_code == 200:

X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token.*?=.*?'(.*?)'", response.text, re.S)[0]

X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code.*?=.*?'(.*?)'", response.text, re.S)[0]

self.headers['X-Anit-Forge-Code'] = X_Anti_Forge_Code

self.headers['X-Anit-Forge-Token'] = X_Anti_Forge_Token

def login(self):

"""

登录接口

:return:

"""

self.login_html() # 先访问登录页面

response = self.session.post(url=self.login_url, headers=self.headers, data=self.data)

print(response.text)

print(response.cookies.get_dict())

if __name__ == "__main__":

lagou = LoginLaGou()

lagou.login()

二、模拟登录github

import re

import requests

class LoginGitHub(object):

"""

模拟登录github

"""

def __init__(self):

self.headers = {

'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',

'Referer': 'https://github.com/'

}

self.data = {

'commit': 'Sign in',

'utf8': '✓',

'authenticity_token': '',

'login': 'root',

'password': '123456',

}

self.github = 'https://github.com/login'

self.login_url = 'https://github.com/session'

self.session = requests.session()

def login_html(self):

"""

获取登录的csrf_token

:return:

"""

response = self.session.get(url=self.github, headers=self.headers)

authenticity_token = re.findall('.*?name="authenticity_token".*?value="(.*?)"', response.text, re.S)[0]

self.data['authenticity_token'] = authenticity_token

def login(self):

"""

登录github

:return:

"""

self.login_html() # 先获取csrf_token

response = self.session.post(url=self.login_url, headers=self.headers, data=self.data)

print(response.text)

if __name__ == "__main__":

github = LoginGitHub()

github.login()