使用selenium模拟登录,请在第一次登录页面之后用手机扫码登录
webdriver跳转到对应frame之后,获取网页内容,使用正则表达式获取说说内容、浏览量、点赞人数并统计
QQ号为本人使用,欢迎加好友
from selenium import webdriver
import time
from lxml import etree
# import pymysql
import xlwt
import re
from selenium.webdriver.common.by import By
#打开QQ首页
driver=webdriver.Chrome()
driver.get('https://user.qzone.qq.com/2409927923')
time.sleep(60)
#扫码
driver.get('https://user.qzone.qq.com/2409927923/main')
time.sleep(10)
driver.switch_to.frame("QM_Feeds_Iframe")
time.sleep(10)
#点击更多动态以获取所有的QQ说说
driver.find_element(By.LINK_TEXT,"更多动态信息").click()
time.sleep(10)
# range(0,)这个数字取决于QQ说说数量
for nn in range(0,365):
driver.switch_to.default_content()
driver.execute_script('window.scrollBy(0,5000)')
time.sleep(10)
#寻找信息
driver.switch_to.default_content()
time.sleep(2)
driver.switch_to.frame("ttinfo")
time.sleep(2)
driver.switch_to.frame("frameFeedList")
time.sleep(2)
page_text = driver.page_source
time.sleep(2)
ques_list = re.findall(r'nameCard(.*?)很赞',page_text)
#爬取并读入txt
name_list = []
cont_list = []
numb_list = []
# page_text = driver.page_source
# ques_list = re.findall(r'nameCard(.*?)很赞',page_text)
for nn in range(0,len(ques_list)):
name_list.append(str(re.findall(r'des_(.*?)</a>',ques_list[nn])))
cont_list.append(re.findall(r'<div class="f-info">(.*?)</div>',ques_list[nn]))
numb_list.append(re.findall(r'<span class="f-like-cnt">(.*?)</span>人觉得',ques_list[nn]))
view_list = re.findall(r'data-clicklog="visitor">(.*?)<',page_text)
time_list = re.findall(r'class=" ui-mr8 state">(.*?)</span>',page_text)
fo = open('all_like_friend.txt','w',encoding="utf-8")
for mm in range(0,len(name_list)):
fo.write(str(name_list[mm])+';'+str(cont_list[mm])+';'+str(numb_list[mm])+';'+str(view_list[mm])+';'+str(time_list[mm])+'\n')
fo.close()