持续创作,加速成长!这是我参与「掘金日新计划 · 6 月更文挑战」的第31天,点击查看活动详情
最近几天各省的成绩陆续公布,接下来就是志愿填报的重要工作。考的好是非常重要的一方面,而志愿报的好也是非常重要,一般来说志愿填报过程中都会有心仪的学校和专业,这样只需要按照自己的目标去查询即可。但是也有一部分学生还是缺乏目标性,也不清楚有哪些学校,分数线如何,该学校是什么情况,在哪个地方,有哪些特色专业等等。
为了加速高考生对志愿填报的快速理解,本人使用PyQT5+爬虫,制作了一款快速查询高校历年分数信息与特色专业的辅助软件,下面废话不多说,直接上代码
首先是界面文件
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'untitled.ui'
#
# Created by: PyQt5 UI code generator 5.15.4
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_Form(object):
def setupUi(self, Form):
Form.setObjectName("Form")
Form.resize(1124, 887)
self.verticalLayoutWidget = QtWidgets.QWidget(Form)
self.verticalLayoutWidget.setGeometry(QtCore.QRect(10, 50, 291, 831))
self.verticalLayoutWidget.setObjectName("verticalLayoutWidget")
self.verticalLayout = QtWidgets.QVBoxLayout(self.verticalLayoutWidget)
self.verticalLayout.setContentsMargins(0, 0, 0, 0)
self.verticalLayout.setObjectName("verticalLayout")
self.textEdit = QtWidgets.QTextEdit(self.verticalLayoutWidget)
self.textEdit.setObjectName("textEdit")
self.verticalLayout.addWidget(self.textEdit)
self.label = QtWidgets.QLabel(Form)
self.label.setGeometry(QtCore.QRect(100, 20, 101, 21))
self.label.setObjectName("label")
self.verticalLayoutWidget_2 = QtWidgets.QWidget(Form)
self.verticalLayoutWidget_2.setGeometry(QtCore.QRect(300, 90, 781, 791))
self.verticalLayoutWidget_2.setObjectName("verticalLayoutWidget_2")
self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.verticalLayoutWidget_2)
self.verticalLayout_2.setContentsMargins(0, 0, 0, 0)
self.verticalLayout_2.setObjectName("verticalLayout_2")
self.textEdit_2 = QtWidgets.QTextEdit(self.verticalLayoutWidget_2)
self.textEdit_2.setObjectName("textEdit_2")
self.verticalLayout_2.addWidget(self.textEdit_2)
self.horizontalLayoutWidget = QtWidgets.QWidget(Form)
self.horizontalLayoutWidget.setGeometry(QtCore.QRect(300, 50, 381, 41))
self.horizontalLayoutWidget.setObjectName("horizontalLayoutWidget")
self.horizontalLayout = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget)
self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
self.horizontalLayout.setObjectName("horizontalLayout")
self.label_3 = QtWidgets.QLabel(self.horizontalLayoutWidget)
self.label_3.setObjectName("label_3")
self.horizontalLayout.addWidget(self.label_3)
self.lineEdit = QtWidgets.QLineEdit(self.horizontalLayoutWidget)
self.lineEdit.setObjectName("lineEdit")
self.horizontalLayout.addWidget(self.lineEdit)
self.pushButton = QtWidgets.QPushButton(self.horizontalLayoutWidget)
self.pushButton.setObjectName("pushButton")
self.horizontalLayout.addWidget(self.pushButton)
self.horizontalLayoutWidget_2 = QtWidgets.QWidget(Form)
self.horizontalLayoutWidget_2.setGeometry(QtCore.QRect(690, 50, 391, 41))
self.horizontalLayoutWidget_2.setObjectName("horizontalLayoutWidget_2")
self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget_2)
self.horizontalLayout_2.setContentsMargins(0, 0, 0, 0)
self.horizontalLayout_2.setObjectName("horizontalLayout_2")
self.label_4 = QtWidgets.QLabel(self.horizontalLayoutWidget_2)
self.label_4.setObjectName("label_4")
self.horizontalLayout_2.addWidget(self.label_4)
self.pushButton_2 = QtWidgets.QPushButton(self.horizontalLayoutWidget_2)
self.pushButton_2.setObjectName("pushButton_2")
self.horizontalLayout_2.addWidget(self.pushButton_2)
self.label_5 = QtWidgets.QLabel(Form)
self.label_5.setGeometry(QtCore.QRect(580, 20, 261, 16))
self.label_5.setObjectName("label_5")
self.retranslateUi(Form)
self.pushButton.clicked.connect(Form.getSchoolInfo)
self.pushButton_2.clicked.connect(Form.autodownload)
QtCore.QMetaObject.connectSlotsByName(Form)
def retranslateUi(self, Form):
_translate = QtCore.QCoreApplication.translate
Form.setWindowTitle(_translate("Form", "Form"))
self.label.setText(_translate("Form", "院校名称及代码"))
self.label_3.setText(_translate("Form", "在此输入院校代码:"))
self.pushButton.setText(_translate("Form", "点击查询"))
self.label_4.setText(_translate("Form", "获取全国所有院校近三年录取信息:"))
self.pushButton_2.setText(_translate("Form", "自动下载"))
self.label_5.setText(_translate("Form", "志愿高考报名:查询近三年全国高校录取信息"))
界面文件大概长这个样子
每次启动软件后,爬虫自动获取全国高校的学校名称和代码,可供学生在右侧输入院校代码进行信息查询。
有很多很多,在这里就不一一展示了
下面是程序处理的核心代码
def process_url(self, url):
mid = "HomePage"
url_list = url.split("HomePage")
tail = str(url_list[1]).split("_")[1]
num = tail.split(".")[0]
info_url = url_list[0] + mid + "/" + "school_des" + "/" + num + "/" + tail
return info_url
def content(self, urls):
str_content = ""
res = requests.get(str(urls))
print(urls)
print(type(res.status_code))
if res.status_code == 200:
res.encoding = "utf-8"
selector = etree.HTML(res.text)
# print(selector)
contents = selector.xpath("//div[@class='intro_txt_y']/p/text()")
for item in contents:
str_content += str(item)
str_content.replace("\r", "").replace("\n", "").replace(" ", "")
return str_content
查询院校信息、特色专业、近三年分数线等处理结果如下面所示
然后就是自动下载各个省市所有学校的专业排名、进三年分数线、特色专业以及学校所属位置等信息,并存储在csv文件中。
虽然不能说有了这个工具是万能的,但是至少有一些志愿填报的依据,同时对于院校选择较为迷茫的同学来说也是一种很好的参考。