大家知道,考研很大一部分也是考信息收集能力。每年往往有很多人就是在这上面栽跟头了,不能正确分析各大院校往年的录取信息,进而没能选择合适的报考院校。至于很多院校的录取信息是以 PDF 形式发布,例如我手上的深大电通录取结果,这就需要我们先把 PDF 转化为 Excel 啦。(1)PDF (2)Excel有了 Excel,那我们就可以为所欲为了!
开始
import xlrd
import numpy as np
from pyecharts.charts import Bar
from pyecharts.charts import Pie, Grid
from pyecharts import options as opts
#==================== 准备数据 ====================
# 导入Excel 文件
data = xlrd.open_workbook("C:/深圳大学电子与信息工程学院2020年电子信息硕士生拟录取名单.xlsx")
# 载入第一个表格
table = data.sheets()[0]2. 提取 Excel 表格数据tables = []
def Read_Excel(excel):
# 从第4行开始读取数据,因为这个Excel文件里面从第四行开始才是考生信息
for rows in range(3, excel.nrows-1):
dict_ = {"id":"", "name":"", "status":"", "preliminary_score":"", "retest_score":"", "total_score":"", "ranking":""}
dict_["id"] = table.cell_value(rows, 1)
dict_["name"] = table.cell_value(rows, 2)
dict_["status"] = table.cell_value(rows, 3)
dict_["remarks"] = table.cell_value(rows, 4)
dict_["preliminary_score"] = table.cell_value(rows, 5)
dict_["retest_score"] = table.cell_value(rows, 6)
dict_["total_score"] = table.cell_value(rows, 7)
dict_["ranking"] = table.cell_value(rows, 8)
# 将未被录取或者非普通计划录取的考生滤除
if dict_["status"] == str("拟录取") and dict_["remarks"] == str("普通计划"):
tables.append(dict_)我们打印一下看看是否正确取出数据:# 执行上面方法
Read_Excel(table)
for i in tables:
print(i)可以看到一切顺利。3. 数据分段统计这步因人而异,我只是想把各个分数段进行单独统计而已,大家也可以根据自己的喜好做其它的处理。num_score_300_310 = 0
num_score_310_320 = 0
num_score_320_330 = 0
num_score_330_340 = 0
num_score_340_350 = 0
num_score_350_360 = 0
num_score_360_370 = 0
num_score_370_380 = 0
num_score_380_390 = 0
num_score_390_400 = 0
num_score_400_410 = 0
min_score = 999
max_score = 0
# 将各个分段的数量统计
for i in tables:
score = i["preliminary_score"]
if score > max_score:
max_score = score
if score < min_score:
min_score = score
if score in range(300, 310):
num_score_300_310 = num_score_300_310 + 1
elif score in range(310, 320):
num_score_310_320 = num_score_310_320 + 1
elif score in range(320, 330):
num_score_320_330 = num_score_320_330 + 1
elif score in range(330, 340):
num_score_330_340 = num_score_330_340 + 1
elif score in range(340, 350):
num_score_340_350 = num_score_340_350 + 1
elif score in range(350, 360):
num_score_350_360 = num_score_350_360 + 1
elif score in range(360, 370):
num_score_360_370 = num_score_360_370 + 1
elif score in range(370, 380):
num_score_370_380 = num_score_370_380 + 1
elif score in range(380, 390):
num_score_380_390 = num_score_380_390 + 1
elif score in range(390, 400):
num_score_390_400 = num_score_390_400 + 1
elif score in range(400, 410):
num_score_400_410 = num_score_400_410 + 1
# 构建两个元组用以后期建表方便
bar_x_axis_data = ("300-310", "310-320", "320-330", "330-340", "340-350", "350-360", "360-370", "370-380", "380-390", "390-400", "400-410")
bar_y_axis_data = (num_score_300_310, num_score_310_320, num_score_320_330,\
num_score_330_340, num_score_340_350, num_score_350_360,\
num_score_360_370, num_score_370_380, num_score_380_390,\
num_score_390_400, num_score_400_410)
绘制可视化图形
1、柱状图:#===================== 柱状图 =====================
# 构建柱状图
c = (
Bar()
.add_xaxis(bar_x_axis_data)
.add_yaxis("录取考生", bar_y_axis_data, color="#af00ff")
.set_global_opts(title_opts=opts.TitleOpts(title="数量"))
.render("C:/录取数据图.html")
)2、饼图:#====================== 饼图 ======================
c = (
Pie(init_opts=opts.InitOpts(height="800px", width="1200px"))
.add("录取分数概览",
[list(z) for z in zip(bar_x_axis_data, bar_y_axis_data)],
center=["35%", "38%"],
radius="40%",
label_opts=opts.LabelOpts(
formatter="{b|{b}: }{c} {per|{d}%} ",
rich={
"b": {"fontSize": 16, "lineHeight": 33},
"per": {
"color": "#eee",
"backgroundColor": "#334455",
"padding": [2, 4],
"borderRadius": 2,
},
}
))
.set_global_opts(title_opts=opts.TitleOpts(title="录取", subtitle='Made by 王昊'),
legend_opts=opts.LegendOpts(pos_left="0%", pos_top="65%"))
.render("C:/录取饼图.html")
)
大功告成!!是不是超级直观哈哈!版权声明:本文为CSDN博主「Waao666」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。原文链接:https://blog.csdn.net/weixin_40973138/article/details/106190092
推荐阅读