代码拉取完成,页面将自动刷新
# coding: utf-8
import sys
import importlib
importlib.reload(sys)
import requests
from lxml import etree
import json
baseUrl = "http://njtczs.university-hr.cn/search_zs.php"
header = {
'user-agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"
}
param = {
'name_find': '', # 考号
'name_find2': '', # 姓名
'searchtype': 0, # 0考生号 1准考证号
'actiontype': 0
}
infoDict = {
'name': '',
'id_card': '',
'gender': '',
'nationality': '',
'admission_time': '',
'professional': '',
'stu_type': '',
'express': ''
}
if __name__ == '__main__':
param['name_find'] = sys.argv[1]
param['name_find2'] = sys.argv[2].encode('gbk')
param['searchtype'] = sys.argv[3]
response = requests.get(baseUrl, headers=header, params=param)
response.encoding = 'gbk' # 返回的页面是gbk编码,不加这行就会以utf-8解析,直接乱码
page_html = response.text
# print(page_html)
tree = etree.HTML(page_html)
# 学生姓名
name = tree.xpath("/html/body/div[3]//table[3]/tr[1]/td[2]/text()")
infoDict['name'] = ''.join(name).strip()
# 身份证
infoDict['id_card'] = tree.xpath("/html/body/div[3]//table[3]/tr[8]/td[2]/text()")[0].strip()
# 性别
infoDict['gender'] = tree.xpath("/html/body/div[3]//table[3]/tr[7]/td[2]/text()")[0].strip()
# 民族
infoDict['nationality'] = tree.xpath("/html/body/div[3]//table[3]/tr[7]/td[4]/text()")[0].strip()
# 入学年份
infoDict['admission_time'] = tree.xpath("/html/body/div[3]//table[3]/tr[2]/td[2]/text()")[0].strip()
# 录取专业
infoDict['professional'] = tree.xpath("/html/body/div[3]//table[3]/tr[6]/td[2]/text()")[0].strip()
# 考生类别
infoDict['stu_type'] = tree.xpath("/html/body/div[3]//table[3]/tr[8]/td[4]/text()")[0].strip()
# 通知书EMS快递单号
infoDict['express'] = tree.xpath("/html/body/div[3]//table[3]/tr[10]/td[2]/text()")[0].strip()
print(json.dumps(infoDict, ensure_ascii=False))
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。