某班课⽹页爬取题库,⾃动答题(3) 此次更新的是题库不再重复爬取相同的题⽬
from selenium import webdriver
from lxml import etree
import requests
import time
import random
import json
from webelement import WebElement
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/90.0.4430.93 Safari/537.36 Edg/90.0.818.56 "
}
browser = webdriver.Edge(executable_path = r"C:\Users\baibe\PycharmProjects\reptile\new_") # 让浏览器发起⼀个指定url的请求
<("/login")
# 定位标签
account_input = browser.find_element_by_xpath('/html/body/div[2]/div/div/div[2]/div/div[1]/form/div[2]/input')
account_input.send_keys('账号')
password_input = browser.find_element_by_xpath('/html/body/div[2]/div/div/div[2]/div/div[1]/form/div[3]/input')
password_input.send_keys('密码')
# ⽤page_source获取当前页⾯的源码数据
response = browser.page_source
tree = etree.HTML(response)
code_url = tree.xpath('/html/body/div[2]/div/div/div[2]/div/div[1]/form/div[4]/img/@src')[0]
text_response = (url = code_url, headers = headers).contentmpv排行榜
with open("../code_text.jpg", "wb") as fp:
fp.write(text_response)
code_text = input("请查看验证码,并在30秒内输⼊:")
code_text_input = browser.find_element_by_xpath('/html/body/div[2]/div/div/div[2]/div/div[1]/form/div[4]/input[1]')
# 与标签交互,输⼊⽂本
改变自己的作文code_text_input.send_keys(code_text)
login = browser.find_element_by_id('submit')
login.click()
time.sleep(5)
topic_url = browser.find_element_by_xpath('/html/body/div[2]/div[2]/table/tbody/tr/td/div[1]/div[3]/div[1]/a[2]')
topic_url.click()
time.sleep(2)
handles = browser.window_handles
browser.switch_to.window(handles[1])
exam_url = browser.find_element_by_xpath('/html/body/section/section/div[1]/div[4]/a')
exam_url.click()
time.sleep(2)
questions = {}
for every in range(50):#循环50次
exam_detail = browser.find_element_by_xpath('/html/body/section/section/div[2]/div[3]/ul/li[1]/div[2]/a[2]')
exam_detail.click()
time.sleep(2)
confirm_btn = browser.find_element_by_xpath('/html/body/div[12]/div[3]/div/div[1]')
confirm_btn.click()
time.sleep(5)
while True:
try: # 出现⽹络问题弹窗时,⾃动关闭,并试到成功为⽌
network_anomaly = browser.find_element_by_xpath('/html/body/div[12]/div[3]/div/div/button')
network_anomaly.click()
我国最长的一条河流是什么河?network_anomaly.click()
except:
break
pass
else:
confirm_btn = browser.find_element_by_xpath('/html/body/div[12]/div[3]/div/div[1]')
confirm_btn.click()
消化不良吃什么bodylist = browser.find_elements_by_xpath('/html/body/section/section/div[5]/div[@class="question-board"]')
print(bodylist)
browser.maximize_window()
for each in bodylist:
)
templist = each.find_elements_by_tag_name('label')
islist = random.choice(templist) # 随机选择选项,此处稍微改⼀下就可以实现答题全对
给祖国母亲的一封信while True:
try: # 出现⽹络问题弹窗时,⾃动关闭,并试到成功为⽌
network_anomaly = browser.find_element_by_xpath('/html/body/div[7]/div[3]/div/div/button')
network_anomaly.click()
except:
break
pass
else:
islist = random.choice(templist)
islist.click()
time.sleep(0.5)
islist.click()
time.sleep(0.5)
print("选项已勾选!")
print("正在交卷!")
submit = browser.find_element_by_xpath('/html/body/section/aside/div[2]/div[2]/div[3]/a[2]')
submit.click()
time.sleep(2)
confirm_again = browser.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[1]')
confirm_again.click()
time.sleep(2)
check_detail = browser.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[1]')
check_detail.click()
time.sleep(2)
print("正在读取题⽬!")
exam_answers = browser.page_source
exam_answers_tree = etree.HTML(exam_answers)
answers_list = exam_answers_tree.xpath('/html/body/section/section/div[3]/div[@class="question-board"]')
for answers in answers_list:
if answers.xpath('./@id')[0] not in questions.keys(): # id重复的题⽬不再爬取
questions[answers.xpath('./@id')[0]] = answers.xpath('.//text()')
print(answers.xpath('./@id')[0], "读取成功!")
again_btn = browser.find_element_by_xpath('/html/body/section/div/a')
again_btn.click()
with open(r'C:\Users\baibe\PycharmProjects\reptile\new_reptile\yiban_', "a+") as fb:# 这⾥记得改⼀下路径,换成你的电脑上的 for each in questions.values():
for i in each:
fb.write(i)
print(each, "存储成功!")
print("总共抓到%s道题!" % len(questions.keys()))
browser.quit()
>ie主页被篡改
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论