本次案例使用OpenCV和selenium來解決一下滑塊驗(yàn)證碼
先說一下思路:
- 彈出滑塊驗(yàn)證碼后使用selenium元素截圖將驗(yàn)證碼整個(gè)背景圖截取出來
- 將需要滑動(dòng)的小圖單獨(dú)截取出來,最好將小圖與背景圖頂部的像素距離獲取到,這樣可以將背景圖上下多余的邊框截取掉
- 使用OpenCV將背景圖和小圖進(jìn)行灰度處理,并對小圖再次進(jìn)行二值化全局閾值,這樣就可以利用OpenCV在背景圖中找到小圖所在的位置
- 用OpenCV獲取到相差的距離后利用selenium的鼠標(biāo)拖動(dòng)方法進(jìn)行拖拉至終點(diǎn)。
詳細(xì)步驟:
先獲取驗(yàn)證碼背景圖,selenium瀏覽器對象中使用screenshot方法可以將指定的元素圖片截取出來
import os
from selenium import webdriver
browser = webdriver.Chrome()
browser.get("https://www.toutiao.com/c/user/token/MS4wLjABAAAA4EKNlqVeNTTuEdWn0VytNS8cdODKTsNNwLTxOnigzZtclro2Kylvway5mTyTUKvz/")
save_path = os.path.join(os.path.expanduser('~'), "Desktop", "background.png")
browser.find_element_by_id("element_id_name").screenshot(save_path)
截取后的驗(yàn)證碼背景圖和需要滑動(dòng)的小圖 如:
再將小圖與背景圖頂部的像素距離獲取到,指的是下面圖中紅邊的高度:
如果HTML元素中小圖是單獨(dú)存在時(shí),那么它的高度在會(huì)定義在頁面元素中,使用selenium頁面元素對象的value_of_css_property方法可以獲取到像素距離。
獲取這個(gè)是因?yàn)橐驯尘皥D的上下兩邊多余部分進(jìn)行切除,從而保留關(guān)鍵的圖像部位,能夠大幅度提高識別率。
element_object = browser.find_element_by_xpath("xpath_element")
px = element_object.value_of_css_property("top")
接下來就要對圖像進(jìn)行灰度處理:
import numpy
import cv2
def make_threshold(img):
"""全局閾值
將圖片二值化,去除噪點(diǎn),讓其黑白分明"""
x = numpy.ones(img.shape, numpy.uint8) * 255
y = img - x
result, thresh = cv2.threshold(y, 127, 255, cv2.THRESH_BINARY_INV)
# 將二值化后的結(jié)果返回
return thresh
class ComputeDistance:
"""獲取需要滑動(dòng)的距離
將驗(yàn)證碼背景大圖和需要滑動(dòng)的小圖進(jìn)行處理,先在大圖中找到相似的小圖位置,再獲取對應(yīng)的像素偏移量"""
def __init__(self, Background_path: str, image_to_move: str, offset_top_px: int):
"""
:param Background_path: 驗(yàn)證碼背景大圖
:param image_to_move: 需要滑動(dòng)的小圖
:param offset_top_px: 小圖距離在大圖上的頂部邊距(像素偏移量)
"""
self.Background_img = cv2.imread(Background_path)
self.offset_px = offset_top_px
self.show_img = show_img
small_img_data = cv2.imread(image_to_move, cv2.IMREAD_UNCHANGED)
# 得到一個(gè)改變維度為50的乘以值
scaleX = 50 / small_img_data.shape[1]
# 使用最近鄰插值法縮放,讓xy乘以scaleX,得到縮放后shape為50x50的圖片
self.tpl_img = cv2.resize(small_img_data, (0, 0), fx=scaleX, fy=scaleX)
self.Background_cutting = None
def tpl_op(self):
# 將小圖轉(zhuǎn)換為灰色
tpl_gray = cv2.cvtColor(self.tpl_img, cv2.COLOR_BGR2GRAY)
h, w = tpl_gray.shape
# 將背景圖轉(zhuǎn)換為灰色
# Background_gray = cv2.cvtColor(self.Background_img, cv2.COLOR_BGR2GRAY)
Background_gray = cv2.cvtColor(self.Background_cutting, cv2.COLOR_BGR2GRAY)
# 得到二值化后的小圖
threshold_img = make_threshold(tpl_gray)
# 將小圖與大圖進(jìn)行模板匹配,找到所對應(yīng)的位置
result = cv2.matchTemplate(Background_gray, threshold_img, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
# 左上角位置
top_left = (max_loc[0] - 5, max_loc[1] + self.offset_px)
# 右下角位置
bottom_right = (top_left[0] + w, top_left[1] + h)
# 在源顏色大圖中畫出小圖需要移動(dòng)到的終點(diǎn)位置
"""rectangle(圖片源數(shù)據(jù), 左上角, 右下角, 顏色, 畫筆厚度)"""
cv2.rectangle(self.Background_img, top_left, bottom_right, (0, 0, 255), 2)
def cutting_background(self):
"""切割圖片的上下邊框"""
height = self.tpl_img.shape[0]
# 將大圖中上下多余部分去除,如: Background_img[40:110, :]
self.Background_cutting = self.Background_img[self.offset_px - 10: self.offset_px + height + 10, :]
def run(self):
# 如果小圖的長度與大圖的長度一致則不用將大圖進(jìn)行切割,可以將self.cutting_background()注釋掉
self.cutting_background()
return self.tpl_op()
if __name__ == '__main__':
image_path1 = "背景圖路徑"
image_path2 = "小圖路徑"
distance_px = "像素距離"
main = ComputeDistance(image_path1, image_path2, distance_px)
main.run()
上面代碼可以返回小圖到凹點(diǎn)的距離,現(xiàn)在我們可以看一下灰度處理中的圖片樣子:
得到距離后還要對這個(gè)距離數(shù)字進(jìn)行處理一下,要讓它拆分成若干個(gè)小數(shù),這么做的目的是在拖動(dòng)的時(shí)候不能一下拖動(dòng)到終點(diǎn),
要模仿人類的手速緩緩向前行駛,不然很明顯是機(jī)器在操控。
比如到終點(diǎn)的距離為100,那么要把它轉(zhuǎn)為 [8, 6, 11, 10, 3, 6, 3, -2, 4, 0, 15, 1, 9, 6, -2, 4, 1, -2, 15, 6, -2] 類似的,列表中的數(shù)加起來正好為100.
最簡單的轉(zhuǎn)換:
def handle_distance(distance):
"""將直線距離轉(zhuǎn)為緩慢的軌跡"""
import random
slow_distance = []
while sum(slow_distance) = distance:
slow_distance.append(random.randint(-2, 15))
if sum(slow_distance) != distance:
slow_distance.append(distance - sum(slow_distance))
return slow_distance
有了到終點(diǎn)的距離,接下來就開始拖動(dòng)吧:
import time
from random import randint
from selenium.webdriver.common.action_chains import ActionChains
def move_slider(website, slider, track, **kwargs):
"""將滑塊移動(dòng)到終點(diǎn)位置
:param website: selenium頁面對象
:param slider: selenium頁面中滑塊元素對象
:param track: 到終點(diǎn)所需的距離
"""
name = kwargs.get('name', '滑塊')
try:
if track[0] > 200:
return track[0]
# 點(diǎn)擊滑塊元素并拖拽
ActionChains(website).click_and_hold(slider).perform()
time.sleep(0.15)
for i in track:
# 隨機(jī)上下浮動(dòng)鼠標(biāo)
ActionChains(website).move_by_offset(xoffset=i, yoffset=randint(-2, 2)).perform()
# 釋放元素
time.sleep(1)
ActionChains(website).release(slider).perform()
time.sleep(1)
# 隨機(jī)拿開鼠標(biāo)
ActionChains(website).move_by_offset(xoffset=randint(200, 300), yoffset=randint(200, 300)).perform()
print(f'[網(wǎng)頁] 拖拽 {name}')
return True
except Exception as e:
print(f'[網(wǎng)頁] 拖拽 {name} 失敗 {e}')
教程結(jié)束,讓我們結(jié)合上面代碼做一個(gè)案例吧。
訪問今日頭條某博主的主頁,直接打開主頁的鏈接會(huì)出現(xiàn)驗(yàn)證碼。
下面代碼 使用pip安裝好相關(guān)依賴庫后可直接運(yùn)行:
調(diào)用ComputeDistance類時(shí),參數(shù) show_img=True 可以在拖動(dòng)驗(yàn)證碼前進(jìn)行展示背景圖識別終點(diǎn)后的區(qū)域在哪里, 如:
distance_obj = ComputeDistance(background_path, small_path, px, show_img=True)
OK,下面為案例代碼:
import os
import time
import requests
import cv2
import numpy
from random import randint
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
def show_image(img_array, name='img', resize_flag=False):
"""展示圖片"""
maxHeight = 540
maxWidth = 960
scaleX = maxWidth / img_array.shape[1]
scaleY = maxHeight / img_array.shape[0]
scale = min(scaleX, scaleY)
if resize_flag and scale 1:
img_array = cv2.resize(img_array, (0, 0), fx=scale, fy=scale)
cv2.imshow(name, img_array)
cv2.waitKey(0)
cv2.destroyWindow(name)
def make_threshold(img):
"""全局閾值
將圖片二值化,去除噪點(diǎn),讓其黑白分明"""
x = numpy.ones(img.shape, numpy.uint8) * 255
y = img - x
result, thresh = cv2.threshold(y, 127, 255, cv2.THRESH_BINARY_INV)
# 將二值化后的結(jié)果返回
return thresh
def move_slider(website, slider, track, **kwargs):
"""將滑塊移動(dòng)到終點(diǎn)位置
:param website: selenium頁面對象
:param slider: selenium頁面中滑塊元素對象
:param track: 到終點(diǎn)所需的距離
"""
name = kwargs.get('name', '滑塊')
try:
if track[0] > 200:
return track[0]
# 點(diǎn)擊滑塊元素并拖拽
ActionChains(website).click_and_hold(slider).perform()
time.sleep(0.15)
for i in track:
# 隨機(jī)上下浮動(dòng)鼠標(biāo)
ActionChains(website).move_by_offset(xoffset=i, yoffset=randint(-2, 2)).perform()
# 釋放元素
time.sleep(1)
ActionChains(website).release(slider).perform()
time.sleep(1)
# 隨機(jī)拿開鼠標(biāo)
ActionChains(website).move_by_offset(xoffset=randint(200, 300), yoffset=randint(200, 300)).perform()
print(f'[網(wǎng)頁] 拖拽 {name}')
return True
except Exception as e:
print(f'[網(wǎng)頁] 拖拽 {name} 失敗 {e}')
class ComputeDistance:
"""獲取需要滑動(dòng)的距離
將驗(yàn)證碼背景大圖和需要滑動(dòng)的小圖進(jìn)行處理,先在大圖中找到相似的小圖位置,再獲取對應(yīng)的像素偏移量"""
def __init__(self, Background_path: str, image_to_move: str, offset_top_px: int, show_img=False):
"""
:param Background_path: 驗(yàn)證碼背景大圖
:param image_to_move: 需要滑動(dòng)的小圖
:param offset_top_px: 小圖距離在大圖上的頂部邊距(像素偏移量)
:param show_img: 是否展示圖片
"""
self.Background_img = cv2.imread(Background_path)
self.offset_px = offset_top_px
self.show_img = show_img
small_img_data = cv2.imread(image_to_move, cv2.IMREAD_UNCHANGED)
# 得到一個(gè)改變維度為50的乘以值
scaleX = 50 / small_img_data.shape[1]
# 使用最近鄰插值法縮放,讓xy乘以scaleX,得到縮放后shape為50x50的圖片
self.tpl_img = cv2.resize(small_img_data, (0, 0), fx=scaleX, fy=scaleX)
self.Background_cutting = None
def show(self, img):
if self.show_img:
show_image(img)
def tpl_op(self):
# 將小圖轉(zhuǎn)換為灰色
tpl_gray = cv2.cvtColor(self.tpl_img, cv2.COLOR_BGR2GRAY)
h, w = tpl_gray.shape
# 將背景圖轉(zhuǎn)換為灰色
# Background_gray = cv2.cvtColor(self.Background_img, cv2.COLOR_BGR2GRAY)
Background_gray = cv2.cvtColor(self.Background_cutting, cv2.COLOR_BGR2GRAY)
# 得到二值化后的小圖
threshold_img = make_threshold(tpl_gray)
# 將小圖與大圖進(jìn)行模板匹配,找到所對應(yīng)的位置
result = cv2.matchTemplate(Background_gray, threshold_img, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
# 左上角位置
top_left = (max_loc[0] - 5, max_loc[1] + self.offset_px)
# 右下角位置
bottom_right = (top_left[0] + w, top_left[1] + h)
# 在源顏色大圖中畫出小圖需要移動(dòng)到的終點(diǎn)位置
"""rectangle(圖片源數(shù)據(jù), 左上角, 右下角, 顏色, 畫筆厚度)"""
cv2.rectangle(self.Background_img, top_left, bottom_right, (0, 0, 255), 2)
if self.show_img:
show_image(self.Background_img)
return top_left
def cutting_background(self):
"""切割圖片的上下邊框"""
height = self.tpl_img.shape[0]
# 將大圖中上下多余部分去除,如: Background_img[40:110, :]
self.Background_cutting = self.Background_img[self.offset_px - 10: self.offset_px + height + 10, :]
def run(self):
# 如果小圖的長度與大圖的長度一致則不用將大圖進(jìn)行切割,可以將self.cutting_background()注釋掉
self.cutting_background()
return self.tpl_op()
class TodayNews(object):
def __init__(self):
self.url = "https://www.toutiao.com/c/user/token/" \
"MS4wLjABAAAA4EKNlqVeNTTuEdWn0VytNS8cdODKTsNNwLTxOnigzZtclro2Kylvway5mTyTUKvz/"
self.process_folder = os.path.join(os.path.expanduser('~'), "Desktop", "today_news")
self.background_path = os.path.join(self.process_folder, "background.png")
self.small_path = os.path.join(self.process_folder, "small.png")
self.small_px = None
self.xpath = {}
self.browser = None
def check_file_exist(self):
"""檢查流程目錄是否存在"""
if not os.path.isdir(self.process_folder):
os.mkdir(self.process_folder)
def start_browser(self):
"""啟動(dòng)瀏覽器"""
self.browser = webdriver.Chrome()
self.browser.maximize_window()
def close_browser(self):
self.browser.quit()
def wait_element_loaded(self, xpath: str, timeout=10, close_browser=True):
"""等待頁面元素加載完成
:param xpath: xpath表達(dá)式
:param timeout: 最長等待超時(shí)時(shí)間
:param close_browser: 元素等待超時(shí)后是否關(guān)閉瀏覽器
:return: Boolean
"""
now_time = int(time.time())
while int(time.time()) - now_time timeout:
# noinspection PyBroadException
try:
element = self.browser.find_element_by_xpath(xpath)
if element:
return True
time.sleep(1)
except Exception:
pass
else:
if close_browser:
self.close_browser()
# print("查找頁面元素失敗,如果不存在網(wǎng)絡(luò)問題請嘗試修改xpath表達(dá)式")
return False
def add_page_element(self):
self.xpath['background_img'] = '//div[@role="dialog"]/div[2]/img[1]'
self.xpath['small_img'] = '//div[@role="dialog"]/div[2]/img[2]'
self.xpath['slider_button'] = '//div[@id="secsdk-captcha-drag-wrapper"]/div[2]'
def process_main(self):
"""處理頁面內(nèi)容"""
self.browser.get(self.url)
for _ in range(10):
if self.wait_element_loaded(self.xpath['background_img'], timeout=5, close_browser=False):
time.sleep(1)
# 截圖
self.browser.find_element_by_xpath(self.xpath['background_img']).screenshot(self.background_path)
small_img = self.browser.find_element_by_xpath(self.xpath['small_img'])
# 獲取小圖片的URL鏈接
small_url = small_img.get_attribute("src")
# 獲取小圖片距離背景圖頂部的像素距離
self.small_px = small_img.value_of_css_property("top").replace("px", "").split(".")[0]
response = requests.get(small_url)
if response.ok:
with open(self.small_path, "wb") as file:
file.write(response.content)
time.sleep(1)
# 如果沒滑動(dòng)成功則刷新頁面重試
if not self.process_slider():
self.browser.refresh()
continue
else:
break
@staticmethod
def handle_distance(distance):
"""將直線距離轉(zhuǎn)為緩慢的軌跡"""
import random
slow_distance = []
while sum(slow_distance) = distance:
slow_distance.append(random.randint(-2, 15))
if sum(slow_distance) != distance:
slow_distance.append(distance - sum(slow_distance))
return slow_distance
def process_slider(self):
"""處理滑塊驗(yàn)證碼"""
distance_obj = ComputeDistance(self.background_path, self.small_path, int(self.small_px), show_img=False)
# 獲取移動(dòng)所需的距離
distance = distance_obj.run()
track = self.handle_distance(distance[0])
track.append(-2)
slider_element = self.browser.find_element_by_xpath(self.xpath['slider_button'])
move_slider(self.browser, slider_element, track)
time.sleep(2)
# 如果滑動(dòng)完成則返回True
if not self.wait_element_loaded(self.xpath['slider_button'], timeout=2, close_browser=False):
return True
else:
return False
def run(self):
self.check_file_exist()
self.start_browser()
self.add_page_element()
self.process_main()
# self.close_browser()
if __name__ == '__main__':
main = TodayNews()
main.run()
到此這篇關(guān)于OpenCV結(jié)合selenium實(shí)現(xiàn)滑塊驗(yàn)證碼的文章就介紹到這了,更多相關(guān)OpenCV selenium滑塊驗(yàn)證碼內(nèi)容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!
您可能感興趣的文章:- Python Selenium破解滑塊驗(yàn)證碼最新版(GEETEST95%以上通過率)