本文實例為大家分享了python簡單實現(xiàn)圖片文字分割的具體代碼,供大家參考,具體內(nèi)容如下
原圖:
圖片預(yù)處理:圖片二值化以及圖片降噪處理。
# 圖片二值化
def binarization(img,threshold):
#圖片二值化操作
width,height=img.size
im_new = img.copy()
for i in range(width):
for j in range(height):
a = img.getpixel((i, j))
aa = 0.30 * a[0] + 0.59 * a[1] + 0.11 * a[2]
if (aa = threshold):
im_new.putpixel((i, j), (0, 0, 0))
else:
im_new.putpixel((i, j), (255, 255, 255))
# im_new.show() # 顯示圖像
return im_new
# 圖片降噪處理
def clear_noise(img):
# 圖片降噪處理
x, y = img.width, img.height
for i in range(x-1):
for j in range(y-1):
if sum_9_region(img, i, j) 600:
# 改變像素點顏色,白色
img.putpixel((i, j), (255,255,255))
# img = np.array(img)
# # cv2.imwrite('handle_two.png', img)
# # img = Image.open('handle_two.png')
img.show()
return img
# 獲取田字格內(nèi)當(dāng)前像素點的像素值
def sum_9_region(img, x, y):
"""
田字格
"""
# 獲取當(dāng)前像素點的像素值
a1 = img.getpixel((x - 1, y - 1))[0]
a2 = img.getpixel((x - 1, y))[0]
a3 = img.getpixel((x - 1, y+1 ))[0]
a4 = img.getpixel((x, y - 1))[0]
a5 = img.getpixel((x, y))[0]
a6 = img.getpixel((x, y+1 ))[0]
a7 = img.getpixel((x+1 , y - 1))[0]
a8 = img.getpixel((x+1 , y))[0]
a9 = img.getpixel((x+1 , y+1))[0]
width = img.width
height = img.height
if a5 == 255: # 如果當(dāng)前點為白色區(qū)域,則不統(tǒng)計鄰域值
return 2550
if y == 0: # 第一行
if x == 0: # 左上頂點,4鄰域
# 中心點旁邊3個點
sum_1 = a5 + a6 + a8 + a9
return 4*255 - sum_1
elif x == width - 1: # 右上頂點
sum_2 = a5 + a6 + a2 + a3
return 4*255 - sum_2
else: # 最上非頂點,6鄰域
sum_3 = a2 + a3+ a5 + a6 + a8 + a9
return 6*255 - sum_3
elif y == height - 1: # 最下面一行
if x == 0: # 左下頂點
# 中心點旁邊3個點
sum_4 = a5 + a8 + a7 + a4
return 4*255 - sum_4
elif x == width - 1: # 右下頂點
sum_5 = a5 + a4 + a2 + a1
return 4*255 - sum_5
else: # 最下非頂點,6鄰域
sum_6 = a5+ a2 + a8 + a4 +a1 + a7
return 6*255 - sum_6
else: # y不在邊界
if x == 0: # 左邊非頂點
sum_7 = a4 + a5 + a6 + a7 + a8 + a9
return 6*255 - sum_7
elif x == width - 1: # 右邊非頂點
sum_8 = a4 + a5 + a6 + a1 + a2 + a3
return 6*255 - sum_8
else: # 具備9領(lǐng)域條件的
sum_9 = a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9
return 9*255 - sum_9
經(jīng)過二值化和降噪后得到的圖片
對圖片進行水平投影與垂直投影:
# 傳入二值化后的圖片進行垂直投影
def vertical(img):
"""傳入二值化后的圖片進行垂直投影"""
pixdata = img.load()
w,h = img.size
ver_list = []
# 開始投影
for x in range(w):
black = 0
for y in range(h):
if pixdata[x,y][0] == 0:
black += 1
ver_list.append(black)
# 判斷邊界
l,r = 0,0
flag = False
t=0#判斷分割數(shù)量
cuts = []
for i,count in enumerate(ver_list):
# 閾值這里為0
if flag is False and count > 0:
l = i
flag = True
if flag and count == 0:
r = i-1
flag = False
cuts.append((l,r))#記錄邊界點
t += 1
#print(t)
return cuts,t
# 傳入二值化后的圖片進行水平投影
def horizontal(img):
"""傳入二值化后的圖片進行水平投影"""
pixdata = img.load()
w,h = img.size
ver_list = []
# 開始投影
for y in range(h):
black = 0
for x in range(w):
if pixdata[x,y][0] == 0:
black += 1
ver_list.append(black)
# 判斷邊界
l,r = 0,0
flag = False
# 分割區(qū)域數(shù)
t=0
cuts = []
for i,count in enumerate(ver_list):
# 閾值這里為0
if flag is False and count > 0:
l = i
flag = True
if flag and count == 0:
r = i-1
flag = False
cuts.append((l,r))
t += 1
return cuts,t
這兩段代碼目的主要是為了分割得到水平和垂直位置的每個字所占的大小,接下來就是對預(yù)處理好的圖片文字進行分割。
# 創(chuàng)建獲得圖片路徑并處理圖片函數(shù)
def get_im_path():
OpenFile = tk.Tk()#創(chuàng)建新窗口
OpenFile.withdraw()
file_path = filedialog.askopenfilename()
im = Image.open(file_path)
# 閾值
th = getthreshold(im) - 16
print(th)
# 原圖直接二值化
im_new1 = binarization(im, th)
im_new1.show()
# 直方圖均衡化
im1 = his_bal(im)
im1.show()
im_new_np = np.array(his_bal(im))
th1 = getthreshold(im1) - 16
print(th1)
# 二值化
im_new = binarization(im1, th1)
# 降噪
im_new_cn = clear_noise(im_new)
height = im_new_cn.size[1]
print(height)
# 算出水平投影和垂直投影的數(shù)值
v, vt = vertical(im_new1)
h, ht = horizontal(im_new1)
# 算出分割區(qū)域
a = []
for i in range(vt):
a.append((v[i][0], 0, v[i][1], height))
print(a)
im_new.show() # 直方圖均衡化后再二值化
# 切割
for i, n in enumerate(a, 1):
temp = im_new_cn.crop(n) # 調(diào)用crop函數(shù)進行切割
temp.show()
temp.save("c/%s.png" % i)
至此大概就完成了。
接下來是文件的全部代碼:
import numpy as np
from PIL import Image
import queue
import matplotlib.pyplot as plt
import tkinter as tk
from tkinter import filedialog#導(dǎo)入文件對話框函數(shù)庫
window = tk.Tk()
window.title('圖片選擇界面')
window.geometry('400x100')
var = tk.StringVar()
# 創(chuàng)建獲得圖片路徑并處理圖片函數(shù)
def get_im_path():
OpenFile = tk.Tk()#創(chuàng)建新窗口
OpenFile.withdraw()
file_path = filedialog.askopenfilename()
im = Image.open(file_path)
# 閾值
th = getthreshold(im) - 16
print(th)
# 原圖直接二值化
im_new1 = binarization(im, th)
im_new1.show()
# 直方圖均衡化
im1 = his_bal(im)
im1.show()
im_new_np = np.array(his_bal(im))
th1 = getthreshold(im1) - 16
print(th1)
# 二值化
im_new = binarization(im1, th1)
# 降噪
im_new_cn = clear_noise(im_new)
height = im_new_cn.size[1]
print(height)
# 算出水平投影和垂直投影的數(shù)值
v, vt = vertical(im_new1)
h, ht = horizontal(im_new1)
# 算出分割區(qū)域
a = []
for i in range(vt):
a.append((v[i][0], 0, v[i][1], height))
print(a)
im_new.show() # 直方圖均衡化后再二值化
# 切割
for i, n in enumerate(a, 1):
temp = im_new_cn.crop(n) # 調(diào)用crop函數(shù)進行切割
temp.show()
temp.save("c/%s.png" % i)
# 傳入二值化后的圖片進行垂直投影
def vertical(img):
"""傳入二值化后的圖片進行垂直投影"""
pixdata = img.load()
w,h = img.size
ver_list = []
# 開始投影
for x in range(w):
black = 0
for y in range(h):
if pixdata[x,y][0] == 0:
black += 1
ver_list.append(black)
# 判斷邊界
l,r = 0,0
flag = False
t=0#判斷分割數(shù)量
cuts = []
for i,count in enumerate(ver_list):
# 閾值這里為0
if flag is False and count > 0:
l = i
flag = True
if flag and count == 0:
r = i-1
flag = False
cuts.append((l,r))#記錄邊界點
t += 1
#print(t)
return cuts,t
# 傳入二值化后的圖片進行水平投影
def horizontal(img):
"""傳入二值化后的圖片進行水平投影"""
pixdata = img.load()
w,h = img.size
ver_list = []
# 開始投影
for y in range(h):
black = 0
for x in range(w):
if pixdata[x,y][0] == 0:
black += 1
ver_list.append(black)
# 判斷邊界
l,r = 0,0
flag = False
# 分割區(qū)域數(shù)
t=0
cuts = []
for i,count in enumerate(ver_list):
# 閾值這里為0
if flag is False and count > 0:
l = i
flag = True
if flag and count == 0:
r = i-1
flag = False
cuts.append((l,r))
t += 1
return cuts,t
# 獲得閾值算出平均像素
def getthreshold(im):
#獲得閾值 算出平均像素
wid, hei = im.size
hist = [0] * 256
th = 0
for i in range(wid):
for j in range(hei):
gray = int(0.3 * im.getpixel((i, j))[0] + 0.59 * im.getpixel((i, j))[1] + 0.11 * im.getpixel((i, j))[2])
th = gray + th
hist[gray] += 1
threshold = int(th/(wid*hei))
return threshold
# 直方圖均衡化 提高對比度
def his_bal(im):
#直方圖均衡化 提高對比度
# 統(tǒng)計灰度直方圖
im_new = im.copy()
wid, hei = im.size
hist = [0] * 256
for i in range(wid):
for j in range(hei):
gray = int(0.3*im.getpixel((i,j))[0]+0.59*im.getpixel((i,j))[1]+0.11*im.getpixel((i,j))[2])
hist[gray] += 1
# 計算累積分布函數(shù)
cdf = [0] * 256
for i in range(256):
if i == 0:
cdf[i] = hist[i]
else:
cdf[i] = cdf[i - 1] + hist[i]
# 用累積分布函數(shù)計算輸出灰度映射函數(shù)LUT
new_gray = [0] * 256
for i in range(256):
new_gray[i] = int(cdf[i] / (wid * hei) * 255 + 0.5)
# 遍歷原圖像,通過LUT逐點計算新圖像對應(yīng)的像素值
for i in range(wid):
for j in range(hei):
gray = int(0.3*im.getpixel((i,j))[0]+0.59*im.getpixel((i,j))[1]+0.11*im.getpixel((i,j))[2])
im_new.putpixel((i, j), new_gray[gray])
return im_new
# 圖片二值化
def binarization(img,threshold):
#圖片二值化操作
width,height=img.size
im_new = img.copy()
for i in range(width):
for j in range(height):
a = img.getpixel((i, j))
aa = 0.30 * a[0] + 0.59 * a[1] + 0.11 * a[2]
if (aa = threshold):
im_new.putpixel((i, j), (0, 0, 0))
else:
im_new.putpixel((i, j), (255, 255, 255))
# im_new.show() # 顯示圖像
return im_new
# 圖片降噪處理
def clear_noise(img):
# 圖片降噪處理
x, y = img.width, img.height
for i in range(x-1):
for j in range(y-1):
if sum_9_region(img, i, j) 600:
# 改變像素點顏色,白色
img.putpixel((i, j), (255,255,255))
# img = np.array(img)
# # cv2.imwrite('handle_two.png', img)
# # img = Image.open('handle_two.png')
img.show()
return img
# 獲取田字格內(nèi)當(dāng)前像素點的像素值
def sum_9_region(img, x, y):
"""
田字格
"""
# 獲取當(dāng)前像素點的像素值
a1 = img.getpixel((x - 1, y - 1))[0]
a2 = img.getpixel((x - 1, y))[0]
a3 = img.getpixel((x - 1, y+1 ))[0]
a4 = img.getpixel((x, y - 1))[0]
a5 = img.getpixel((x, y))[0]
a6 = img.getpixel((x, y+1 ))[0]
a7 = img.getpixel((x+1 , y - 1))[0]
a8 = img.getpixel((x+1 , y))[0]
a9 = img.getpixel((x+1 , y+1))[0]
width = img.width
height = img.height
if a5 == 255: # 如果當(dāng)前點為白色區(qū)域,則不統(tǒng)計鄰域值
return 2550
if y == 0: # 第一行
if x == 0: # 左上頂點,4鄰域
# 中心點旁邊3個點
sum_1 = a5 + a6 + a8 + a9
return 4*255 - sum_1
elif x == width - 1: # 右上頂點
sum_2 = a5 + a6 + a2 + a3
return 4*255 - sum_2
else: # 最上非頂點,6鄰域
sum_3 = a2 + a3+ a5 + a6 + a8 + a9
return 6*255 - sum_3
elif y == height - 1: # 最下面一行
if x == 0: # 左下頂點
# 中心點旁邊3個點
sum_4 = a5 + a8 + a7 + a4
return 4*255 - sum_4
elif x == width - 1: # 右下頂點
sum_5 = a5 + a4 + a2 + a1
return 4*255 - sum_5
else: # 最下非頂點,6鄰域
sum_6 = a5+ a2 + a8 + a4 +a1 + a7
return 6*255 - sum_6
else: # y不在邊界
if x == 0: # 左邊非頂點
sum_7 = a4 + a5 + a6 + a7 + a8 + a9
return 6*255 - sum_7
elif x == width - 1: # 右邊非頂點
sum_8 = a4 + a5 + a6 + a1 + a2 + a3
return 6*255 - sum_8
else: # 具備9領(lǐng)域條件的
sum_9 = a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9
return 9*255 - sum_9
btn_Open = tk.Button(window,
text='打開圖像', # 顯示在按鈕上的文字
width=15, height=2,
command=get_im_path) # 點擊按鈕式執(zhí)行的命令
btn_Open.pack()
# 運行整體窗口
window.mainloop()
以上就是本文的全部內(nèi)容,希望對大家的學(xué)習(xí)有所幫助,也希望大家多多支持腳本之家。
您可能感興趣的文章:- python opencv實現(xiàn)圖片旋轉(zhuǎn)矩形分割
- python實現(xiàn)將文件夾內(nèi)的每張圖片批量分割成多張
- python實現(xiàn)圖片中文字分割效果
- Python+opencv 實現(xiàn)圖片文字的分割的方法示例
- python 使用opencv 把視頻分割成圖片示例
- python實現(xiàn)圖片九宮格分割
- python中opencv實現(xiàn)文字分割的實踐