使用 OpenCV 进行图像处理,从图像中去除背景文本和噪点
问题描述
我有这些图片
我想删除背景中的文本.只有 captcha 字符
应该保留(即 K6PwKA、YabVzu).任务是稍后使用 tesseract 识别这些字符.
这是我尝试过的方法,但准确性并不高.
导入 cv2导入 pytesseractpytesseract.pytesseract.tesseract_cmd = r"C:UsersHPO2KORAppDataLocalTesseract-OCR esseract.exe"img = cv2.imread("untitled.png")gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)gray_filtered = cv2.inRange(gray_image, 0, 75)cv2.imwrite("cleaned.png", gray_filtered)
我该如何改进?
注意:我尝试了所有关于这个问题的建议,但没有一个对我有用.
根据 Elias 的说法,我尝试使用 Photoshop 将验证码文本的颜色转换为介于 [100, 105] 之间的灰度.然后我根据这个范围对图像进行阈值处理.但是我得到的结果并没有从 tesseract 中得到令人满意的结果.
gray_filtered = cv2.inRange(gray_image, 100, 105)cv2.imwrite("cleaned.png", gray_filtered)gray_inv = ~gray_filteredcv2.imwrite("cleaned.png", gray_inv)数据 = pytesseract.image_to_string(gray_inv, lang='eng')
输出:
'KEP wKA'
结果:
编辑 2:
def get_text(img_name):较低 = (100, 100, 100)上 = (104, 104, 104)img = cv2.imread(img_name)img_rgb_inrange = cv2.inRange(img,下,上)neg_rgb_image = ~img_rgb_inrangecv2.imwrite('neg_img_rgb_inrange.png', neg_rgb_image)数据 = pytesseract.image_to_string(neg_rgb_image, lang='eng')返回数据
给:
文本为
GXuMuUZ
有什么办法可以缓和一点
解决方案这里有两种可能的方法和一种纠正扭曲文本的方法:
方法一:形态学运算+轮廓滤波
获取二进制图像.
轮廓区域过滤
->
反转->
应用模糊得到结果OCR 的结果
YabVzu
代码
导入 cv2导入 pytesseract将 numpy 导入为 nppytesseract.pytesseract.tesseract_cmd = rC:Program FilesTesseract-OCR esseract.exe"# 加载图片,灰度,Otsu的阈值图像 = cv2.imread('2.png')灰色 = cv2.cvtColor(图像,cv2.COLOR_BGR2GRAY)thresh = cv2.threshold(灰色, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]# 变形打开以消除噪音内核 = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))开放 = cv2.morphologyEx(阈值,cv2.MORPH_OPEN,内核,迭代 = 1)# 寻找轮廓并去除小噪声cnts = cv2.findContours(打开,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] 如果 len(cnts) == 2 否则 cnts[1]对于 cnts 中的 c:面积 = cv2.contourArea(c)如果面积 <50:cv2.drawContours(开口,[c],-1,0,-1)# 反转并应用轻微的高斯模糊结果 = 255 - 打开结果 = cv2.GaussianBlur(结果, (3,3), 0)# 执行 OCR数据 = pytesseract.image_to_string(结果,lang='eng',config='--psm 6')打印(数据)cv2.imshow('thresh', thresh)cv2.imshow('开场', 开场)cv2.imshow('结果', 结果)cv2.waitKey()
方法二:颜色分割
观察到要提取的所需文本与图像中的噪声具有可区分的对比度,我们可以使用颜色阈值来隔离文本.这个想法是转换为 HSV 格式然后颜色阈值以获得使用较低/较高颜色范围的掩码.从我们是否使用相同的过程到 Pytesseract 进行 OCR.
输入图像
->
掩码->
结果代码
导入 cv2导入 pytesseract将 numpy 导入为 nppytesseract.pytesseract.tesseract_cmd = rC:Program FilesTesseract-OCR esseract.exe"# 加载图片,转换为HSV,颜色阈值得到mask图像 = cv2.imread('2.png')hsv = cv2.cvtColor(图像,cv2.COLOR_BGR2HSV)较低 = np.array([0, 0, 0])上 = np.array([100, 175, 110])掩码 = cv2.inRange(hsv, 下, 上)# 反转图像和 OCR反转 = 255 - 掩码数据 = pytesseract.image_to_string(反转,lang='eng',config='--psm 6')打印(数据)cv2.imshow('掩码', 掩码)cv2.imshow('反转',反转)cv2.waitKey()
纠正扭曲的文字
OCR 在图像水平时效果最佳.为了确保文本是 OCR 的理想格式,我们可以执行透视变换.在去除所有噪声以隔离文本之后,我们可以执行变形关闭以将单个文本轮廓组合成单个轮廓.从这里我们可以使用
与其他图像一起输出
更新代码以包含透视变换
导入 cv2导入 pytesseract将 numpy 导入为 np从 imutils.perspective 导入four_point_transformpytesseract.pytesseract.tesseract_cmd = rC:Program FilesTesseract-OCR esseract.exe"# 加载图片,转换为HSV,颜色阈值得到mask图像 = cv2.imread('1.png')hsv = cv2.cvtColor(图像,cv2.COLOR_BGR2HSV)较低 = np.array([0, 0, 0])上 = np.array([100, 175, 110])掩码 = cv2.inRange(hsv, 下, 上)# 变形关闭以将单个文本连接成单个轮廓内核 = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))关闭 = cv2.morphologyEx(掩码,cv2.MORPH_CLOSE,内核,迭代 = 3)# 找到旋转的边界框,然后进行透视变换cnts = cv2.findContours(关闭,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] 如果 len(cnts) == 2 否则 cnts[1]矩形 = cv2.minAreaRect(cnts[0])box = cv2.boxPoints(rect)盒子 = np.int0(盒子)cv2.drawContours(图像,[框],0,(36,255,12),2)扭曲 =four_point_transform(255 - 掩码,box.reshape(4, 2))# 字符识别数据 = pytesseract.image_to_string(扭曲,lang='eng',config='--psm 6')打印(数据)cv2.imshow('掩码', 掩码)cv2.imshow('关闭',关闭)cv2.imshow('扭曲',扭曲)cv2.imshow('图像', 图像)cv2.waitKey()
注意:颜色阈值范围是使用此 HSV 阈值脚本确定的
导入 cv2将 numpy 导入为 np什么都没有(x):经过# 加载图片图像 = cv2.imread('2.png')# 创建一个窗口cv2.namedWindow('图像')# 创建颜色变化的轨迹栏# Opencv 的色调为 0-179cv2.createTrackbar('HMin', 'image', 0, 179, 没有)cv2.createTrackbar('SMin', 'image', 0, 255, nothing)cv2.createTrackbar('VMin', 'image', 0, 255, nothing)cv2.createTrackbar('HMax', 'image', 0, 179, 没有)cv2.createTrackbar('SMax', 'image', 0, 255, nothing)cv2.createTrackbar('VMax', 'image', 0, 255, nothing)# 设置 Max HSV 轨迹栏的默认值cv2.setTrackbarPos('HMax', '图像', 179)cv2.setTrackbarPos('SMax', '图像', 255)cv2.setTrackbarPos('VMax', 'image', 255)# 初始化 HSV 最小/最大值hMin = sMin = vMin = hMax = sMax = vMax = 0phMin = psMin = pvMin = phMax = psMax = pvMax = 0而(1):# 获取所有trackbar的当前位置hMin = cv2.getTrackbarPos('HMin', 'image')sMin = cv2.getTrackbarPos('SMin', 'image')vMin = cv2.getTrackbarPos('VMin', 'image')hMax = cv2.getTrackbarPos('HMax', 'image')sMax = cv2.getTrackbarPos('SMax', 'image')vMax = cv2.getTrackbarPos('VMax', 'image')# 设置要显示的最小和最大 HSV 值较低 = np.array([hMin, sMin, vMin])上 = np.array([hMax, sMax, vMax])# 转换为HSV格式和颜色阈值hsv = cv2.cvtColor(图像,cv2.COLOR_BGR2HSV)掩码 = cv2.inRange(hsv, 下, 上)结果= cv2.bitwise_and(图像,图像,掩码=掩码)# 如果 HSV 值发生变化,打印如果((phMin!= hMin)|(psMin!= sMin)|(pvMin!= vMin)|(phMax!= hMax)|(psMax!= sMax)|(pvMax!= vMax)):print("(hMin = %d , sMin = %d, vMin = %d), (hMax = %d , sMax = %d, vMax = %d)" % (hMin , sMin , vMin, hMax, sMax, vMax))phMin = hMinpsMin = sMinpvMin = vMinphMax = hMaxpsMax = sMaxpvMax = vMax# 显示结果图片cv2.imshow('图像', 结果)如果 cv2.waitKey(10) &0xFF == ord('q'):休息cv2.destroyAllWindows()
I have these images
For which I want to remove the text in the background. Only the
captcha characters
should remain(i.e K6PwKA, YabVzu). The task is to identify these characters later using tesseract.This is what I have tried, but it isn't giving much good accuracy.
import cv2 import pytesseract pytesseract.pytesseract.tesseract_cmd = r"C:UsersHPO2KORAppDataLocalTesseract-OCR esseract.exe" img = cv2.imread("untitled.png") gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_filtered = cv2.inRange(gray_image, 0, 75) cv2.imwrite("cleaned.png", gray_filtered)
How can I improve the same?
Note : I tried all the suggestion that I was getting for this question and none of them worked for me.
EDIT : According to Elias, I tried finding the color of the captcha text using photoshop by converting it to grayscale which came out to be somewhere in between [100, 105]. I then threshold the image based on this range. But the result which I got did not give satisfactory result from tesseract.
gray_filtered = cv2.inRange(gray_image, 100, 105) cv2.imwrite("cleaned.png", gray_filtered) gray_inv = ~gray_filtered cv2.imwrite("cleaned.png", gray_inv) data = pytesseract.image_to_string(gray_inv, lang='eng')
Output :
'KEP wKA'
Result :
EDIT 2 :
def get_text(img_name): lower = (100, 100, 100) upper = (104, 104, 104) img = cv2.imread(img_name) img_rgb_inrange = cv2.inRange(img, lower, upper) neg_rgb_image = ~img_rgb_inrange cv2.imwrite('neg_img_rgb_inrange.png', neg_rgb_image) data = pytesseract.image_to_string(neg_rgb_image, lang='eng') return data
gives :
and the text as
GXuMuUZ
Is there any way to soften it a little
解决方案Here are two potential approaches and a method to correct distorted text:
Method #1: Morphological operations + contour filtering
Obtain binary image. Load image, grayscale, then Otsu's threshold.
Remove text contours. Create a rectangular kernel with
cv2.getStructuringElement()
and then perform morphological operations to remove noise.Filter and remove small noise. Find contours and filter using contour area to remove small particles. We effectively remove the noise by filling in the contour with
cv2.drawContours()
Perform OCR. We invert the image then apply a slight Gaussian blur. We then OCR using Pytesseract with the
--psm 6
configuration option to treat the image as a single block of text. Look at Tesseract improve quality for other methods to improve detection and Pytesseract configuration options for additional settings.
Input image
->
Binary->
Morph openingContour area filtering
->
Invert->
Apply blur to get resultResult from OCR
YabVzu
Code
import cv2 import pytesseract import numpy as np pytesseract.pytesseract.tesseract_cmd = r"C:Program FilesTesseract-OCR esseract.exe" # Load image, grayscale, Otsu's threshold image = cv2.imread('2.png') gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # Morph open to remove noise kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2)) opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1) # Find contours and remove small noise cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] for c in cnts: area = cv2.contourArea(c) if area < 50: cv2.drawContours(opening, [c], -1, 0, -1) # Invert and apply slight Gaussian blur result = 255 - opening result = cv2.GaussianBlur(result, (3,3), 0) # Perform OCR data = pytesseract.image_to_string(result, lang='eng', config='--psm 6') print(data) cv2.imshow('thresh', thresh) cv2.imshow('opening', opening) cv2.imshow('result', result) cv2.waitKey()
Method #2: Color segmentation
With the observation that the desired text to extract has a distinguishable contrast from the noise in the image, we can use color thresholding to isolate the text. The idea is to convert to HSV format then color threshold to obtain a mask using a lower/upper color range. From were we use the same process to OCR with Pytesseract.
Input image
->
Mask->
ResultCode
import cv2 import pytesseract import numpy as np pytesseract.pytesseract.tesseract_cmd = r"C:Program FilesTesseract-OCR esseract.exe" # Load image, convert to HSV, color threshold to get mask image = cv2.imread('2.png') hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) lower = np.array([0, 0, 0]) upper = np.array([100, 175, 110]) mask = cv2.inRange(hsv, lower, upper) # Invert image and OCR invert = 255 - mask data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6') print(data) cv2.imshow('mask', mask) cv2.imshow('invert', invert) cv2.waitKey()
Correcting distorted text
OCR works best when the image is horizontal. To ensure that the text is in an ideal format for OCR, we can perform a perspective transform. After removing all the noise to isolate the text, we can perform a morph close to combine individual text contours into a single contour. From here we can find the rotated bounding box using
cv2.minAreaRect
and then perform a four point perspective transform usingimutils.perspective.four_point_transform
. Continuing from the cleaned mask, here's the results:Mask
->
Morph close->
Detected rotated bounding box->
ResultOutput with the other image
Updated code to include perspective transform
import cv2 import pytesseract import numpy as np from imutils.perspective import four_point_transform pytesseract.pytesseract.tesseract_cmd = r"C:Program FilesTesseract-OCR esseract.exe" # Load image, convert to HSV, color threshold to get mask image = cv2.imread('1.png') hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) lower = np.array([0, 0, 0]) upper = np.array([100, 175, 110]) mask = cv2.inRange(hsv, lower, upper) # Morph close to connect individual text into a single contour kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5)) close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3) # Find rotated bounding box then perspective transform cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] rect = cv2.minAreaRect(cnts[0]) box = cv2.boxPoints(rect) box = np.int0(box) cv2.drawContours(image,[box],0,(36,255,12),2) warped = four_point_transform(255 - mask, box.reshape(4, 2)) # OCR data = pytesseract.image_to_string(warped, lang='eng', config='--psm 6') print(data) cv2.imshow('mask', mask) cv2.imshow('close', close) cv2.imshow('warped', warped) cv2.imshow('image', image) cv2.waitKey()
Note: The color threshold range was determined using this HSV threshold script
import cv2 import numpy as np def nothing(x): pass # Load image image = cv2.imread('2.png') # Create a window cv2.namedWindow('image') # Create trackbars for color change # Hue is from 0-179 for Opencv cv2.createTrackbar('HMin', 'image', 0, 179, nothing) cv2.createTrackbar('SMin', 'image', 0, 255, nothing) cv2.createTrackbar('VMin', 'image', 0, 255, nothing) cv2.createTrackbar('HMax', 'image', 0, 179, nothing) cv2.createTrackbar('SMax', 'image', 0, 255, nothing) cv2.createTrackbar('VMax', 'image', 0, 255, nothing) # Set default value for Max HSV trackbars cv2.setTrackbarPos('HMax', 'image', 179) cv2.setTrackbarPos('SMax', 'image', 255) cv2.setTrackbarPos('VMax', 'image', 255) # Initialize HSV min/max values hMin = sMin = vMin = hMax = sMax = vMax = 0 phMin = psMin = pvMin = phMax = psMax = pvMax = 0 while(1): # Get current positions of all trackbars hMin = cv2.getTrackbarPos('HMin', 'image') sMin = cv2.getTrackbarPos('SMin', 'image') vMin = cv2.getTrackbarPos('VMin', 'image') hMax = cv2.getTrackbarPos('HMax', 'image') sMax = cv2.getTrackbarPos('SMax', 'image') vMax = cv2.getTrackbarPos('VMax', 'image') # Set minimum and maximum HSV values to display lower = np.array([hMin, sMin, vMin]) upper = np.array([hMax, sMax, vMax]) # Convert to HSV format and color threshold hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) mask = cv2.inRange(hsv, lower, upper) result = cv2.bitwise_and(image, image, mask=mask) # Print if there is a change in HSV value if((phMin != hMin) | (psMin != sMin) | (pvMin != vMin) | (phMax != hMax) | (psMax != sMax) | (pvMax != vMax) ): print("(hMin = %d , sMin = %d, vMin = %d), (hMax = %d , sMax = %d, vMax = %d)" % (hMin , sMin , vMin, hMax, sMax , vMax)) phMin = hMin psMin = sMin pvMin = vMin phMax = hMax psMax = sMax pvMax = vMax # Display result image cv2.imshow('image', result) if cv2.waitKey(10) & 0xFF == ord('q'): break cv2.destroyAllWindows()
相关文章