使用 OpenCV 从桌面游戏卡图像中提取艺术品
问题描述
我在 python 中编写了一个小脚本,我试图提取或裁剪扑克牌中仅代表艺术品的部分,删除所有其余部分.我一直在尝试各种阈值方法,但无法实现.另请注意,我不能简单地手动记录艺术品的位置,因为它并不总是处于相同的位置或大小,而是总是呈矩形,其他一切都只是文本和边框.
from matplotlib import pyplot as plt导入简历2img = cv2.imread(文件名)灰色 = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)ret,binary = cv2.threshold(灰色, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)二进制 = cv2.bitwise_not(二进制)内核 = np.ones((15, 15), np.uint8)关闭 = cv2.morphologyEx(二进制,cv2.MORPH_OPEN,内核)plt.imshow(关闭),plt.show()
当前输出是我能得到的最接近的东西.我可能走在正确的道路上,并尝试进一步争论在白色部分周围画一个矩形,但我认为这不是一种可持续的方法:
最后一点,请参阅下面的卡片,并非所有框架的尺寸或位置都完全相同,但总有一件艺术品只有文字和边框.它不必被非常精确地切割,但很明显,艺术是卡片的一个区域",被包含一些文本的其他区域包围.我的目标是尽可能地捕捉艺术品的区域.
解决方案我使用霍夫线变换来检测图像的线性部分.所有线的交叉点用于构建所有可能的矩形,其中不包含其他交叉点.由于您要查找的卡片部分始终是这些矩形中最大的部分(至少在您提供的示例中),因此我只是选择了这些矩形中最大的部分作为获胜者.该脚本无需用户交互即可运行.
导入 cv2将 numpy 导入为 np从集合导入 defaultdictdef segment_by_angle_kmeans(lines, k=2, **kwargs):#使用k-means根据角度对线进行分组.#在单位圆上的角度坐标上使用k-means#to 分割 `lines` 内的 `k` 角度.# 定义标准 = (type, max_iter, epsilon)default_criteria_type = cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER标准 = kwargs.get('标准', (default_criteria_type, 10, 1.0))flags = kwargs.get('flags', cv2.KMEANS_RANDOM_CENTERS)尝试 = kwargs.get('尝试', 10)# 以弧度返回 [0, pi] 中的角度角度 = np.array([line[0][1] for line in lines])# 将角度乘以 2 并找到该角度的坐标pts = np.array([[np.cos(2*angle), np.sin(2*angle)]对于角度角度],dtype=np.float32)# 在坐标上运行 kmeans标签,中心= cv2.kmeans(pts,k,无,标准,尝试,标志)[1:]labels = labels.reshape(-1) # 转置为行 vec# 根据 kmeans 标签分割线分段 = 默认字典(列表)对于 i,zip 中的行(范围(len(行)),行):分段[labels[i]].append(line)分段 = 列表(分段.值())分段返回def 交叉点(第 1 行,第 2 行):#找到以Hesse范式给出的两条线的交点.#返回最近的整数像素位置.#参见 https://stackoverflow.com/a/383527/5087436rho1, theta1 = line1[0]rho2, theta2 = line2[0]A = np.array([[np.cos(theta1), np.sin(theta1)],[np.cos(theta2), np.sin(theta2)]])b = np.array([[rho1], [rho2]])x0, y0 = np.linalg.solve(A, b)x0, y0 = int(np.round(x0)), int(np.round(y0))返回 [[x0, y0]]def segmented_intersections(线):#查找线组之间的交点.交叉点 = []对于我,在枚举中分组(行 [:-1]):对于行 [i+1:] 中的 next_group:对于组中的第 1 行:对于 next_group 中的 line2:intersections.append(intersection(line1, line2))返回路口def rect_from_crossings(crossings):#查找内部没有其他点的所有矩形矩形 = []# 搜索所有可能的矩形对于我在范围内(len(crossings)):x1= int(交叉点[i][0][0])y1= int(交叉点[i][0][1])对于范围内的 j (len(crossings)):x2= int(交叉点[j][0][0])y2= int(交叉点[j][0][1])#搜索所有点标志 = 1对于范围内的k(len(crossings)):x3= int(过境点[k][0][0])y3= int(交叉点[k][0][1])#Dont count double (反向矩形)如果(x1 > x2 或 y1 > y2):标志 = 0#不要计算里面有点的矩形elif ((((x3 >= x1) and (x2 >= x3))and (y3 > y1) and (y2 > y3) or ((x3 > x1) and (x2 > x3))和 (y3 >= y1) 和 (y2 >= y3))):如果(i!=k 和 j!=k):标志 = 0如果标志:矩形.append([[x1,y1],[x2,y2]])返回矩形如果 __name__ == '__main__':#img = cv2.imread('TAJFp.jpg')#img = cv2.imread('Bj2uu.jpg')img = cv2.imread('yi8db.png')宽度 = int(img.shape[1])高度 = int(img.shape[0])比例 = 380/宽度暗淡=(int(宽度*比例),int(高度*比例))# 调整图片大小img = cv2.resize(img,暗淡,插值 = cv2.INTER_AREA)img2 = img.copy()灰色 = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)灰色 = cv2.GaussianBlur(灰色,(5,5),cv2.BORDER_DEFAULT)# Canny 和 Hough 的参数可能需要调整以适用于尽可能多的卡片边缘 = cv2.Canny(灰色,10,45,孔径 = 7)线 = cv2.HoughLines(edges,1,np.pi/90,160)分段 = segment_by_angle_kmeans(线)交叉口 = 分段交点(分段)矩形 = rect_from_crossings(crossings)#找到最大的剩余矩形大小 = 0对于我在范围内(len(矩形)):x1 = 矩形[i][0][0]x2 = 矩形[i][1][0]y1 = 矩形[i][0][1]y2 = 矩形[i][1][1]如果(尺寸 < (abs(x1-x2)*abs(y1-y2))):大小 = abs(x1-x2)*abs(y1-y2)x1_rect = x1x2_rect = x2y1_rect = y1y2_rect = y2cv2.rectangle(img2, (x1_rect,y1_rect), (x2_rect,y2_rect), (0,0,255), 2)投资回报率 = img[y1_rect:y2_rect, x1_rect:x2_rect]cv2.imshow("输出",roi)cv2.imwrite("输出.png", roi)cv2.waitKey()
这些是您提供的样本的结果:
查找线交叉的代码可以在这里找到:查找使用 houghlines opencv 绘制的两条线的交点
您可以在这里阅读更多关于霍夫线的信息..p>
I wrote a small script in python where I'm trying to extract or crop the part of the playing card that represents the artwork only, removing all the rest. I've been trying various methods of thresholding but couldn't get there. Also note that I can't simply record manually the position of the artwork because it's not always in the same position or size, but always in a rectangular shape where everything else is just text and borders.
from matplotlib import pyplot as plt
import cv2
img = cv2.imread(filename)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,binary = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)
binary = cv2.bitwise_not(binary)
kernel = np.ones((15, 15), np.uint8)
closing = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
plt.imshow(closing),plt.show()
The current output is the closest thing I could get. I could be on the right way and try some further wrangling to draw a rectangle around the white parts, but I don't think it's a sustainable method :
As a last note, see the cards below, not all frames are exactly the same sizes or positions, but there's always a piece of artwork with only text and borders around it. It doesn't have to be super precisely cut, but clearly the art is a "region" of the card, surrounded by other regions containing some text. My goal is to try to capture the region of the artwork as well as I can.
解决方案I used Hough line transform to detect linear parts of the image. The crossings of all lines were used to construct all possible rectangles, which do not contain other crossing points. Since the part of the card you are looking for is always the biggest of those rectangles (at least in the samples you provided), i simply chose the biggest of those rectangles as winner. The script works without user interaction.
import cv2
import numpy as np
from collections import defaultdict
def segment_by_angle_kmeans(lines, k=2, **kwargs):
#Groups lines based on angle with k-means.
#Uses k-means on the coordinates of the angle on the unit circle
#to segment `k` angles inside `lines`.
# Define criteria = (type, max_iter, epsilon)
default_criteria_type = cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER
criteria = kwargs.get('criteria', (default_criteria_type, 10, 1.0))
flags = kwargs.get('flags', cv2.KMEANS_RANDOM_CENTERS)
attempts = kwargs.get('attempts', 10)
# returns angles in [0, pi] in radians
angles = np.array([line[0][1] for line in lines])
# multiply the angles by two and find coordinates of that angle
pts = np.array([[np.cos(2*angle), np.sin(2*angle)]
for angle in angles], dtype=np.float32)
# run kmeans on the coords
labels, centers = cv2.kmeans(pts, k, None, criteria, attempts, flags)[1:]
labels = labels.reshape(-1) # transpose to row vec
# segment lines based on their kmeans label
segmented = defaultdict(list)
for i, line in zip(range(len(lines)), lines):
segmented[labels[i]].append(line)
segmented = list(segmented.values())
return segmented
def intersection(line1, line2):
#Finds the intersection of two lines given in Hesse normal form.
#Returns closest integer pixel locations.
#See https://stackoverflow.com/a/383527/5087436
rho1, theta1 = line1[0]
rho2, theta2 = line2[0]
A = np.array([
[np.cos(theta1), np.sin(theta1)],
[np.cos(theta2), np.sin(theta2)]
])
b = np.array([[rho1], [rho2]])
x0, y0 = np.linalg.solve(A, b)
x0, y0 = int(np.round(x0)), int(np.round(y0))
return [[x0, y0]]
def segmented_intersections(lines):
#Finds the intersections between groups of lines.
intersections = []
for i, group in enumerate(lines[:-1]):
for next_group in lines[i+1:]:
for line1 in group:
for line2 in next_group:
intersections.append(intersection(line1, line2))
return intersections
def rect_from_crossings(crossings):
#find all rectangles without other points inside
rectangles = []
# Search all possible rectangles
for i in range(len(crossings)):
x1= int(crossings[i][0][0])
y1= int(crossings[i][0][1])
for j in range(len(crossings)):
x2= int(crossings[j][0][0])
y2= int(crossings[j][0][1])
#Search all points
flag = 1
for k in range(len(crossings)):
x3= int(crossings[k][0][0])
y3= int(crossings[k][0][1])
#Dont count double (reverse rectangles)
if (x1 > x2 or y1 > y2):
flag = 0
#Dont count rectangles with points inside
elif ((((x3 >= x1) and (x2 >= x3))and (y3 > y1) and (y2 > y3) or ((x3 > x1) and (x2 > x3))and (y3 >= y1) and (y2 >= y3))):
if(i!=k and j!=k):
flag = 0
if flag:
rectangles.append([[x1,y1],[x2,y2]])
return rectangles
if __name__ == '__main__':
#img = cv2.imread('TAJFp.jpg')
#img = cv2.imread('Bj2uu.jpg')
img = cv2.imread('yi8db.png')
width = int(img.shape[1])
height = int(img.shape[0])
scale = 380/width
dim = (int(width*scale), int(height*scale))
# resize image
img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
img2 = img.copy()
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray,(5,5),cv2.BORDER_DEFAULT)
# Parameters of Canny and Hough may have to be tweaked to work for as many cards as possible
edges = cv2.Canny(gray,10,45,apertureSize = 7)
lines = cv2.HoughLines(edges,1,np.pi/90,160)
segmented = segment_by_angle_kmeans(lines)
crossings = segmented_intersections(segmented)
rectangles = rect_from_crossings(crossings)
#Find biggest remaining rectangle
size = 0
for i in range(len(rectangles)):
x1 = rectangles[i][0][0]
x2 = rectangles[i][1][0]
y1 = rectangles[i][0][1]
y2 = rectangles[i][1][1]
if(size < (abs(x1-x2)*abs(y1-y2))):
size = abs(x1-x2)*abs(y1-y2)
x1_rect = x1
x2_rect = x2
y1_rect = y1
y2_rect = y2
cv2.rectangle(img2, (x1_rect,y1_rect), (x2_rect,y2_rect), (0,0,255), 2)
roi = img[y1_rect:y2_rect, x1_rect:x2_rect]
cv2.imshow("Output",roi)
cv2.imwrite("Output.png", roi)
cv2.waitKey()
These are the results with the samples you provided:
The code for finding line crossings can be found here: find intersection point of two lines drawn using houghlines opencv
You can read more about Hough Lines here.
相关文章