霍夫变换(Hough Transform)/直线检测——第二步:NMS
我们将在这里进行第2步。
在前文获得的表格中,在某个地方附近集中了很多票。这里,执行提取局部最大值的操作。
这一次,提取出投票前十名的位置,并将其表示出来。
NMS 的算法如下:
1. 在该表中,如果遍历到的像素的投票数大于其8近邻的像素值,则它不变。
2. 如果遍历到的像素的投票数小于其8近邻的像素值,则设置为0。
python实现:
import cv2
import numpy as np
import matplotlib.pyplot as plt
def Canny(img):
# Gray scale
def BGR2GRAY(img):
b = img[:, :, 0].copy()
g = img[:, :, 1].copy()
r = img[:, :, 2].copy()
# Gray scale
out = 0.2126 * r + 0.7152 * g + 0.0722 * b
out = out.astype(np.uint8)
return out
# Gaussian filter for grayscale
def gaussian_filter(img, K_size=3, sigma=1.3):
if len(img.shape) == 3:
H, W, C = img.shape
gray = False
else:
img = np.expand_dims(img, axis=-1)
H, W, C = img.shape
gray = True
## Zero padding
pad = K_size // 2
out = np.zeros([H + pad * 2, W + pad * 2, C], dtype=np.float)
out[pad : pad + H, pad : pad + W] = img.copy().astype(np.float)
## prepare Kernel
K = np.zeros((K_size, K_size), dtype=np.float)
for x in range(-pad, -pad + K_size):
for y in range(-pad, -pad + K_size):
K[y + pad, x + pad] = np.exp( - (x ** 2 + y ** 2) / (2 * sigma * sigma))
#K /= (sigma * np.sqrt(2 * np.pi))
K /= (2 * np.pi * sigma * sigma)
K /= K.sum()
tmp = out.copy()
# filtering
for y in range(H):
for x in range(W):
for c in range(C):
out[pad + y, pad + x, c] = np.sum(K * tmp[y : y + K_size, x : x + K_size, c])
out = np.clip(out, 0, 255)
out = out[pad : pad + H, pad : pad + W]
out = out.astype(np.uint8)
if gray:
out = out[..., 0]
return out
# sobel filter
def sobel_filter(img, K_size=3):
if len(img.shape) == 3:
H, W, C = img.shape
else:
H, W = img.shape
# Zero padding
pad = K_size // 2
out = np.zeros((H + pad * 2, W + pad * 2), dtype=np.float)
out[pad : pad + H, pad : pad + W] = img.copy().astype(np.float)
tmp = out.copy()
out_v = out.copy()
out_h = out.copy()
## Sobel vertical
Kv = [[1., 2., 1.],[0., 0., 0.], [-1., -2., -1.]]
## Sobel horizontal
Kh = [[1., 0., -1.],[2., 0., -2.],[1., 0., -1.]]
# filtering
for y in range(H):
for x in range(W):
out_v[pad + y, pad + x] = np.sum(Kv * (tmp[y : y + K_size, x : x + K_size]))
out_h[pad + y, pad + x] = np.sum(Kh * (tmp[y : y + K_size, x : x + K_size]))
out_v = np.clip(out_v, 0, 255)
out_h = np.clip(out_h, 0, 255)
out_v = out_v[pad : pad + H, pad : pad + W]
out_v = out_v.astype(np.uint8)
out_h = out_h[pad : pad + H, pad : pad + W]
out_h = out_h.astype(np.uint8)
return out_v, out_h
def get_edge_angle(fx, fy):
# get edge strength
edge = np.sqrt(np.power(fx.astype(np.float32), 2) + np.power(fy.astype(np.float32), 2))
edge = np.clip(edge, 0, 255)
fx = np.maximum(fx, 1e-10)
#fx[np.abs(fx) <= 1e-5] = 1e-5
# get edge angle
angle = np.arctan(fy / fx)
return edge, angle
def angle_quantization(angle):
angle = angle / np.pi * 180
angle[angle < -22.5] = 180 + angle[angle < -22.5]
_angle = np.zeros_like(angle, dtype=np.uint8)
_angle[np.where(angle <= 22.5)] = 0
_angle[np.where((angle > 22.5) & (angle <= 67.5))] = 45
_angle[np.where((angle > 67.5) & (angle <= 112.5))] = 90
_angle[np.where((angle > 112.5) & (angle <= 157.5))] = 135
return _angle
def non_maximum_suppression(angle, edge):
H, W = angle.shape
_edge = edge.copy()
for y in range(H):
for x in range(W):
if angle[y, x] == 0:
dx1, dy1, dx2, dy2 = -1, 0, 1, 0
elif angle[y, x] == 45:
dx1, dy1, dx2, dy2 = -1, 1, 1, -1
elif angle[y, x] == 90:
dx1, dy1, dx2, dy2 = 0, -1, 0, 1
elif angle[y, x] == 135:
dx1, dy1, dx2, dy2 = -1, -1, 1, 1
if x == 0:
dx1 = max(dx1, 0)
dx2 = max(dx2, 0)
if x == W-1:
dx1 = min(dx1, 0)
dx2 = min(dx2, 0)
if y == 0:
dy1 = max(dy1, 0)
dy2 = max(dy2, 0)
if y == H-1:
dy1 = min(dy1, 0)
dy2 = min(dy2, 0)
if max(max(edge[y, x], edge[y + dy1, x + dx1]), edge[y + dy2, x + dx2]) != edge[y, x]:
_edge[y, x] = 0
return _edge
def hysterisis(edge, HT=100, LT=30):
H, W = edge.shape
# Histeresis threshold
edge[edge >= HT] = 255
edge[edge <= LT] = 0
_edge = np.zeros((H + 2, W + 2), dtype=np.float32)
_edge[1 : H + 1, 1 : W + 1] = edge
## 8 - Nearest neighbor
nn = np.array(((1., 1., 1.), (1., 0., 1.), (1., 1., 1.)), dtype=np.float32)
for y in range(1, H+2):
for x in range(1, W+2):
if _edge[y, x] < LT or _edge[y, x] > HT:
continue
if np.max(_edge[y-1:y+2, x-1:x+2] * nn) >= HT:
_edge[y, x] = 255
else:
_edge[y, x] = 0
edge = _edge[1:H+1, 1:W+1]
return edge
# grayscale
gray = BGR2GRAY(img)
# gaussian filtering
gaussian = gaussian_filter(gray, K_size=5, sigma=1.4)
# sobel filtering
fy, fx = sobel_filter(gaussian, K_size=3)
# get edge strength, angle
edge, angle = get_edge_angle(fx, fy)
# angle quantization
angle = angle_quantization(angle)
# non maximum suppression
edge = non_maximum_suppression(angle, edge)
# hysterisis threshold
out = hysterisis(edge, 100, 30)
return out
def Hough_Line_step2(edge):
## Voting
def voting(edge):
H, W = edge.shape
drho = 1
dtheta = 1
# get rho max length
rho_max = np.ceil(np.sqrt(H ** 2 + W ** 2)).astype(np.int)
# hough table
hough = np.zeros((rho_max * 2, 180), dtype=np.int)
# get index of edge
ind = np.where(edge == 255)
## hough transformation
for y, x in zip(ind[0], ind[1]):
for theta in range(0, 180, dtheta):
# get polar coordinat4s
t = np.pi / 180 * theta
rho = int(x * np.cos(t) + y * np.sin(t))
# vote
hough[rho + rho_max, theta] += 1
out = hough.astype(np.uint8)
return out
# non maximum suppression
def non_maximum_suppression(hough):
rho_max, _ = hough.shape
## non maximum suppression
for y in range(rho_max):
for x in range(180):
# get 8 nearest neighbor
x1 = max(x-1, 0)
x2 = min(x+2, 180)
y1 = max(y-1, 0)
y2 = min(y+2, rho_max-1)
if np.max(hough[y1:y2, x1:x2]) == hough[y,x] and hough[y, x] != 0:
pass
#hough[y,x] = 255
else:
hough[y,x] = 0
# for hough visualization
# get top-10 x index of hough table
ind_x = np.argsort(hough.ravel())[::-1][:20]
# get y index
ind_y = ind_x.copy()
thetas = ind_x % 180
rhos = ind_y // 180
_hough = np.zeros_like(hough, dtype=np.int)
_hough[rhos, thetas] = 255
return _hough
# voting
hough = voting(edge)
# non maximum suppression
out = non_maximum_suppression(hough)
return out
# Read image
img = cv2.imread("thorino.jpg").astype(np.float32)
# Canny
edge = Canny(img)
# Hough
out = Hough_Line_step2(edge)
out = out.astype(np.uint8)
# Save result
cv2.imwrite("out.jpg", out)
cv2.imshow("result", out)
cv2.waitKey(0)
cv2.destroyAllWindows()
c++实现:
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <iostream>
#include <math.h>
// RGB to Gray scale
cv::Mat BGR2GRAY(cv::Mat img){
// get height and width
int height = img.rows;
int width = img.cols;
int channel = img.channels();
// prepare output
cv::Mat out = cv::Mat::zeros(height, width, CV_8UC1);
// BGR -> Gray
for (int y = 0; y < height; y++){
for (int x = 0; x < width; x++){
out.at<uchar>(y, x) = (int)((float)img.at<cv::Vec3b>(y, x)[0] * 0.0722 + \
(float)img.at<cv::Vec3b>(y, x)[1] * 0.7152 + \
(float)img.at<cv::Vec3b>(y, x)[2] * 0.2126);
}
}
return out;
}
float clip(float value, float min, float max){
return fmin(fmax(value, 0), 255);
}
// gaussian filter
cv::Mat gaussian_filter(cv::Mat img, double sigma, int kernel_size){
int height = img.rows;
int width = img.cols;
int channel = img.channels();
// prepare output
cv::Mat out = cv::Mat::zeros(height, width, CV_8UC3);
if (channel == 1) {
out = cv::Mat::zeros(height, width, CV_8UC1);
}
// prepare kernel
int pad = floor(kernel_size / 2);
int _x = 0, _y = 0;
double kernel_sum = 0;
// get gaussian kernel
float kernel[kernel_size][kernel_size];
for (int y = 0; y < kernel_size; y++){
for (int x = 0; x < kernel_size; x++){
_y = y - pad;
_x = x - pad;
kernel[y][x] = 1 / (2 * M_PI * sigma * sigma) * exp( - (_x * _x + _y * _y) / (2 * sigma * sigma));
kernel_sum += kernel[y][x];
}
}
for (int y = 0; y < kernel_size; y++){
for (int x = 0; x < kernel_size; x++){
kernel[y][x] /= kernel_sum;
}
}
// filtering
double v = 0;
for (int y = 0; y < height; y++){
for (int x = 0; x < width; x++){
// for BGR
if (channel == 3){
for (int c = 0; c < channel; c++){
v = 0;
for (int dy = -pad; dy < pad + 1; dy++){
for (int dx = -pad; dx < pad + 1; dx++){
if (((x + dx) >= 0) && ((y + dy) >= 0) && ((x + dx) < width) && ((y + dy) < height)){
v += (double)img.at<cv::Vec3b>(y + dy, x + dx)[c] * kernel[dy + pad][dx + pad];
}
}
}
out.at<cv::Vec3b>(y, x)[c] = (uchar)clip(v, 0, 255);
}
} else {
// for Gray
v = 0;
for (int dy = -pad; dy < pad + 1; dy++){
for (int dx = -pad; dx < pad + 1; dx++){
if (((x + dx) >= 0) && ((y + dy) >= 0) && ((x + dx) < width) && ((y + dy) < height)){
v += (double)img.at<uchar>(y + dy, x + dx) * kernel[dy + pad][dx + pad];
}
}
}
out.at<uchar>(y, x) = (uchar)clip(v, 0, 255);
}
}
}
return out;
}
// Sobel filter
cv::Mat sobel_filter(cv::Mat img, int kernel_size, bool horizontal){
int height = img.rows;
int width = img.cols;
int channel = img.channels();
// prepare output
cv::Mat out = cv::Mat::zeros(height, width, CV_8UC1);
// prepare kernel
double kernel[kernel_size][kernel_size] = {{1, 2, 1}, {0, 0, 0}, {-1, -2, -1}};
if (horizontal){
kernel[0][1] = 0;
kernel[0][2] = -1;
kernel[1][0] = 2;
kernel[1][2] = -2;
kernel[2][0] = 1;
kernel[2][1] = 0;
}
int pad = floor(kernel_size / 2);
double v = 0;
// filtering
for (int y = 0; y < height; y++){
for (int x = 0; x < width; x++){
v = 0;
for (int dy = -pad; dy < pad + 1; dy++){
for (int dx = -pad; dx < pad + 1; dx++){
if (((y + dy) >= 0) && (( x + dx) >= 0) && ((y + dy) < height) && ((x + dx) < width)){
v += (double)img.at<uchar>(y + dy, x + dx) * kernel[dy + pad][dx + pad];
}
}
}
out.at<uchar>(y, x) = (uchar)clip(v, 0, 255);
}
}
return out;
}
// get edge
cv::Mat get_edge(cv::Mat fx, cv::Mat fy){
// get height and width
int height = fx.rows;
int width = fx.cols;
// prepare output
cv::Mat out = cv::Mat::zeros(height, width, CV_8UC1);
double _fx, _fy;
for(int y = 0; y < height; y++){
for(int x = 0; x < width; x++){
_fx = (double)fx.at<uchar>(y, x);
_fy = (double)fy.at<uchar>(y, x);
out.at<uchar>(y, x) = (uchar)clip(sqrt(_fx * _fx + _fy * _fy), 0, 255);
}
}
return out;
}
// get angle
cv::Mat get_angle(cv::Mat fx, cv::Mat fy){
// get height and width
int height = fx.rows;
int width = fx.cols;
// prepare output
cv::Mat out = cv::Mat::zeros(height, width, CV_8UC1);
double _fx, _fy;
double angle;
for(int y = 0; y < height; y++){
for(int x = 0; x < width; x++){
_fx = fmax((double)fx.at<uchar>(y, x), 0.000001);
_fy = (double)fy.at<uchar>(y, x);
angle = atan2(_fy, _fx);
angle = angle / M_PI * 180;
if(angle < -22.5){
angle = 180 + angle;
} else if (angle >= 157.5) {
angle = angle - 180;
}
// quantization
if (angle <= 22.5){
out.at<uchar>(y, x) = 0;
} else if (angle <= 67.5){
out.at<uchar>(y, x) = 45;
} else if (angle <= 112.5){
out.at<uchar>(y, x) = 90;
} else {
out.at<uchar>(y, x) = 135;
}
}
}
return out;
}
// non maximum suppression
cv::Mat non_maximum_suppression(cv::Mat angle, cv::Mat edge){
int height = angle.rows;
int width = angle.cols;
int channel = angle.channels();
int dx1, dx2, dy1, dy2;
int now_angle;
// prepare output
cv::Mat _edge = cv::Mat::zeros(height, width, CV_8UC1);
for (int y = 0; y < height; y++){
for (int x = 0; x < width; x++){
now_angle = angle.at<uchar>(y, x);
// angle condition
if (now_angle == 0){
dx1 = -1;
dy1 = 0;
dx2 = 1;
dy2 = 0;
} else if(now_angle == 45) {
dx1 = -1;
dy1 = 1;
dx2 = 1;
dy2 = -1;
} else if(now_angle == 90){
dx1 = 0;
dy1 = -1;
dx2 = 0;
dy2 = 1;
} else {
dx1 = -1;
dy1 = -1;
dx2 = 1;
dy2 = 1;
}
if (x == 0){
dx1 = fmax(dx1, 0);
dx2 = fmax(dx2, 0);
}
if (x == (width - 1)){
dx1 = fmin(dx1, 0);
dx2 = fmin(dx2, 0);
}
if (y == 0){
dy1 = fmax(dy1, 0);
dy2 = fmax(dy2, 0);
}
if (y == (height - 1)){
dy1 = fmin(dy1, 0);
dy2 = fmin(dy2, 0);
}
// if pixel is max among adjuscent pixels, pixel is kept
if (fmax(fmax(edge.at<uchar>(y, x), edge.at<uchar>(y + dy1, x + dx1)), edge.at<uchar>(y + dy2, x + dx2)) == edge.at<uchar>(y, x)) {
_edge.at<uchar>(y, x) = edge.at<uchar>(y, x);
}
}
}
return _edge;
}
// histerisis
cv::Mat histerisis(cv::Mat edge, int HT, int LT){
int height = edge.rows;
int width = edge.cols;
int channle = edge.channels();
// prepare output
cv::Mat _edge = cv::Mat::zeros(height, width, CV_8UC1);
int now_pixel;
for (int y = 0; y < height; y++){
for (int x = 0; x < width; x++){
// get pixel
now_pixel = edge.at<uchar>(y, x);
// if pixel >= HT
if (now_pixel >= HT){
_edge.at<uchar>(y, x) = 255;
}
// if LT < pixel < HT
else if (now_pixel > LT) {
for (int dy = -1; dy < 2; dy++){
for (int dx = -1; dx < 2; dx++){
// if 8 nearest neighbor pixel >= HT
if (edge.at<uchar>(fmin(fmax(y + dy, 0), 255), fmin(fmax(x + dx, 0), 255)) >= HT){
_edge.at<uchar>(y, x) = 255;
}
}
}
}
}
}
return _edge;
}
// Canny
cv::Mat Canny(cv::Mat img){
// BGR -> Gray
cv::Mat gray = BGR2GRAY(img);
// gaussian filter
cv::Mat gaussian = gaussian_filter(gray, 1.4, 5);
// sobel filter (vertical)
cv::Mat fy = sobel_filter(gaussian, 3, false);
// sobel filter (horizontal)
cv::Mat fx = sobel_filter(gaussian, 3, true);
// get edge
cv::Mat edge = get_edge(fx, fy);
// get angle
cv::Mat angle = get_angle(fx, fy);
// edge non-maximum suppression
edge = non_maximum_suppression(angle, edge);
// histerisis
edge = histerisis(edge, 100, 30);
return edge;
}
//------
// hough
const int ANGLE_T = 180;
const int RHO_MAX = 320;
// hough table
struct struct_hough_table {
int table[RHO_MAX * 2][ANGLE_T];
};
// hough vote
struct_hough_table Hough_vote(struct_hough_table hough_table, cv::Mat img){
int height = img.rows;
int width = img.cols;
int rho = 0;
double angle = 0;
for (int y = 0; y < height; y++){
for (int x = 0; x < width; x++){
// if not edge, skip
if (img.at<uchar>(y, x) != 255){
continue;
}
// 0 <= angle t < 180
for (int t = 0; t < ANGLE_T; t++){
angle = M_PI / 180 * t;
rho = (int)(x * cos(angle) + y * sin(angle));
hough_table.table[rho + RHO_MAX][t] ++;
}
}
}
return hough_table;
}
// hough nms
struct_hough_table Hough_NMS(struct_hough_table hough_table){
// output hough table
struct_hough_table output_hough_table;
// initialize 0
for (int rho = 0; rho < RHO_MAX * 2; rho++){
for (int t = 0; t < ANGLE_T; t++){
output_hough_table.table[rho][t] = 0;
}
}
// top N x, y
int N = 30;
int top_N_rho[N], top_N_t[N], top_N_vote[N];
int tmp_rho, tmp_t, tmp_vote, tmp_rho2, tmp_t2, tmp_vote2;
int rho, t;
for (int n = 0; n < N; n++){
top_N_rho[n] = -1;
top_N_t[n] = -1;
top_N_vote[n] = -1;
}
for (int rho = 0; rho < RHO_MAX * 2; rho++){
for (int t = 0; t < ANGLE_T; t++){
if (hough_table.table[rho][t] == 0){
continue;
}
// compare to left top
if (((t - 1) >= 0) && ((rho - 1) >= 0)){
if (hough_table.table[rho][t] < hough_table.table[rho - 1][t - 1]){
continue;
}
}
// comparet to top
if ((rho - 1) >= 0){
if (hough_table.table[rho][t] < hough_table.table[rho - 1][t]){
continue;
}
}
// compare to left top
if (((t + 1) < ANGLE_T) && ((rho - 1) >= 0)){
if (hough_table.table[rho][t] < hough_table.table[rho - 1][t + 1]){
continue;
}
}
// compare to left
if ((t - 1) >= 0){
if (hough_table.table[rho][t] < hough_table.table[rho][t - 1]){
continue;
}
}
// compare to right
if ((t + 1) < ANGLE_T){
if (hough_table.table[rho][t] < hough_table.table[rho][t + 1]){
continue;
}
}
// compare to left bottom
if (((t - 1) >= 0) && ((rho + 1) < RHO_MAX * 2)){
if (hough_table.table[rho][t] < hough_table.table[rho + 1][t - 1]){
continue;
}
}
// compare to bottom
if ((rho + 1) < RHO_MAX * 2){
if (hough_table.table[rho][t] < hough_table.table[rho + 1][t]){
continue;
}
}
// compare to right bottom
if (((t + 1) < ANGLE_T) && ((rho + 1) < RHO_MAX * 2)){
if (hough_table.table[rho][t] < hough_table.table[rho + 1][t + 1]){
continue;
}
}
// Select top N votes
for (int n = 0; n < N; n++){
if (top_N_vote[n] <= hough_table.table[rho][t]){
tmp_vote = top_N_vote[n];
tmp_rho = top_N_rho[n];
tmp_t = top_N_t[n];
top_N_vote[n] = hough_table.table[rho][t];
top_N_rho[n] = rho;
top_N_t[n] = t;
for (int m = n + 1; m < N - 1; m++){
tmp_vote2 = top_N_vote[m];
tmp_rho2 = top_N_rho[m];
tmp_t2 = top_N_t[m];
top_N_vote[m] = tmp_vote;
top_N_rho[m] = tmp_rho;
top_N_t[m] = tmp_t;
tmp_vote = tmp_vote2;
tmp_rho = tmp_rho2;
tmp_t = tmp_t2;
}
top_N_vote[N - 1] = tmp_vote;
top_N_rho[N - 1] = tmp_rho;
top_N_t[N - 1] = tmp_t;
break;
}
}
}
}
// get pixel for top N votes
for (int n = 0; n < N; n++){
if (top_N_rho[n] == -1){
break;
}
rho = top_N_rho[n];
t = top_N_t[n];
output_hough_table.table[rho][t] = hough_table.table[rho][t];
}
return output_hough_table;
}
// Inverse hough transformation
cv::Mat Hough_inverse(struct_hough_table hough_table, cv::Mat img){
int height = img.rows;
int width = img.cols;
double _cos, _sin;
int y, x;
for (int rho = 0; rho < RHO_MAX * 2; rho++){
for (int t = 0; t < ANGLE_T; t++){
// if not vote, skip
if (hough_table.table[rho][t] < 1){
continue;
}
_cos = cos(t * M_PI / 180);
_sin = sin(t * M_PI / 180);
if ((_sin == 0) || (_cos == 0)){
continue;
}
for (int x = 0; x < width; x++){
y = (int)(- _cos / _sin * x + (rho - RHO_MAX) / _sin);
if ((y >= 0) && (y < height)){
img.at<cv::Vec3b>(y, x) = cv::Vec3b(0, 0, 255);
}
}
for (int y = 0; y < height; y++){
x = (int)(- _sin / _cos * y + (rho - RHO_MAX) / _cos);
if ((x >= 0) && (x < width)){
img.at<cv::Vec3b>(y, x) = cv::Vec3b(0, 0, 255);
}
}
}
}
return img;
}
// hough step 2
int Hough_line(cv::Mat img){
// get edge by canny
cv::Mat edge = Canny(img);
// hough
struct_hough_table hough_table;
// initialize 0
for (int rho = 0; rho < RHO_MAX * 2; rho++){
for (int t = 0; t < ANGLE_T; t++){
hough_table.table[rho][t] = 0;
}
}
// hough vote
hough_table = Hough_vote(hough_table, edge);
// hough NMS
hough_table = Hough_NMS(hough_table);
return 0;
}
int main(int argc, const char* argv[]){
// read image
cv::Mat img = cv::imread("thorino.jpg", cv::IMREAD_COLOR);
// Hough line detection
Hough_line(img);
return 0;
}
输入:
输出: