k-平均聚类算法(k -means Clustering)第二步:聚类(Clustering)
在这里完成Opencv k-平均聚类算法第一步所述算法的步骤4和步骤5,进行聚类吧!
在这里预测类别为0和1。
因此,k-平均聚类算法是一种完全按范围划分类别的方法。一条数据最后被划分到什么类别只有到最后才清楚。此外,必须预先知道类别的数量。
需要注意的是,k-平均聚类算法最初分配的类别对最后的结果有很大的影响。并且,数据量小的情况下极有可能失败。也就是说,数据量越大最后得到的数据分布越准确。
python实现:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
# Dicrease color
def dic_color(img):
img //= 63
img = img * 64 + 32
return img
# Database
def get_DB():
# get training image path
train = glob("dataset/test_*")
train.sort()
# prepare database
db = np.zeros((len(train), 13), dtype=np.int32)
pdb = []
# each train
for i, path in enumerate(train):
# read image
img = dic_color(cv2.imread(path))
# histogram
for j in range(4):
db[i, j] = len(np.where(img[..., 0] == (64 * j + 32))[0])
db[i, j+4] = len(np.where(img[..., 1] == (64 * j + 32))[0])
db[i, j+8] = len(np.where(img[..., 2] == (64 * j + 32))[0])
# get class
if 'akahara' in path:
cls = 0
elif 'madara' in path:
cls = 1
# store class label
db[i, -1] = cls
# add image path
pdb.append(path)
return db, pdb
# k-Means step2
def k_means_step2(db, pdb, Class=2):
# copy database
feats = db.copy()
# initiate random seed
np.random.seed(1)
# assign random class
for i in range(len(feats)):
if np.random.random() < 0.5:
feats[i, -1] = 0
else:
feats[i, -1] = 1
while True:
# prepare greavity
gs = np.zeros((Class, 12), dtype=np.float32)
change_count = 0
# compute gravity
for i in range(Class):
gs[i] = np.mean(feats[np.where(feats[..., -1] == i)[0], :12], axis=0)
# re-labeling
for i in range(len(feats)):
# get distance each nearest graviry
dis = np.sqrt(np.sum(np.square(np.abs(gs - feats[i, :12])), axis=1))
# get new label
pred = np.argmin(dis, axis=0)
# if label is difference from old label
if int(feats[i, -1]) != pred:
change_count += 1
feats[i, -1] = pred
if change_count < 1:
break
for i in range(db.shape[0]):
print(pdb[i], " Pred:", feats[i, -1])
db, pdb = get_DB()
k_means_step2(db, pdb)
答案:
test_akahara_1.jpg Pred: 0
test_akahara_2.jpg Pred: 1
test_madara_1.jpg Pred: 0
test_madara_2.jpg Pred: 0