1.K_Means算法核心思想:(计算,递归)
(1) 从 n个数据对象任意选择 k 个对象作为初始聚类中心;
(2) 根据每个聚类对象的均值(中心对象),计算每个对象与这些中心对象的距离;并根据最小距离重新对相应对象进行划分;
(3) 重新计算每个(有变化)聚类的均值(中心对象)
(4) 循环(2)到(3)直到每个聚类不再发生变化为止
2.简单实列:
问题:假设数据挖掘的任务是将8个点聚类成3个类别, A1(2,10), A2(2,5), A3(8,4), B1(5,8), B2(7,5), B3(6,4), C1(1,2), C2(3,7), C3(4,9),距离函数是Euclidean距离。 假设初始选择A1, B1, C1分别作为每个聚类的中心用 K-means来给出,
代码(C++实现):
第一次循环执行后的三个聚类中心是多少?最后的三个中心是多少?
(题目来自某位我喜欢的AI领域老师的课后习题。)

#include<iostream>
#include<math.h>
#include<vector>
#include<cstring>

using namespace std;


enum center_flag {
	set_1,
	set_2,
	set_3
};//集合标志

struct	Point {
	float					x{0};
	float					y{0};
	center_flag			flag{ set_1 };
	
}Point_A1, Point_A2, Point_A3,\\
Point_B1, Point_B2, Point_B3,\\
Point_C1,Point_C2,Point_C3;


Point cur_cluster_1{ 2,10,set_1 },\\
	cur_cluster_2{ 5,8,set_2 },\\
	cur_cluster_3{ 1,2,set_3 },//三个中心点
	new_cluster_1{0,0,set_1},\\
	new_cluster_2{ 0,0,set_2 },\\
	new_cluster_3{ 0,0,set_3 };

vector <Point> point_set_1{};
vector <Point> point_set_2{};
vector <Point> point_set_3{};

void Init_Point(){
	Point_A1.x = 2; Point_A1.y = 10;
	Point_A2.x = 2; Point_A2.y = 5;
	Point_A3.x = 8; Point_A3.y = 4;
	Point_B1.x = 5; Point_B1.y = 8;
	Point_B2.x = 7; Point_B2.y = 5;
	Point_B3.x = 6; Point_B3.y = 4;
	Point_C1.x = 1; Point_C1.y = 2;
	Point_C2.x = 3; Point_C2.y = 7;
	Point_C3.x = 4; Point_C3.y = 9;	
}


float Get_Euclidean_Distance(Point A,Point B){
	float d = sqrt(pow((A.x - B.x), 2) + pow((A.y - B.y), 2));
	return d;
}
//当前点获得归属集
center_flag Get_flag(Point curPoint,Point cluster1, Point cluster2, Point cluster3) {
	float a=0, b=0, c=0;
	a = Get_Euclidean_Distance(curPoint, cluster1);
	b = Get_Euclidean_Distance(curPoint, cluster2);
	c = Get_Euclidean_Distance(curPoint, cluster3);
	if (a <= b && a <= c)
		return set_1;
	else if (b <= c && b <= a)
		return set_2;
	else
		return set_3;
}
//求新的中心点
Point Get_new_cluaster(vector<Point>point_set) {
	float new_cluaster_x = 0;
	float new_cluaster_y = 0;
	int n = 0;
	center_flag new_cluaster_flag = set_1;
	Point point_temp;
	Point cur_cluaster;
	for (auto i = point_set.begin(); i != point_set.end(); i++) 
	{
		point_temp =*i;
		new_cluaster_x += point_temp.x;
		new_cluaster_y += point_temp.y;
	}
	
	n = point_set.size();
	new_cluaster_x = new_cluaster_x/n ;
	new_cluaster_y = new_cluaster_y/ n;
	new_cluaster_flag = point_temp.flag;
	cur_cluaster.x = new_cluaster_x;
	cur_cluaster.y = new_cluaster_y;
	cur_cluaster.flag = new_cluaster_flag;
	return cur_cluaster;

}
bool center_NotChange() {
	if(cur_cluster_1.x==new_cluster_1.x&&\\
		cur_cluster_1.y == new_cluster_1.y&&\\
		cur_cluster_2.x == new_cluster_2.x&&\\
		cur_cluster_2.y == new_cluster_2.y&&\\
		cur_cluster_3.x == new_cluster_3.x&&\\
		cur_cluster_3.y == new_cluster_3.y)
		return true;
	else
	{
		cur_cluster_1.x = new_cluster_1.x;
		cur_cluster_1.y = new_cluster_1.y;
		cur_cluster_2.x = new_cluster_2.x;
		cur_cluster_2.y = new_cluster_2.y;
		cur_cluster_3.x = new_cluster_3.x;
		cur_cluster_3.y = new_cluster_3.y;
		return false;
	}
	
}
int main() {
	int m = 0;
	Init_Point();
	const vector <Point> Allpoint{ \\
	Point_A1,Point_A2,Point_A3,\\
	Point_B1, Point_B2, Point_B3,\\
	Point_C1,Point_C2,Point_C3 };
	while (1)
	{
		for (auto point : Allpoint)
		{
			point.flag = Get_flag(point, cur_cluster_1, cur_cluster_2, cur_cluster_3);
			switch (point.flag)
			{
			case set_1: {point_set_1.push_back(point); break; }
			case set_2: {point_set_2.push_back(point); break; }
			case set_3: {point_set_3.push_back(point); break; }

			default:
				string errStr = \"there are something wrong with the point\";
				//cout << errStr << endl;
				break;
			}
		}
		point_set_1.push_back(cur_cluster_1);
		point_set_2.push_back(cur_cluster_2);
		point_set_3.push_back(cur_cluster_3);
		//重新计算中心/
		m++;
		new_cluster_1 = Get_new_cluaster(point_set_1);
		new_cluster_2 = Get_new_cluaster(point_set_2);
		new_cluster_3 = Get_new_cluaster(point_set_3);
		//清空集合容器
		point_set_1.clear();
		point_set_2.clear();
		point_set_3.clear();
		if (center_NotChange()==true) 
		{
			cout << \"最终中心点\"<<endl;
			cout << \"中心点1:	\" << new_cluster_1.x << \",	\" << new_cluster_1.y << endl;
			cout << \"中心点2:	\" << new_cluster_2.x << \",	\" << new_cluster_2.y << endl;
			cout << \"中心点3:	\" << new_cluster_3.x << \",	\" << new_cluster_3.y << endl;
			break;
		}
		else
		{
			cout << \"第\" << m << \"次计算后的中心点\" << endl;
			cout << \"中心点1:	\" << new_cluster_1.x << \",	\" << new_cluster_1.y << endl;
			cout << \"中心点2:	\" << new_cluster_2.x << \",	\" << new_cluster_2.y << endl;
			cout << \"中心点3:	\" << new_cluster_3.x << \",	\" << new_cluster_3.y << endl;
	
		//数组清零
	}
	//输出中心点
	cin >> m;
	return 0;
}

输出效果:
\"输出结果图\"
\"输出结果图\"

收藏 打印