using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Drawing; using System.Drawing.Imaging; namespace ConsoleApplication1 { class Program { static void Main(string[] args) { Random random = new Random(Math.Abs(unchecked((int)DateTime.Now.Ticks))); List<Coordinate> rawData = new List<Coordinate>(); for (int i = 0; i < 500; i++) { rawData.Add(new Coordinate { X = random.Next(0, 500), Y = random.Next(0, 500) }); } KmeansPlus plus = new KmeansPlus(); plus.Data = rawData; plus.K = 10; plus.ShortDistance = 20; plus.Start(); DrawPoint(rawData, plus.Means, plus.Clustering); Console.ReadLine(); } /// <summary> /// 绘制图片显示出来 /// </summary> /// <param name="vector"></param> /// <param name="mean"></param> /// <param name="clustering"></param> static void DrawPoint(List<Coordinate> vector, List<Coordinate> mean, int[] clustering) { Bitmap bit = new Bitmap(600, 600); Graphics g = Graphics.FromImage(bit); g.Clear(Color.White); SolidBrush Black = new SolidBrush(Color.Black); SolidBrush Red = new SolidBrush(Color.Red); List<Color> ColorList = new List<Color>(); ColorList.Add(Color.MediumOrchid); ColorList.Add(Color.DeepPink); ColorList.Add(Color.Blue); ColorList.Add(Color.Brown); ColorList.Add(Color.Coral); ColorList.Add(Color.CornflowerBlue); ColorList.Add(Color.DarkCyan); ColorList.Add(Color.DarkGreen); ColorList.Add(Color.DarkMagenta); ColorList.Add(Color.DarkRed); ColorList.Add(Color.DodgerBlue); Font f = new Font("宋体", 10); for (int i = 0; i < vector.Count; i++) { Coordinate p = vector[i]; g.FillEllipse(Black, Convert.ToInt32(p.X), Convert.ToInt32(p.Y), 5, 5); } for (int i = 0; i < vector.Count; i++) { Coordinate p = vector[i]; int color = clustering[i]; g.DrawString(color.ToString(), f, new SolidBrush(ColorList[color]), Convert.ToInt32(p.X), Convert.ToInt32(p.Y)); } int j = 0; foreach (Coordinate p in mean) { g.FillEllipse(new SolidBrush(ColorList[j]), Convert.ToInt32(p.X), Convert.ToInt32(p.Y), 8, 8); j++; } bit.Save(@"E:\Debug\" + DateTime.Now.Millisecond + ".png", ImageFormat.Png); } } }
using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace ConsoleApplication1 { public class Kmeans { public int[] Cluster(List<Coordinate> rawData, List<Coordinate> means, ref List<Coordinate> endmeans) { List<Coordinate> data = rawData; bool changed = true; int[] clustering = null; int maxCount = data.Count * 20; // sanity check int ct = 0; while (changed == true && ct < maxCount) { ++ct; int[] _clustering = new int[data.Count]; List<Coordinate> _means = new List<Coordinate>(); changed = UpdateMeans(data, means, clustering, ref _clustering, ref _means); means = _means; clustering = _clustering; } endmeans = means; return clustering; } /// <summary> /// 计算核心点和聚类点。 /// </summary> /// <param name="data">原始数据</param> /// <param name="means">上一次的中心点</param> /// <param name="clustering">上一次的聚类点</param> /// <param name="newclustering">返回新的中心点</param> /// <param name="newmeans">返回新的聚类点</param> /// <returns>是否发生更新。</returns> private static bool UpdateMeans(List<Coordinate> data, List<Coordinate> means, int[] clustering, ref int[] newclustering, ref List<Coordinate> newmeans) { newclustering = new int[data.Count]; int[] clusterCounts = new int[means.Count]; for (int i = 0; i < data.Count; i++) { int _clusting = MinIndex(data[i], means); newclustering[i] = _clusting; clusterCounts[_clusting]++; } List<Coordinate> _means = new List<Coordinate>(); for (int i = 0; i < means.Count; i++) { Coordinate p = new Coordinate { X = 0, Y = 0 }; _means.Add(p); } for (int i = 0; i < data.Count; ++i) { int cluster = newclustering[i]; _means[cluster].X += data[i].X; // accumulate sum _means[cluster].Y += data[i].Y; // accumulate sum } newmeans = new List<Coordinate>(); for (int k = 0; k < _means.Count; ++k) { double x = _means[k].X / clusterCounts[k]; // danger of div by 0 double y = _means[k].Y / clusterCounts[k]; // danger of div by 0 Coordinate p = new Coordinate { X = x, Y = y }; newmeans.Add(p); } if (clustering == null) { return true; } else { for (int i = 0; i < newclustering.Length; i++) { if (newclustering[i] != clustering[i]) { return true; } } } return false; } /// <summary> /// 计算点到核心点距离获取最小距离点索引; /// </summary> /// <param name="p"></param> /// <param name="means"></param> /// <returns></returns> private static int MinIndex(Coordinate p, List<Coordinate> means) { double[] distances = new double[means.Count]; for (int i = 0; i < means.Count; i++) { distances[i] = Distance(p, means[i]); } int indexOfMin = 0; double smallDist = distances[0]; for (int k = 0; k < distances.Length; ++k) { if (distances[k] < smallDist) { smallDist = distances[k]; indexOfMin = k; } } return indexOfMin; } /// <summary> /// 距离计算 /// </summary> /// <param name="tuple"></param> /// <param name="mean"></param> /// <returns></returns> public static double Distance(Coordinate tuple, Coordinate meas) { double sumSquaredDiffs = 0.0; sumSquaredDiffs += Math.Pow((tuple.X - meas.X), 2); sumSquaredDiffs += Math.Pow((tuple.Y - meas.Y), 2); return Math.Sqrt(sumSquaredDiffs); } } }
using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace ConsoleApplication1 { public class KmeansPlus { private int _k = 10; private int _shortdistance = 50; /// <summary> /// 初始设置值 /// </summary> public int K { get { return this._k; } set { this._k = value; } } /// <summary> /// 最短距离合并 /// </summary> public int ShortDistance { get { return this._shortdistance; } set { this._shortdistance = value; } } /// <summary> /// 初始数据 /// </summary> public List<Coordinate> Data { get; set; } /// <summary> /// 返回中心点信息 /// </summary> public List<Coordinate> Means { get; set; } /// <summary> /// 数据分组信息 /// </summary> public int[] Clustering { get; set; } public void Start() { Random random = new Random(Math.Abs(unchecked((int)DateTime.Now.Ticks))); List<Coordinate> means = new List<Coordinate>(); int maxlength = Data.Count - 1; int measpoint = random.Next(0, maxlength); means.Add(Data[measpoint]); List<Coordinate> result = new List<Coordinate>(); for (int i = 0; i < Data.Count; i++) { result.Add(Data[i]); } result.Remove(Data[measpoint]); for (int i = 0; i < K; i++) { List<Coordinate> nextresult = new List<Coordinate>(); means = UpdateCompass(result, means, ref nextresult); result = nextresult; } means = MergeMeans(means, ShortDistance); List<Coordinate> Lmeans = new List<Coordinate>(); int[] clustering = new Kmeans().Cluster(Data, means, ref Lmeans); Clustering = clustering; Means = Lmeans; } /// <summary> /// 罗盘法随机核心点 /// </summary> /// <param name="data">点</param> /// <param name="meas">中心点</param> /// <param name="result">去除核心点的数组,下一次使用</param> /// <returns></returns> private static List<Coordinate> UpdateCompass(List<Coordinate> data, List<Coordinate> meas, ref List<Coordinate> result) { double[] distance = new double[data.Count]; result = new List<Coordinate>(); double sumlength = 0; for (int j = 0; j < data.Count; j++) { double[] _distance = new double[meas.Count]; for (int i = 0; i < meas.Count; i++) { _distance[i] = Kmeans.Distance(data[j], meas[i]); } double min = _distance.Min(); distance[j] = min; sumlength += min; } Random random = new Random(Math.Abs(unchecked((int)DateTime.Now.Ticks))); double measpoint = random.Next(0, Convert.ToInt32(sumlength)); int _postion = 0; for (int j = 0; j < distance.Length; j++) { if ((measpoint - distance[j]) <= 0) { _postion = j; } measpoint -= distance[j]; } meas.Add(data[_postion]); data.Remove(data[_postion]); result = data; return meas; } /// <summary> /// 合并中心 /// </summary> /// <param name="meas">中心点</param> /// <param name="len">合并最小距离</param> /// <returns></returns> private static List<Coordinate> MergeMeans(List<Coordinate> means, int len) { for (int i = 0; i < means.Count - 1; i++) { for (int j = 1; j < means.Count; j++) { if (i == j) { continue; } double l = Kmeans.Distance(means[i], means[j]); if (l < len) { means.Remove(means[j]); j--; } } } return means; } //private static double Distance(Coordinate tuple, Coordinate meas) //{ // double sumSquaredDiffs = 0.0; // sumSquaredDiffs += Math.Pow((tuple.X - meas.X), 2); // sumSquaredDiffs += Math.Pow((tuple.Y - meas.Y), 2); // return Math.Sqrt(sumSquaredDiffs); //} } }
using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace ConsoleApplication1 { [Serializable] public class Coordinate { public double X { get; set; } public double Y { get; set; } } }