static void ShowClustering2(GeoCoordinate[] rawData, int numClusters, myresult result, List <myGeo> goes) { for (int k = 0; k < numClusters; ++k) // Each cluster { int count = 0; for (int i = 0; i < rawData.Length; ++i) { // Each tuple if (result.Cluster[i] == k) { /*var geo = goes.Find(x => x.Id == Convert.ToInt32(rawData[i].Speed)); * Debug.Write(rawData[i].Speed.ToString() + "\t"); * Debug.Write(geo.address.ToString() + "\t"); * Debug.Write(geo.gaddress.ToString() + "\t"); * Debug.Write(rawData[i].Latitude.ToString() + "\t"); * Debug.Write(rawData[i].Longitude.ToString() + "\t"); * Debug.Write(result.Centroids[k].Latitude.ToString() + "\t"); * Debug.Write(result.Centroids[k].Longitude.ToString() + "\t"); * Debug.WriteLine("");*/ count++; } } var geo = goes.Find(x => x.Id == Convert.ToInt32(result.Centroids[k].Speed)); Debug.Write(k.ToString() + "\t"); Debug.Write(geo.address.ToString() + "\t"); Debug.Write(geo.gaddress.ToString() + "\t"); Debug.Write(result.Centroids[k].Latitude.ToString() + "\t"); Debug.Write(result.Centroids[k].Longitude.ToString() + "\t"); Debug.Write(count.ToString() + "\t"); Debug.WriteLine(""); } }
// 14 short static method definitions here /*static int MinIndex(double[] distances) * { * int indexOfMin = 0; * double smallDist = distances[0]; * for (int k = 0; k < distances.Length; ++k) * { * if (distances[k] < smallDist) * { * smallDist = distances[k]; indexOfMin = k; * } * } * return indexOfMin; * } */ static myresult Cluster(GeoCoordinate[] rawData, int numClusters, int maxCount) { bool changed = true; int ct = 0; int numTuples = rawData.Length; int[] clustering = InitClustering(numTuples, numClusters, 0); GeoCoordinate[] means = Allocate(numClusters); GeoCoordinate[] centroids = Allocate(numClusters); UpdateMeans(rawData, clustering, means); UpdateCentroids(rawData, clustering, means, centroids); while (changed == true && ct < maxCount) { ++ct; changed = Assign(rawData, clustering, centroids); UpdateMeans(rawData, clustering, means); UpdateCentroids(rawData, clustering, means, centroids); } myresult result = new myresult(); result.Centroids = centroids; result.Cluster = clustering; return(result); }
static void StoreStation(HSSFWorkbook wb, GeoCoordinate[] rawData, int numClusters, myresult result, List <myGeo> goes) { // Output if (wb == null) { return; } //Create new Excel sheet ISheet sheet = null; sheet = wb.CreateSheet("Station"); ////(Optional) set the width of the columns sheet.SetColumnWidth(0, 10 * 256); sheet.SetColumnWidth(1, 20 * 256); sheet.SetColumnWidth(2, 20 * 256); sheet.SetColumnWidth(3, 10 * 256); //Create a header row var headerRow = sheet.CreateRow(0); //Set the column names in the header row headerRow.CreateCell(0).SetCellValue("Clusterid"); headerRow.CreateCell(1).SetCellValue("Lat"); headerRow.CreateCell(2).SetCellValue("Long"); headerRow.CreateCell(3).SetCellValue("Nb"); for (int k = 0; k < numClusters; ++k) // Each cluster { var row = sheet.CreateRow(k + 1); int count = 0; for (int i = 0; i < rawData.Length; ++i) { // Each tuple if (result.Cluster[i] == k) { //var geo = goes.Find(x => x.Id == Convert.ToInt32(rawData[i].Speed)); //row.CreateCell(0).SetCellValue(rawData[i].Speed.ToString()); //row.CreateCell(1).SetCellValue(geo.address.ToString()); //row.CreateCell(2).SetCellValue(geo.gaddress.ToString()); //row.CreateCell(3).SetCellValue(rawData[i].Latitude.ToString()); //row.CreateCell(4).SetCellValue(rawData[i].Longitude.ToString()); //row.CreateCell(5).SetCellValue(result.Centroids[k].Latitude.ToString()); //row.CreateCell(6).SetCellValue(result.Centroids[k].Longitude.ToString()); count++; } } //var geo = goes.Find(x => x.Id == Convert.ToInt32(result.Centroids[k].Speed)); row.CreateCell(0).SetCellValue((k + 1).ToString()); //row.CreateCell(1).SetCellValue(geo.address.ToString()); //row.CreateCell(2).SetCellValue(geo.gaddress.ToString()); row.CreateCell(1).SetCellValue(result.Centroids[k].Latitude.ToString()); row.CreateCell(2).SetCellValue(result.Centroids[k].Longitude.ToString()); row.CreateCell(3).SetCellValue(count.ToString()); } }
static void StoreBusStop(HSSFWorkbook wb, GeoCoordinate[] rawData, int numClusters, myresult result, List <myGeo> goes) { // Output if (wb == null) { return; } //Create new Excel sheet ISheet sheet = null; sheet = wb.CreateSheet("BusStop"); ////(Optional) set the width of the columns sheet.SetColumnWidth(0, 10 * 256); sheet.SetColumnWidth(1, 20 * 256); sheet.SetColumnWidth(2, 50 * 256); sheet.SetColumnWidth(3, 15 * 256); sheet.SetColumnWidth(4, 15 * 256); sheet.SetColumnWidth(5, 15 * 256); sheet.SetColumnWidth(6, 15 * 256); sheet.SetColumnWidth(7, 15 * 256); //Create a header row var headerRow = sheet.CreateRow(0); //Set the column names in the header row headerRow.CreateCell(0).SetCellValue("StudentId"); headerRow.CreateCell(1).SetCellValue("Addr"); headerRow.CreateCell(2).SetCellValue("Google Addr"); headerRow.CreateCell(3).SetCellValue("RawLat"); headerRow.CreateCell(4).SetCellValue("RawLong"); headerRow.CreateCell(5).SetCellValue("ClusterId"); headerRow.CreateCell(6).SetCellValue("CentroidsLat"); headerRow.CreateCell(7).SetCellValue("CentroidsLong"); int j = 1; var indexCluster = 1; for (int k = 0; k < numClusters; ++k) // Each cluster { for (int i = 0; i < rawData.Length; ++i) { // Each tuple if (result.Cluster[i] == k) { var row = sheet.CreateRow(j++); var geo = goes.Find(x => x.Id == Convert.ToInt32(rawData[i].Speed)); row.CreateCell(0).SetCellValue(rawData[i].Speed.ToString()); row.CreateCell(1).SetCellValue(geo.address.ToString()); row.CreateCell(2).SetCellValue(geo.gaddress.ToString()); row.CreateCell(3).SetCellValue(rawData[i].Latitude.ToString()); row.CreateCell(4).SetCellValue(rawData[i].Longitude.ToString()); row.CreateCell(5).SetCellValue(indexCluster.ToString()); row.CreateCell(6).SetCellValue(result.Centroids[k].Latitude.ToString()); row.CreateCell(7).SetCellValue(result.Centroids[k].Longitude.ToString()); } } indexCluster++; Debug.WriteLine(""); } }
static void Main(string[] args) { try { Debug.WriteLine("\nBegin outlier data detection demo\n"); Debug.WriteLine("Loading all (height-weight) data into memory"); HSSFWorkbook wb = null; // create xls if not exists if (File.Exists("../../data/data.xls")) { //File.Delete("D:\\" + quan + ".xls"); FileStream file = new FileStream("../../data/data.xls", FileMode.Open, FileAccess.Read); wb = new HSSFWorkbook(file); } ISheet sheet = wb.GetSheetAt(0); List <myGeo> geos = new List <myGeo>(); for (int i = 2; i <= sheet.LastRowNum; i++) { myGeo geo = new myGeo(); IRow row = sheet.GetRow(i); geo.geo = new GeoCoordinate(); if (row.Cells[9] != null && row.Cells[9].CellType != CellType.Blank) { if (row.Cells[9].CellType == CellType.String && !string.IsNullOrEmpty(row.Cells[9].StringCellValue)) { geo.geo.Latitude = Convert.ToDouble(row.Cells[9].StringCellValue); } else if (row.Cells[9].CellType == CellType.Numeric) { geo.geo.Latitude = Convert.ToDouble(row.Cells[9].NumericCellValue); } if (row.Cells[10].CellType == CellType.String && !string.IsNullOrEmpty(row.Cells[10].StringCellValue)) { geo.geo.Longitude = Convert.ToDouble(row.Cells[10].StringCellValue); } else if (row.Cells[10].CellType == CellType.Numeric) { geo.geo.Longitude = Convert.ToDouble(row.Cells[10].NumericCellValue); } geo.address = row.Cells[4].StringCellValue; geo.gaddress = row.Cells[11].StringCellValue; if (row.Cells[0].CellType == CellType.String && !string.IsNullOrEmpty(row.Cells[0].StringCellValue)) { geo.Id = Convert.ToInt32(row.Cells[0].StringCellValue); geo.geo.Speed = geo.Id; } else if (row.Cells[0].CellType == CellType.Numeric) { geo.Id = Convert.ToInt32(row.Cells[0].NumericCellValue); geo.geo.Speed = geo.Id; } //geo.Id = i; //geo.geo.Speed = i; geos.Add(geo); } } GeoCoordinate[] rawData = new GeoCoordinate[geos.Count]; for (int i = 0; i < geos.Count; i++) { rawData[i] = geos[i].geo; } //Debug.WriteLine("\nRaw data:\n"); //ShowMatrix(rawData, rawData.Length); int numClusters = Convert.ToInt32(Convert.ToDouble(geos.Count() / maxcapacity)) + 1; int maxCount = 80; Debug.WriteLine("\nk = " + numClusters + " and maxCount = " + maxCount); myresult result = Cluster(rawData, numClusters, maxCount); Debug.WriteLine("\nClustering complete"); Debug.WriteLine("\nClustering in internal format: \n"); //ShowVector(clustering); Debug.WriteLine("\nClustered data:"); //ShowClustering(rawData, numClusters, result, geos); StoreBusStop(wb, rawData, numClusters, result, geos); StoreStation(wb, rawData, numClusters, result, geos); SaveExcel(wb); /*GeoCoordinate outlier = Outlier(rawData, clustering, numClusters, 0); * Debug.WriteLine("Outlier for cluster 0 is:"); * ShowVector(outlier);*/ Debug.WriteLine("\nEnd demo\n"); } catch (Exception ex) { Debug.WriteLine(ex.Message); } } // Main