public static double AverageWidth = 0.0; // Average Width each iteration public static void InitializeKmeans(double[][] PointPositionINPUT, ParallelOptions _parallelOptionsINPUT, string FileName, int ClusterPosition, int FirstClusterValue, int StartingPosition, int Ncent_GlobalINPUT, int MaxNcent_GlobalINPUT, int Ncent_Global_ParallelINPUT, int ParameterVectorDimensionINPUT, double CenterChangeINPUT, int IterationCutINPUT) { Kmeans.PointPosition = PointPositionINPUT; Kmeans._parallelOptions = _parallelOptionsINPUT; Kmeans.Ncent_Global = Ncent_GlobalINPUT; Kmeans.MaxNcent_Global = MaxNcent_GlobalINPUT; Kmeans.Ncent_Global_Parallel = Ncent_Global_ParallelINPUT; Kmeans.KmeansCenterChangeCut = CenterChangeINPUT; Kmeans.KmeansIterationCut = IterationCutINPUT; Kmeans.ParameterVectorDimension = ParameterVectorDimensionINPUT; Kmeans.SetParallelCenterDecomposition(); Kmeans.InitialPointAssignment = new int[DAVectorUtility.PointCount_Process]; if (FileName.Length == 0) { return; } // Read Initial Assignments DAVectorUtility.SALSAPrint(0, "Kmeans Read File " + FileName + " Points " + DAVectorUtility.PointCount_Global.ToString() + " Starting at position " + StartingPosition.ToString() + " Dimension " + Kmeans.ParameterVectorDimension.ToString() + " Cluster Position " + ClusterPosition.ToString() + " Initial value " + FirstClusterValue.ToString()); Kmeans.ReadDataFromFile(FileName, ClusterPosition, FirstClusterValue, StartingPosition); } // End InitializeKmeans
} // End SetupKmeans public static void RunKmeans(double[][] ClusterCenterINPUT, int[] ClusterSizeINPUT, double[] ClusteRadiusINPUT, out int Ncent_GlobalFINAL, out double AverageWidthFINAL) { ArrayList KeepPCfractions = new ArrayList(200); ArrayList KeepCCfractions = new ArrayList(200); // Inherit Solution arrays Kmeans.ClusterCenter = ClusterCenterINPUT; Kmeans.ClusterSize = ClusterSizeINPUT; Kmeans.ClusterRadius = ClusteRadiusINPUT; Kmeans.ClusterWidth = new double[Kmeans.MaxNcent_Global]; // Set up TriangleInequality KmeansTriangleInequality.SetExternalFunctions(GetClusterRadius, GetClusterCenters, FindClusterCenters); KmeansTriangleInequality.InitializeTriangleInequality(Kmeans.PointPosition, Kmeans._parallelOptions, Kmeans.ClusterCenter, Kmeans.Ncent_Global, Kmeans.MaxNcent_Global, Kmeans.Ncent_Global_Parallel, Kmeans.ParameterVectorDimension); DAVectorUtility.SALSAPrint(0, "Start Kmeans ****** Number of Centers " + Kmeans.Ncent_Global.ToString() + " Max Number of Centers " + Kmeans.MaxNcent_Global.ToString() + " Center Limit for Parallelism " + Kmeans.Ncent_Global_Parallel.ToString() + " Vector Dimension " + Kmeans.ParameterVectorDimension.ToString()); Kmeans.FindClusterCenters(true, Kmeans.InitialPointAssignment, null, null); Kmeans.CountKmeansIterations = 0; bool StartStop = false; int CountStops = 0; while (Kmeans.CountKmeansIterations < Kmeans.KmeansIterationCut) { double save1 = KmeansTriangleInequality.NumberFullDistancesCalculatedCC; double save2 = KmeansTriangleInequality.NumberFullDistancesCalculatedPC; KmeansTriangleInequality.NextIteration(); ++Kmeans.CountKmeansIterations; bool WillStop = false; if (!StartStop) { if (Kmeans.AverageCenterChange < Kmeans.AverageRadius * Kmeans.KmeansCenterChangeCut) { StartStop = true; } } else { ++CountStops; if (CountStops > 10) { WillStop = true; } } double tmp1 = (KmeansTriangleInequality.NumberFullDistancesCalculatedCC - save1) / (double)Kmeans.MaxNcent_Global; double tmp2 = (KmeansTriangleInequality.NumberFullDistancesCalculatedPC - save2) / ((double)Kmeans.MaxNcent_Global * (double)DAVectorUtility.PointCount_Global); double tmp3 = KmeansTriangleInequality.NumberFullDistancesCalculatedPC / ((double)Kmeans.MaxNcent_Global * (double)(DAVectorUtility.PointCount_Global * Kmeans.CountKmeansIterations)); double tmp4 = (KmeansTriangleInequality.NumberFullDistancesCalculatedPC + KmeansTriangleInequality.NumberFullDistancesCalculatedCC) / ((double)Kmeans.MaxNcent_Global * (double)(DAVectorUtility.PointCount_Global * Kmeans.CountKmeansIterations)); DAVectorUtility.SALSAPrint(0, "Iteration " + Kmeans.CountKmeansIterations.ToString() + " Average Center Change " + Kmeans.AverageCenterChange.ToString("E4") + " Average Radius " + Kmeans.AverageRadius.ToString("E4") + " Average Width " + Kmeans.AverageWidth.ToString("E4") + " CC calcs per C " + tmp1.ToString("F4") + " PC calcs per P&C " + tmp2.ToString("F6") + " Cumul PC / Max " + tmp3.ToString("F6") + " Cumul PC+CC / PC Max " + tmp4.ToString("F6")); KeepPCfractions.Add(tmp2); KeepCCfractions.Add(tmp1 / DAVectorUtility.PointCount_Global); if (((Kmeans.CountKmeansIterations % 10) == 1) || WillStop) { string message = " Sizes"; for (int CenterIndex = 0; CenterIndex < Kmeans.Ncent_Global; CenterIndex++) { message += " " + Kmeans.ClusterSize[CenterIndex].ToString(); } DAVectorUtility.SALSAPrint(0, message); } if (WillStop) { break; } } DAVectorUtility.SALSAPrint(0, "End Kmeans Iterations " + Kmeans.CountKmeansIterations.ToString() + " Iteration Cut " + Kmeans.KmeansIterationCut.ToString() + " Average Center Change " + Kmeans.AverageCenterChange.ToString("E4") + " Average Radius " + Kmeans.AverageRadius.ToString("E4") + " Average Width " + Kmeans.AverageWidth.ToString("E4") + " Fractional Cut " + Kmeans.KmeansCenterChangeCut.ToString("F4")); KmeansTriangleInequality.PrintDiagnostics(); string messagePC = "\nPC Calcs per Point iteration"; string messageCC = "\nCC Calcs per Point iteration"; int numPC = KeepPCfractions.Count; for (int linecount = 0; linecount < numPC; linecount++) { messagePC += " " + ((double)KeepPCfractions[linecount]).ToString("F4") + ","; messageCC += " " + ((double)KeepCCfractions[linecount]).ToString("F4") + ","; } DAVectorUtility.SALSAPrint(0, messagePC); DAVectorUtility.SALSAPrint(0, messageCC); Ncent_GlobalFINAL = Kmeans.Ncent_Global; AverageWidthFINAL = Kmeans.AverageWidth; // Print Histograms if (KmeansTriangleInequality.UseTriangleInequality != 0) { KmeansTriangleInequality.PlotPointHistograms(Math.Sqrt(AverageWidthFINAL)); KmeansTriangleInequality.PlotCenterHistograms(Math.Sqrt(AverageWidthFINAL)); } return; } // End RunKmeans()
} // End FindClusterCenters(int[] NearestCentertoPoint, double[][] LastClusterCenter) public static void ReadDataFromFile(string fname, int ClusterPosition, int FirstClustervalue, int StartPointPosition) { char[] _sep = new[] { ' ', ',', '\t' }; int FirstPointPosition = 0; int TotalNumberPointstoRead = 0; FirstPointPosition = DAVectorUtility.PointStart_Process; TotalNumberPointstoRead = DAVectorUtility.PointCount_Process; Random RandomObject = new Random(10101010 + DAVectorUtility.MPI_Rank); if (ClusterPosition < 0) { DAVectorUtility.SALSAPrint(0, "Random Start 10101010 plus rank ******************* Option " + ClusterPosition.ToString()); } int MinSplitSize = ClusterPosition + 1; if (StartPointPosition >= 0) { MinSplitSize = Math.Max(MinSplitSize, StartPointPosition + Kmeans.ParameterVectorDimension); } else { Exception e = DAVectorUtility.SALSAError("Illegal Start Position on Points file " + fname + " Rank " + DAVectorUtility.MPI_Rank.ToString() + " POsition " + StartPointPosition.ToString() + " Number to Read " + TotalNumberPointstoRead.ToString()); throw (e); } bool success = false; string line = " Unset"; int CountLinesinFile = 0; try { StreamReader sr = null; if (!string.IsNullOrEmpty(fname)) { Stream stream = File.Open(fname, FileMode.Open, FileAccess.Read, FileShare.Read); sr = new StreamReader(stream); } if (sr != null) { while (!sr.EndOfStream) { line = sr.ReadLine(); if (!string.IsNullOrEmpty(line)) { string[] splits = line.Trim().Split(_sep, StringSplitOptions.RemoveEmptyEntries); if (splits.Length < MinSplitSize) { DAVectorUtility.SALSAPrint(0, "Count " + CountLinesinFile.ToString() + " Illegal data length on Point file " + splits.Length.ToString() + " " + MinSplitSize.ToString() + " " + line); continue; } // Skip header lines double junk; if (!Double.TryParse(splits[StartPointPosition], out junk)) { continue; // Skip header lines } if (CountLinesinFile < FirstPointPosition) { CountLinesinFile += 1; continue; } int ActualPointPosition = CountLinesinFile - FirstPointPosition; int label = 0; Kmeans.PointPosition[ActualPointPosition][0] = double.Parse(splits[StartPointPosition]); Kmeans.PointPosition[ActualPointPosition][1] = double.Parse(splits[StartPointPosition + 1]); if (Kmeans.ParameterVectorDimension > 2) { for (int VectorIndex = 2; VectorIndex < Kmeans.ParameterVectorDimension; VectorIndex++) { Kmeans.PointPosition[ActualPointPosition][VectorIndex] = double.Parse(splits[VectorIndex + StartPointPosition]); } } if (ClusterPosition >= 0) { if (!Int32.TryParse(splits[ClusterPosition], out label)) { label = FirstClustervalue; } Kmeans.InitialPointAssignment[ActualPointPosition] = label - FirstClustervalue; } else { Kmeans.InitialPointAssignment[ActualPointPosition] = RandomObject.Next(Program.InitialNcent); if (ClusterPosition == -2) { // Force each cluster to have one point if (CountLinesinFile < Program.InitialNcent) { Kmeans.InitialPointAssignment[ActualPointPosition] = CountLinesinFile; } } if (ClusterPosition == -3) { int divisor = Program.NumberDataPoints / Program.InitialNcent; if (CountLinesinFile % divisor == 0) { Kmeans.InitialPointAssignment[ActualPointPosition] = CountLinesinFile / divisor; } } if (ClusterPosition == -4) { int divisor = Program.NumberDataPoints / Program.InitialNcent; Kmeans.InitialPointAssignment[ActualPointPosition] = CountLinesinFile / divisor; } } ++ActualPointPosition; ++CountLinesinFile; if (CountLinesinFile >= (FirstPointPosition + TotalNumberPointstoRead)) { break; } } } if (CountLinesinFile != (FirstPointPosition + TotalNumberPointstoRead)) { Exception e = DAVectorUtility.SALSAError("Illegal count on Points file " + fname + " Rank " + DAVectorUtility.MPI_Rank.ToString() + " Lines in File " + CountLinesinFile.ToString() + " Number to Read " + TotalNumberPointstoRead.ToString()); throw (e); } success = true; } sr.Close(); } catch (Exception e) { Console.WriteLine("Failed reading Points data " + DAVectorUtility.MPI_Rank.ToString() + " " + CountLinesinFile.ToString() + " Start " + FirstPointPosition.ToString() + " Number " + TotalNumberPointstoRead.ToString() + " " + line + e); throw (e); } if (!success) { Exception e = DAVectorUtility.SALSAError("DA Vector File read error " + fname); throw (e); } } // End ReadDataFromFile