Пример #1
0
        public static double AverageWidth          = 0.0;              // Average Width each iteration

        public static void InitializeKmeans(double[][] PointPositionINPUT, ParallelOptions _parallelOptionsINPUT, string FileName, int ClusterPosition, int FirstClusterValue, int StartingPosition,
                                            int Ncent_GlobalINPUT, int MaxNcent_GlobalINPUT, int Ncent_Global_ParallelINPUT, int ParameterVectorDimensionINPUT, double CenterChangeINPUT, int IterationCutINPUT)
        {
            Kmeans.PointPosition    = PointPositionINPUT;
            Kmeans._parallelOptions = _parallelOptionsINPUT;

            Kmeans.Ncent_Global          = Ncent_GlobalINPUT;
            Kmeans.MaxNcent_Global       = MaxNcent_GlobalINPUT;
            Kmeans.Ncent_Global_Parallel = Ncent_Global_ParallelINPUT;
            Kmeans.KmeansCenterChangeCut = CenterChangeINPUT;
            Kmeans.KmeansIterationCut    = IterationCutINPUT;

            Kmeans.ParameterVectorDimension = ParameterVectorDimensionINPUT;

            Kmeans.SetParallelCenterDecomposition();

            Kmeans.InitialPointAssignment = new int[DAVectorUtility.PointCount_Process];
            if (FileName.Length == 0)
            {
                return;
            }

            //  Read Initial Assignments
            DAVectorUtility.SALSAPrint(0, "Kmeans Read File " + FileName + " Points " + DAVectorUtility.PointCount_Global.ToString() + " Starting at position "
                                       + StartingPosition.ToString() + " Dimension " + Kmeans.ParameterVectorDimension.ToString() + " Cluster Position " + ClusterPosition.ToString() + " Initial value " + FirstClusterValue.ToString());
            Kmeans.ReadDataFromFile(FileName, ClusterPosition, FirstClusterValue, StartingPosition);
        }   // End InitializeKmeans
Пример #2
0
        }         // End SetupKmeans

        public static void RunKmeans(double[][] ClusterCenterINPUT, int[] ClusterSizeINPUT, double[] ClusteRadiusINPUT,
                                     out int Ncent_GlobalFINAL, out double AverageWidthFINAL)
        {
            ArrayList KeepPCfractions = new ArrayList(200);
            ArrayList KeepCCfractions = new ArrayList(200);

            //  Inherit Solution arrays
            Kmeans.ClusterCenter = ClusterCenterINPUT;
            Kmeans.ClusterSize   = ClusterSizeINPUT;
            Kmeans.ClusterRadius = ClusteRadiusINPUT;
            Kmeans.ClusterWidth  = new double[Kmeans.MaxNcent_Global];

            //  Set up TriangleInequality
            KmeansTriangleInequality.SetExternalFunctions(GetClusterRadius, GetClusterCenters, FindClusterCenters);
            KmeansTriangleInequality.InitializeTriangleInequality(Kmeans.PointPosition, Kmeans._parallelOptions, Kmeans.ClusterCenter,
                                                                  Kmeans.Ncent_Global, Kmeans.MaxNcent_Global, Kmeans.Ncent_Global_Parallel, Kmeans.ParameterVectorDimension);

            DAVectorUtility.SALSAPrint(0, "Start Kmeans ****** Number of Centers " + Kmeans.Ncent_Global.ToString() + " Max Number of Centers " + Kmeans.MaxNcent_Global.ToString()
                                       + " Center Limit for Parallelism " + Kmeans.Ncent_Global_Parallel.ToString() + " Vector Dimension " + Kmeans.ParameterVectorDimension.ToString());

            Kmeans.FindClusterCenters(true, Kmeans.InitialPointAssignment, null, null);
            Kmeans.CountKmeansIterations = 0;
            bool StartStop  = false;
            int  CountStops = 0;

            while (Kmeans.CountKmeansIterations < Kmeans.KmeansIterationCut)
            {
                double save1 = KmeansTriangleInequality.NumberFullDistancesCalculatedCC;
                double save2 = KmeansTriangleInequality.NumberFullDistancesCalculatedPC;

                KmeansTriangleInequality.NextIteration();
                ++Kmeans.CountKmeansIterations;
                bool WillStop = false;
                if (!StartStop)
                {
                    if (Kmeans.AverageCenterChange < Kmeans.AverageRadius * Kmeans.KmeansCenterChangeCut)
                    {
                        StartStop = true;
                    }
                }
                else
                {
                    ++CountStops;
                    if (CountStops > 10)
                    {
                        WillStop = true;
                    }
                }
                double tmp1 = (KmeansTriangleInequality.NumberFullDistancesCalculatedCC - save1) / (double)Kmeans.MaxNcent_Global;
                double tmp2 = (KmeansTriangleInequality.NumberFullDistancesCalculatedPC - save2) / ((double)Kmeans.MaxNcent_Global * (double)DAVectorUtility.PointCount_Global);
                double tmp3 = KmeansTriangleInequality.NumberFullDistancesCalculatedPC / ((double)Kmeans.MaxNcent_Global * (double)(DAVectorUtility.PointCount_Global * Kmeans.CountKmeansIterations));
                double tmp4 = (KmeansTriangleInequality.NumberFullDistancesCalculatedPC + KmeansTriangleInequality.NumberFullDistancesCalculatedCC) / ((double)Kmeans.MaxNcent_Global * (double)(DAVectorUtility.PointCount_Global * Kmeans.CountKmeansIterations));
                DAVectorUtility.SALSAPrint(0, "Iteration " + Kmeans.CountKmeansIterations.ToString() + " Average Center Change " + Kmeans.AverageCenterChange.ToString("E4")
                                           + " Average Radius " + Kmeans.AverageRadius.ToString("E4") + " Average Width " + Kmeans.AverageWidth.ToString("E4")
                                           + " CC calcs per C " + tmp1.ToString("F4") + " PC calcs per P&C " + tmp2.ToString("F6")
                                           + " Cumul PC / Max " + tmp3.ToString("F6") + " Cumul PC+CC / PC Max " + tmp4.ToString("F6"));
                KeepPCfractions.Add(tmp2);
                KeepCCfractions.Add(tmp1 / DAVectorUtility.PointCount_Global);
                if (((Kmeans.CountKmeansIterations % 10) == 1) || WillStop)
                {
                    string message = " Sizes";
                    for (int CenterIndex = 0; CenterIndex < Kmeans.Ncent_Global; CenterIndex++)
                    {
                        message += " " + Kmeans.ClusterSize[CenterIndex].ToString();
                    }
                    DAVectorUtility.SALSAPrint(0, message);
                }
                if (WillStop)
                {
                    break;
                }
            }
            DAVectorUtility.SALSAPrint(0, "End Kmeans Iterations " + Kmeans.CountKmeansIterations.ToString() + " Iteration Cut " + Kmeans.KmeansIterationCut.ToString() +
                                       " Average Center Change " + Kmeans.AverageCenterChange.ToString("E4") + " Average Radius " + Kmeans.AverageRadius.ToString("E4") +
                                       " Average Width " + Kmeans.AverageWidth.ToString("E4") + " Fractional Cut " + Kmeans.KmeansCenterChangeCut.ToString("F4"));
            KmeansTriangleInequality.PrintDiagnostics();
            string messagePC = "\nPC Calcs per Point iteration";
            string messageCC = "\nCC Calcs per Point iteration";
            int    numPC     = KeepPCfractions.Count;

            for (int linecount = 0; linecount < numPC; linecount++)
            {
                messagePC += " " + ((double)KeepPCfractions[linecount]).ToString("F4") + ",";
                messageCC += " " + ((double)KeepCCfractions[linecount]).ToString("F4") + ",";
            }
            DAVectorUtility.SALSAPrint(0, messagePC);
            DAVectorUtility.SALSAPrint(0, messageCC);
            Ncent_GlobalFINAL = Kmeans.Ncent_Global;
            AverageWidthFINAL = Kmeans.AverageWidth;

            //  Print Histograms
            if (KmeansTriangleInequality.UseTriangleInequality != 0)
            {
                KmeansTriangleInequality.PlotPointHistograms(Math.Sqrt(AverageWidthFINAL));
                KmeansTriangleInequality.PlotCenterHistograms(Math.Sqrt(AverageWidthFINAL));
            }
            return;
        }   // End RunKmeans()
Пример #3
0
        }   // End FindClusterCenters(int[] NearestCentertoPoint, double[][] LastClusterCenter)

        public static void ReadDataFromFile(string fname, int ClusterPosition, int FirstClustervalue, int StartPointPosition)
        {
            char[] _sep = new[] { ' ', ',', '\t' };

            int FirstPointPosition      = 0;
            int TotalNumberPointstoRead = 0;

            FirstPointPosition      = DAVectorUtility.PointStart_Process;
            TotalNumberPointstoRead = DAVectorUtility.PointCount_Process;
            Random RandomObject = new Random(10101010 + DAVectorUtility.MPI_Rank);

            if (ClusterPosition < 0)
            {
                DAVectorUtility.SALSAPrint(0, "Random Start 10101010 plus rank ******************* Option " + ClusterPosition.ToString());
            }
            int MinSplitSize = ClusterPosition + 1;

            if (StartPointPosition >= 0)
            {
                MinSplitSize = Math.Max(MinSplitSize, StartPointPosition + Kmeans.ParameterVectorDimension);
            }
            else
            {
                Exception e = DAVectorUtility.SALSAError("Illegal Start Position on Points file " + fname + " Rank " + DAVectorUtility.MPI_Rank.ToString()
                                                         + " POsition " + StartPointPosition.ToString() + " Number to Read " + TotalNumberPointstoRead.ToString());
                throw (e);
            }
            bool   success          = false;
            string line             = " Unset";
            int    CountLinesinFile = 0;

            try
            {
                StreamReader sr = null;
                if (!string.IsNullOrEmpty(fname))
                {
                    Stream stream = File.Open(fname, FileMode.Open, FileAccess.Read, FileShare.Read);
                    sr = new StreamReader(stream);
                }
                if (sr != null)
                {
                    while (!sr.EndOfStream)
                    {
                        line = sr.ReadLine();
                        if (!string.IsNullOrEmpty(line))
                        {
                            string[] splits = line.Trim().Split(_sep, StringSplitOptions.RemoveEmptyEntries);
                            if (splits.Length < MinSplitSize)
                            {
                                DAVectorUtility.SALSAPrint(0, "Count " + CountLinesinFile.ToString() + " Illegal data length on Point file " + splits.Length.ToString()
                                                           + " " + MinSplitSize.ToString() + " " + line);
                                continue;
                            }   // Skip header lines

                            double junk;
                            if (!Double.TryParse(splits[StartPointPosition], out junk))
                            {
                                continue;   // Skip header lines
                            }
                            if (CountLinesinFile < FirstPointPosition)
                            {
                                CountLinesinFile += 1;
                                continue;
                            }

                            int ActualPointPosition = CountLinesinFile - FirstPointPosition;
                            int label = 0;

                            Kmeans.PointPosition[ActualPointPosition][0] = double.Parse(splits[StartPointPosition]);
                            Kmeans.PointPosition[ActualPointPosition][1] = double.Parse(splits[StartPointPosition + 1]);
                            if (Kmeans.ParameterVectorDimension > 2)
                            {
                                for (int VectorIndex = 2; VectorIndex < Kmeans.ParameterVectorDimension; VectorIndex++)
                                {
                                    Kmeans.PointPosition[ActualPointPosition][VectorIndex] = double.Parse(splits[VectorIndex + StartPointPosition]);
                                }
                            }

                            if (ClusterPosition >= 0)
                            {
                                if (!Int32.TryParse(splits[ClusterPosition], out label))
                                {
                                    label = FirstClustervalue;
                                }
                                Kmeans.InitialPointAssignment[ActualPointPosition] = label - FirstClustervalue;
                            }
                            else
                            {
                                Kmeans.InitialPointAssignment[ActualPointPosition] = RandomObject.Next(Program.InitialNcent);
                                if (ClusterPosition == -2)
                                {   // Force each cluster to have one point
                                    if (CountLinesinFile < Program.InitialNcent)
                                    {
                                        Kmeans.InitialPointAssignment[ActualPointPosition] = CountLinesinFile;
                                    }
                                }
                                if (ClusterPosition == -3)
                                {
                                    int divisor = Program.NumberDataPoints / Program.InitialNcent;
                                    if (CountLinesinFile % divisor == 0)
                                    {
                                        Kmeans.InitialPointAssignment[ActualPointPosition] = CountLinesinFile / divisor;
                                    }
                                }
                                if (ClusterPosition == -4)
                                {
                                    int divisor = Program.NumberDataPoints / Program.InitialNcent;
                                    Kmeans.InitialPointAssignment[ActualPointPosition] = CountLinesinFile / divisor;
                                }
                            }
                            ++ActualPointPosition;
                            ++CountLinesinFile;
                            if (CountLinesinFile >= (FirstPointPosition + TotalNumberPointstoRead))
                            {
                                break;
                            }
                        }
                    }
                    if (CountLinesinFile != (FirstPointPosition + TotalNumberPointstoRead))
                    {
                        Exception e = DAVectorUtility.SALSAError("Illegal count on Points file " + fname + " Rank " + DAVectorUtility.MPI_Rank.ToString()
                                                                 + " Lines in File " + CountLinesinFile.ToString() + " Number to Read " + TotalNumberPointstoRead.ToString());
                        throw (e);
                    }
                    success = true;
                }
                sr.Close();
            }
            catch (Exception e)
            {
                Console.WriteLine("Failed reading Points data " + DAVectorUtility.MPI_Rank.ToString() + " " + CountLinesinFile.ToString() + " Start "
                                  + FirstPointPosition.ToString() + " Number " + TotalNumberPointstoRead.ToString() + " " + line + e);
                throw (e);
            }
            if (!success)
            {
                Exception e = DAVectorUtility.SALSAError("DA Vector File read error " + fname);
                throw (e);
            }
        } // End ReadDataFromFile