Ejemplo n.º 1
0
        CreateUpdateDesc(
            KMeansOptions options,
            KMeansMapPartition[] workers,
            IEnumerable <Vector> vectors,
            int[] clusterIds,
            int nCenters,
            int nRank,
            int nIteration
            )
        {
            KMeansUpdateCenters update = new KMeansUpdateCenters();

            update.m_evtMapWorkComplete     = m_evtMapWorkComplete;
            update.m_evtMapWorkAvailable    = m_evtMapWorkAvailable;
            update.m_evtUpdateWorkComplete  = m_evtUpdateWorkComplete;
            update.m_evtUpdateWorkAvailable = m_evtUpdateWorkAvailable;
            update.m_barrier           = m_barrier;
            update.m_nIterations       = nIteration;
            update.m_nCenters          = nCenters;
            update.m_nRank             = nRank;
            update.m_nPoints           = vectors.Count();
            update.m_vectors           = vectors;
            update.m_sharedCenters     = null;
            update.m_sharedGroupCounts = null;
            update.m_counts            = null;
            update.m_clusterIds        = clusterIds;
            update.m_fDelta            = 0.0f;
            update.m_workers           = workers;
            update.m_options           = options;

            return(update);
        }
Ejemplo n.º 2
0
        CreateMapPartitionDesc(
            KMeansOptions options,
            Vector[] vectors,
            Vector[] initCenters,
            int[] clusterIds,
            int nCenters,
            int nRank,
            int nId,
            int nStartIndex,
            int nPartitionSize
            )
        {
            KMeansMapPartition partition = new KMeansMapPartition();

            partition.m_evtMapWorkComplete     = m_evtMapWorkComplete;
            partition.m_evtMapWorkAvailable    = m_evtMapWorkAvailable;
            partition.m_evtUpdateWorkComplete  = m_evtUpdateWorkComplete;
            partition.m_evtUpdateWorkAvailable = m_evtUpdateWorkAvailable;
            partition.m_barrier        = m_barrier;
            partition.m_nId            = nId;
            partition.m_nStartIndex    = nStartIndex;
            partition.m_nPartitionSize = nPartitionSize;
            partition.m_nCenters       = nCenters;
            partition.m_nRank          = nRank;
            partition.m_nPoints        = vectors.Count();
            partition.m_oldCenters     = initCenters;
            partition.m_newCenters     = null;
            partition.m_newGroupCounts = null;
            partition.m_clusterIds     = clusterIds;
            partition.m_fDelta         = 0.0f;
            partition.m_vectors        = vectors;

            return(partition);
        }
Ejemplo n.º 3
0
        SelectVersionsToCompare(
            KMeansOptions options
            )
        {
            List <String> runnableVersions  = new List <string>();
            List <String> specifiedVersions = options.m_vImplementations.Keys.ToList();

            foreach (String sVersion in specifiedVersions)
            {
                if (IsParallelizableVersion(sVersion))
                {
                    int procs = options.m_nMinimumThreadsSweep;
                    while (procs <= System.Environment.ProcessorCount &&
                           procs <= options.m_nMaximumThreadsSweep)
                    {
                        runnableVersions.Add(sVersion + "-" + procs);
                        if (procs < 16)
                        {
                            procs *= 2;
                        }
                        else
                        {
                            procs += 8;
                        }
                    }
                }
                else
                {
                    runnableVersions.Add(sVersion);
                }
            }
            return(runnableVersions);
        }
Ejemplo n.º 4
0
        execute(
            KMeansOptions options,
            IEnumerable <Vector> vectors,
            ref Vector[] centers,
            out int[] clusterIds,
            int nParallelDegree = 1,
            bool bAvoidLazyEval = true
            )
        {
            IEnumerable <Vector> result = null;

            m_pStopwatch.Start();
            m_nStartParallelPhase = m_pStopwatch.ElapsedMilliseconds;
            result = LINQKMeans.Steps(vectors,
                                      centers,
                                      options.m_nMaxIterations,
                                      nParallelDegree,
                                      bAvoidLazyEval);
            result.Count();
            m_pStopwatch.Stop();
            m_nEndParallelPhase = m_pStopwatch.ElapsedMilliseconds;
            clusterIds          = null;
            centers             = result.ToArray();

            return(options.m_nMaxIterations);
        }
Ejemplo n.º 5
0
 execute(
     KMeansOptions options,
     IEnumerable <Vector> vectors,
     ref Vector[] centers,
     out int[] clusterIds,
     int nMaxParallelism = 1,
     bool bAvoidLazyEval = true
     );
Ejemplo n.º 6
0
        CheckResult(
            KMeansOptions options,
            IEnumerable <Vector> centers,
            IEnumerable <Vector> refnewcenters,
            bool bVerbose   = true,
            bool bDumpLists = false
            )
        {
            List <int> diffIndexes = new List <int>();

            if (centers.Count() != refnewcenters.Count())
            {
                if (bVerbose)
                {
                    Console.WriteLine("different lengths!");
                }
                return(false);
            }

            bool bResult = true;
            int  nIndex  = 0;

            foreach (Vector center in centers)
            {
                if (!ValuePresent(options, center, refnewcenters))
                {
                    bResult = false;
                    diffIndexes.Add(nIndex);
                }
                nIndex++;
            }
            if (!bResult && bDumpLists)
            {
                if (bVerbose)
                {
                    Console.WriteLine("center lists differ!");
                }
                Console.WriteLine("REF:");
                foreach (Vector center in refnewcenters)
                {
                    Console.WriteLine("\t{0}", center);
                }
                Console.WriteLine("CAND: could not match:");
                foreach (int index in diffIndexes)
                {
                    Console.WriteLine("\t{0}", centers.ElementAt(index));
                }
            }
            return(bResult);
        }
Ejemplo n.º 7
0
 ValuePresent(
     KMeansOptions options,
     Vector center,
     IEnumerable <Vector> refcenters
     )
 {
     foreach (Vector rc in refcenters)
     {
         if (Vector.Dist(center, rc) < options.m_fEpsilon)
         {
             return(true);
         }
     }
     return(false);
 }
Ejemplo n.º 8
0
        execute(
            KMeansOptions options,
            IEnumerable <Vector> vectors,
            ref Vector[] centers,
            out int[] clusterIds,
            int nMaxParallelism,
            bool bAvoidLazyEval
            )
        {
            int nCenters    = options.m_nClusters;
            int nIterations = 0;

            clusterIds = null;
            centers    = ComputeNewCenters(options, vectors, centers, out clusterIds, out nIterations);
            return(nIterations);
        }
Ejemplo n.º 9
0
 SelectImplementation(
     KMeansOptions options,
     String sVersion,
     out int nMaxParallelism,
     out bool bAvoidLazyEval
     )
 {
     if (sVersion.StartsWith("PLINQ") ||
         sVersion.StartsWith("threaded") ||
         sVersion.StartsWith("tasks"))
     {
         int    nDashIndex        = sVersion.IndexOf('-');
         String strMaxParallelism = sVersion.Substring(nDashIndex + 1);
         nMaxParallelism = Int32.Parse(strMaxParallelism);
         bAvoidLazyEval  = true;
         if (sVersion.StartsWith("PLINQ"))
         {
             return(new PLINQKMeans(options));
         }
         if (sVersion.StartsWith("threaded"))
         {
             return(new ThreadedKMeans(options));
         }
         if (sVersion.StartsWith("tasks"))
         {
             return(new TaskingKMeans(options));
         }
         return(null);
     }
     else
     {
         nMaxParallelism = 1;
         bAvoidLazyEval  = true;
         if (sVersion == "seq")
         {
             return(new SequentialKMeans(options));
         }
         if (sVersion == "LINQ")
         {
             return(new LINQKMeans(options));
         }
     }
     return(null);
 }
Ejemplo n.º 10
0
        execute(
            KMeansOptions options,
            IEnumerable <Vector> vectors,
            ref Vector[] centers,
            out int[] clusterIds,
            int nMaxParallelism = 1,
            bool bAvoidLazyEval = true
            )
        {
            int nIterations = 0;

            clusterIds = null;
            centers    = Compute(vectors,
                                 centers,
                                 options.m_nMaxIterations,
                                 bAvoidLazyEval).ToArray();
            nIterations = options.m_nMaxIterations;
            return(nIterations);
        }
Ejemplo n.º 11
0
        execute(
            KMeansOptions options,
            IEnumerable <Vector> vectors,
            ref Vector[] centers,
            out int[] clusterIds,
            int nMaxParallelism = 1,
            bool bAvoidLazyEval = true
            )
        {
            int nIterations = 0;

            options.m_nCurrentThreadsSweep = nMaxParallelism;
            centers = ComputeNewCenters(options,
                                        vectors,
                                        centers,
                                        options.m_nClusters,
                                        out clusterIds,
                                        out nIterations);
            return(nIterations);
        }
Ejemplo n.º 12
0
        ///-------------------------------------------------------------------------------------------------
        /// <summary>   Main entry-point for this application. </summary>
        ///
        /// <remarks>   crossbac, 8/6/2013. </remarks>
        ///
        /// <param name="args"> Array of command-line argument strings. </param>
        ///-------------------------------------------------------------------------------------------------

        static void Main(string[] args)
        {
            KMeansOptions options = KMeansOptions.getOptions(args);

            if (options == null)
            {
                return;
            }

            if (options.m_bGenerateData)
            {
                GenerateRandomInput(options.m_strFileName,
                                    options.m_nGenerateElems,
                                    options.m_nGenerateDims);
                return;
            }

            Vector[] attributes =
                options.m_bBinaryInput ?
                KMeansCalculator.ReadBinaryInput(options.m_strFileName) :
                KMeansCalculator.ReadTextInput(options.m_strFileName);

            ComparePerformance(options, attributes);
        }
Ejemplo n.º 13
0
        ///-------------------------------------------------------------------------------------------------
        /// <summary>   Constructor. </summary>
        ///
        /// <remarks>   Chris Rossbach ([email protected]), 8/2/2012. </remarks>
        ///
        /// <param name="options">  Options for controlling the operation. </param>
        ///-------------------------------------------------------------------------------------------------

        public ThreadedKMeans(KMeansOptions options)
        {
            m_options = options;
        }
Ejemplo n.º 14
0
        ComputeNewCenters(
            KMeansOptions options,
            IEnumerable <Vector> vectors,
            Vector[] centers,
            out int[] clusterIds,
            out int nIterations
            )
        {
            float delta      = 0.0f;
            int   iterations = 0;
            int   nRank      = vectors.ElementAt(0).Rank;
            int   nPoints    = vectors.Count();
            int   nCenters   = centers.Length;

            clusterIds = new int[nPoints];
            for (int i = 0; i < nPoints; i++)
            {
                clusterIds[i] = -1;
            }
            Vector[] newCenters  = new Vector[nCenters];
            Vector[] oldCenters  = new Vector[nCenters];
            int[]    groupCounts = new int[nCenters];
            for (int i = 0; i < nCenters; i++)
            {
                groupCounts[i] = 0;
                newCenters[i]  = Vector.ZeroVector(nRank);
                oldCenters[i]  = new Vector(centers[i]);
            }

            m_pStopwatch.Start();
            m_nStartParallelPhase = m_pStopwatch.ElapsedMilliseconds;

            do
            {
                delta = 0.0f;
                for (int i = 0; i < nPoints; i++)
                {
                    Vector vec    = vectors.ElementAt(i);
                    int    nIndex = FindNearestCenter(vec, oldCenters);
                    if (clusterIds[i] != nIndex)
                    {
                        delta += 1.0f;
                    }
                    clusterIds[i]        = nIndex;
                    newCenters[nIndex]  += vec;
                    groupCounts[nIndex] += 1;
                }

                for (int i = 0; i < nCenters; i++)
                {
                    newCenters[i] /= groupCounts[i];
                }
                delta /= nPoints;
                iterations++;

                Vector[] tCenters = oldCenters;
                oldCenters = newCenters;
                newCenters = tCenters;
                for (int i = 0; i < nCenters; i++)
                {
                    groupCounts[i] = 0;
                    newCenters[i].clear();
                }
            } while((delta > options.m_fConvergenceThreshold) && (iterations < options.m_nMaxIterations));

            for (int i = 0; i < nCenters; i++)
            {
                centers[i] = oldCenters[i];
            }

            nIterations         = iterations;
            m_nEndParallelPhase = m_pStopwatch.ElapsedMilliseconds;

            return(centers);
        }
Ejemplo n.º 15
0
        ///-------------------------------------------------------------------------------------------------
        /// <summary>   Compare performance of several implementations. </summary>
        ///
        /// <remarks>   Chris Rossbach ([email protected]), 8/7/2012. </remarks>
        ///
        /// <param name="options">      Options for controlling the operation. </param>
        /// <param name="attributes">   The attributes. </param>
        ///-------------------------------------------------------------------------------------------------

        static void ComparePerformance(
            KMeansOptions options,
            IEnumerable <Vector> attributes
            )
        {
            List <String> runnableVersions            = SelectVersionsToCompare(options);
            Dictionary <String, Sample[]>    perfdata = new Dictionary <string, Sample[]>();
            Dictionary <String, Performance> stats    = new Dictionary <string, Performance>();

            foreach (String version in runnableVersions)
            {
                Sample[] vdata = new Sample[options.m_nTotalRuns];
                for (int i = 0; i < options.m_nTotalRuns; i++)
                {
                    vdata[i] = new Sample();
                }
                perfdata[version] = vdata;
            }

            IEnumerable <Vector> goldcenters =
                SelectInitialCenters(attributes,
                                     options.m_nClusters,
                                     options.m_nRandomSeed);

            for (int i = 0; i < options.m_nTotalRuns; i++)
            {
                long lReferenceImplTime            = 0;
                IEnumerable <Vector> refnewcenters = null;
                if (options.m_bCheckResult)
                {
                    Vector[]        refcenters = DuplicateCenters(goldcenters);
                    ReferenceKMeans refkmeans  = new ReferenceKMeans();
                    refnewcenters      = refkmeans.Compute(attributes, refcenters, options.m_nMaxIterations, true);
                    lReferenceImplTime = refkmeans.RuntimeMilliseconds;
                }

                foreach (String sVersion in runnableVersions)
                {
                    bool             bAvoidLazyEval  = true;
                    int              nMaxParallelism = 1;
                    int[]            rClusterIds     = null;
                    Vector[]         newcenters      = DuplicateCenters(goldcenters);
                    KMeansCalculator kmeans          = SelectImplementation(options, sVersion, out nMaxParallelism, out bAvoidLazyEval);
                    int              nIterations     = kmeans.execute(options, attributes, ref newcenters, out rClusterIds, nMaxParallelism, bAvoidLazyEval);
                    Sample           isample         = perfdata[sVersion][i];
                    isample.m_impltime = kmeans.RuntimeMilliseconds;
                    isample.m_success  = true;
                    isample.m_reftime  = 0;
                    if (options.m_bCheckResult)
                    {
                        isample.m_reftime = lReferenceImplTime;
                        isample.m_success = KMeansCalculator.CheckResult(options,
                                                                         newcenters,
                                                                         refnewcenters,
                                                                         options.m_bVerbose,
                                                                         options.m_bVerbose);
                        if (!isample.m_success)
                        {
                            Console.WriteLine("FAILED");
                            return;
                        }
                    }
                }
            }

            Console.WriteLine("SUCCEEDED");
            foreach (String v in perfdata.Keys)
            {
                Sample[]    samples = perfdata[v];
                Performance perf    = new Performance(samples);
                Console.WriteLine("{0,-15}: {1}, avg={2}", v, perf.RawRuntimes(), perf.m_impltime.ToString("f1"));
            }
        }
Ejemplo n.º 16
0
        ///-------------------------------------------------------------------------------------------------
        /// <summary>   Constructor. </summary>
        ///
        /// <remarks>   Chris Rossbach ([email protected]), 8/2/2012. </remarks>
        ///
        /// <param name="options">  Options for controlling the operation. </param>
        ///-------------------------------------------------------------------------------------------------

        public TaskingKMeans(KMeansOptions options)
        {
            m_options = options;
        }
Ejemplo n.º 17
0
        public static KMeansOptions getOptions(string[] args)
        {
            Getopt.Getopt g       = new Getopt.Getopt("KMeans", args, "cr:x:i:m:T:e:bR:X:K:I:vV:gd:E:");
            KMeansOptions options = new KMeansOptions();

            try {
                int c;
                while ((c = g.getopt()) != -1)
                {
                    switch (c)
                    {
                    case 'g':
                        options.m_bGenerateData = true;
                        break;

                    case 'd':
                        options.m_nGenerateDims = Int32.Parse(g.Optarg);
                        break;

                    case 'E':
                        options.m_nGenerateElems = Int32.Parse(g.Optarg);
                        break;

                    case 'v':
                        options.m_bVerbose = true;
                        break;

                    case 'V':
                        options.m_vImplementations[g.Optarg] = null;
                        break;

                    case 'X':
                        options.m_nMaximumThreadsSweep = Int32.Parse(g.Optarg);
                        break;

                    case 'K':
                        options.m_nPartitionSize = Int32.Parse(g.Optarg);
                        break;

                    case 'R':
                        options.m_nRandomSeed = Int32.Parse(g.Optarg);
                        break;

                    case 'c':
                        options.m_bCheckResult = true;
                        break;

                    case 'C':
                        options.m_bComparePerformance = true;
                        break;

                    case 'r':
                        options.m_nTotalRuns = Int16.Parse(g.Optarg);
                        break;

                    case 'T':
                        options.m_fConvergenceThreshold = float.Parse(g.Optarg);
                        break;

                    case 'e':
                        options.m_fEpsilon = float.Parse(g.Optarg);
                        break;

                    case 'x':
                        options.m_nMinimumThreadsSweep = int.Parse(g.Optarg);
                        break;

                    case 'i':
                        options.m_strFileName = g.Optarg;
                        break;

                    case 'I':
                        options.m_nMaxIterations = int.Parse(g.Optarg);
                        break;

                    case 'm':
                        options.m_nClusters = int.Parse(g.Optarg);
                        break;

                    case 'b':
                        options.m_bBinaryInput = true;
                        break;

                    case 'h':
                        PrintUsage();
                        return(null);

                    default:
                        Console.WriteLine("Unknown option: " + (Char)c);
                        PrintUsage();
                        return(null);
                    }
                }
            } catch (Exception e) {
                Console.WriteLine("Options exception: " + e.Message);
                PrintUsage();
                return(null);
            }
            return(options);
        }
Ejemplo n.º 18
0
        ComputeNewCenters(
            KMeansOptions options,
            IEnumerable <Vector> vectors,
            Vector[] centers,
            int nCenters,
            out int[] clusterIds,
            out int nIterations
            )
        {
            int nRank   = vectors.ElementAt(0).Rank;
            int nPoints = vectors.Count();

            clusterIds = CreateInitialClusterIds(nPoints);

            m_bMapPhasesComplete    = false;
            m_bUpdatePhasesComplete = false;
            m_evtMapWorkAvailable.Reset();
            m_evtMapWorkComplete.Reset();

            m_barrier = new Barrier(options.m_nCurrentThreadsSweep, (b) => {
                m_evtMapWorkAvailable.Reset();
                m_evtMapWorkComplete.Set();
                if (m_bVerbose)
                {
                    Console.WriteLine("reached barrier!");
                }
            });

            KMeansMapPartition[] partitions;
            Thread[]             mappers;
            int nVectors       = vectors.Count();
            int nWorkers       = options.m_nCurrentThreadsSweep;
            int nPartitionSize = (int)Math.Round((double)nVectors / (double)nWorkers);

            partitions = new KMeansMapPartition[nWorkers];
            mappers    = new Thread[nWorkers];

            for (int i = 0; i < nWorkers; i++)
            {
                int nStartIndex          = i * nPartitionSize;
                int nWorkerPartitionSize = nPartitionSize;
                if (nStartIndex + nPartitionSize > nVectors)
                {
                    nWorkerPartitionSize = nVectors - nStartIndex;
                }
                partitions[i] = CreateMapPartitionDesc(options,
                                                       vectors,
                                                       centers,
                                                       clusterIds,
                                                       nCenters,
                                                       nRank,
                                                       i,
                                                       nStartIndex,
                                                       nWorkerPartitionSize);
                mappers[i] = new Thread(ThreadedKMeans.MapPartition);
                mappers[i].Start(partitions[i]);
            }

            KMeansUpdateCenters update;

            update = CreateUpdateDesc(options,
                                      partitions,
                                      vectors,
                                      clusterIds,
                                      nCenters,
                                      nRank,
                                      0);

            Thread updater = new Thread(ThreadedKMeans.UpdateCenters);

            updater.Start(update);
            m_pStopwatch.Start();
            m_nStartParallelPhase = m_pStopwatch.ElapsedMilliseconds;
            m_evtMapWorkAvailable.Set();
            updater.Join();
            m_pStopwatch.Stop();
            nIterations         = update.m_nIterations;
            centers             = update.m_sharedCenters;
            m_nEndParallelPhase = m_pStopwatch.ElapsedMilliseconds;

            return(centers);
        }
Ejemplo n.º 19
0
        ///-------------------------------------------------------------------------------------------------
        /// <summary>   Constructor. </summary>
        ///
        /// <remarks>   Chris Rossbach ([email protected]), 8/2/2012. </remarks>
        ///
        /// <param name="options">  Options for controlling the operation. </param>
        ///-------------------------------------------------------------------------------------------------

        public SequentialKMeans(KMeansOptions options)
        {
            m_options = options;
        }
Ejemplo n.º 20
0
 public LINQKMeans(KMeansOptions options)
 {
     m_options = options;
 }