CreateMapPartitionDesc( KMeansOptions options, Vector[] vectors, Vector[] initCenters, int[] clusterIds, int nCenters, int nRank, int nId, int nStartIndex, int nPartitionSize ) { KMeansMapPartition partition = new KMeansMapPartition(); partition.m_evtMapWorkComplete = m_evtMapWorkComplete; partition.m_evtMapWorkAvailable = m_evtMapWorkAvailable; partition.m_evtUpdateWorkComplete = m_evtUpdateWorkComplete; partition.m_evtUpdateWorkAvailable = m_evtUpdateWorkAvailable; partition.m_barrier = m_barrier; partition.m_nId = nId; partition.m_nStartIndex = nStartIndex; partition.m_nPartitionSize = nPartitionSize; partition.m_nCenters = nCenters; partition.m_nRank = nRank; partition.m_nPoints = vectors.Count(); partition.m_oldCenters = initCenters; partition.m_newCenters = null; partition.m_newGroupCounts = null; partition.m_clusterIds = clusterIds; partition.m_fDelta = 0.0f; partition.m_vectors = vectors; return(partition); }
///------------------------------------------------------------------------------------------------- /// <summary> Map partition. </summary> /// /// <remarks> Chris Rossbach ([email protected]), 8/7/2012. </remarks> /// /// <param name="partition"> The partition. </param> ///------------------------------------------------------------------------------------------------- public static void MapPartition( object oPartition ) { KMeansMapPartition partition = oPartition as KMeansMapPartition; while (!m_bMapPhasesComplete) { partition.m_newCenters = KMeansCalculator.CreateCenterAccumulatorList(partition.m_nCenters, partition.m_nRank); partition.m_newGroupCounts = KMeansCalculator.CreateGroupCountList(partition.m_nCenters, partition.m_nRank); partition.m_fDelta = 0.0f; partition.m_evtMapWorkAvailable.WaitOne(); if (!m_bMapPhasesComplete) { for (int i = partition.m_nStartIndex; i < partition.m_nPartitionSize; i++) { Vector vec = partition.m_vectors.ElementAt(i); int nIndex = FindNearestCenter(vec, partition.m_oldCenters); if (partition.m_clusterIds.ElementAt(i) != nIndex) { partition.m_fDelta += 1.0f; } partition.m_clusterIds[i] = nIndex; partition.m_newCenters[nIndex] += vec; partition.m_newGroupCounts[nIndex] += 1; } partition.m_barrier.SignalAndWait(); } } }
///------------------------------------------------------------------------------------------------- /// <summary> Updates the centers. </summary> /// /// <remarks> Chris Rossbach ([email protected]), 8/7/2012. </remarks> /// /// <param name="update"> The update. </param> ///------------------------------------------------------------------------------------------------- public static void UpdateCenters( object oUpdate ) { KMeansUpdateCenters update = oUpdate as KMeansUpdateCenters; update.m_nIterations = 0; while (!m_bUpdatePhasesComplete) { update.m_sharedCenters = KMeansCalculator.CreateCenterAccumulatorList(update.m_nCenters, update.m_nRank); update.m_sharedGroupCounts = KMeansCalculator.CreateGroupCountList(update.m_nCenters, update.m_nRank); update.m_fDelta = 0.0f; update.m_evtMapWorkComplete.WaitOne(); if (m_bVerbose) { Console.WriteLine("update..."); } for (int w = 0; w < update.m_workers.Count(); w++) { KMeansMapPartition worker = update.m_workers[w]; update.m_fDelta += worker.m_fDelta; for (int i = 0; i < update.m_nCenters; i++) { update.m_sharedCenters[i] += worker.m_newCenters[i]; update.m_sharedGroupCounts[i] += worker.m_newGroupCounts[i]; } } for (int i = 0; i < update.m_nCenters; i++) { update.m_sharedCenters[i] /= update.m_sharedGroupCounts[i]; } update.m_fDelta /= update.m_nPoints; update.m_nIterations++; if (update.m_nIterations < update.m_options.m_nMaxIterations && update.m_fDelta >= update.m_options.m_fConvergenceThreshold) { for (int w = 0; w < update.m_workers.Count(); w++) { update.m_workers[w].m_oldCenters = update.m_sharedCenters; } update.m_evtMapWorkComplete.Reset(); update.m_evtMapWorkAvailable.Set(); } else { m_bMapPhasesComplete = true; m_bUpdatePhasesComplete = true; update.m_evtMapWorkAvailable.Set(); // shouldn't be needed, but harmless return; } } }
ComputeNewCenters( KMeansOptions options, IEnumerable <Vector> vectors, Vector[] centers, int nCenters, out int[] clusterIds, out int nIterations ) { int nRank = vectors.ElementAt(0).Rank; int nPoints = vectors.Count(); clusterIds = CreateInitialClusterIds(nPoints); m_bMapPhasesComplete = false; m_bUpdatePhasesComplete = false; m_evtMapWorkAvailable.Reset(); m_evtMapWorkComplete.Reset(); m_barrier = new Barrier(options.m_nCurrentThreadsSweep, (b) => { m_evtMapWorkAvailable.Reset(); m_evtMapWorkComplete.Set(); if (m_bVerbose) { Console.WriteLine("reached barrier!"); } }); KMeansMapPartition[] partitions; Thread[] mappers; int nVectors = vectors.Count(); int nWorkers = options.m_nCurrentThreadsSweep; int nPartitionSize = (int)Math.Round((double)nVectors / (double)nWorkers); partitions = new KMeansMapPartition[nWorkers]; mappers = new Thread[nWorkers]; for (int i = 0; i < nWorkers; i++) { int nStartIndex = i * nPartitionSize; int nWorkerPartitionSize = nPartitionSize; if (nStartIndex + nPartitionSize > nVectors) { nWorkerPartitionSize = nVectors - nStartIndex; } partitions[i] = CreateMapPartitionDesc(options, vectors, centers, clusterIds, nCenters, nRank, i, nStartIndex, nWorkerPartitionSize); mappers[i] = new Thread(ThreadedKMeans.MapPartition); mappers[i].Start(partitions[i]); } KMeansUpdateCenters update; update = CreateUpdateDesc(options, partitions, vectors, clusterIds, nCenters, nRank, 0); Thread updater = new Thread(ThreadedKMeans.UpdateCenters); updater.Start(update); m_pStopwatch.Start(); m_nStartParallelPhase = m_pStopwatch.ElapsedMilliseconds; m_evtMapWorkAvailable.Set(); updater.Join(); m_pStopwatch.Stop(); nIterations = update.m_nIterations; centers = update.m_sharedCenters; m_nEndParallelPhase = m_pStopwatch.ElapsedMilliseconds; return(centers); }