CreateUpdateDesc( KMeansOptions options, KMeansMapPartition[] workers, IEnumerable <Vector> vectors, int[] clusterIds, int nCenters, int nRank, int nIteration ) { KMeansUpdateCenters update = new KMeansUpdateCenters(); update.m_evtMapWorkComplete = m_evtMapWorkComplete; update.m_evtMapWorkAvailable = m_evtMapWorkAvailable; update.m_evtUpdateWorkComplete = m_evtUpdateWorkComplete; update.m_evtUpdateWorkAvailable = m_evtUpdateWorkAvailable; update.m_barrier = m_barrier; update.m_nIterations = nIteration; update.m_nCenters = nCenters; update.m_nRank = nRank; update.m_nPoints = vectors.Count(); update.m_vectors = vectors; update.m_sharedCenters = null; update.m_sharedGroupCounts = null; update.m_counts = null; update.m_clusterIds = clusterIds; update.m_fDelta = 0.0f; update.m_workers = workers; update.m_options = options; return(update); }
CreateMapPartitionDesc( KMeansOptions options, Vector[] vectors, Vector[] initCenters, int[] clusterIds, int nCenters, int nRank, int nId, int nStartIndex, int nPartitionSize ) { KMeansMapPartition partition = new KMeansMapPartition(); partition.m_evtMapWorkComplete = m_evtMapWorkComplete; partition.m_evtMapWorkAvailable = m_evtMapWorkAvailable; partition.m_evtUpdateWorkComplete = m_evtUpdateWorkComplete; partition.m_evtUpdateWorkAvailable = m_evtUpdateWorkAvailable; partition.m_barrier = m_barrier; partition.m_nId = nId; partition.m_nStartIndex = nStartIndex; partition.m_nPartitionSize = nPartitionSize; partition.m_nCenters = nCenters; partition.m_nRank = nRank; partition.m_nPoints = vectors.Count(); partition.m_oldCenters = initCenters; partition.m_newCenters = null; partition.m_newGroupCounts = null; partition.m_clusterIds = clusterIds; partition.m_fDelta = 0.0f; partition.m_vectors = vectors; return(partition); }
SelectVersionsToCompare( KMeansOptions options ) { List <String> runnableVersions = new List <string>(); List <String> specifiedVersions = options.m_vImplementations.Keys.ToList(); foreach (String sVersion in specifiedVersions) { if (IsParallelizableVersion(sVersion)) { int procs = options.m_nMinimumThreadsSweep; while (procs <= System.Environment.ProcessorCount && procs <= options.m_nMaximumThreadsSweep) { runnableVersions.Add(sVersion + "-" + procs); if (procs < 16) { procs *= 2; } else { procs += 8; } } } else { runnableVersions.Add(sVersion); } } return(runnableVersions); }
execute( KMeansOptions options, IEnumerable <Vector> vectors, ref Vector[] centers, out int[] clusterIds, int nParallelDegree = 1, bool bAvoidLazyEval = true ) { IEnumerable <Vector> result = null; m_pStopwatch.Start(); m_nStartParallelPhase = m_pStopwatch.ElapsedMilliseconds; result = LINQKMeans.Steps(vectors, centers, options.m_nMaxIterations, nParallelDegree, bAvoidLazyEval); result.Count(); m_pStopwatch.Stop(); m_nEndParallelPhase = m_pStopwatch.ElapsedMilliseconds; clusterIds = null; centers = result.ToArray(); return(options.m_nMaxIterations); }
execute( KMeansOptions options, IEnumerable <Vector> vectors, ref Vector[] centers, out int[] clusterIds, int nMaxParallelism = 1, bool bAvoidLazyEval = true );
CheckResult( KMeansOptions options, IEnumerable <Vector> centers, IEnumerable <Vector> refnewcenters, bool bVerbose = true, bool bDumpLists = false ) { List <int> diffIndexes = new List <int>(); if (centers.Count() != refnewcenters.Count()) { if (bVerbose) { Console.WriteLine("different lengths!"); } return(false); } bool bResult = true; int nIndex = 0; foreach (Vector center in centers) { if (!ValuePresent(options, center, refnewcenters)) { bResult = false; diffIndexes.Add(nIndex); } nIndex++; } if (!bResult && bDumpLists) { if (bVerbose) { Console.WriteLine("center lists differ!"); } Console.WriteLine("REF:"); foreach (Vector center in refnewcenters) { Console.WriteLine("\t{0}", center); } Console.WriteLine("CAND: could not match:"); foreach (int index in diffIndexes) { Console.WriteLine("\t{0}", centers.ElementAt(index)); } } return(bResult); }
ValuePresent( KMeansOptions options, Vector center, IEnumerable <Vector> refcenters ) { foreach (Vector rc in refcenters) { if (Vector.Dist(center, rc) < options.m_fEpsilon) { return(true); } } return(false); }
execute( KMeansOptions options, IEnumerable <Vector> vectors, ref Vector[] centers, out int[] clusterIds, int nMaxParallelism, bool bAvoidLazyEval ) { int nCenters = options.m_nClusters; int nIterations = 0; clusterIds = null; centers = ComputeNewCenters(options, vectors, centers, out clusterIds, out nIterations); return(nIterations); }
SelectImplementation( KMeansOptions options, String sVersion, out int nMaxParallelism, out bool bAvoidLazyEval ) { if (sVersion.StartsWith("PLINQ") || sVersion.StartsWith("threaded") || sVersion.StartsWith("tasks")) { int nDashIndex = sVersion.IndexOf('-'); String strMaxParallelism = sVersion.Substring(nDashIndex + 1); nMaxParallelism = Int32.Parse(strMaxParallelism); bAvoidLazyEval = true; if (sVersion.StartsWith("PLINQ")) { return(new PLINQKMeans(options)); } if (sVersion.StartsWith("threaded")) { return(new ThreadedKMeans(options)); } if (sVersion.StartsWith("tasks")) { return(new TaskingKMeans(options)); } return(null); } else { nMaxParallelism = 1; bAvoidLazyEval = true; if (sVersion == "seq") { return(new SequentialKMeans(options)); } if (sVersion == "LINQ") { return(new LINQKMeans(options)); } } return(null); }
execute( KMeansOptions options, IEnumerable <Vector> vectors, ref Vector[] centers, out int[] clusterIds, int nMaxParallelism = 1, bool bAvoidLazyEval = true ) { int nIterations = 0; clusterIds = null; centers = Compute(vectors, centers, options.m_nMaxIterations, bAvoidLazyEval).ToArray(); nIterations = options.m_nMaxIterations; return(nIterations); }
execute( KMeansOptions options, IEnumerable <Vector> vectors, ref Vector[] centers, out int[] clusterIds, int nMaxParallelism = 1, bool bAvoidLazyEval = true ) { int nIterations = 0; options.m_nCurrentThreadsSweep = nMaxParallelism; centers = ComputeNewCenters(options, vectors, centers, options.m_nClusters, out clusterIds, out nIterations); return(nIterations); }
///------------------------------------------------------------------------------------------------- /// <summary> Main entry-point for this application. </summary> /// /// <remarks> crossbac, 8/6/2013. </remarks> /// /// <param name="args"> Array of command-line argument strings. </param> ///------------------------------------------------------------------------------------------------- static void Main(string[] args) { KMeansOptions options = KMeansOptions.getOptions(args); if (options == null) { return; } if (options.m_bGenerateData) { GenerateRandomInput(options.m_strFileName, options.m_nGenerateElems, options.m_nGenerateDims); return; } Vector[] attributes = options.m_bBinaryInput ? KMeansCalculator.ReadBinaryInput(options.m_strFileName) : KMeansCalculator.ReadTextInput(options.m_strFileName); ComparePerformance(options, attributes); }
///------------------------------------------------------------------------------------------------- /// <summary> Constructor. </summary> /// /// <remarks> Chris Rossbach ([email protected]), 8/2/2012. </remarks> /// /// <param name="options"> Options for controlling the operation. </param> ///------------------------------------------------------------------------------------------------- public ThreadedKMeans(KMeansOptions options) { m_options = options; }
ComputeNewCenters( KMeansOptions options, IEnumerable <Vector> vectors, Vector[] centers, out int[] clusterIds, out int nIterations ) { float delta = 0.0f; int iterations = 0; int nRank = vectors.ElementAt(0).Rank; int nPoints = vectors.Count(); int nCenters = centers.Length; clusterIds = new int[nPoints]; for (int i = 0; i < nPoints; i++) { clusterIds[i] = -1; } Vector[] newCenters = new Vector[nCenters]; Vector[] oldCenters = new Vector[nCenters]; int[] groupCounts = new int[nCenters]; for (int i = 0; i < nCenters; i++) { groupCounts[i] = 0; newCenters[i] = Vector.ZeroVector(nRank); oldCenters[i] = new Vector(centers[i]); } m_pStopwatch.Start(); m_nStartParallelPhase = m_pStopwatch.ElapsedMilliseconds; do { delta = 0.0f; for (int i = 0; i < nPoints; i++) { Vector vec = vectors.ElementAt(i); int nIndex = FindNearestCenter(vec, oldCenters); if (clusterIds[i] != nIndex) { delta += 1.0f; } clusterIds[i] = nIndex; newCenters[nIndex] += vec; groupCounts[nIndex] += 1; } for (int i = 0; i < nCenters; i++) { newCenters[i] /= groupCounts[i]; } delta /= nPoints; iterations++; Vector[] tCenters = oldCenters; oldCenters = newCenters; newCenters = tCenters; for (int i = 0; i < nCenters; i++) { groupCounts[i] = 0; newCenters[i].clear(); } } while((delta > options.m_fConvergenceThreshold) && (iterations < options.m_nMaxIterations)); for (int i = 0; i < nCenters; i++) { centers[i] = oldCenters[i]; } nIterations = iterations; m_nEndParallelPhase = m_pStopwatch.ElapsedMilliseconds; return(centers); }
///------------------------------------------------------------------------------------------------- /// <summary> Compare performance of several implementations. </summary> /// /// <remarks> Chris Rossbach ([email protected]), 8/7/2012. </remarks> /// /// <param name="options"> Options for controlling the operation. </param> /// <param name="attributes"> The attributes. </param> ///------------------------------------------------------------------------------------------------- static void ComparePerformance( KMeansOptions options, IEnumerable <Vector> attributes ) { List <String> runnableVersions = SelectVersionsToCompare(options); Dictionary <String, Sample[]> perfdata = new Dictionary <string, Sample[]>(); Dictionary <String, Performance> stats = new Dictionary <string, Performance>(); foreach (String version in runnableVersions) { Sample[] vdata = new Sample[options.m_nTotalRuns]; for (int i = 0; i < options.m_nTotalRuns; i++) { vdata[i] = new Sample(); } perfdata[version] = vdata; } IEnumerable <Vector> goldcenters = SelectInitialCenters(attributes, options.m_nClusters, options.m_nRandomSeed); for (int i = 0; i < options.m_nTotalRuns; i++) { long lReferenceImplTime = 0; IEnumerable <Vector> refnewcenters = null; if (options.m_bCheckResult) { Vector[] refcenters = DuplicateCenters(goldcenters); ReferenceKMeans refkmeans = new ReferenceKMeans(); refnewcenters = refkmeans.Compute(attributes, refcenters, options.m_nMaxIterations, true); lReferenceImplTime = refkmeans.RuntimeMilliseconds; } foreach (String sVersion in runnableVersions) { bool bAvoidLazyEval = true; int nMaxParallelism = 1; int[] rClusterIds = null; Vector[] newcenters = DuplicateCenters(goldcenters); KMeansCalculator kmeans = SelectImplementation(options, sVersion, out nMaxParallelism, out bAvoidLazyEval); int nIterations = kmeans.execute(options, attributes, ref newcenters, out rClusterIds, nMaxParallelism, bAvoidLazyEval); Sample isample = perfdata[sVersion][i]; isample.m_impltime = kmeans.RuntimeMilliseconds; isample.m_success = true; isample.m_reftime = 0; if (options.m_bCheckResult) { isample.m_reftime = lReferenceImplTime; isample.m_success = KMeansCalculator.CheckResult(options, newcenters, refnewcenters, options.m_bVerbose, options.m_bVerbose); if (!isample.m_success) { Console.WriteLine("FAILED"); return; } } } } Console.WriteLine("SUCCEEDED"); foreach (String v in perfdata.Keys) { Sample[] samples = perfdata[v]; Performance perf = new Performance(samples); Console.WriteLine("{0,-15}: {1}, avg={2}", v, perf.RawRuntimes(), perf.m_impltime.ToString("f1")); } }
///------------------------------------------------------------------------------------------------- /// <summary> Constructor. </summary> /// /// <remarks> Chris Rossbach ([email protected]), 8/2/2012. </remarks> /// /// <param name="options"> Options for controlling the operation. </param> ///------------------------------------------------------------------------------------------------- public TaskingKMeans(KMeansOptions options) { m_options = options; }
public static KMeansOptions getOptions(string[] args) { Getopt.Getopt g = new Getopt.Getopt("KMeans", args, "cr:x:i:m:T:e:bR:X:K:I:vV:gd:E:"); KMeansOptions options = new KMeansOptions(); try { int c; while ((c = g.getopt()) != -1) { switch (c) { case 'g': options.m_bGenerateData = true; break; case 'd': options.m_nGenerateDims = Int32.Parse(g.Optarg); break; case 'E': options.m_nGenerateElems = Int32.Parse(g.Optarg); break; case 'v': options.m_bVerbose = true; break; case 'V': options.m_vImplementations[g.Optarg] = null; break; case 'X': options.m_nMaximumThreadsSweep = Int32.Parse(g.Optarg); break; case 'K': options.m_nPartitionSize = Int32.Parse(g.Optarg); break; case 'R': options.m_nRandomSeed = Int32.Parse(g.Optarg); break; case 'c': options.m_bCheckResult = true; break; case 'C': options.m_bComparePerformance = true; break; case 'r': options.m_nTotalRuns = Int16.Parse(g.Optarg); break; case 'T': options.m_fConvergenceThreshold = float.Parse(g.Optarg); break; case 'e': options.m_fEpsilon = float.Parse(g.Optarg); break; case 'x': options.m_nMinimumThreadsSweep = int.Parse(g.Optarg); break; case 'i': options.m_strFileName = g.Optarg; break; case 'I': options.m_nMaxIterations = int.Parse(g.Optarg); break; case 'm': options.m_nClusters = int.Parse(g.Optarg); break; case 'b': options.m_bBinaryInput = true; break; case 'h': PrintUsage(); return(null); default: Console.WriteLine("Unknown option: " + (Char)c); PrintUsage(); return(null); } } } catch (Exception e) { Console.WriteLine("Options exception: " + e.Message); PrintUsage(); return(null); } return(options); }
ComputeNewCenters( KMeansOptions options, IEnumerable <Vector> vectors, Vector[] centers, int nCenters, out int[] clusterIds, out int nIterations ) { int nRank = vectors.ElementAt(0).Rank; int nPoints = vectors.Count(); clusterIds = CreateInitialClusterIds(nPoints); m_bMapPhasesComplete = false; m_bUpdatePhasesComplete = false; m_evtMapWorkAvailable.Reset(); m_evtMapWorkComplete.Reset(); m_barrier = new Barrier(options.m_nCurrentThreadsSweep, (b) => { m_evtMapWorkAvailable.Reset(); m_evtMapWorkComplete.Set(); if (m_bVerbose) { Console.WriteLine("reached barrier!"); } }); KMeansMapPartition[] partitions; Thread[] mappers; int nVectors = vectors.Count(); int nWorkers = options.m_nCurrentThreadsSweep; int nPartitionSize = (int)Math.Round((double)nVectors / (double)nWorkers); partitions = new KMeansMapPartition[nWorkers]; mappers = new Thread[nWorkers]; for (int i = 0; i < nWorkers; i++) { int nStartIndex = i * nPartitionSize; int nWorkerPartitionSize = nPartitionSize; if (nStartIndex + nPartitionSize > nVectors) { nWorkerPartitionSize = nVectors - nStartIndex; } partitions[i] = CreateMapPartitionDesc(options, vectors, centers, clusterIds, nCenters, nRank, i, nStartIndex, nWorkerPartitionSize); mappers[i] = new Thread(ThreadedKMeans.MapPartition); mappers[i].Start(partitions[i]); } KMeansUpdateCenters update; update = CreateUpdateDesc(options, partitions, vectors, clusterIds, nCenters, nRank, 0); Thread updater = new Thread(ThreadedKMeans.UpdateCenters); updater.Start(update); m_pStopwatch.Start(); m_nStartParallelPhase = m_pStopwatch.ElapsedMilliseconds; m_evtMapWorkAvailable.Set(); updater.Join(); m_pStopwatch.Stop(); nIterations = update.m_nIterations; centers = update.m_sharedCenters; m_nEndParallelPhase = m_pStopwatch.ElapsedMilliseconds; return(centers); }
///------------------------------------------------------------------------------------------------- /// <summary> Constructor. </summary> /// /// <remarks> Chris Rossbach ([email protected]), 8/2/2012. </remarks> /// /// <param name="options"> Options for controlling the operation. </param> ///------------------------------------------------------------------------------------------------- public SequentialKMeans(KMeansOptions options) { m_options = options; }
public LINQKMeans(KMeansOptions options) { m_options = options; }