/// <summary> /// Returns a sample of the data /// </summary> /// <param name="data"></param> /// <param name="nofSamples"></param> /// <returns></returns> private float[][] GetUniqueRandomNumbers(float[][] data, int nofSamples) { float[][] tempData = new float[nofSamples][]; for (int i = 0; i < nofSamples; ++i) { tempData[i] = new float[data[0].Count()]; } HashSet <int> numbers = new HashSet <int>(); int numbersLeft = nofSamples; int destinationIndex = 0; while (numbersLeft > 0) { int rand = RandomGen.Next(0, data.Count()); if (!numbers.Contains(rand)) { numbers.Add(rand); numbersLeft--; CopyArray(data, tempData, rand, destinationIndex); destinationIndex++; } } return(tempData); }
private float[,] FindStartingCentersEMD(float[][] data, int k) { // select random centers Console.WriteLine("K-means++ finding good starting centers..."); // first get some samples of all data to speed up the algorithm int maxSamples = Math.Min(k * 20, data.Length); float[][] dataTemp = GetUniqueRandomNumbers(data, maxSamples); float[,] centers = new float[k, dataTemp[0].Count()]; // first cluster center is random List <int> centerIndices = new List <int>(); int index = -1; using (var progress = new ProgressBar()) { progress.Report((double)(1) / k, 1); for (int c = 0; c < k; ++c) // get a new cluster center one by one { float[] distancesToBestCenter = Enumerable.Repeat(float.MaxValue, dataTemp.Count()).ToArray(); if (c == 0) { index = RandomGen.Next(0, dataTemp.Count()); centerIndices.Add(index); CopyArray(dataTemp, centers, index, c); } else { Parallel.For(0, Global.NOF_THREADS, i => { for (int j = Util.GetWorkItemsIndices(dataTemp.Count(), Global.NOF_THREADS, i).Item1; j < Util.GetWorkItemsIndices(dataTemp.Count(), Global.NOF_THREADS, i).Item2; ++j) { // go through all dataTemp for (int m = 0; m < c; ++m) // go through centers { float tempDistance = GetEarthMoverDistance(dataTemp, centers, j, m); if (tempDistance < distancesToBestCenter[j]) { distancesToBestCenter[j] = tempDistance; } } } }); SquareArray(distancesToBestCenter); float sum = distancesToBestCenter.Sum(); for (int p = 0; p < distancesToBestCenter.Count(); ++p) { distancesToBestCenter[p] /= sum; } int centerIndexSample = Util.SampleDistribution(distancesToBestCenter); while (centerIndices.Contains(centerIndexSample)) { centerIndexSample = Util.SampleDistribution(distancesToBestCenter); } CopyArray(dataTemp, centers, centerIndexSample, c); centerIndices.Add(centerIndexSample); } progress.Report((double)(c + 1) / k, c + 1); } } return(centers); }
/// <summary> /// Returns an array where the element at index i contains the cluster entry associated with the entry /// </summary> /// <param name="data"></param> /// <param name="k"></param> /// <returns></returns> public int[] ClusterEMD(float[][] data, int k, int nofRuns, int[] _bestCenters = null) { Console.WriteLine("K-means++ (EMD) clustering {0} elements into {1} clusters with {2} runs...", data.Count(), k, nofRuns); int filenameId = RandomGen.Next(0, 10000000); DateTime start = DateTime.UtcNow; int[] bestCenters = new int[data.Count()]; int[] recordCenters = new int[data.Count()]; // we return indices only, the centers are discarded // load previous indices if passed bool skipInit = false; if (_bestCenters != null) { skipInit = true; Array.Copy(_bestCenters, bestCenters, _bestCenters.Length); Array.Copy(_bestCenters, recordCenters, _bestCenters.Length); } double recordDistance = double.MaxValue; for (int run = 0; run < nofRuns; ++run) { float[,] centers = new float[k, data[0].Count()]; Console.WriteLine("K-means++ starting clustering..."); double lastDistance = double.MaxValue; bool distanceChanged = true; if (!skipInit) { bestCenters = new int[data.Count()]; centers = FindStartingCentersEMD(data, k); } else { // find new cluster centers // todo: it isnt theoretically sound to take the mean when using EMD distance metric centers = CalculateNewCenters(data, bestCenters, k); skipInit = false; } float[,] centerCenterDistances = new float[k, k]; while (distanceChanged) { // calculate cluster-cluster distances to use triangle inequality CalculateClusterDistancesEMD(centerCenterDistances, centers); // find closest cluster for each element long sharedLoopCounter = 0; double totalDistance = 0; using (var progress = new ProgressBar()) { Parallel.For(0, Global.NOF_THREADS, i => { double threadDistance = 0; long iter = 0; for (int j = Util.GetWorkItemsIndices(data.Length, Global.NOF_THREADS, i).Item1; j < Util.GetWorkItemsIndices(data.Length, Global.NOF_THREADS, i).Item2; ++j) { // go through all data // assume previous cluster was good, this is better for the triangle inequality double distance = GetEarthMoverDistance(data, centers, j, bestCenters[j]); int bestIndex = bestCenters[j]; for (int m = 0; m < k; m++) // go through centers { if (centerCenterDistances[bestIndex, m] < 2 * distance && bestIndex != m) { double tempDistance = GetEarthMoverDistance(data, centers, j, m); if (tempDistance < distance) { distance = tempDistance; bestIndex = m; } } } bestCenters[j] = bestIndex; threadDistance += distance; iter++; if (iter % 100000 == 0) { Interlocked.Add(ref sharedLoopCounter, 100000); AddDouble(ref totalDistance, threadDistance); threadDistance = 0; progress.Report((double)Interlocked.Read(ref sharedLoopCounter) / data.Length, sharedLoopCounter); } } Interlocked.Add(ref sharedLoopCounter, iter % 100000); progress.Report((double)Interlocked.Read(ref sharedLoopCounter) / data.Length, sharedLoopCounter); AddDouble(ref totalDistance, threadDistance); }); } centers = CalculateNewCenters(data, bestCenters, k); totalDistance /= data.Length; distanceChanged = !(totalDistance == lastDistance); double diff = lastDistance - totalDistance; Console.WriteLine("Saving intermediate table to file..."); FileHandler.SaveToFile(recordCenters, "EMDTable_temp_" + filenameId + ".txt"); if (totalDistance < recordDistance) { recordDistance = totalDistance; Array.Copy(bestCenters, recordCenters, recordCenters.Length); } Console.WriteLine("Current average distance: {0} Improvement: {1}, {2}%", totalDistance, diff, 100.0 * (1.0 - totalDistance / lastDistance)); lastDistance = totalDistance; } } Console.WriteLine("Best distance found: " + recordDistance); TimeSpan elapsed = DateTime.UtcNow - start; Console.WriteLine("K-means++ clustering (EMD) completed in {0}d {1}h {2}m {3}s", elapsed.Days, elapsed.Hours, elapsed.Minutes, elapsed.Seconds); // print starting hand chart return(recordCenters); }
private static void CalculateOCHSOpponentClusters() { Console.WriteLine("Calculating {0} opponent clusters for OCHS using Monte Carlo Sampling...", Global.nofOpponentClusters); DateTime start = DateTime.UtcNow; histogramsPreflop = new float[169][]; // c# doesnt allow more than int max indices (its 2019, bitch pls) for (int i = 0; i < 169; ++i) { histogramsPreflop[i] = new float[Global.preflopHistogramSize]; } long sharedLoopCounter = 0; using (var progress = new ProgressBar()) { progress.Report((double)Interlocked.Read(ref sharedLoopCounter) / (169 * Global.nofMCSimsPerPreflopHand), sharedLoopCounter); Parallel.For(0, 169, i => { int[] cards = new int[2]; Global.indexer_2.unindex(Global.indexer_2.rounds - 1, i, cards); long deadCardMask = (1L << cards[0]) + (1L << cards[1]); for (int steps = 0; steps < Global.nofMCSimsPerPreflopHand; steps++) { int cardFlop1 = RandomGen.Next(0, 52); while (((1L << cardFlop1) & deadCardMask) != 0) { cardFlop1 = RandomGen.Next(0, 52); } deadCardMask |= (1L << cardFlop1); int cardFlop2 = RandomGen.Next(0, 52); while (((1L << cardFlop2) & deadCardMask) != 0) { cardFlop2 = RandomGen.Next(0, 52); } deadCardMask |= (1L << cardFlop2); int cardFlop3 = RandomGen.Next(0, 52); while (((1L << cardFlop3) & deadCardMask) != 0) { cardFlop3 = RandomGen.Next(0, 52); } deadCardMask |= (1L << cardFlop3); int cardTurn = RandomGen.Next(0, 52); while (((1L << cardTurn) & deadCardMask) != 0) { cardTurn = RandomGen.Next(0, 52); } deadCardMask |= (1L << cardTurn); int cardRiver = RandomGen.Next(0, 52); while (((1L << cardRiver) & deadCardMask) != 0) { cardRiver = RandomGen.Next(0, 52); } deadCardMask |= (1L << cardRiver); int[] strength = new int[3]; for (int card1Opponent = 0; card1Opponent < 51; card1Opponent++) { if (((1L << card1Opponent) & deadCardMask) != 0) { continue; } deadCardMask |= (1L << card1Opponent); for (int card2Opponent = card1Opponent + 1; card2Opponent < 52; card2Opponent++) { if (((1L << card2Opponent) & deadCardMask) != 0) { continue; } ulong handSevenCards = (1uL << cards[0]) + (1uL << cards[1]) + (1uL << cardFlop1) + (1uL << cardFlop2) + (1uL << cardFlop3) + (1uL << cardTurn) + (1uL << cardRiver); ulong handOpponentSevenCards = (1uL << cardFlop1) + (1uL << cardFlop2) + (1uL << cardFlop3) + (1uL << cardTurn) + (1uL << cardRiver) + (1uL << card1Opponent) + (1uL << card2Opponent); int valueSevenCards = Global.handEvaluator.Evaluate(handSevenCards); int valueOpponentSevenCards = Global.handEvaluator.Evaluate(handOpponentSevenCards); int index = (valueSevenCards > valueOpponentSevenCards ? 0 : valueSevenCards == valueOpponentSevenCards ? 1 : 2); strength[index] += 1; } } float equity = (strength[0] + strength[1] / 2.0f) / (strength[0] + strength[1] + strength[2]); histogramsPreflop[i][(Math.Min(Global.preflopHistogramSize - 1, (int)(equity * (float)Global.preflopHistogramSize)))] += 1; deadCardMask = (1L << cards[0]) + (1L << cards[1]); Interlocked.Add(ref sharedLoopCounter, 1); progress.Report((double)Interlocked.Read(ref sharedLoopCounter) / (169 * Global.nofMCSimsPerPreflopHand), sharedLoopCounter); } }); } TimeSpan elapsed = DateTime.UtcNow - start; Console.WriteLine("Calculating opponent clusters completed in {0}d {1}h {2}m {3}s", elapsed.Days, elapsed.Hours, elapsed.Minutes, elapsed.Seconds); Console.WriteLine("Calculated histograms: "); int[] cardsOutput = new int[2]; for (int i = 0; i < 169; ++i) { cardsOutput = new int[2]; Global.indexer_2.unindex(Global.indexer_2.rounds - 1, i, cardsOutput); Hand hand = new Hand(); hand.Cards.Add(new Card(cardsOutput[0])); hand.Cards.Add(new Card(cardsOutput[1])); hand.PrintColoredCards(); Console.Write(": "); for (int j = 0; j < Global.preflopHistogramSize; ++j) { Console.Write(histogramsPreflop[i][j] + " "); } Console.WriteLine(); } }