private float[,] FindStartingCentersEMD(float[][] data, int k) { // select random centers Console.WriteLine("K-means++ finding good starting centers..."); // first get some samples of all data to speed up the algorithm int maxSamples = Math.Min(k * 20, data.Length); float[][] dataTemp = GetUniqueRandomNumbers(data, maxSamples); float[,] centers = new float[k, dataTemp[0].Count()]; // first cluster center is random List <int> centerIndices = new List <int>(); int index = -1; using (var progress = new ProgressBar()) { progress.Report((double)(1) / k, 1); for (int c = 0; c < k; ++c) // get a new cluster center one by one { float[] distancesToBestCenter = Enumerable.Repeat(float.MaxValue, dataTemp.Count()).ToArray(); if (c == 0) { index = RandomGen.Next(0, dataTemp.Count()); centerIndices.Add(index); CopyArray(dataTemp, centers, index, c); continue; } else { Parallel.For(0, Global.NOF_THREADS, i => { for (int j = Util.GetWorkItemsIndices(dataTemp.Count(), Global.NOF_THREADS, i).Item1; j < Util.GetWorkItemsIndices(dataTemp.Count(), Global.NOF_THREADS, i).Item2; ++j) { // go through all dataTemp for (int m = 0; m < c; ++m) // go through centers { float tempDistance = GetEarthMoverDistance(dataTemp, centers, j, m); if (tempDistance < distancesToBestCenter[j]) { distancesToBestCenter[j] = tempDistance; } } } }); SquareArray(distancesToBestCenter); float sum = distancesToBestCenter.Sum(); for (int p = 0; p < distancesToBestCenter.Count(); ++p) { distancesToBestCenter[p] /= sum; } int centerIndexSample = Util.SampleDistribution(distancesToBestCenter); while (centerIndices.Contains(centerIndexSample)) { centerIndexSample = Util.SampleDistribution(distancesToBestCenter); } CopyArray(dataTemp, centers, centerIndexSample, c); centerIndices.Add(centerIndexSample); } progress.Report((double)(c + 1) / k, c + 1); } } return(centers); }
private static void CalculateOCHSOpponentClusters() { Console.WriteLine("Calculating {0} opponent clusters for OCHS using Monte Carlo Sampling...", Global.nofOpponentClusters); DateTime start = DateTime.UtcNow; histogramsPreflop = new float[169][]; // c# doesnt allow more than int max indices (its 2019, bitch pls) for (int i = 0; i < 169; ++i) { histogramsPreflop[i] = new float[Global.preflopHistogramSize]; } long sharedLoopCounter = 0; using (var progress = new ProgressBar()) { progress.Report((double)Interlocked.Read(ref sharedLoopCounter) / (169 * Global.nofMCSimsPerPreflopHand), sharedLoopCounter); Parallel.For(0, 169, i => { int[] cards = new int[2]; Global.indexer_2.unindex(Global.indexer_2.rounds - 1, i, cards); long deadCardMask = (1L << cards[0]) + (1L << cards[1]); for (int steps = 0; steps < Global.nofMCSimsPerPreflopHand; steps++) { int cardFlop1 = RandomGen.Next(0, 52); while (((1L << cardFlop1) & deadCardMask) != 0) { cardFlop1 = RandomGen.Next(0, 52); } deadCardMask |= (1L << cardFlop1); int cardFlop2 = RandomGen.Next(0, 52); while (((1L << cardFlop2) & deadCardMask) != 0) { cardFlop2 = RandomGen.Next(0, 52); } deadCardMask |= (1L << cardFlop2); int cardFlop3 = RandomGen.Next(0, 52); while (((1L << cardFlop3) & deadCardMask) != 0) { cardFlop3 = RandomGen.Next(0, 52); } deadCardMask |= (1L << cardFlop3); int cardTurn = RandomGen.Next(0, 52); while (((1L << cardTurn) & deadCardMask) != 0) { cardTurn = RandomGen.Next(0, 52); } deadCardMask |= (1L << cardTurn); int cardRiver = RandomGen.Next(0, 52); while (((1L << cardRiver) & deadCardMask) != 0) { cardRiver = RandomGen.Next(0, 52); } deadCardMask |= (1L << cardRiver); int[] strength = new int[3]; for (int card1Opponent = 0; card1Opponent < 51; card1Opponent++) { if (((1L << card1Opponent) & deadCardMask) != 0) { continue; } deadCardMask |= (1L << card1Opponent); for (int card2Opponent = card1Opponent + 1; card2Opponent < 52; card2Opponent++) { if (((1L << card2Opponent) & deadCardMask) != 0) { continue; } ulong handSevenCards = (1uL << cards[0]) + (1uL << cards[1]) + (1uL << cardFlop1) + (1uL << cardFlop2) + (1uL << cardFlop3) + (1uL << cardTurn) + (1uL << cardRiver); ulong handOpponentSevenCards = (1uL << cardFlop1) + (1uL << cardFlop2) + (1uL << cardFlop3) + (1uL << cardTurn) + (1uL << cardRiver) + (1uL << card1Opponent) + (1uL << card2Opponent); int valueSevenCards = Global.handEvaluator.Evaluate(handSevenCards); int valueOpponentSevenCards = Global.handEvaluator.Evaluate(handOpponentSevenCards); int index = (valueSevenCards > valueOpponentSevenCards ? 0 : valueSevenCards == valueOpponentSevenCards ? 1 : 2); strength[index] += 1; } } float equity = (strength[0] + strength[1] / 2.0f) / (strength[0] + strength[1] + strength[2]); histogramsPreflop[i][(Math.Min(Global.preflopHistogramSize - 1, (int)(equity * (float)Global.preflopHistogramSize)))] += 1; deadCardMask = (1L << cards[0]) + (1L << cards[1]); Interlocked.Add(ref sharedLoopCounter, 1); progress.Report((double)Interlocked.Read(ref sharedLoopCounter) / (169 * Global.nofMCSimsPerPreflopHand), sharedLoopCounter); } }); } TimeSpan elapsed = DateTime.UtcNow - start; Console.WriteLine("Calculating opponent clusters completed in {0}d {1}h {2}m {3}s", elapsed.Days, elapsed.Hours, elapsed.Minutes, elapsed.Seconds); Console.WriteLine("Calculated histograms: "); int[] cardsOutput = new int[2]; for (int i = 0; i < 169; ++i) { cardsOutput = new int[2]; Global.indexer_2.unindex(Global.indexer_2.rounds - 1, i, cardsOutput); Hand hand = new Hand(); hand.Cards.Add(new Card(cardsOutput[0])); hand.Cards.Add(new Card(cardsOutput[1])); hand.PrintColoredCards(); Console.Write(": "); for (int j = 0; j < Global.preflopHistogramSize; ++j) { Console.Write(histogramsPreflop[i][j] + " "); } Console.WriteLine(); } }
/// <summary> /// Returns an array where the element at index i contains the cluster entry associated with the entry /// </summary> /// <param name="data"></param> /// <param name="k"></param> /// <returns></returns> public int[] ClusterEMD(float[][] data, int k, int nofRuns, int[] _bestCenters = null) { Console.WriteLine("K-means++ (EMD) clustering {0} elements into {1} clusters with {2} runs...", data.Count(), k, nofRuns); int filenameId = RandomGen.Next(0, 10000000); DateTime start = DateTime.UtcNow; int[] bestCenters = new int[data.Count()]; int[] recordCenters = new int[data.Count()]; // we return indices only, the centers are discarded // load previous indices if passed bool skipInit = false; if (_bestCenters != null) { skipInit = true; Array.Copy(_bestCenters, bestCenters, _bestCenters.Length); Array.Copy(_bestCenters, recordCenters, _bestCenters.Length); } double recordDistance = double.MaxValue; for (int run = 0; run < nofRuns; ++run) { float[,] centers = new float[k, data[0].Count()]; Console.WriteLine("K-means++ starting clustering..."); double lastDistance = double.MaxValue; bool distanceChanged = true; if (!skipInit) { bestCenters = new int[data.Count()]; centers = FindStartingCentersEMD(data, k); } else { // find new cluster centers // todo: it isnt theoretically sound to take the mean when using EMD distance metric centers = CalculateNewCenters(data, bestCenters, k); skipInit = false; } float[,] centerCenterDistances = new float[k, k]; while (distanceChanged) { // calculate cluster-cluster distances to use triangle inequality CalculateClusterDistancesEMD(centerCenterDistances, centers); // find closest cluster for each element long sharedLoopCounter = 0; double totalDistance = 0; using (var progress = new ProgressBar()) { Parallel.For(0, Global.NOF_THREADS, i => { double threadDistance = 0; long iter = 0; for (int j = Util.GetWorkItemsIndices(data.Length, Global.NOF_THREADS, i).Item1; j < Util.GetWorkItemsIndices(data.Length, Global.NOF_THREADS, i).Item2; ++j) { // go through all data // assume previous cluster was good, this is better for the triangle inequality double distance = GetEarthMoverDistance(data, centers, j, bestCenters[j]); int bestIndex = bestCenters[j]; for (int m = 0; m < k; m++) // go through centers { if (centerCenterDistances[bestIndex, m] < 2 * distance && bestIndex != m) { double tempDistance = GetEarthMoverDistance(data, centers, j, m); if (tempDistance < distance) { distance = tempDistance; bestIndex = m; } } } bestCenters[j] = bestIndex; threadDistance += distance; iter++; if (iter % 100000 == 0) { Interlocked.Add(ref sharedLoopCounter, 100000); AddDouble(ref totalDistance, threadDistance); threadDistance = 0; progress.Report((double)Interlocked.Read(ref sharedLoopCounter) / data.Length, sharedLoopCounter); } } Interlocked.Add(ref sharedLoopCounter, iter % 100000); progress.Report((double)Interlocked.Read(ref sharedLoopCounter) / data.Length, sharedLoopCounter); AddDouble(ref totalDistance, threadDistance); }); } centers = CalculateNewCenters(data, bestCenters, k); totalDistance /= data.Length; distanceChanged = !(totalDistance == lastDistance); double diff = lastDistance - totalDistance; Console.WriteLine("Saving intermediate table to file..."); FileHandler.SaveToFile(recordCenters, "EMDTable_temp_" + filenameId + ".txt"); if (totalDistance < recordDistance) { recordDistance = totalDistance; Array.Copy(bestCenters, recordCenters, recordCenters.Length); } Console.WriteLine("Current average distance: {0} Improvement: {1}, {2}%", totalDistance, diff, 100.0 * (1.0 - totalDistance / lastDistance)); lastDistance = totalDistance; } } Console.WriteLine("Best distance found: " + recordDistance); TimeSpan elapsed = DateTime.UtcNow - start; Console.WriteLine("K-means++ clustering (EMD) completed in {0}d {1}h {2}m {3}s", elapsed.Days, elapsed.Hours, elapsed.Minutes, elapsed.Seconds); // print starting hand chart return(recordCenters); }
private static void Train() { Console.WriteLine("Starting Monte Carlo Counterfactual Regret Minimization (MCCFRM)..."); long StrategyInterval = Math.Max(1, 1000 / Global.NOF_THREADS);; // bb rounds before updating player strategy (recursive through tree) 10k long PruneThreshold = 20000000 / Global.NOF_THREADS; // bb rounds after this time we stop checking all actions, 200 minutes long LCFRThreshold = 20000000 / Global.NOF_THREADS; // bb rounds when to stop discounting old regrets, no clue what it should be long DiscountInterval = 1000000 / Global.NOF_THREADS; // bb rounds, discount values periodically but not every round, 10 minutes long SaveToDiskInterval = 1000000 / Global.NOF_THREADS; long testGamesInterval = 100000 / Global.NOF_THREADS; long sharedLoopCounter = 0; LoadFromFile(); LoadFromFile_d(); Trainer trainer = new Trainer(0); trainer.EnumerateActionSpace(); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); Parallel.For(0, Global.NOF_THREADS, index => { Trainer trainer = new Trainer(index); for (int t = 1; ; t++) // bb rounds { if (t % 1000 == 0) { Interlocked.Add(ref sharedLoopCounter, 1000); Console.WriteLine("Training steps " + sharedLoopCounter); } if (t % testGamesInterval == 0 && index == 0) // implement progress bar later { trainer.PrintStartingHandsChart(); trainer.PrintStatistics(sharedLoopCounter); Console.WriteLine("Sample games (against self)"); for (int z = 0; z < 20; z++) { trainer.PlayOneGame(); } //Console.WriteLine("Sample games (against baseline)"); //float mainScore = 0.0f; //for (int x = 0; x < 100; x++) // 100 games not statistically significant //{ // if (x < 20) // { // mainScore += trainer.PlayOneGame_d(x % 2, true); // } // mainScore += trainer.PlayOneGame_d(x % 2, false); //} //WritePlotStatistics((mainScore / 10000) / Global.BB); //Console.WriteLine("BBs per hand: {0}", (mainScore / 10000) / Global.BB); Console.WriteLine("Iterations per second: {0}", 1000 * sharedLoopCounter / (stopwatch.ElapsedMilliseconds + 1)); Console.WriteLine(); } for (int traverser = 0; traverser < Global.nofPlayers; traverser++) // traverser { if (t % StrategyInterval == 0 && index == 0) { trainer.UpdateStrategy(traverser); } if (t > PruneThreshold) { float q = RandomGen.NextFloat(); if (q < 0.05) { trainer.TraverseMCCFR(traverser, t); } else { trainer.TraverseMCCFRPruned(traverser); } } else { trainer.TraverseMCCFR(traverser, t); } } if (t % SaveToDiskInterval == 0 && index == 0) // allow only one thread to do saving { Console.WriteLine("Saving nodeMap to disk disabled!"); //SaveToFile(); } // discount all infosets (for all players) if (t < LCFRThreshold && t % DiscountInterval == 0 && index == 0) // allow only one thread to do discounting { float d = ((float)t / DiscountInterval) / ((float)t / DiscountInterval + 1); trainer.DiscountInfosets(d); } } }); }