예제 #1
0
        void CalculateNode(string path, int round, IClusterNode parentNode, Bucket parentBucket)
        {
            if (round == 4)
            {
                return;
            }

            if (IsVerbose)
            {
                Console.WriteLine("Start node: {0}.", path);
            }

            McHand[] hands = null;
            hands = CreateMcHands(round, parentBucket);
            Bucket[] buckets = Clusterizer.BucketizeHands(round, hands, parentNode);

            for (int i = 0; i < parentNode.ChildrenCount; ++i)
            {
                CalculateNode(path + "," + i.ToString(), round + 1, parentNode.GetChild(i), buckets[i]);
            }

            if (IsVerbose)
            {
                Console.WriteLine("Finish node: {0}.", path);
            }
        }
        public Prediction PredictSuccess(Song song)
        {
            int           clusterIndex    = Clusterizer.GetNearestClusterIndex(song);
            Song          clusterCentroid = Clusterizer.Centroids[clusterIndex];
            double        portion         = Probabilities[clusterIndex];
            List <double> portions        = new List <double>(Probabilities);
            double        distance        = DistanceFunc.GetDistance(song, clusterCentroid);
            List <double> distances       = Clusterizer.Centroids
                                            .Select(centroid => DistanceFunc.GetDistance(song, centroid))
                                            .ToList();
            List <int> sizes       = Clusterizer.Clusters.Select(cluster => cluster.Count).ToList();
            int        clusterSize = Clusterizer.Clusters[clusterIndex].Count;

            return(new Prediction
            {
                ClusterIndex = clusterIndex,
                ClusterPortion = portion,
                ClusterPortions = portions,
                CentroidDistance = distance,
                CentroidDistances = distances,
                ClusterSizes = sizes,
                ClusterSize = clusterSize,
                ClusterCentroid = clusterCentroid,
                AssociatedSong = song,
            });
        }
        public void TestClusterizer()
        {
            var clusterizer = new Clusterizer(3);

            clusterizer.AddEdge(1, 2, 4);
            clusterizer.AddEdge(1, 3, 6);
            clusterizer.AddEdge(1, 4, 20);
            clusterizer.AddEdge(1, 5, 22);
            clusterizer.AddEdge(1, 6, 30);
            clusterizer.AddEdge(1, 7, 32);
            clusterizer.AddEdge(1, 8, 34);
            clusterizer.AddEdge(2, 3, 5);
            clusterizer.AddEdge(2, 4, 21);
            clusterizer.AddEdge(2, 5, 23);
            clusterizer.AddEdge(2, 6, 31);
            clusterizer.AddEdge(2, 7, 33);
            clusterizer.AddEdge(2, 8, 35);
            clusterizer.AddEdge(3, 4, 22);
            clusterizer.AddEdge(3, 5, 24);
            clusterizer.AddEdge(3, 6, 32);
            clusterizer.AddEdge(3, 7, 34);
            clusterizer.AddEdge(3, 8, 36);
            clusterizer.AddEdge(4, 5, 2);
            clusterizer.AddEdge(4, 6, 15);
            clusterizer.AddEdge(4, 7, 17);
            clusterizer.AddEdge(4, 8, 19);
            clusterizer.AddEdge(5, 6, 16);
            clusterizer.AddEdge(5, 7, 18);
            clusterizer.AddEdge(5, 8, 19);
            clusterizer.AddEdge(6, 7, 2);
            clusterizer.AddEdge(6, 8, 1);
            clusterizer.AddEdge(7, 8, 3);
            clusterizer.Execute();
            Assert.AreEqual(15, clusterizer.Spacing);
        }
        public void TestClusterizer()
        {
            var clusterizer = new Clusterizer();

            clusterizer.Add("000000000000000000000000");
            clusterizer.Add("111000000000000000000000");
            clusterizer.Add("000000000000000000000111");
            clusterizer.Add("000000000000000000000101");
            Assert.AreEqual(2, clusterizer.DisjointSetsCount);
        }
예제 #5
0
        static int Main(string[] args)
        {
            ServiceProvider serviceProvider = CreateServiceProvider();
            var             logger          = serviceProvider.GetService <ILogger <Program> >();
            var             writer          = serviceProvider.GetService <FileWriter>();

            return(Parser.Default.ParseArguments <NormalizeOptions, GenerateOptions, PaintOptions, ClusterizationOptions, ForecastOptions>(args)
                   .MapResult(
                       (GenerateOptions opts) => Generator.Generate(opts, logger, writer),
                       (NormalizeOptions opts) => Normalizer.Normalize(opts, logger),
                       (PaintOptions opts) => Painter.Paint(opts, writer, logger),
                       (ClusterizationOptions opts) => Clusterizer.Clusterize(opts, logger, writer),
                       (ForecastOptions opts) => Forecaster.Forecast(opts, writer, logger),
                       errs => HandleParseError(errs, logger)));
        }
예제 #6
0
        static void Main(string[] args)
        {
            var    i           = 0;
            var    clusterizer = new Clusterizer();
            string line;
            var    reader = new StreamReader("clustering_big.txt");

            while ((line = reader.ReadLine()) != null)
            {
                i += 1;
                if (i % 1000 == 0)
                {
                    Console.WriteLine(i);
                }
                clusterizer.Add(line.Replace(" ", ""));
            }
            Console.WriteLine(clusterizer.DisjointSetsCount);
            Console.ReadKey();
        }
예제 #7
0
        private Bucket[] CreatePreflopBuckets(int preflopBucketsCount)
        {
            Bucket[] buckets = new Bucket[preflopBucketsCount].Fill(i => new Bucket());

            int totalHands = 0;

            for (int i = 0; i < HePocket.Count; ++i)
            {
                HePocketKind pk = (HePocketKind)i;
                // Use all possible pockets for each pocket kind. This ensures
                // that they occur with the natural frequency in a typical case where
                // a bucket contain pocket kinds with different numbers of pockets (e.g. AA - 6, AKs - 4, AKo - 12).
                CardSet [] range = HePocket.KindToRange(pk);
                foreach (CardSet pocketCs in range)
                {
                    McHand  hand   = new McHand();
                    int[]   pocket = StdDeck.Descriptor.GetIndexesAscending(pocketCs).ToArray();
                    CardSet restCs = StdDeck.Descriptor.FullDeck;
                    restCs.Remove(pocketCs);
                    int[] rest = StdDeck.Descriptor.GetIndexesAscending(restCs).ToArray();
                    Debug.Assert(pocket.Length + rest.Length == 52);

                    pocket.CopyTo(hand.Cards, 0);
                    rest.CopyTo(hand.Cards, 2);
                    hand.Length = 2;

                    int abstrCard = Clusterizer.GetAbstractCard(hand.Cards, hand.Length);
                    buckets[abstrCard].Hands.Add(hand);
                    totalHands++;
                }
            }
            Debug.Assert(totalHands == 1326);
            if (IsVerbose)
            {
                Console.WriteLine("Preflop buckets created, buckets: {0}, hands: {1}", buckets.Length, totalHands);
            }

            return(buckets);
        }
예제 #8
0
        public IClusterNode Generate()
        {
            _rng            = new MersenneTwister(RngSeed);
            _totalMcSamples = 0;

            Clusterizer.IsVerbose = IsVerbose;

            IClusterNode root = Clusterizer.OnGenerateBegin();

            Bucket [] buckets = CreatePreflopBuckets(root.ChildrenCount);
            for (int i = 0; i < root.ChildrenCount; ++i)
            {
                CalculateNode(i.ToString(), 1, root.GetChild(i), buckets[i]);
            }
            if (IsVerbose)
            {
                double []  minClusters = new double[4].Fill(i => double.MaxValue);
                double  [] maxClusters = new double[4].Fill(i => double.MinValue);

                CalculateStatistics(root, 0, minClusters, maxClusters);
                Console.Write("Min clusters count:");
                for (int r = 0; r < minClusters.Length; ++r)
                {
                    Console.Write("{0,3} ", minClusters[r]);
                }
                Console.WriteLine();
                Console.Write("Max clusters count:");
                for (int r = 0; r < maxClusters.Length; ++r)
                {
                    Console.Write("{0,3} ", maxClusters[r]);
                }
                Console.WriteLine();

                Console.WriteLine("Samples: {0:#,#}", _totalMcSamples);
            }
            Clusterizer.OnGenerateEnd(root);
            return(root);
        }
예제 #9
0
        private void btnGenerate_Click(object sender, EventArgs e)
        {
            var drawer = new Drawer();
            var rand   = new Random();

            var cores = new List <ClusterCore>();

            for (var i = 0; i < 20; i++)
            {
                var coreLocation = new Point(rand.Next(0, 420), rand.Next(0, 420));
                cores.Add(new ClusterCore(coreLocation));
            }

            var dots = new List <Dot>();

            for (var i = 20; i < 100000; i++)
            {
                dots.Add(new Dot(new Point(rand.Next(0, 420), rand.Next(0, 420))));
            }

            var clusterizer = new Clusterizer();

            clusterizer.Clusterize(cores, dots);

            var bitmap   = new Bitmap(420, 420);
            var graphics = Graphics.FromImage(bitmap);

            drawer.DrawField(graphics, dots, cores);
            picboxBeforeClusterization.Image = bitmap;

            clusterizer.OptimizeClusterisation(cores, dots);
            var bitmap1   = new Bitmap(420, 420);
            var graphics1 = Graphics.FromImage(bitmap1);

            drawer.DrawField(graphics1, dots, cores);
            picBoxAfterClusterization.Image = bitmap1;
        }
예제 #10
0
    static void Main(string[] args)
    {
        string w2v_path      = @"f:\Word2Vec\word_vectors_cbow=1_win=5_dim=32.txt";
        string ngrams_path   = @"f:\tmp\mutual_info_2_ru.dat";
        string clusters_path = @"f:\tmp\clusters.txt";

        int max_nb_ngrams = 2000000;
        int nb_clusters   = 1000;

        Console.WriteLine($"Load w2v model from {w2v_path}");
        W2V_Lib.W2V_Model w2v = new W2V_Lib.W2V_Model(w2v_path, lazy_load: true);
        int veclen            = w2v.GetVectorLen();

        int ngram_arity = 0;

        using (System.IO.StreamReader rdr = new System.IO.StreamReader(ngrams_path))
        {
            ngram_arity = rdr.ReadLine().Split('\t').Length - 1;
        }


        Console.WriteLine($"Load and vectorize ngrams from {ngrams_path}");
        int ngram_dim = ngram_arity * veclen;

        string[]  ngram_texts   = new string[max_nb_ngrams];
        float[][] ngram_vectors = new float[max_nb_ngrams][];
        for (int i = 0; i < max_nb_ngrams; ++i)
        {
            ngram_vectors[i] = new float[ngram_dim];
        }

        float[] ngram_scores  = new float[max_nb_ngrams];
        float[] ngram_scores0 = new float[max_nb_ngrams];
        float   sum_scores    = 0.0f;

        using (System.IO.StreamReader rdr = new System.IO.StreamReader(ngrams_path))
        {
            int idata = 0;
            while (!rdr.EndOfStream && idata < max_nb_ngrams)
            {
                string   line = rdr.ReadLine();
                string[] toks = line.Split('\t');

                ngram_texts[idata] = string.Join(" ", toks.Take(ngram_arity));

                for (int j = 0; j < ngram_arity; ++j)
                {
                    string  word     = toks[j];
                    float[] word_vec = w2v[word];
                    for (int k = 0; k < veclen; ++k)
                    {
                        ngram_vectors[idata][j * veclen + k] = word_vec[k];
                    }
                }

                float score = float.Parse(toks[ngram_arity], System.Globalization.CultureInfo.InvariantCulture);
                ngram_scores0[idata] = score;
                sum_scores          += score;
                ngram_scores[idata]  = sum_scores;
                idata++;
            }

            if (idata != max_nb_ngrams)
            {
                throw new ApplicationException($"Not enough ngrams in datafile {ngrams_path}");
            }
        }


        Console.WriteLine("Clusterize");
        Clusterizer clusterizer = new Clusterizer(nb_clusters);

        clusterizer.Fit(
            ngram_arity,
            veclen,
            ngram_texts,
            ngram_vectors,
            ngram_scores,
            ngram_scores0,
            10000000,
            2e-2f,
            1000000,
            clusters_path
            );
        Console.WriteLine("All done.");

        return;
    }