Example #1
0
        public void RandomIndexing()
        {
            Random ran    = new Random(6754);
            int    length = 1000;

            int[]           a       = new int[length];
            int[]           b       = new int[length];
            ArrayList <int> shuffle = new ArrayList <int>(length);

            IPriorityQueueHandle <int>[] h = new IPriorityQueueHandle <int> [length];

            for (int i = 0; i < length; i++)
            {
                shuffle.Add(i);
                queue.Add(ref h[i], a[i] = ran.Next());
                b[i] = ran.Next();
                Assert.IsTrue(queue.Check());
            }

            Assert.IsTrue(queue.Check());
            shuffle.Shuffle(ran);
            for (int i = 0; i < length; i++)
            {
                int j = shuffle[i];
                Assert.AreEqual(a[j], queue[h[j]]);
                queue[h[j]] = b[j];
                Assert.AreEqual(b[j], queue[h[j]]);
                Assert.IsTrue(queue.Check());
            }
        }
Example #2
0
        public void RandomWithDeleteHandles()
        {
            var ran     = new Random(6754);
            var length  = 1000;
            var a       = new int[length];
            var shuffle = new ArrayList <int>(length);
            var h       = new IPriorityQueueHandle <int> [length];

            for (var i = 0; i < length; i++)
            {
                shuffle.Add(i);
                queue.Add(ref h[i], a[i] = ran.Next());
                Assert.IsTrue(queue.Check());
            }

            Assert.IsTrue(queue.Check());
            shuffle.Shuffle(ran);
            for (var i = 0; i < length; i++)
            {
                var j = shuffle[i];
                Assert.AreEqual(a[j], queue.Delete(h[j]));
                Assert.IsTrue(queue.Check());
            }

            Assert.IsTrue(queue.IsEmpty);
        }
Example #3
0
 public void Shuffle()
 {
     list.Add(4); list.Add(56); list.Add(8);
     listen();
     list.Shuffle();
     seen.Check(new CollectionEvent <int>[] {
         new CollectionEvent <int>(EventType.Changed, new EventArgs(), guarded)
     });
     list.View(1, 0).Shuffle();
     seen.Check(new CollectionEvent <int>[] { });
 }
Example #4
0
 public void GroupLabels(bool shuffle, Random random = null)
 {
     mItems = new ArrayList <LabeledExample <LblT, ExT> >(mItems
                                                          .GroupBy(le => le.Label)
                                                          .SelectMany(g =>
     {
         var list = new ArrayList <LabeledExample <LblT, ExT> >(g);
         if (shuffle)
         {
             if (random == null)
             {
                 list.Shuffle();
             }
             else
             {
                 list.Shuffle(random);
             }
         }
         return(list);
     }));
 }
Example #5
0
        internal void Assign(ArrayList <CentroidData> centroids, SparseMatrix <double> dataMtx, int instCount, int offs, out double clustQual)
        {
            int k = centroids.Count;

            double[][] dotProd = new double[k][];
            clustQual = 0;
            int i = 0;

            foreach (CentroidData cen in centroids)
            {
                SparseVector <double> cenVec = cen.GetSparseVector();
                dotProd[i++] = ModelUtils.GetDotProductSimilarity(dataMtx, instCount, cenVec);
            }
            for (int instIdx = 0; instIdx < instCount; instIdx++)
            {
                double          maxSim     = double.MinValue;
                ArrayList <int> candidates = new ArrayList <int>();
                for (int cenIdx = 0; cenIdx < k; cenIdx++)
                {
                    double sim = dotProd[cenIdx][instIdx];
                    if (sim > maxSim)
                    {
                        maxSim = sim;
                        candidates.Clear();
                        candidates.Add(cenIdx);
                    }
                    else if (sim == maxSim)
                    {
                        candidates.Add(cenIdx);
                    }
                }
                if (candidates.Count > 1)
                {
                    candidates.Shuffle(mRnd);
                }
                centroids[candidates[0]].Items.Add(instIdx + offs);
                clustQual += maxSim;
            }
            clustQual /= (double)instCount;
        }
        }//End SeedMasterList();

        /// <summary>
        /// StartBracketGeneration()
        ///
        /// This method generates random unique brackets based upon
        /// the players contained in the master list.
        /// </summary>
        /// <param name="sb"></param>
        public void StandardBracketGeneration()
        {
            ///int sizeOfBracket, int numberGames, double bracketVal  generate all distinct pairs.
            //TODO: will code this as a parameter later

            /** this.bracketSize = sizeOfBracket;
             * this.numOfGames = numberGames;
             * this.BracketValue = bracketVal*this.bracketSize;
             * this.HouseProfit = bracketVal;
             **/
            //allocate the CompletedBrackets ArrayList (nested).
            //will automate this later with database values from random bowlers that will
            //be pulled from a separate associative array that indicates the number of
            //available bowlers who want to play.
            SeedMasterList();

            //Check to see if bracket size is greater than the list of players
            if (Master.Count < bracketSize)
            {
                MessageBox.Show("Not Enough Players to Make a Bracket of " + bracketSize + ",\n" +
                                "How Many Players Entered: " + Master.Count + "\n" +
                                "Please Add more players or choose a smaller bracket size");
                return;
            }

            //Separate Players into Numeric / All Bracket types
            SeparatePlayerLists();
            //Calculate the possible number of brackets
            CalculateBracketsPossible();
            GeneratePairsAndRemoveDuplicates();

            while (numberBracketsRemaining > 0 && !((numberBracketsRemaining) < bracketSize / 2))
            {
                distinctPlayerGroups.Shuffle();

                int i = 0;

                //While we have players to enter, the number of brackets
                while (ChosenPairs.Count < bracketSize / 2 && (numberBracketsRemaining > 0 || ChosenPairs.Count > 0) &&
                       distinctPlayerGroups.Count > bracketSize / 2)
                {
                    //If there aren't enough players to generate a bracket- stop.
                    if (numberBracketsRemaining < bracketSize / 2)
                    {
                        break;
                    }

                    if (!(i < distinctPlayerGroups.Count))
                    {
                        i = distinctPlayerGroups.Count - 1;
                    }
                    //if the current player is not chosen and the current player still has brackets remaining.
                    if (distinctPlayerGroups[i].hasBeenUsed == false && ((distinctPlayerGroups[i].player1.currentNumOfBrackets < distinctPlayerGroups[i].player1.maxBrackets) && (distinctPlayerGroups[i].player2.currentNumOfBrackets < distinctPlayerGroups[i].player2.maxBrackets)))
                    {
                        //check to see if there is a duplicate name in the bracket
                        bool stopChecking = false;
                        if (NamesInBracket.Count >= 2)
                        {
                            //now search the names in the bracket....
                            foreach (Entry e in distinctPlayerGroups)
                            {
                                if (stopChecking == true)
                                {
                                    break;
                                }
                                if (NamesInBracket.Contains(e.player1.wholeName) || NamesInBracket.Contains(e.player2.wholeName))
                                {
                                    continue;
                                }
                                else
                                {
                                    //otherwise, we've now found the entry with no names in the list
                                    i = distinctPlayerGroups.IndexOf(e);
                                    if (distinctPlayerGroups[i].hasBeenUsed == false && ((distinctPlayerGroups[i].player1.currentNumOfBrackets < distinctPlayerGroups[i].player1.maxBrackets) && (distinctPlayerGroups[i].player2.currentNumOfBrackets < distinctPlayerGroups[i].player2.maxBrackets)))
                                    {
                                        //Console.WriteLine("Index: " + i + " GOOD ENTRY FOUND: " + distinctPlayerPairs[i].player1.wholeName + " AND " + distinctPlayerPairs[i].player2.wholeName);
                                        NamesInBracket.Add(distinctPlayerGroups[i].player1.wholeName);
                                        NamesInBracket.Add(distinctPlayerGroups[i].player2.wholeName);
                                        stopChecking = true;
                                        break;
                                    }
                                    else
                                    {
                                        continue;
                                    }
                                }
                            }
                        }

                        //mark that player as chosen....
                        distinctPlayerGroups[i].hasBeenUsed = true;
                        distinctPlayerGroups[i].player1.currentNumOfBrackets++;
                        distinctPlayerGroups[i].player2.currentNumOfBrackets++;
                        //add them to the chosen list
                        ChosenPairs.Add(distinctPlayerGroups[i]);
                        if (NamesInBracket.Count == 0)
                        {
                            NamesInBracket.Add(distinctPlayerGroups[i].player1.wholeName);
                            NamesInBracket.Add(distinctPlayerGroups[i].player2.wholeName);
                        }

                        //if the person was a numeric bracket, deduct the number of they're bracket from
                        //the over all number of brackets.
                        if (distinctPlayerGroups[i].player1._isAllType == false)
                        {
                            numberBracketsRemaining--;
                        }
                        if (distinctPlayerGroups[i].player2._isAllType == false)
                        {
                            numberBracketsRemaining--;
                        }
                    }
                    else
                    {
                        //if we're at the end of the list
                        if (i == (distinctPlayerGroups.Count - 1))
                        {
                            //move the index position to the beginning of the list
                            //distinctPlayerPairs.Remove(distinctPlayerPairs[i]);
                            i = 0;
                            continue;    //and move to the next top level iteration.
                        }

                        //or just move to the next element;
                        i++;
                        continue;    // and onto next top level iteration
                    }
                    i++;
                }
                //If the Program reaches this point:
                //A bracket has been successfully generated.
                if (ChosenPairs.Count == bracketSize / 2)
                {
                    var temp = ChosenPairs.Clone();

                    generatedBrackets.Add((ArrayList <Entry>)temp);
                }
                else
                {
                    //now Print the list of the complete brackets.
                    ChosenPairs.Clear();
                    NamesInBracket.Clear();
                    break;
                }
                ChosenPairs.Clear();
                NamesInBracket.Clear();

                OnPropertyChanged("Brackets");
            }
            CompleteBrackets.Add(generatedBrackets);

            StringBuilder sb = new StringBuilder();

            //Let's generate our brackets!
            MessageBox.Show("Bracket Generation Complete. \n Brackets Made: " + generatedBrackets.Count);
            ClearLists();

            Master.Dispose();
            NumericBrackets.Dispose();
            AllBrackets.Dispose();
        }//End StartBracketGeneration();
Example #7
0
        public static void Main()
        {
            //var eq = new C6.ComparerFactory.EqualityComparer<string>(ReferenceEquals,
            //    SCG.EqualityComparer<string>.Default.GetHashCode);

            //var items = new[] { "-8", "Ab", "6", "-4", "5", "-2", "-1", "1", "10", "8" };
            //var al = new ArrayList<string>(items);
            //var v1 = al.View(al.Count - 2, 2);
            //var v2 = al.View(al.Count - 2, 2);

            var items      = new[] { "-8", "Ab", "6", "-4", "5", "-2", "-1", "1", "10", "8" };
            var collection = new HashedArrayList <string>(items);

            Console.WriteLine(collection.Contains("10"));
            Console.WriteLine(collection.Add("10"));



            // BUG: Sorting
            //var items = new[] { "-8", "Ab", "6", "-4", "5", "-2", "-1", "1", "10", "8" };
            //var collection = new HashedLinkedList<string>(items);

            //var v0 = collection.View(0, 2);
            //var v2 = collection.View(1, 2);
            //var v4 = collection.View(4, 2);
            //var v6 = collection.View(7, 1);
            //var vCount2 = collection.View(collection.Count - 2, 2);

            //Console.WriteLine("Views before calling Sort()");
            //Console.WriteLine($"v0 = {v0}");
            //Console.WriteLine($"v2 = {v2}");
            //Console.WriteLine($"v4 = {v4}");
            //Console.WriteLine($"v6 = {v6}");
            //Console.WriteLine($"vCount2 = {vCount2}");

            //v4.Sort();

            //Console.WriteLine("Views after calling Sort()");
            //Console.WriteLine($"v0 = {v0}");
            //Console.WriteLine($"v2 = {v2}");
            //Console.WriteLine($"v4 = {v4}");
            //Console.WriteLine($"v6 = {v6}");
            //Console.WriteLine($"vCount2 = {vCount2}");



            // ==============================
            // RemoveRange
            //var items = new[] { "8", "Ab", "3", "4", "5", "6", "7", "9" };
            //var collection = new ArrayList<string>(items);
            //var view1 = collection.View(0, 1); // longer
            //var view2 = collection.View(0, 2);
            //var item = view1.Choose();
            //var itms = new ArrayList<string>(new[] { item });

            //view1.RemoveRange(itms);
            //Console.WriteLine(view2);


            //var items = new[] { "8", "Ab", "3", "4", "5", "6", "7", "9" };
            // HLL.Reverse
            //var items = new[] { "a", "b", "c", "d", "e" };
            //var linkedList = new ArrayList<string>(items);
            //var v1 = linkedList.View(0, linkedList.Count);
            //var v2 = linkedList.View(0, 2);
            //v1.Reverse();
            //v1.Reverse();
            //Console.WriteLine(v2);

            // HLL.Sort
            //var items = new[] { "b", "a", "c", "e", "d" };
            //var linkedList = new HashedLinkedList<string>(items);
            //var v1 = linkedList.View(0, 3);
            //var v2 = linkedList.View(3, 2);
            //v1.Sort();
            //Console.WriteLine(v1);
            //Console.WriteLine(v2);

            // HAL.Add()
            //var items = new[] { "8", "Ab", "3", "4", "5", "6", "7", "9" };
            //var arrayList = new LinkedList<string>(items);
            //var v1 = arrayList.View(0, 7);
            //var v2 = arrayList.View(0, 7);
            //v1.Add("333333333");
            //Console.WriteLine(v1);
            //Console.WriteLine(v2);


            //Console.WriteLine(view1.IsValid);
            //Console.WriteLine(view);
            //Console.WriteLine(collection);



            return;

            // Construct list using collection initializer
            //var list = new ArrayList<int>() { 2, 3, 5, 5, 7, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33};
            var list = new ArrayList <int>()
            {
                2, 3
            };
            var backList = list.Backwards();

            backList.ToList().ForEach(x => Console.Write(x + ", "));
            Console.WriteLine(backList.IsValid);

            list.Add(10);
            Console.WriteLine(backList.IsValid);
            //backList.ToList().ForEach(x => Console.Write(x));


            //var list = list1.View(2, list1.Count-2);
            //var v = list.View(3,4);
            //var v2 = v.View(1, 2);
            //var items = new ArrayList<int>() { 3, 13, 7, 17};
            //Console.WriteLine(ArrayList<int>.EmptyArray);



            var dupl = list.FindDuplicates(5);

            Console.WriteLine(dupl);
            list.Add(-100);
            var arr = dupl.ToArray();

            list.Dispose();



            //en.ToList().ForEach(x => Console.WriteLine(x));


            //Console.WriteLine(v);
            //Console.WriteLine(v2);
            //Console.WriteLine(list);

            return;

            // Get index of item
            var index = list.IndexOf(23);

            // Get an index range
            var range = list.GetIndexRange(index, 4);

            // Print range in reverse order
            foreach (var prime in range.Backwards())
            {
                Console.WriteLine(prime);
            }

            // Remove items within index range
            list.RemoveIndexRange(10, 3);

            // Remove item at index
            var second = list.RemoveAt(1);

            // Remove first item
            var first = list.RemoveFirst();

            // Remove last item
            var last = list.RemoveLast();

            // Create array with items in list
            var array = list.ToArray();

            // Clear list
            list.Clear();

            // Check if list is empty
            var isEmpty = list.IsEmpty;

            // Add item
            list.Add(first);

            // Add items from enumerable
            list.AddRange(array);

            // Insert item into list
            list.Insert(1, second);

            // Add item to the end
            list.Add(last);

            // Check if list is sorted
            var isSorted = list.IsSorted();

            // Reverse list
            list.Reverse();

            // Check if list is sorted
            var reverseComparer = ComparerFactory.CreateComparer <int>((x, y) => y.CompareTo(x));

            isSorted = list.IsSorted(reverseComparer);

            // Shuffle list
            var random = new Random(0);

            list.Shuffle(random);

            // Print list using indexer
            for (var i = 0; i < list.Count; i++)
            {
                Console.WriteLine($"{i,2}: {list[i],2}");
            }

            // Check if list contains all items in enumerable
            var containsRange = list.ContainsRange(array);

            // Construct list using enumerable
            var otherList = new ArrayList <int>(array);

            // Add every third items from list
            otherList.AddRange(list.Where((x, i) => i % 3 == 0));

            containsRange = list.ContainsRange(otherList);

            // Remove all items not in enumerable
            otherList.RetainRange(list);

            // Remove all items in enumerable from list
            list.RemoveRange(array);

            // Sort list
            list.Sort();

            // Copy to array
            list.CopyTo(array, 2);

            return;
        }
Example #8
0
        static void Main(string[] args)
        {
            // load documents
            Utils.VerboseLine("Loading documents ...");
            string[] docs = File.ReadAllLines("C:\\newwork\\testclustering\\data\\yahoofinance.txt");
            BowSpace bowSpace = new BowSpace();
            bowSpace.StopWords = StopWords.EnglishStopWords;
            bowSpace.Stemmer = new PorterStemmer();
            bowSpace.WordWeightType = WordWeightType.TfIdf;
            RegexTokenizer tokenizer = new RegexTokenizer();
            tokenizer.IgnoreUnknownTokens = true;
            bowSpace.Tokenizer = tokenizer;
            bowSpace.Initialize(docs);
            // compute layout
            SemanticSpaceLayout semSpc = new SemanticSpaceLayout(bowSpace);
            Vector2D[] coords = semSpc.ComputeLayout();
            // build spatial index
            //Utils.VerboseLine("Building spatial index ...");
            //SpatialIndex2D spatIdx = new SpatialIndex2D();
            //spatIdx.BuildIndex(coords);
            //spatIdx.InsertPoint(9000, new Vector2D(1000, 1000));
            //ArrayList<IdxDat<Vector2D>> points = spatIdx.GetPoints(new Vector2D(0.5, 0.5), 0.1);
            //Utils.VerboseLine("Number of retrieved points: {0}.", points.Count);

            ArrayList<Vector2D> tmp = new ArrayList<Vector2D>(coords);
            tmp.Shuffle();
            //tmp.RemoveRange(1000, tmp.Count - 1000);

            // compute elevation
            StreamWriter writer = new StreamWriter("c:\\elev.txt");
            LayoutSettings ls = new LayoutSettings(800, 600);
            ls.AdjustmentType = LayoutAdjustmentType.Soft;
            ls.StdDevMult = 2;
            ls.FitToBounds = true;
            ls.MarginVert = 50;
            ls.MarginHoriz = 50;
            double[,] zMtx = VisualizationUtils.ComputeLayoutElevation(tmp, ls, 150, 200);
            VisualizationUtils.__DrawElevation__(tmp, ls, 300, 400).Save("c:\\elev.bmp");
            for (int row = 0; row < zMtx.GetLength(0); row++)
            {
                for (int col = 0; col < zMtx.GetLength(1); col++)
                {
                    writer.Write("{0}\t", zMtx[row, col]);
                }
                writer.WriteLine();
            }
            writer.Close();

            // output coordinates
            StreamWriter tsvWriter = new StreamWriter("c:\\layout.tsv");
            for (int i = 0; i < coords.Length; i++)
            {
                //if (i < points.Count)
                //{
                //    tsvWriter.WriteLine("{0}\t{1}\t{2}\t{3}", coords[i].X, coords[i].Y, points[i].Dat.X, points[i].Dat.Y);
                //}
                //else
                {
                    tsvWriter.WriteLine("{0}\t{1}", coords[i].X, coords[i].Y);
                }
            }
            tsvWriter.Close();
            //// get document names
            //int k = 0;
            //ArrayList<Pair<string, Vector2D>> layout = new ArrayList<Pair<string, Vector2D>>();
            //foreach (string doc in docs)
            //{
            //    string[] docInfo = doc.Split(' ');
            //    layout.Add(new Pair<string, Vector2D>(docInfo[0], coords[k++]));
            //}
            //Console.WriteLine(coords.Length);
            //Console.WriteLine(layout.Count);
            //StreamWriter writer = new StreamWriter("c:\\vidCoords.txt");
            //foreach (Pair<string, Vector2D> docPos in layout)
            //{
            //    writer.WriteLine("{0}\t{1}\t{2}", docPos.First, docPos.Second.X, docPos.Second.Y);
            //}
            //writer.Close();
        }
Example #9
0
 /// <summary>
 ///
 /// </summary>
 public void Shuffle( )
 {
     innerlist.Shuffle( );
 }
Example #10
0
        public ClusteringResult Cluster(IUnlabeledExampleCollection <SparseVector <double> > dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count < mK ? new ArgumentValueException("dataset") : null);
            ClusteringResult clustering          = null;
            ClusteringResult bestClustering      = null;
            double           globalBestClustQual = 0;

            for (int trial = 1; trial <= mTrials; trial++)
            {
                mLogger.Trace("Cluster", "Clustering trial {0} of {1} ...", trial, mTrials);
                ArrayList <SparseVector <double> > centroids = null;
                clustering = new ClusteringResult();
                for (int i = 0; i < mK; i++)
                {
                    clustering.AddRoot(new Cluster());
                }
                // select seed items
                double          minSim = double.MaxValue;
                ArrayList <int> tmp    = new ArrayList <int>(dataset.Count);
                for (int i = 0; i < dataset.Count; i++)
                {
                    tmp.Add(i);
                }
                for (int k = 0; k < 3; k++)
                {
                    ArrayList <SparseVector <double> > seeds = new ArrayList <SparseVector <double> >(mK);
                    tmp.Shuffle(mRnd);
                    for (int i = 0; i < mK; i++)
                    {
                        seeds.Add(ModelUtils.ComputeCentroid(new SparseVector <double>[] { dataset[tmp[i]] }, mCentroidType));
                    }
                    // assess quality of seed items
                    double simAvg = 0;
                    foreach (SparseVector <double> seed1 in seeds)
                    {
                        foreach (SparseVector <double> seed2 in seeds)
                        {
                            if (seed1 != seed2)
                            {
                                simAvg += mSimilarity.GetSimilarity(seed1, seed2);
                            }
                        }
                    }
                    simAvg /= (double)(mK * mK - mK);
                    if (simAvg < minSim)
                    {
                        minSim    = simAvg;
                        centroids = seeds;
                    }
                }
                // main loop
                int    iter          = 0;
                double bestClustQual = 0;
                double clustQual;
                while (true)
                {
                    iter++;
                    mLogger.Trace("Cluster", "Iteration {0} ...", iter);
                    clustQual = 0;
                    // assign items to clusters
                    foreach (Cluster cluster in clustering.Roots)
                    {
                        cluster.Items.Clear();
                    }
                    for (int i = 0; i < dataset.Count; i++)
                    {
                        SparseVector <double> example = dataset[i];
                        double          maxSim        = double.MinValue;
                        ArrayList <int> candidates    = new ArrayList <int>();
                        for (int j = 0; j < mK; j++)
                        {
                            SparseVector <double> centroid = centroids[j];
                            double sim = mSimilarity.GetSimilarity(example, centroid);
                            if (sim > maxSim)
                            {
                                maxSim = sim;
                                candidates.Clear();
                                candidates.Add(j);
                            }
                            else if (sim == maxSim)
                            {
                                candidates.Add(j);
                            }
                        }
                        if (candidates.Count > 1)
                        {
                            candidates.Shuffle(mRnd);
                        }
                        clustering.Roots[candidates[0]].Items.Add(i);
                        clustQual += maxSim;
                    }
                    clustQual /= (double)dataset.Count;
                    mLogger.Trace("Cluster", "Quality: {0:0.0000}", clustQual);
                    // check if done
                    if (iter > 1 && clustQual - bestClustQual <= mEps)
                    {
                        break;
                    }
                    bestClustQual = clustQual;
                    // compute new centroids
                    for (int i = 0; i < mK; i++)
                    {
                        centroids[i] = clustering.Roots[i].ComputeCentroid(dataset, mCentroidType);
                    }
                }
                if (trial == 1 || clustQual > globalBestClustQual)
                {
                    globalBestClustQual = clustQual;
                    bestClustering      = clustering;
                }
            }
            return(bestClustering);
        }
Example #11
0
        public ClusteringResult Cluster(IExampleCollection <LblT, SparseVector <double> .ReadOnly> dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count < m_k ? new ArgumentValueException("dataset") : null);
            ClusteringResult clustering             = null;
            ClusteringResult best_clustering        = null;
            double           global_best_clust_qual = 0;

            for (int trial = 1; trial <= m_trials; trial++)
            {
                Utils.VerboseLine("*** CLUSTERING TRIAL {0} OF {1} ***", trial, m_trials);
                ArrayList <SparseVector <double> .ReadOnly> centroids = null;
                clustering = new ClusteringResult();
                for (int i = 0; i < m_k; i++)
                {
                    clustering.Roots.Add(new Cluster());
                }
                // select seed items
                double          min_sim = double.MaxValue;
                ArrayList <int> tmp     = new ArrayList <int>(dataset.Count);
                for (int i = 0; i < dataset.Count; i++)
                {
                    tmp.Add(i);
                }
                for (int k = 0; k < 3; k++)
                {
                    ArrayList <SparseVector <double> .ReadOnly> seeds = new ArrayList <SparseVector <double> .ReadOnly>(m_k);
                    tmp.Shuffle(m_rnd);
                    for (int i = 0; i < m_k; i++)
                    {
                        seeds.Add(ModelUtils.ComputeCentroid(new SparseVector <double> .ReadOnly[] { dataset[tmp[i]].Example }, m_centroid_type));
                    }
                    // assess quality of seed items
                    double sim_avg = 0;
                    foreach (SparseVector <double> .ReadOnly seed_1 in seeds)
                    {
                        foreach (SparseVector <double> .ReadOnly seed_2 in seeds)
                        {
                            if (seed_1 != seed_2)
                            {
                                sim_avg += m_similarity.GetSimilarity(seed_1, seed_2);
                            }
                        }
                    }
                    sim_avg /= (double)(m_k * m_k - m_k);
                    //Console.WriteLine(sim_avg);
                    if (sim_avg < min_sim)
                    {
                        min_sim   = sim_avg;
                        centroids = seeds;
                    }
                }
                // main loop
                int    iter            = 0;
                double best_clust_qual = 0;
                double clust_qual;
                while (true)
                {
                    iter++;
                    clust_qual = 0;
                    // assign items to clusters
                    foreach (Cluster cluster in clustering.Roots)
                    {
                        cluster.Items.Clear();
                    }
                    for (int i = 0; i < dataset.Count; i++)
                    {
                        SparseVector <double> .ReadOnly example = dataset[i].Example;
                        double          max_sim    = double.MinValue;
                        ArrayList <int> candidates = new ArrayList <int>();
                        for (int j = 0; j < m_k; j++)
                        {
                            SparseVector <double> .ReadOnly centroid = centroids[j];
                            double sim = m_similarity.GetSimilarity(example, centroid);
                            if (sim > max_sim)
                            {
                                max_sim = sim;
                                candidates.Clear();
                                candidates.Add(j);
                            }
                            else if (sim == max_sim)
                            {
                                candidates.Add(j);
                            }
                        }
                        if (candidates.Count > 1)
                        {
                            candidates.Shuffle(m_rnd);
                        }
                        if (candidates.Count > 0) // *** is this always true?
                        {
                            clustering.Roots[candidates[0]].Items.Add(new Pair <double, int>(1, i));
                            clust_qual += max_sim;
                        }
                    }
                    clust_qual /= (double)dataset.Count;
                    Utils.VerboseLine("*** Iteration {0} ***", iter);
                    Utils.VerboseLine("Quality: {0:0.0000}", clust_qual);
                    // check if done
                    if (iter > 1 && clust_qual - best_clust_qual <= m_eps)
                    {
                        break;
                    }
                    best_clust_qual = clust_qual;
                    // compute new centroids
                    for (int i = 0; i < m_k; i++)
                    {
                        centroids[i] = clustering.Roots[i].ComputeCentroid(dataset, m_centroid_type);
                    }
                }
                if (trial == 1 || clust_qual > global_best_clust_qual)
                {
                    global_best_clust_qual = clust_qual;
                    best_clustering        = clustering;
                }
            }
            return(best_clustering);
        }
Example #12
0
 public void Shuffle()
 {
     m_items.Shuffle();
 }
Example #13
0
        public override void Run(object[] args)
        {
            // *** ArrayList ***
            Output.WriteLine("*** ArrayList ***");
            Output.WriteLine();
            // create an ArrayList
            Output.WriteLine("Create an ArrayList ...");
            ArrayList <int> list = new ArrayList <int>(new int[] { 1, 2, 3 });

            Output.WriteLine(list);
            // add more items
            Output.WriteLine("Add more items ...");
            list.AddRange(new int[] { 6, 5, 4 });
            Output.WriteLine(list);
            // sort descendingly
            Output.WriteLine("Sort descendingly ...");
            list.Sort(DescSort <int> .Instance);
            Output.WriteLine(list);
            // shuffle
            Output.WriteLine("Shuffle ...");
            list.Shuffle(new Random(1));
            Output.WriteLine(list);
            // convert to array of double
            Output.WriteLine("Convert to array of double ...");
            double[] array = list.ToArray <double>();
            Output.WriteLine(new ArrayList <double>(array));
            // convert to ArrayList of string
            Output.WriteLine("Convert to ArrayList of string ...");
            ArrayList <string> list2 = new ArrayList <string>(list.ToArray <string>());

            Output.WriteLine(list2);
            // get items
            Output.WriteLine("Get items ...");
            Output.WriteLine(list[0]);
            Output.WriteLine(list[1]);
            // set items
            Output.WriteLine("Set items ...");
            list[0] = 3;
            list[1] = 2;
            Output.WriteLine(list);
            // get length
            Output.WriteLine("Get length ...");
            Output.WriteLine(list.Count);
            Output.WriteLine();

            // *** Set ***
            Output.WriteLine("*** Set ***");
            Output.WriteLine();
            // create Set
            Output.WriteLine("Create Set ...");
            Set <int> set = new Set <int>(new int[] { 1, 2, 3 });

            Output.WriteLine(set);
            // check for items
            Output.WriteLine("Check for items ...");
            Output.WriteLine(set.Contains(1));
            Output.WriteLine(set.Contains(4));
            // add more items (note the duplicate)
            Output.WriteLine("Add more items ...");
            set.AddRange(new int[] { 6, 5, 4, 3 });
            Output.WriteLine(set);
            // remove some items
            Output.WriteLine("Remove some items ...");
            set.RemoveRange(new int[] { 1, 3 });
            set.Remove(5);
            Output.WriteLine(set);
            // create another Set
            Output.WriteLine("Create another Set ...");
            Set <int> set2 = new Set <int>(new int[] { 1, 2, 3, 4, 5 });

            Output.WriteLine(set2);
            // compute union
            Output.WriteLine("Compute union ...");
            Output.WriteLine(Set <int> .Union(set, set2));
            // compute difference
            Output.WriteLine("Compute difference ...");
            Output.WriteLine(Set <int> .Difference(set, set2));
            // compute intersection
            Output.WriteLine("Compute intersection ...");
            Output.WriteLine(Set <int> .Intersection(set, set2));
            // compute Jaccard similarity
            Output.WriteLine("Compute Jaccard similarity ...");
            Output.WriteLine(Set <int> .JaccardSimilarity(set, set2));
            // convert to array
            Output.WriteLine("Convert to array ...");
            int[] array2 = set2.ToArray();
            Output.WriteLine(new ArrayList <int>(array2));
            // convert to Set of string
            Output.WriteLine("Convert to Set of string ...");
            Set <string> set3 = new Set <string>(set2.ToArray <string>());

            Output.WriteLine(set3);
            // get length
            Output.WriteLine("Get length ...");
            Output.WriteLine(set3.Count);
            Output.WriteLine();

            /*
             * // *** BinaryVector ***
             * Output.WriteLine("*** BinaryVector ***");
             * Output.WriteLine();
             * // create BinaryVector
             * Output.WriteLine("Create BinaryVector ...");
             * BinaryVector<char> binVec = new BinaryVector<char>(new char[] { 'a', 'b', 'c' });
             * Output.WriteLine((object) binVec);
             * // check for items
             * Output.WriteLine("Check for items ...");
             * Output.WriteLine((bool) binVec.Contains('a'));
             * Output.WriteLine((bool) binVec.Contains('d'));
             * // add more items (note the duplicate)
             * Output.WriteLine("Add more items ...");
             * binVec.AddRange(new char[] { 'f', 'e', 'd', 'c' });
             * Output.WriteLine((object) binVec);
             * // remove some items
             * Output.WriteLine("Remove some items ...");
             * binVec.RemoveRange(new char[] { 'a', 'c' });
             * binVec.Remove('e');
             * Output.WriteLine((object) binVec);
             * // convert to array
             * Output.WriteLine("Convert to array ...");
             * char[] array3 = binVec.ToArray();
             * Output.WriteLine(new ArrayList<char>(array3));
             * // convert to BinaryVector of string
             * Output.WriteLine("Convert to BinaryVector of string ...");
             * BinaryVector<string> binVec2 = new BinaryVector<string>(binVec.ToArray<string>());
             * Output.WriteLine((object) binVec2);
             * // get items
             * Output.WriteLine("Get items ...");
             * Output.WriteLine((int) binVec2[0]);
             * Output.WriteLine((int) binVec2[1]);
             * // get length
             * Output.WriteLine("Get length ...");
             * Output.WriteLine((int) binVec2.Count);
             * Output.WriteLine();
             */

            // *** Pair ***
            Output.WriteLine("*** Pair ***");
            Output.WriteLine();
            // create Pair
            Output.WriteLine("Create Pair ...");
            Pair <int, string> pair = new Pair <int, string>(3, "dogs");

            Output.WriteLine(pair);
            // create another Pair
            Output.WriteLine("Create another Pair ...");
            Pair <int, string> pair2 = new Pair <int, string>(3, "cats");

            Output.WriteLine(pair2);
            // compare
            Output.WriteLine("Compare ...");
            Output.WriteLine(pair == pair2);
            // make a change
            Output.WriteLine("Make a change ...");
            pair.Second = "cats";
            Output.WriteLine(pair);
            // compare again
            Output.WriteLine("Compare again ...");
            Output.WriteLine(pair == pair2);
            Output.WriteLine();

            // *** KeyDat ***
            Output.WriteLine("*** KeyDat ***");
            Output.WriteLine();
            // create KeyDat
            Output.WriteLine("Create KeyDat ...");
            KeyDat <int, string> keyDat = new KeyDat <int, string>(3, "dogs");

            Output.WriteLine(keyDat);
            // create another KeyDat
            Output.WriteLine("Create another KeyDat ...");
            KeyDat <int, string> keyDat2 = new KeyDat <int, string>(3, "cats");

            Output.WriteLine(keyDat2);
            // compare
            Output.WriteLine("Compare ...");
            Output.WriteLine(keyDat == keyDat2);
            // make a change
            Output.WriteLine("Make a change ...");
            keyDat.Key = 4;
            Output.WriteLine(keyDat);
            // compare again
            Output.WriteLine("Compare again ...");
            Output.WriteLine(keyDat == keyDat2);
            Output.WriteLine(keyDat > keyDat2);
            Output.WriteLine();

            // *** IdxDat ***
            Output.WriteLine("*** IdxDat ***");
            Output.WriteLine();
            // create an IdxDat
            Output.WriteLine("Create an IdxDat ...");
            IdxDat <string> idxDat = new IdxDat <string>(3, "dogs");

            Output.WriteLine(idxDat);
            // create another IdxDat
            Output.WriteLine("Create another IdxDat ...");
            IdxDat <string> idxDat2 = new IdxDat <string>(4, "cats");

            Output.WriteLine(idxDat2);
            // compare
            Output.WriteLine("Compare ...");
            Output.WriteLine(idxDat == idxDat2);
            // make a change
            //idxDat.Idx = 4; // not possible to change index
            idxDat.Dat = "cats";
            Output.WriteLine(idxDat);
            // compare again
            Output.WriteLine("Compare again ...");
            Output.WriteLine(idxDat == idxDat2);
            Output.WriteLine(idxDat < idxDat2);
            Output.WriteLine();

            // *** ArrayList of KeyDat ***
            Output.WriteLine("*** ArrayList of KeyDat ***");
            Output.WriteLine();
            // create an ArrayList of KeyDat
            Output.WriteLine("Create an ArrayList of KeyDat ...");
            ArrayList <KeyDat <double, string> > listKeyDat = new ArrayList <KeyDat <double, string> >(new KeyDat <double, string>[] {
                new KeyDat <double, string>(2.4, "cats"),
                new KeyDat <double, string>(3.3, "dogs"),
                new KeyDat <double, string>(4.2, "lizards")
            });

            Output.WriteLine(listKeyDat);
            // sort descendingly
            Output.WriteLine("Sort descendingly ...");
            listKeyDat.Sort(DescSort <KeyDat <double, string> > .Instance);
            Output.WriteLine(listKeyDat);
            // find item with bisection
            Output.WriteLine("Find item with bisection ...");
            int idx = listKeyDat.BinarySearch(new KeyDat <double, string>(3.3), DescSort <KeyDat <double, string> > .Instance);

            Output.WriteLine(idx);
            idx = listKeyDat.BinarySearch(new KeyDat <double, string>(3), DescSort <KeyDat <double, string> > .Instance);
            Output.WriteLine(~idx);
            // remove item
            Output.WriteLine("Remove item ...");
            listKeyDat.Remove(new KeyDat <double, string>(3.3));
            Output.WriteLine(listKeyDat);
            // get first and last item
            Output.WriteLine("Get first and last item ...");
            Output.WriteLine(listKeyDat.First);
            Output.WriteLine(listKeyDat.Last);
        }
Example #14
0
        protected ClusteringResult kMeans(IUnlabeledExampleCollection <SparseVector <double> > dataset, int k)
        {
            if (k == 1)
            {
                return(CreateSingleCluster(dataset));
            }                                                    // border case
            double globalBestClustQual = 0;

            for (int trial = 1; trial <= mTrials; trial++)
            {
                mLogger.Trace("Cluster", "Clustering trial {0} of {1} ...", trial, mTrials);
                ArrayList <CentroidData> centroids = new ArrayList <CentroidData>(k);
                ArrayList <int>          bestSeeds = null;
                for (int i = 0; i < k; i++)
                {
                    centroids.Add(new CentroidData());
                }
                // select seed items
                double          minSim = double.MaxValue;
                ArrayList <int> tmp    = new ArrayList <int>(dataset.Count);
                for (int i = 0; i < dataset.Count; i++)
                {
                    tmp.Add(i);
                }
                for (int i = 0; i < 3; i++)
                {
                    ArrayList <SparseVector <double> > seeds = new ArrayList <SparseVector <double> >(k);
                    tmp.Shuffle(mRnd);
                    for (int j = 0; j < k; j++)
                    {
                        seeds.Add(dataset[tmp[j]]);
                    }
                    // assess quality of seed items
                    double simAvg = 0;
                    foreach (SparseVector <double> seed1 in seeds)
                    {
                        foreach (SparseVector <double> seed2 in seeds)
                        {
                            if (seed1 != seed2)
                            {
                                simAvg += DotProductSimilarity.Instance.GetSimilarity(seed1, seed2);
                            }
                        }
                    }
                    simAvg /= (double)(k * k - k);
                    if (simAvg < minSim)
                    {
                        minSim    = simAvg;
                        bestSeeds = new ArrayList <int>(k);
                        for (int j = 0; j < k; j++)
                        {
                            bestSeeds.Add(tmp[j]);
                        }
                    }
                }
                for (int i = 0; i < k; i++)
                {
                    centroids[i].Items.Add(bestSeeds[i]);
                    centroids[i].Update(dataset);
                    centroids[i].UpdateCentroidLen();
                }
                // execute main loop
                double clustQual;
                kMeansMainLoop(dataset, centroids, out clustQual);
                if (trial == 1 || clustQual > globalBestClustQual)
                {
                    globalBestClustQual = clustQual;
                    mCentroids          = centroids;
                }
            }
            return(GetClusteringResult());
        }
Example #15
0
        public void RandomIndexing()
        {
            Random ran = new Random(6754);
              int length = 1000;
              int[] a = new int[length];
              int[] b = new int[length];
              ArrayList<int> shuffle = new ArrayList<int>(length);
              IPriorityQueueHandle<int>[] h = new IPriorityQueueHandle<int>[length];

              for (int i = 0; i < length; i++)
              {
            shuffle.Add(i);
            queue.Add(ref h[i], a[i] = ran.Next());
            b[i] = ran.Next();
            Assert.IsTrue(queue.Check());
              }

              Assert.IsTrue(queue.Check());
              shuffle.Shuffle(ran);
              for (int i = 0; i < length; i++)
              {
            int j = shuffle[i];
            Assert.AreEqual(a[j], queue[h[j]]);
            queue[h[j]] = b[j];
            Assert.AreEqual(b[j], queue[h[j]]);
            Assert.IsTrue(queue.Check());
              }
        }
Example #16
0
        //private double GetQual()
        //{
        //    double clustQual = 0;
        //    foreach (Centroid centroid in mCentroids)
        //    {
        //        foreach (int itemIdx in centroid.CurrentItems)
        //        {
        //            clustQual += centroid.GetDotProduct(mDataset[itemIdx]);
        //        }
        //    }
        //    clustQual /= (double)mDataset.Count;
        //    return clustQual;
        //}

        // TODO: exceptions
        public ClusteringResult Update(int dequeueN, IEnumerable <SparseVector <double> > addList, ref int iter)
        {
            StopWatch stopWatch = new StopWatch();

            // update centroid data (1)
            foreach (CentroidData centroid in mCentroids)
            {
                foreach (int item in centroid.CurrentItems)
                {
                    if (item >= dequeueN)
                    {
                        centroid.Items.Add(item);
                    }
                }
                centroid.Update(mDataset);
                centroid.UpdateCentroidLen();
            }
            //Console.WriteLine(">>> {0} >>> update centroid data (1)", stopWatch.TotalMilliseconds);
            stopWatch.Reset();
            // update dataset
            mDataset.RemoveRange(0, dequeueN);
            int ofs = mDataset.Count;

            mDataset.AddRange(addList);
            //Console.WriteLine(">>> {0} >>> update dataset", stopWatch.TotalMilliseconds);
            stopWatch.Reset();
            // update centroid data (2)
            foreach (CentroidData centroid in mCentroids)
            {
                Set <int> itemsOfs = new Set <int>();
                foreach (int item in centroid.CurrentItems)
                {
                    itemsOfs.Add(item - dequeueN);
                }
                centroid.CurrentItems.Inner.SetItems(itemsOfs);
                centroid.Items.SetItems(itemsOfs);
            }
            //Console.WriteLine(">>> {0} >>> update centroid data (2)", stopWatch.TotalMilliseconds);
            stopWatch.Reset();
            // assign new instances
            double bestClustQual = 0;

            {
                mLogger.Info("Update", "Initializing ...");
                int i = 0;
                foreach (SparseVector <double> example in addList)
                {
                    double          maxSim     = double.MinValue;
                    ArrayList <int> candidates = new ArrayList <int>();
                    for (int j = 0; j < mK; j++)
                    {
                        double sim = mCentroids[j].GetDotProduct(example);
                        if (sim > maxSim)
                        {
                            maxSim = sim;
                            candidates.Clear();
                            candidates.Add(j);
                        }
                        else if (sim == maxSim)
                        {
                            candidates.Add(j);
                        }
                    }
                    if (candidates.Count > 1)
                    {
                        candidates.Shuffle(mRnd);
                    }
                    if (candidates.Count > 0) // *** is this always true?
                    {
                        mCentroids[candidates[0]].Items.Add(ofs + i);
                    }
                    i++;
                }
                // update centroids
                foreach (CentroidData centroid in mCentroids)
                {
                    centroid.Update(mDataset);
                    centroid.UpdateCentroidLen();
                }
                //Console.WriteLine(GetQual());
                foreach (CentroidData centroid in mCentroids)
                {
                    foreach (int itemIdx in centroid.CurrentItems)
                    {
                        bestClustQual += centroid.GetDotProduct(mDataset[itemIdx]);
                    }
                }
                bestClustQual /= (double)mDataset.Count;
                mLogger.Info("Update", "Quality: {0:0.0000}", bestClustQual);
            }
            //Console.WriteLine(">>> {0} >>> assign new instances", stopWatch.TotalMilliseconds);
            stopWatch.Reset();
            // main k-means loop
            iter = 0;
            while (true)
            {
                iter++;
                mLogger.Info("Update", "Iteration {0} ...", iter);
                // assign items to clusters
                for (int i = 0; i < mDataset.Count; i++)
                {
                    SparseVector <double> example = mDataset[i];
                    double          maxSim        = double.MinValue;
                    ArrayList <int> candidates    = new ArrayList <int>();
                    for (int j = 0; j < mK; j++)
                    {
                        double sim = mCentroids[j].GetDotProduct(example);
                        if (sim > maxSim)
                        {
                            maxSim = sim;
                            candidates.Clear();
                            candidates.Add(j);
                        }
                        else if (sim == maxSim)
                        {
                            candidates.Add(j);
                        }
                    }
                    if (candidates.Count > 1)
                    {
                        candidates.Shuffle(mRnd);
                    }
                    if (candidates.Count > 0) // *** is this always true?
                    {
                        mCentroids[candidates[0]].Items.Add(i);
                    }
                }
                //
                // *** OPTIMIZE THIS with GetDotProductSimilarity (see this.Cluster) !!! ***
                //
                //Console.WriteLine(">>> {0} >>> loop: assign items to clusters", stopWatch.TotalMilliseconds);
                stopWatch.Reset();
                double clustQual = 0;
                // update centroids
                foreach (CentroidData centroid in mCentroids)
                {
                    centroid.Update(mDataset);
                    centroid.UpdateCentroidLen();
                }
                //Console.WriteLine(GetQual());
                foreach (CentroidData centroid in mCentroids)
                {
                    foreach (int itemIdx in centroid.CurrentItems)
                    {
                        clustQual += centroid.GetDotProduct(mDataset[itemIdx]);
                    }
                }
                clustQual /= (double)mDataset.Count;
                //Console.WriteLine(">>> {0} >>> loop: update centroids", stopWatch.TotalMilliseconds);
                stopWatch.Reset();
                mLogger.Info("Update", "Quality: {0:0.0000} Diff: {1:0.0000}", clustQual, clustQual - bestClustQual);
                // check if done
                if (clustQual - bestClustQual <= mEps)
                {
                    break;
                }
                bestClustQual = clustQual;
            }
            // save the result
            ClusteringResult clustering = new ClusteringResult();

            for (int i = 0; i < mK; i++)
            {
                clustering.AddRoot(new Cluster());
                clustering.Roots.Last.Items.AddRange(mCentroids[i].Items);
            }
            return(clustering);
        }
Example #17
0
        public void RandomWithDeleteHandles()
        {
            Random ran = new Random(6754);
              int length = 1000;
              int[] a = new int[length];
              ArrayList<int> shuffle = new ArrayList<int>(length);
              IPriorityQueueHandle<int>[] h = new IPriorityQueueHandle<int>[length];

              for (int i = 0; i < length; i++)
              {
            shuffle.Add(i);
            queue.Add(ref h[i], a[i] = ran.Next());
            Assert.IsTrue(queue.Check());
              }

              Assert.IsTrue(queue.Check());
              shuffle.Shuffle(ran);
              for (int i = 0; i < length; i++)
              {
            int j = shuffle[i];
            Assert.AreEqual(a[j], queue.Delete(h[j]));
            Assert.IsTrue(queue.Check());
              }

              Assert.IsTrue(queue.IsEmpty);
        }
Example #18
0
        public ClusteringResult Cluster(IUnlabeledExampleCollection <SparseVector <double> > dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count < mK ? new ArgumentValueException("dataset") : null);
            mDataset = new UnlabeledDataset <SparseVector <double> >(dataset);
            ClusteringResult clustering          = null;
            double           globalBestClustQual = 0;

            for (int trial = 1; trial <= mTrials; trial++)
            {
                mLogger.Info("Cluster", "Clustering trial {0} of {1} ...", trial, mTrials);
                ArrayList <CentroidData> centroids = new ArrayList <CentroidData>(mK);
                ArrayList <int>          bestSeeds = null;
                for (int i = 0; i < mK; i++)
                {
                    centroids.Add(new CentroidData());
                }
                // select seed items
                double          minSim = double.MaxValue;
                ArrayList <int> tmp    = new ArrayList <int>(mDataset.Count);
                for (int i = 0; i < mDataset.Count; i++)
                {
                    tmp.Add(i);
                }
                for (int k = 0; k < 3; k++)
                {
                    ArrayList <SparseVector <double> > seeds = new ArrayList <SparseVector <double> >(mK);
                    tmp.Shuffle(mRnd);
                    for (int i = 0; i < mK; i++)
                    {
                        seeds.Add(mDataset[tmp[i]]);
                    }
                    // assess quality of seed items
                    double simAvg = 0;
                    foreach (SparseVector <double> seed1 in seeds)
                    {
                        foreach (SparseVector <double> seed2 in seeds)
                        {
                            if (seed1 != seed2)
                            {
                                simAvg += DotProductSimilarity.Instance.GetSimilarity(seed1, seed2);
                            }
                        }
                    }
                    simAvg /= (double)(mK * mK - mK);
                    //Console.WriteLine(simAvg);
                    if (simAvg < minSim)
                    {
                        minSim    = simAvg;
                        bestSeeds = new ArrayList <int>(mK);
                        for (int i = 0; i < mK; i++)
                        {
                            bestSeeds.Add(tmp[i]);
                        }
                    }
                }
                ArrayList <KeyDat <double, int> > medoids = new ArrayList <KeyDat <double, int> >(mK);
                for (int i = 0; i < mK; i++)
                {
                    centroids[i].Items.Add(bestSeeds[i]);
                    centroids[i].Update(mDataset);
                    centroids[i].UpdateCentroidLen();
                    medoids.Add(new KeyDat <double, int>(-1, bestSeeds[i]));
                }
                double[,] dotProd = new double[mDataset.Count, mK];
                SparseMatrix <double> dsMat = ModelUtils.GetTransposedMatrix(mDataset);
                // main loop
                int    iter          = 0;
                double bestClustQual = 0;
                double clustQual;
                while (true)
                {
                    iter++;
                    mLogger.Info("Cluster", "Iteration {0} ...", iter);
                    clustQual = 0;
                    // assign items to clusters
                    //StopWatch stopWatch = new StopWatch();
                    int j = 0;
                    foreach (CentroidData cen in centroids)
                    {
                        SparseVector <double> cenVec = cen.GetSparseVector();
                        double[] dotProdSimVec       = ModelUtils.GetDotProductSimilarity(dsMat, mDataset.Count, cenVec);
                        for (int i = 0; i < dotProdSimVec.Length; i++)
                        {
                            if (dotProdSimVec[i] > 0)
                            {
                                dotProd[i, j] = dotProdSimVec[i];
                            }
                        }
                        j++;
                    }
                    for (int dsInstIdx = 0; dsInstIdx < mDataset.Count; dsInstIdx++)
                    {
                        double          maxSim     = double.MinValue;
                        ArrayList <int> candidates = new ArrayList <int>();
                        for (int cenIdx = 0; cenIdx < mK; cenIdx++)
                        {
                            double sim = dotProd[dsInstIdx, cenIdx];
                            if (sim > maxSim)
                            {
                                maxSim = sim;
                                candidates.Clear();
                                candidates.Add(cenIdx);
                            }
                            else if (sim == maxSim)
                            {
                                candidates.Add(cenIdx);
                            }
                        }
                        if (candidates.Count > 1)
                        {
                            candidates.Shuffle(mRnd);
                        }
                        if (candidates.Count > 0) // *** is this always true?
                        {
                            centroids[candidates[0]].Items.Add(dsInstIdx);
                            clustQual += maxSim;
                            if (medoids[candidates[0]].Key < maxSim)
                            {
                                medoids[candidates[0]] = new KeyDat <double, int>(maxSim, dsInstIdx);
                            }
                        }
                    }
                    //Console.WriteLine(stopWatch.TotalMilliseconds);
                    clustQual /= (double)mDataset.Count;
                    mLogger.Info("Cluster", "Quality: {0:0.0000}", clustQual);
                    // compute new centroids
                    for (int i = 0; i < mK; i++)
                    {
                        centroids[i].Update(mDataset);
                        centroids[i].UpdateCentroidLen();
                    }
                    // check if done
                    if (iter > 1 && clustQual - bestClustQual <= mEps)
                    {
                        break;
                    }
                    bestClustQual = clustQual;
                    for (int i = 0; i < medoids.Count; i++)
                    {
                        medoids[i] = new KeyDat <double, int>(-1, medoids[i].Dat);
                    }
                }
                if (trial == 1 || clustQual > globalBestClustQual)
                {
                    globalBestClustQual = clustQual;
                    mCentroids          = centroids;
                    mMedoids            = medoids;
                    // save the result
                    clustering = new ClusteringResult();
                    for (int i = 0; i < mK; i++)
                    {
                        clustering.AddRoot(new Cluster());
                        clustering.Roots.Last.Items.AddRange(centroids[i].Items);
                    }
                }
            }
            return(clustering);
        }
Example #19
0
        static void Main(string[] args)
        {
            // load documents
            Utils.VerboseLine("Loading documents ...");
            string[] docs     = File.ReadAllLines("C:\\newwork\\testclustering\\data\\yahoofinance.txt");
            BowSpace bowSpace = new BowSpace();

            bowSpace.StopWords      = StopWords.EnglishStopWords;
            bowSpace.Stemmer        = new PorterStemmer();
            bowSpace.WordWeightType = WordWeightType.TfIdf;
            RegexTokenizer tokenizer = new RegexTokenizer();

            tokenizer.IgnoreUnknownTokens = true;
            bowSpace.Tokenizer            = tokenizer;
            bowSpace.Initialize(docs);
            // compute layout
            SemanticSpaceLayout semSpc = new SemanticSpaceLayout(bowSpace);

            Vector2D[] coords = semSpc.ComputeLayout();
            // build spatial index
            //Utils.VerboseLine("Building spatial index ...");
            //SpatialIndex2D spatIdx = new SpatialIndex2D();
            //spatIdx.BuildIndex(coords);
            //spatIdx.InsertPoint(9000, new Vector2D(1000, 1000));
            //ArrayList<IdxDat<Vector2D>> points = spatIdx.GetPoints(new Vector2D(0.5, 0.5), 0.1);
            //Utils.VerboseLine("Number of retrieved points: {0}.", points.Count);

            ArrayList <Vector2D> tmp = new ArrayList <Vector2D>(coords);

            tmp.Shuffle();
            //tmp.RemoveRange(1000, tmp.Count - 1000);

            // compute elevation
            StreamWriter   writer = new StreamWriter("c:\\elev.txt");
            LayoutSettings ls     = new LayoutSettings(800, 600);

            ls.AdjustmentType = LayoutAdjustmentType.Soft;
            ls.StdDevMult     = 2;
            ls.FitToBounds    = true;
            ls.MarginVert     = 50;
            ls.MarginHoriz    = 50;
            double[,] zMtx    = VisualizationUtils.ComputeLayoutElevation(tmp, ls, 150, 200);
            VisualizationUtils.__DrawElevation__(tmp, ls, 300, 400).Save("c:\\elev.bmp");
            for (int row = 0; row < zMtx.GetLength(0); row++)
            {
                for (int col = 0; col < zMtx.GetLength(1); col++)
                {
                    writer.Write("{0}\t", zMtx[row, col]);
                }
                writer.WriteLine();
            }
            writer.Close();

            // output coordinates
            StreamWriter tsvWriter = new StreamWriter("c:\\layout.tsv");

            for (int i = 0; i < coords.Length; i++)
            {
                //if (i < points.Count)
                //{
                //    tsvWriter.WriteLine("{0}\t{1}\t{2}\t{3}", coords[i].X, coords[i].Y, points[i].Dat.X, points[i].Dat.Y);
                //}
                //else
                {
                    tsvWriter.WriteLine("{0}\t{1}", coords[i].X, coords[i].Y);
                }
            }
            tsvWriter.Close();
            //// get document names
            //int k = 0;
            //ArrayList<Pair<string, Vector2D>> layout = new ArrayList<Pair<string, Vector2D>>();
            //foreach (string doc in docs)
            //{
            //    string[] docInfo = doc.Split(' ');
            //    layout.Add(new Pair<string, Vector2D>(docInfo[0], coords[k++]));
            //}
            //Console.WriteLine(coords.Length);
            //Console.WriteLine(layout.Count);
            //StreamWriter writer = new StreamWriter("c:\\vidCoords.txt");
            //foreach (Pair<string, Vector2D> docPos in layout)
            //{
            //    writer.WriteLine("{0}\t{1}\t{2}", docPos.First, docPos.Second.X, docPos.Second.Y);
            //}
            //writer.Close();
        }
Example #20
0
        public void Main()
        {
            // Construct list using collection initializer
            var list = new ArrayList <int> {
                2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47
            };

            // Get index of item
            var index = list.IndexOf(23);

            // Get an index range
            var range = list.GetIndexRange(index, 4);

            // Print range in reverse order
            foreach (var prime in range.Backwards())
            {
                Console.WriteLine(prime);
            }

            // Remove items within index range
            list.RemoveIndexRange(10, 3);

            // Remove item at index
            var second = list.RemoveAt(1);

            // Remove first item
            var first = list.RemoveFirst();

            // Remove last item
            var last = list.RemoveLast();

            // Create array with items in list
            var array = list.ToArray();

            // Clear list
            list.Clear();

            // Check if list is empty
            var isEmpty = list.IsEmpty;

            // Add item
            list.Add(first);

            // Add items from enumerable
            list.AddRange(array);

            // Insert item into list
            list.Insert(1, second);

            // Add item to the end
            list.Add(last);

            // Check if list is sorted
            var isSorted = list.IsSorted();

            // Reverse list
            list.Reverse();

            // Check if list is sorted
            var reverseComparer = ComparerFactory.CreateComparer <int>((x, y) => y.CompareTo(x));

            isSorted = list.IsSorted(reverseComparer);

            // Shuffle list
            var random = new Random(0);

            list.Shuffle(random);

            // Print list using indexer
            for (var i = 0; i < list.Count; i++)
            {
                Console.WriteLine($"{i,2}: {list[i],2}");
            }

            // Check if list contains all items in enumerable
            var containsRange = list.ContainsRange(array);

            // Construct list using enumerable
            var otherList = new ArrayList <int>(array);

            // Add every third items from list
            otherList.AddRange(list.Where((x, i) => i % 3 == 0));

            containsRange = list.ContainsRange(otherList);

            // Remove all items not in enumerable
            otherList.RetainRange(list);

            // Remove all items in enumerable from list
            list.RemoveRange(array);

            // Sort list
            list.Sort();

            // Copy to array
            list.CopyTo(array, 2);

            return;
        }