public void GetTfIdfVectorFourNewsItemsInDatabase()
        {
            // Add categories and news sources.
            foreach (Category c in Categories)
            {
                c.Id = Archivist.AddCategory(c.Name);
            }
            Archivist.AddNewsSources(NewsSources);

            Dictionary<string, List<int>> terms = new Dictionary<string, List<int>>();

            // Add some news material.
            for (int i = 0; i < NewsMaterial.Count; i++)
            {
                NewsMaterial n = NewsMaterial[i];
                // Generate vector for index #1.
                Dictionary<string, int> termsInText =
                    TermUtils.CalculateTermFrequency(n.Content);
                // Find all unique terms in news, and increase counts.
                foreach (KeyValuePair<string, int> term in termsInText)
                {
                    if (!terms.ContainsKey(term.Key))
                    {
                        terms.Add(term.Key, new List<int>());

                        // Add for all news material items.
                        for (int j = 0; j < NewsMaterial.Count; j++)
                        {
                            terms[term.Key].Add(0);
                        }
                    }

                    terms[term.Key][i] += term.Value;
                }

                // Add to database.
                Archivist.AddNews(n);
            }
            // Update idf values.
            Archivist.UpdateIdfValues();

            // Create expected vector.
            SparseVector expectedVector = new SparseVector(terms.Count);
            int index = 0;
            foreach (KeyValuePair<string, List<int>> termCount in terms)
            {
                // Calculate idf.
                int docCount = 0;
                termCount.Value.ForEach((p) => docCount += p > 0 ? 1 : 0);
                double idf = TermUtils.CalculateInverseDocumentFrequency(
                    NewsMaterial.Count,
                    docCount);
                // Calculate tf.
                int tf = termCount.Value[1];

                // Set value in vector.
                expectedVector[index] = (float)(tf * idf);
                index++;
            }

            // Get vector.
            List<NewsItem> news = Archivist.GetNews(new NewsQuery());
            SparseVector vector = Archivist.GetTfIdfVector(
                news.Find(n => n.Title.Equals(NewsMaterial[1].Title)));
            Assert.AreEqual(expectedVector.Length(), vector.Length(), 0.001);
        }
        public void GetTfIdfVectorOneNewsItemInDatabase()
        {
            // Add categories and news sources.
            foreach (Category c in Categories)
            {
                c.Id = Archivist.AddCategory(c.Name);
            }
            Archivist.AddNewsSources(NewsSources);

            Dictionary<string, int> terms = new Dictionary<string, int>();

            // Add some news material.
            NewsMaterial nItem = NewsMaterial[1];
            // Generate vector.
            Dictionary<string, int> termsInText =
                TermUtils.CalculateTermFrequency(nItem.Content);
            // Find all unique terms in news, and increase counts.
            foreach (KeyValuePair<string, int> term in termsInText)
            {
                if (!terms.ContainsKey(term.Key))
                {
                    terms.Add(term.Key, 0);
                }

                terms[term.Key] += term.Value;

            }

            // Add to database.
            Archivist.AddNews(nItem);

            // Update idf values.
            Archivist.UpdateIdfValues();

            // Create expected vector.
            SparseVector expectedVector = new SparseVector(terms.Count);
            int index = 0;
            foreach (KeyValuePair<string, int> termCount in terms)
            {
                // Calculate idf.
                double idf = TermUtils.CalculateInverseDocumentFrequency(
                    1,
                    1);
                // Calculate tf.
                int tf = termCount.Value;

                // Set value in vector.
                expectedVector[index] = (float)(tf * idf);
                index++;
            }

            // Get vector.
            List<NewsItem> news = Archivist.GetNews(new NewsQuery());
            SparseVector vector = Archivist.GetTfIdfVector(
                news.Find(n => n.Title.Equals(NewsMaterial[1].Title)));
            Assert.AreEqual(expectedVector.Length(), vector.Length(), 0.001);
        }
        public void LengthTestWhenZero()
        {
            SparseVector v = new SparseVector(5);
            float expected = 0.0f;

            double result = v.Length();

            Assert.AreEqual(expected, result, EPSILON);
        }