public void GetTfIdfMatrixOneNewsItemInDatabase()
        {
            // Add categories and news sources.
            foreach (Category c in Categories)
            {
                c.Id = Archivist.AddCategory(c.Name);
            }
            Archivist.AddNewsSources(NewsSources);

            Dictionary<string, List<int>> terms = new Dictionary<string, List<int>>();

            // Add some news material.
            NewsMaterial n = NewsMaterial[0];
            // Generate vector for index #1.
            Dictionary<string, int> termsInText =
                TermUtils.CalculateTermFrequency(n.Content);
            // Find all unique terms in news, and increase counts.
            foreach (KeyValuePair<string, int> term in termsInText)
            {
                if (!terms.ContainsKey(term.Key))
                {
                    terms.Add(term.Key, new List<int>());

                    // Add for all news material items.
                    for (int j = 0; j < NewsMaterial.Count; j++)
                    {
                        terms[term.Key].Add(0);
                    }
                }

                terms[term.Key][0] += term.Value;
            }

            // Add to database.
            Archivist.AddNews(n);

            // Update idf values.
            Archivist.UpdateIdfValues();

            // Create expected vector.
            SparseMatrix expected = new SparseMatrix(terms.Count, 1);
            int index = 0;
            foreach (KeyValuePair<string, List<int>> termCount in terms)
            {
                // Calculate idf.
                int docCount = 0;
                termCount.Value.ForEach((p) => docCount += p > 0 ? 1 : 0);
                double idf = TermUtils.CalculateInverseDocumentFrequency(
                    NewsMaterial.Count,
                    docCount);

                // Calculate tf.
                int tf = termCount.Value[2];
                // Set value in vector.
                expected[index, 0] = (float)(tf * idf);

                index++;
            }

            // Get matrix.
            NewsQuery query = new NewsQuery();
            query.Limit = 1;
            SparseMatrix matrix = Archivist.GetTfIdfMatrix(query);
            double sum1 = 0;
            double sum2 = 0;
            for (int i = 0; i < expected.Columns; i++)
            {
                sum1 += expected.ColumnVector(i).Length();
                sum2 += matrix.ColumnVector(i).Length();
            }

            Assert.AreEqual(sum1, sum2, 0.001d);
        }
Example #2
0
        /// <summary>
        /// Multiplies a given matrix on the matrix.
        /// </summary>
        /// <param name="m">
        /// The matrix to multiply on.
        /// </param>
        /// <returns>
        /// The matrix product.
        /// </returns>
        public SparseMatrix Product(SparseMatrix m)
        {
            // Make sure matrices can be multiplied.
            if (this.Columns != m.Rows)
            {
                throw new InvalidOperationException(
                    String.Format("Cannot multiply {0}x{1} matrix with a {2}x{3} matrix",
                        m.Rows, m.Columns, this.Rows, this.Columns));
            }

            // Create result matrix.
            SparseMatrix matrix = new SparseMatrix(this.Rows, m.Columns);

            foreach (int i in this.RowVectors.Keys)
            {
                // Calculate matrix-vector product for each row in this and
                // add to result matrix as columns.
                SparseVector product = VectorProduct(m.ColumnVector(i));
                foreach (int i2 in product.NonZeroIndices)
                {
                    matrix[i2, i] = product[i2];
                }
            }

            return matrix;
        }