public void GetTfIdfMatrixOneNewsItemInDatabase() { // Add categories and news sources. foreach (Category c in Categories) { c.Id = Archivist.AddCategory(c.Name); } Archivist.AddNewsSources(NewsSources); Dictionary<string, List<int>> terms = new Dictionary<string, List<int>>(); // Add some news material. NewsMaterial n = NewsMaterial[0]; // Generate vector for index #1. Dictionary<string, int> termsInText = TermUtils.CalculateTermFrequency(n.Content); // Find all unique terms in news, and increase counts. foreach (KeyValuePair<string, int> term in termsInText) { if (!terms.ContainsKey(term.Key)) { terms.Add(term.Key, new List<int>()); // Add for all news material items. for (int j = 0; j < NewsMaterial.Count; j++) { terms[term.Key].Add(0); } } terms[term.Key][0] += term.Value; } // Add to database. Archivist.AddNews(n); // Update idf values. Archivist.UpdateIdfValues(); // Create expected vector. SparseMatrix expected = new SparseMatrix(terms.Count, 1); int index = 0; foreach (KeyValuePair<string, List<int>> termCount in terms) { // Calculate idf. int docCount = 0; termCount.Value.ForEach((p) => docCount += p > 0 ? 1 : 0); double idf = TermUtils.CalculateInverseDocumentFrequency( NewsMaterial.Count, docCount); // Calculate tf. int tf = termCount.Value[2]; // Set value in vector. expected[index, 0] = (float)(tf * idf); index++; } // Get matrix. NewsQuery query = new NewsQuery(); query.Limit = 1; SparseMatrix matrix = Archivist.GetTfIdfMatrix(query); double sum1 = 0; double sum2 = 0; for (int i = 0; i < expected.Columns; i++) { sum1 += expected.ColumnVector(i).Length(); sum2 += matrix.ColumnVector(i).Length(); } Assert.AreEqual(sum1, sum2, 0.001d); }
/// <summary> /// Multiplies a given matrix on the matrix. /// </summary> /// <param name="m"> /// The matrix to multiply on. /// </param> /// <returns> /// The matrix product. /// </returns> public SparseMatrix Product(SparseMatrix m) { // Make sure matrices can be multiplied. if (this.Columns != m.Rows) { throw new InvalidOperationException( String.Format("Cannot multiply {0}x{1} matrix with a {2}x{3} matrix", m.Rows, m.Columns, this.Rows, this.Columns)); } // Create result matrix. SparseMatrix matrix = new SparseMatrix(this.Rows, m.Columns); foreach (int i in this.RowVectors.Keys) { // Calculate matrix-vector product for each row in this and // add to result matrix as columns. SparseVector product = VectorProduct(m.ColumnVector(i)); foreach (int i2 in product.NonZeroIndices) { matrix[i2, i] = product[i2]; } } return matrix; }