/// <summary>
        /// Normalizes all rows in the matrix.
        /// </summary>
        /// <returns>
        /// The matrix with normalized rows.
        /// </returns>
        public SparseMatrix NormalizeRows()
        {
            SparseMatrix result = new SparseMatrix(Rows, Columns);

            // Normalize each row (which is a vector) and add to result matrix.
            foreach (int row in RowVectors.Keys)
            {
                result.RowVectors[row] = this[row].Normalize();
            }

            return result;
        }
 public void SparseMatrixTestInitialize()
 {
     // Create sparse matrix.
     TestMatrix = new SparseMatrix(3, 5);
     TestMatrix[0, 0] = 3.0f;
     TestMatrix[0, 2] = 4.4f;
     TestMatrix[1, 3] = 2.1f;
     TestMatrix[1, 4] = 1.1f;
     TestMatrix[2, 0] = 0.5f;
     TestMatrix[2, 2] = 3.5f;
 }
        public void TransposeTestTransposesMatrix()
        {
            SparseMatrix expected = new SparseMatrix(5, 3);
            expected[0, 0] = 3.0f;
            expected[2, 0] = 4.4f;
            expected[3, 1] = 2.1f;
            expected[4, 1] = 1.1f;
            expected[0, 2] = 0.5f;
            expected[2, 2] = 3.5f;

            TestMatrix = TestMatrix.Transpose();

            Assert.IsTrue(expected.ApproximatelyEqual(TestMatrix));
        }
        public void ProductTestThrowsExceptionWhenInvalidDimension()
        {
            SparseMatrix factor = new SparseMatrix(3, 5);
            factor[0, 1] = 2.0f;
            factor[0, 2] = 3.0f;
            factor[1, 3] = 0.1f;
            factor[1, 4] = 1.3f;
            factor[2, 0] = 0.9f;
            factor[2, 1] = 1.5f;

            SparseMatrix result = TestMatrix.Product(factor);
        }
        public void ScalarMultiplicationTestWhenScalarZero()
        {
            SparseMatrix expected = new SparseMatrix(3, 5);

            TestMatrix = TestMatrix.ScalarMultiplication(0.0f);

            Assert.IsTrue(expected.ApproximatelyEqual(TestMatrix));
        }
 public void InitializerTestRowDimensionLessThanZero()
 {
     SparseMatrix m = new SparseMatrix(-1, 10);
 }
        public void NormalizeRowsTestCorrectly()
        {
            SparseMatrix expected = new SparseMatrix(3, 5);
            expected[0, 0] = 0.5633368f;
            expected[0, 2] = 0.826227367f;
            expected[1, 3] = 0.885831535f;
            expected[1, 4] = 0.46400702f;
            expected[2, 0] = 0.141421363f;
            expected[2, 2] = 0.989949465f;

            SparseMatrix result = TestMatrix.NormalizeRows();

            Assert.IsTrue(expected.ApproximatelyEqual(result));
        }
 public void EqualsTestEquality()
 {
     SparseMatrix o1 = new SparseMatrix(1, 2);
     SparseMatrix o2 = new SparseMatrix(1, 2);
     Assert.IsTrue(o1.Equals(o2));
 }
 public void EqualsTestGetHasCodesEquals()
 {
     SparseMatrix o1 = new SparseMatrix(1, 2);
     SparseMatrix o2 = new SparseMatrix(1, 2);
     Assert.AreEqual(o1.GetHashCode(), o2.GetHashCode());
 }
        /// <summary>
        /// Gets a tf-idf matrix of the <c>NewsItem</c>s matching the details
        /// as specified by the <c>NewsQuery</c>.
        /// </summary>
        /// <param name="newsQuery">
        /// Contains details of what <c>NewsItem</c>s to create 
        /// a tf-idf matrix of.
        /// </param>
        /// <exception cref="ArgumentNullException">
        /// Thrown when the input is a null reference.
        /// </exception>
        /// <exception cref="InvalidOperationException">
        /// Thrown when the <c>Archivist</c> is not open upon calling this method.
        /// </exception>
        /// <returns>
        /// An m x n <c>SparseMatrix</c> where m is the number of terms in
        /// the database, with the tf-idf value's index in the vector 
        /// corresponding to the index of the respective term in 
        /// the database. n is the number of matrices that matches
        /// the specified <c>NewsQuery</c>.
        /// </returns>
        public override SparseMatrix GetTfIdfMatrix(NewsQuery newsQuery)
        {
            // Check that the database is open.
            if (!DatabaseConnection.State.Equals(ConnectionState.Open))
            {
                throw new InvalidOperationException(
                    "The Archivist must be open before calling this method. " +
                    "Call the Open() method.");
            }

            // Check input validity.
            if (newsQuery == null)
            {
                throw new ArgumentNullException("newsQuery",
                    "newsQuery cannot be null");
            }

            SqlCeCommand cmd = DatabaseConnection.CreateCommand();

            // Create the select command.
            StringBuilder sql = new StringBuilder();
            sql.Append("SELECT tc.t_count, n.id, t.id AS term_id, nt.tf, t.idf ");
            sql.Append("FROM ( SELECT id, title, summary, author, publisher_date, ");
            sql.Append("is_read, user_found_interesting, url, news_source_id, ");
            sql.Append("category_id FROM news ");

            sql.Append("ORDER BY publisher_date " +
                (newsQuery.OrderDateDesc ? "DESC " : "ASC "));

            sql.Append("OFFSET ? ROWS FETCH NEXT ? ROWS ONLY ");
            cmd.Parameters.Add(new SqlCeParameter("offset", SqlDbType.Int));
            cmd.Parameters["offset"].Value = newsQuery.Offset;
            cmd.Parameters.Add(new SqlCeParameter("limit", SqlDbType.Int));
            cmd.Parameters["limit"].Value = newsQuery.Limit;

            sql.Append(") n ");
            sql.Append("LEFT OUTER JOIN news_sources ns ON ");
            sql.Append("ns.id = n.news_source_id ");
            sql.Append("LEFT OUTER JOIN categories c ON ");
            sql.Append("c.id = n.category_id ");
            sql.Append("LEFT OUTER JOIN news_term_joins nt ON ");
            sql.Append("nt.news_id = n.id ");
            sql.Append("LEFT OUTER JOIN terms t ON ");
            sql.Append("t.id = nt.term_id ");
            sql.Append("CROSS JOIN ( SELECT MAX( id ) AS t_count FROM terms ) tc ");
            sql.Append("WHERE 1=1 "); // 1=1 to avoid invalid query when no args.

            // Add category id.
            if (newsQuery.CategoryId > -1)
            {
                sql.Append("AND n.category_id=? ");
                cmd.Parameters.Add(new SqlCeParameter("cat", SqlDbType.Int));
                cmd.Parameters["cat"].Value = newsQuery.CategoryId;
            }
            // Set all exluded news.
            if (newsQuery.ExcludedNews != null &&
                newsQuery.ExcludedNews.Count > 0)
            {
                sql.Append("AND n.id NOT IN ( ");
                for (int i = 0; i < newsQuery.ExcludedNews.Count; i++)
                {
                    sql.Append("?");
                    cmd.Parameters.Add(
                        new SqlCeParameter("news_" +
                            newsQuery.ExcludedNews[i].Id.ToString(), SqlDbType.Int));
                    cmd.Parameters["news_" +
                        newsQuery.ExcludedNews[i].Id.ToString()]
                        .Value = newsQuery.ExcludedNews[i].Id;

                    if (i < newsQuery.ExcludedNews.Count - 1)
                    {
                        sql.Append(", ");
                    }
                }
                sql.Append(") ");
            }

            // Interest status.
            if (newsQuery.Interest != InterestStatus.Any)
            {
                sql.Append("AND n.user_found_interesting=? ");
                cmd.Parameters.Add(new SqlCeParameter("interesting", SqlDbType.Bit));
                cmd.Parameters["interesting"].Value =
                    newsQuery.Interest == InterestStatus.Interesting ? 1 : 0;
            }

            // Newer than.
            if (newsQuery.NewerThan != DateTime.MinValue)
            {
                sql.Append("AND n.publisher_date > ? ");
                cmd.Parameters.Add(new SqlCeParameter("published", SqlDbType.DateTime));
                cmd.Parameters["published"].Value = newsQuery.NewerThan;
            }

            // Read status.
            if (newsQuery.Read != ReadStatus.Any)
            {
                sql.Append("AND n.is_read=? ");
                cmd.Parameters.Add(new SqlCeParameter("read", SqlDbType.Bit));
                cmd.Parameters["read"].Value =
                    newsQuery.Read == ReadStatus.Read ? 1 : 0;
            }

            cmd.CommandText = sql.ToString();
            cmd.Prepare();

            SqlCeDataReader rdr = cmd.ExecuteReader();

            SparseMatrix matrix = null;

            int currentColumn = 0;
            int currentId = -1;
            // Read the news sources.
            while (rdr.Read())
            {
                // Create matrix instance if null with 1 column.
                // More columns will be added on the fly.
                if (matrix == null)
                {
                    matrix = new SparseMatrix(rdr.GetInt32(0), 1);
                    currentId = rdr.GetInt32(1);
                }
                else
                {
                    // If new news_id, add column.
                    if (currentId != rdr.GetInt32(1))
                    {
                        matrix.AddColumn();
                        currentId = rdr.GetInt32(1);
                        currentColumn++;
                    }

                }

                // Calculate tf-idf.
                if (!rdr.IsDBNull(2) && !rdr.IsDBNull(3) && !rdr.IsDBNull(4))
                {
                    float tfIdf = (float)(rdr.GetInt32(3) * rdr.GetDouble(4));
                    // Add to vector.
                    matrix[rdr.GetInt32(2) - 1, currentColumn] = tfIdf;
                }

            }

            rdr.Close();
            return matrix;
        }
        public void GetTfIdfMatrixOneNewsItemInDatabase()
        {
            // Add categories and news sources.
            foreach (Category c in Categories)
            {
                c.Id = Archivist.AddCategory(c.Name);
            }
            Archivist.AddNewsSources(NewsSources);

            Dictionary<string, List<int>> terms = new Dictionary<string, List<int>>();

            // Add some news material.
            NewsMaterial n = NewsMaterial[0];
            // Generate vector for index #1.
            Dictionary<string, int> termsInText =
                TermUtils.CalculateTermFrequency(n.Content);
            // Find all unique terms in news, and increase counts.
            foreach (KeyValuePair<string, int> term in termsInText)
            {
                if (!terms.ContainsKey(term.Key))
                {
                    terms.Add(term.Key, new List<int>());

                    // Add for all news material items.
                    for (int j = 0; j < NewsMaterial.Count; j++)
                    {
                        terms[term.Key].Add(0);
                    }
                }

                terms[term.Key][0] += term.Value;
            }

            // Add to database.
            Archivist.AddNews(n);

            // Update idf values.
            Archivist.UpdateIdfValues();

            // Create expected vector.
            SparseMatrix expected = new SparseMatrix(terms.Count, 1);
            int index = 0;
            foreach (KeyValuePair<string, List<int>> termCount in terms)
            {
                // Calculate idf.
                int docCount = 0;
                termCount.Value.ForEach((p) => docCount += p > 0 ? 1 : 0);
                double idf = TermUtils.CalculateInverseDocumentFrequency(
                    NewsMaterial.Count,
                    docCount);

                // Calculate tf.
                int tf = termCount.Value[2];
                // Set value in vector.
                expected[index, 0] = (float)(tf * idf);

                index++;
            }

            // Get matrix.
            NewsQuery query = new NewsQuery();
            query.Limit = 1;
            SparseMatrix matrix = Archivist.GetTfIdfMatrix(query);
            double sum1 = 0;
            double sum2 = 0;
            for (int i = 0; i < expected.Columns; i++)
            {
                sum1 += expected.ColumnVector(i).Length();
                sum2 += matrix.ColumnVector(i).Length();
            }

            Assert.AreEqual(sum1, sum2, 0.001d);
        }
Exemple #12
0
        /// <summary>
        /// Transposes the matrix.
        /// </summary>
        /// <returns>
        /// A transposed matrix.
        /// </returns>
        public SparseMatrix Transpose()
        {
            // Create result matrix.
            SparseMatrix m = new SparseMatrix(Columns, Rows);

            foreach (int row in RowVectors.Keys)
            {
                foreach (int col in RowVectors[row].NonZeroIndices)
                {
                    m[col, row] = this[row, col];
                }
            }
            return m;
        }
Exemple #13
0
        /// <summary>
        /// Multiplies a scalar on the current matrix.
        /// </summary>
        /// <param name="scalar">
        /// The scalar to multiply on the matrix.
        /// </param>
        /// <returns>
        /// The product of the matrix and the scalar.
        /// </returns>
        public SparseMatrix ScalarMultiplication(float scalar)
        {
            // Create result matrix.
            SparseMatrix matrix = new SparseMatrix(this.Rows, this.Columns);

            // Multiply scalar on each value.
            foreach (int i in RowVectors.Keys)
            {
                foreach (int j in RowVectors[i].NonZeroIndices)
                {
                    matrix[i, j] = RowVectors[i][j] * scalar;
                }
            }

            return matrix;
        }
Exemple #14
0
        /// <summary>
        /// Multiplies a given matrix on the matrix.
        /// </summary>
        /// <param name="m">
        /// The matrix to multiply on.
        /// </param>
        /// <returns>
        /// The matrix product.
        /// </returns>
        public SparseMatrix Product(SparseMatrix m)
        {
            // Make sure matrices can be multiplied.
            if (this.Columns != m.Rows)
            {
                throw new InvalidOperationException(
                    String.Format("Cannot multiply {0}x{1} matrix with a {2}x{3} matrix",
                        m.Rows, m.Columns, this.Rows, this.Columns));
            }

            // Create result matrix.
            SparseMatrix matrix = new SparseMatrix(this.Rows, m.Columns);

            foreach (int i in this.RowVectors.Keys)
            {
                // Calculate matrix-vector product for each row in this and
                // add to result matrix as columns.
                SparseVector product = VectorProduct(m.ColumnVector(i));
                foreach (int i2 in product.NonZeroIndices)
                {
                    matrix[i2, i] = product[i2];
                }
            }

            return matrix;
        }
 public void InitializerTestColumnDimensionZero()
 {
     SparseMatrix m = new SparseMatrix(10, 0);
 }
 public void EqualsTestInequality()
 {
     SparseMatrix o1 = new SparseMatrix(1, 2);
     SparseMatrix o2 = new SparseMatrix(2, 1);
     Assert.IsFalse(o1.Equals(o2));
 }
        public void InitializerTestRowDimensionGreaterThanZero()
        {
            SparseMatrix m = new SparseMatrix(20, 10);
            int expected = 20;

            Assert.AreEqual(expected, m.Rows);
        }
 public void EqualsTestNotEqualsOperator()
 {
     SparseMatrix o1 = new SparseMatrix(1, 2);
     SparseMatrix o2 = new SparseMatrix(2, 1);
     Assert.IsTrue(o1 != o2);
 }
 public void InitializerTestRowDimensionZero()
 {
     SparseMatrix m = new SparseMatrix(0, 10);
 }
 public void EqualsTestObjectReferenceEquality()
 {
     SparseMatrix o1 = new SparseMatrix(1, 2);
     SparseMatrix o2 = new SparseMatrix(1, 2);
     Assert.IsTrue(o1.Equals((Object) o2));
 }
        public void ProductTestGreaterThanZero()
        {
            SparseMatrix expected = new SparseMatrix(3, 3);
            expected[0, 0] = 13.2000008f;
            expected[0, 2] = 2.69999981f;
            expected[1, 1] = 1.64f;
            expected[2, 0] = 10.5f;
            expected[2, 2] = 0.45f;

            SparseMatrix factor = new SparseMatrix(3, 5);
            factor[0, 1] = 2.0f;
            factor[0, 2] = 3.0f;
            factor[1, 3] = 0.1f;
            factor[1, 4] = 1.3f;
            factor[2, 0] = 0.9f;
            factor[2, 1] = 1.5f;
            factor = factor.Transpose();

            SparseMatrix result = TestMatrix.Product(factor);

            Assert.IsTrue(expected.ApproximatelyEqual(result));
        }
 public void EqualsTestObjectToNullReferenceEqualsOperator()
 {
     SparseMatrix o1 = new SparseMatrix(1, 2);
     SparseMatrix o2 = null;
     Assert.IsFalse(o1 == o2);
 }
        public void ScalarMultiplicationTestWhenScalarLessThanZero()
        {
            SparseMatrix expected = new SparseMatrix(3, 5);
            expected[0, 0] = -9.0f;
            expected[0, 2] = -13.2000008f;
            expected[1, 3] = -6.29999971f;
            expected[1, 4] = -3.30000019f;
            expected[2, 0] = -1.5f;
            expected[2, 2] = -10.5f;

            TestMatrix = TestMatrix.ScalarMultiplication(-3.0f);

            Assert.IsTrue(expected.ApproximatelyEqual(TestMatrix));
        }
 public void EqualsTestReferenceEquals()
 {
     SparseMatrix o1 = new SparseMatrix(1, 2);
     SparseMatrix o2 = o1;
     Assert.IsTrue(o1.Equals(o2));
 }
 public void SparseMatrixTestCleanup()
 {
     TestMatrix = null;
 }
        public void GetTestIndexOutOfRange()
        {
            bool exceptionThrown = false;

            try
            {
                SparseMatrix s = new SparseMatrix(3, 5);
                s[0, 0] = 0.5633368f;
                s[0, 2] = 0.826227367f;
                s[1, 3] = 0.885831535f;
                s[1, 4] = 0.46400702f;
                s[2, 0] = 0.141421363f;
                s[2, 2] = 0.989949465f;

                float f = s[10, 10];
            }
            catch (IndexOutOfRangeException)
            {
                exceptionThrown = true;
            }

            Assert.IsTrue(exceptionThrown,
                "No IndexOutOfRangeException was thrown.");
        }
        public void ToStringTest()
        {
            SparseMatrix s = new SparseMatrix(3, 5);
            s[0, 0] = 0.5633368f;
            s[0, 2] = 0.826227367f;
            s[1, 3] = 0.885831535f;
            s[1, 4] = 0.46400702f;
            s[2, 0] = 0.141421363f;
            s[2, 2] = 0.989949465f;

            Assert.IsFalse(String.IsNullOrEmpty(s.ToString()),
                "ToString() did not produce a string.");
        }
        public void InitializerTestColumnDimensionGreaterThanZero()
        {
            SparseMatrix m = new SparseMatrix(2, 10);
            int expected = 10;

            Assert.AreEqual(expected, m.Columns);
        }
        /// <summary>
        /// Write the cosine similarity of the given <c>NewsItem</c>s to 
        /// the file cosine_similarity_log.txt.
        /// </summary>
        /// <param name="archivist">
        /// The <c>Archivist</c> for getting database information.
        /// </param>
        /// <param name="news">
        /// The <c>NewsItem</c>s to print the cosine similarity of.
        /// </param>
        private static void WriteCosineSimilarity(Archivist archivist, List<NewsItem> news)
        {
            // Sort the news according to title.
            news.Sort((n1, n2) => int.Parse(n1.Title).CompareTo(int.Parse(n2.Title)));

            // Get the vector of all news and add them to a matrix.
            List<SparseVector> vectors = new List<SparseVector>();
            foreach (NewsItem n in news)
            {
                vectors.Add(archivist.GetTfIdfVector(n));
            }

            // Create a matrix.
            SparseMatrix matrix =
                new SparseMatrix(vectors.First().Dimension, vectors.Count);
            // Add entries to the matrix.
            for (int i = 0; i < vectors.Count; i++)
            {
                for (int j = 0; j < vectors.First().Dimension; j++)
                {
                    matrix[j, i] = vectors[i][j];
                }
            }

            // Transpose matrix and thus prepare for multiplication.
            SparseMatrix mTransposed = matrix.Transpose().NormalizeRows();

            // Calculate the result.
            SparseMatrix result = mTransposed.Product(matrix.Transpose().NormalizeRows().Transpose());

            // Write to file.
            using (StreamWriter file = new StreamWriter("cosine_similarity_log.txt"))
            {
                int rowLength = result.Rows;
                int columnLength = result.Columns;

                // Print header.
                file.Write("      ");
                for (int i = 0; i < result.Columns; i++)
                {
                    file.Write("dok" + (i + 1).ToString("0#") + " ");
                }
                file.WriteLine();

                // Print the matrix.
                for (int i = 0; i < rowLength; i++)
                {
                    // Print doc title.
                    file.Write("dok" + (i + 1).ToString("0#") + " ");
                    for (int j = 0; j < columnLength; j++)
                    {
                        file.Write(result[i, j].ToString("0.##0") + " ");
                    }

                    if (i != rowLength - 1)
                    {
                        file.WriteLine();
                    }
                }
            }
        }
Exemple #30
0
        /// <summary>
        /// Whether or not the specified <c>SparseMatrix</c> equals this 
        /// <c>SparseMatrix</c>.
        /// </summary>
        /// <param name="other">
        /// The <c>SparseMatrix</c> to check for equality for.
        /// </param>
        /// <returns>
        /// Whether the specified <c>SparseMatrix</c> equals this <c>SparseMatrix</c>.
        /// </returns>
        public bool Equals(SparseMatrix other)
        {
            if ((object)other == null)
            {
                return false;
            }
            if (ReferenceEquals(this, other))
            {
                return true;
            }
            if (Rows != other.Rows)
            {
                return false;
            }
            if (Columns != other.Columns)
            {
                return false;
            }

            for (int i = 0; i < Rows; i++)
            {
                if (!this[i].Equals(other[i]))
                {
                    return false;
                }
            }

            return true;
        }