Пример #1
0
        public void StoreDocument()
        {
            MockRepository  mocks = new MockRepository();
            IIndexConnector conn  = mocks.StrictMock <IIndexConnector>();

            ScrewTurn.Wiki.SearchEngine.IDocument doc = mocks.StrictMock <ScrewTurn.Wiki.SearchEngine.IDocument>();

            string dummyState = "state";

            string content = "This is some test content.";
            string title   = "My Document";

            Expect.Call(doc.Title).Return(title).Repeat.AtLeastOnce();
            Expect.Call(doc.Tokenize(content)).Return(ScrewTurn.Wiki.SearchEngine.Tools.Tokenize(content, ScrewTurn.Wiki.SearchEngine.WordLocation.Content));
            Expect.Call(doc.Tokenize(title)).Return(ScrewTurn.Wiki.SearchEngine.Tools.Tokenize(title, ScrewTurn.Wiki.SearchEngine.WordLocation.Title));

            Predicate <ScrewTurn.Wiki.SearchEngine.WordInfo[]> contentPredicate = (array) => {
                return
                    (array.Length == 5 &&
                     array[0].Text == "this" &&
                     array[1].Text == "is" &&
                     array[2].Text == "some" &&
                     array[3].Text == "test" &&
                     array[4].Text == "content");
            };
            Predicate <ScrewTurn.Wiki.SearchEngine.WordInfo[]> titlePredicate = (array) => {
                return
                    (array.Length == 2 &&
                     array[0].Text == "my" &&
                     array[1].Text == "document");
            };
            Predicate <ScrewTurn.Wiki.SearchEngine.WordInfo[]> keywordsPredicate = (array) => {
                return
                    (array.Length == 1 &&
                     array[0].Text == "test");
            };

            conn.DeleteDataForDocument(doc, dummyState);
            LastCall.On(conn);
            Expect.Call(conn.SaveDataForDocument(null, null, null, null, null)).IgnoreArguments()
            .Constraints(RC.Is.Same(doc), RC.Is.Matching(contentPredicate), RC.Is.Matching(titlePredicate), RC.Is.Matching(keywordsPredicate), RC.Is.Same(dummyState))
            .Return(8);

            mocks.ReplayAll();

            SqlIndex index = new SqlIndex(conn);

            Assert.AreEqual(8, index.StoreDocument(doc, new string[] { "test" }, content, dummyState), "Wrong occurrence count");

            mocks.VerifyAll();
        }
Пример #2
0
        /// <summary>
        ///     Stores a document in the index.
        /// </summary>
        /// <param name="document">The document.</param>
        /// <param name="keywords">The document keywords, if any, an empty array or <c>null</c> otherwise.</param>
        /// <param name="content">The content of the document.</param>
        /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
        /// <returns>The number of indexed words (including duplicates).</returns>
        /// <remarks>
        ///     Indexing the content of the document is <b>O(n)</b>,
        ///     where <b>n</b> is the total number of words in the document.
        /// </remarks>
        public int StoreDocument(IDocument document, IList <string> keywords, string content, object state)
        {
            if (document == null)
            {
                throw new ArgumentNullException("document");
            }
            if (keywords == null)
            {
                keywords = new string[0];
            }
            if (content == null)
            {
                throw new ArgumentNullException("content");
            }

            RemoveDocument(document, state);

            keywords = SearchEngine.Tools.CleanupKeywords(keywords);

            // Prepare content words
            var contentWords = document.Tokenize(content);

            contentWords = SearchEngine.Tools.RemoveStopWords(contentWords, stopWords);

            // Prepare title words
            var titleWords = document.Tokenize(document.Title);

            titleWords = SearchEngine.Tools.RemoveStopWords(titleWords, stopWords);
            for (var i = 0; i < titleWords.Length; i++)
            {
                titleWords[i] = new WordInfo(titleWords[i].Text, titleWords[i].FirstCharIndex, titleWords[i].WordIndex,
                                             WordLocation.Title);
            }

            // Prepare keywords
            var words = new WordInfo[keywords.Count];
            var count = 0;

            for (var i = 0; i < words.Length; i++)
            {
                words[i] = new WordInfo(keywords[i], (ushort)count, (ushort)i, WordLocation.Keywords);
                count   += 1 + keywords[i].Length;
            }

            return(connector.SaveDataForDocument(document, contentWords, titleWords, words, state));
        }