Represents a change occurred to the index, structured for easy dumping to disk or database.
The class is not thread-safe.
        /// <summary>
        /// Initializes a new instance of the <see cref="IndexChangedEventArgs" /> class.
        /// </summary>
        /// <param name="document">The affected document.</param>
        /// <param name="change">The change performed.</param>
        /// <param name="changeData">The dumped change data.</param>
        /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
        /// <exception cref="ArgumentNullException">If <paramref name="change"/> is not <see cref="IndexChangeType.IndexCleared"/> and <paramref name="document"/> or <paramref name="changeData"/> are <c>null</c>.</exception>
        public IndexChangedEventArgs(IDocument document, IndexChangeType change, DumpedChange changeData, object state)
        {
            if(change != IndexChangeType.IndexCleared) {
                if(document == null) throw new ArgumentNullException("document");
                if(changeData == null) throw new ArgumentNullException("changeData");
            }

            this.document = document;
            this.change = change;
            this.changeData = changeData;
            this.state = state;
        }
        /// <summary>
        /// Removes a document from the index.
        /// </summary>
        /// <param name="document">The document to remove.</param>
        /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
        /// <exception cref="ArgumentNullException">If <paramref name="document"/> is <c>null</c>.</exception>
        public void RemoveDocument(IDocument document, object state)
        {
            if (document == null)
            {
                throw new ArgumentNullException("document");
            }

            DumpedChange dc = RemoveDocumentInternal(document);

            if (dc != null)
            {
                OnIndexChange(document, IndexChangeType.DocumentRemoved, dc, state);
            }
            // else nothing to do
        }
Beispiel #3
0
        /// <summary>
        ///     Initializes a new instance of the <see cref="IndexChangedEventArgs" /> class.
        /// </summary>
        /// <param name="document">The affected document.</param>
        /// <param name="change">The change performed.</param>
        /// <param name="changeData">The dumped change data.</param>
        /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
        /// <exception cref="ArgumentNullException">
        ///     If <paramref name="change" /> is not
        ///     <see cref="IndexChangeType.IndexCleared" /> and <paramref name="document" /> or <paramref name="changeData" /> are
        ///     <c>null</c>.
        /// </exception>
        public IndexChangedEventArgs(IDocument document, IndexChangeType change, DumpedChange changeData, object state)
        {
            if (change != IndexChangeType.IndexCleared)
            {
                if (document == null)
                {
                    throw new ArgumentNullException("document");
                }
                if (changeData == null)
                {
                    throw new ArgumentNullException("changeData");
                }
            }

            Document   = document;
            Change     = change;
            ChangeData = changeData;
            State      = state;
        }
Beispiel #4
0
 /// <summary>
 ///     Initializes a new instance of the <see cref="IndexChangedEventArgs" /> class.
 /// </summary>
 /// <param name="document">The affected document.</param>
 /// <param name="change">The change performed.</param>
 /// <param name="changeData">The dumped change data.</param>
 /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
 /// <param name="result">The storer result, if any.</param>
 /// <exception cref="ArgumentNullException">
 ///     If <paramref name="change" /> is not
 ///     <see cref="IndexChangeType.IndexCleared" /> and <paramref name="document" /> or <paramref name="changeData" /> are
 ///     <c>null</c>.
 /// </exception>
 public IndexChangedEventArgs(IDocument document, IndexChangeType change, DumpedChange changeData, object state,
                              IndexStorerResult result)
     : this(document, change, changeData, state)
 {
     Result = result;
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="IndexChangedEventArgs" /> class.
 /// </summary>
 /// <param name="document">The affected document.</param>
 /// <param name="change">The change performed.</param>
 /// <param name="changeData">The dumped change data.</param>
 /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
 /// <param name="result">The storer result, if any.</param>
 /// <exception cref="ArgumentNullException">If <paramref name="change"/> is not <see cref="IndexChangeType.IndexCleared"/> and <paramref name="document"/> or <paramref name="changeData"/> are <c>null</c>.</exception>
 public IndexChangedEventArgs(IDocument document, IndexChangeType change, DumpedChange changeData, object state, IndexStorerResult result)
     : this(document, change, changeData, state)
 {
     this.result = result;
 }
Beispiel #6
0
 /// <summary>
 /// Stores new data into the data storage.
 /// </summary>
 /// <param name="data">The data to store.</param>
 /// <param name="state">A state object passed by the index.</param>
 /// <returns>The storer result, if any.</returns>
 /// <remarks>When saving a new document, the document ID in data.Mappings must be
 /// replaced with the currect document ID, generated by the concrete implementation of
 /// this method. data.Words should have IDs numbered from uint.MaxValue downwards.
 /// The method re-numbers the words appropriately.</remarks>
 protected abstract IndexStorerResult SaveData(DumpedChange data, object state);
Beispiel #7
0
 /// <summary>
 /// Deletes data from the data storage.
 /// </summary>
 /// <param name="data">The data to delete.</param>
 /// <param name="state">A state object passed from the index.</param>
 protected abstract void DeleteData(DumpedChange data, object state);
Beispiel #8
0
        /// <summary>
        /// Deletes data from the data storage.
        /// </summary>
        /// <param name="data">The data to delete.</param>
        /// <param name="state">A state object passed from the index.</param>
        protected override void DeleteData(DumpedChange data, object state)
        {
            // Files are regenerated in a tempDumpedWord location and copied back
            string tempDocumentsFile = GetTempFile(documentsFile);
            string tempWordsFile = GetTempFile(wordsFile);
            string tempMappingsFile = GetTempFile(mappingsFile);

            // 1. Remove Mappings
            using(FileStream fsi = new FileStream(mappingsFile, FileMode.Open, FileAccess.Read, FileShare.None)) {
                int count = ReadCount(fsi);
                int countLocation = (int)fsi.Position - 4;
                int writeCount = 0;
                BinaryReader reader = new BinaryReader(fsi, Encoding.UTF8);
                using(FileStream fso = new FileStream(tempMappingsFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
                    BinaryWriter writer = new BinaryWriter(fso, Encoding.UTF8);
                    WriteHeader(writer);
                    DumpedWordMapping m;
                    for(int i = 0; i < count; i++) {
                        m = ReadDumpedWordMapping(reader);
                        // If m is not contained in data.Mappings, store it in tempDumpedWord file
                        if(!Find(m, data.Mappings)) {
                            WriteDumpedWordMapping(writer, m);
                            writeCount++;
                        }
                    }
                    writer.Seek(countLocation, SeekOrigin.Begin);
                    writer.Write(writeCount);
                }
            }
            // Replace the file
            File.Copy(tempMappingsFile, mappingsFile, true);
            File.Delete(tempMappingsFile);

            // 2. Remove Words
            using(FileStream fsi = new FileStream(wordsFile, FileMode.Open, FileAccess.Read, FileShare.None)) {
                int count = ReadCount(fsi);
                int countLocation = (int)fsi.Position - 4;
                int writeCount = 0;
                BinaryReader reader = new BinaryReader(fsi, Encoding.UTF8);
                using(FileStream fso = new FileStream(tempWordsFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
                    BinaryWriter writer = new BinaryWriter(fso, Encoding.UTF8);
                    WriteHeader(writer);
                    DumpedWord w;
                    for(int i = 0; i < count; i++) {
                        w = ReadDumpedWord(reader);
                        // If w is not contained in data.Words, store it in tempDumpedWord file
                        if(!Find(w, data.Words)) {
                            WriteDumpedWord(writer, w);
                            writeCount++;
                        }
                    }
                    writer.Seek(countLocation, SeekOrigin.Begin);
                    writer.Write(writeCount);
                }
            }
            // Replace the file
            File.Copy(tempWordsFile, wordsFile, true);
            File.Delete(tempWordsFile);

            // 3. Remove Document
            using(FileStream fsi = new FileStream(documentsFile, FileMode.Open, FileAccess.Read, FileShare.None)) {
                int count = ReadCount(fsi);
                int countLocation = (int)fsi.Position - 4;
                BinaryReader reader = new BinaryReader(fsi, Encoding.UTF8);
                using(FileStream fso = new FileStream(tempDocumentsFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
                    BinaryWriter writer = new BinaryWriter(fso, Encoding.UTF8);
                    WriteHeader(writer);
                    DumpedDocument d;
                    for(int i = 0; i < count; i++) {
                        d = ReadDumpedDocument(reader);
                        // If d is not equal to data.Document (to be deleted), then copy it to the result file
                        if(!EqualDumpedDocument(d, data.Document)) {
                            WriteDumpedDocument(writer, d);
                        }
                    }
                    writer.Seek(countLocation, SeekOrigin.Begin);
                    writer.Write(count - 1);
                }
            }
            File.Copy(tempDocumentsFile, documentsFile, true);
            File.Delete(tempDocumentsFile);
        }
Beispiel #9
0
        /// <summary>
        /// Stores new data into the data storage.
        /// </summary>
        /// <param name="data">The data to store.</param>
        /// <param name="state">A state object passed from the index.</param>
        /// <returns>The storer result, if any.</returns>
        /// <remarks>When saving a new document, the document ID in data.Mappings must be
        /// replaced with the currect document ID, generated by the concrete implementation of
        /// this method. data.Words should have IDs numbered from uint.MaxValue downwards. 
        /// The method re-numbers the words appropriately.</remarks>
        protected override IndexStorerResult SaveData(DumpedChange data, object state)
        {
            IndexStorerResult result = new IndexStorerResult(null, null);

            // 1. Save Document
            using(FileStream fs = new FileStream(documentsFile, FileMode.Open, FileAccess.ReadWrite, FileShare.None)) {
                int count = ReadCount(fs);
                // Update count and append document
                BinaryWriter writer = new BinaryWriter(fs, Encoding.UTF8);
                fs.Seek(-4, SeekOrigin.Current);
                writer.Write(count + 1);
                writer.Seek(0, SeekOrigin.End);
                data.Document.ID = firstFreeDocumentId;
                WriteDumpedDocument(writer, data.Document);

                result.DocumentID = firstFreeDocumentId;
                firstFreeDocumentId++;
            }

            // 2. Save Words
            Dictionary<uint, WordId> wordIds = null;
            using(FileStream fs = new FileStream(wordsFile, FileMode.Open, FileAccess.ReadWrite, FileShare.None)) {
                int count = ReadCount(fs);
                // Update count and append words
                BinaryWriter writer = new BinaryWriter(fs, Encoding.UTF8);
                fs.Seek(-4, SeekOrigin.Current);
                writer.Write(count + data.Words.Count);
                fs.Seek(0, SeekOrigin.End);

                wordIds = new Dictionary<uint, WordId>(data.Words.Count);
                foreach(DumpedWord dw in data.Words) {
                    wordIds.Add(dw.ID, new WordId(dw.Text, firstFreeWordId));
                    dw.ID = firstFreeWordId;
                    WriteDumpedWord(writer, dw);
                    firstFreeWordId++;
                }
                result.WordIDs = new List<WordId>(wordIds.Values);
            }

            // 3. Save Mappings
            using(FileStream fs = new FileStream(mappingsFile, FileMode.Open, FileAccess.ReadWrite, FileShare.None)) {
                int count = ReadCount(fs);
                // Update count and append mappings
                BinaryWriter writer = new BinaryWriter(fs, Encoding.UTF8);
                fs.Seek(-4, SeekOrigin.Current);
                writer.Write(count + data.Mappings.Count);
                fs.Seek(0, SeekOrigin.End);
                foreach(DumpedWordMapping map in data.Mappings) {
                    // Words are autonumbered from uint.MaxValue downwards by IndexBase so that
                    // IndexStorer can identify the DumpedWordMappings easily and
                    // fix the IDs with the ones actually stored
                    WordId newMappingWordId;
                    if(wordIds != null && wordIds.TryGetValue(map.WordID, out newMappingWordId)) {
                        map.WordID = newMappingWordId.ID;
                    }
                    WriteDumpedWordMapping(writer,
                        new DumpedWordMapping(map.WordID, result.DocumentID.Value,
                            map.FirstCharIndex, map.WordIndex, map.Location));
                }
            }

            return result;
        }
Beispiel #10
0
 /// <summary>
 /// Takes care of firing the <see cref="IndexChanged" /> event.
 /// </summary>
 /// <param name="document">The affected document.</param>
 /// <param name="change">The change performed.</param>
 /// <param name="changeData">The dumped change data.</param>
 /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
 /// <returns>The storage result or <c>null</c>.</returns>
 protected IndexStorerResult OnIndexChange(IDocument document, IndexChangeType change, DumpedChange changeData, object state)
 {
     if(IndexChanged != null) {
         IndexChangedEventArgs args = new IndexChangedEventArgs(document, change, changeData, state);
         IndexChanged(this, args);
         return args.Result;
     }
     else return null;
 }
Beispiel #11
0
 /// <summary>
 /// Stores new data into the data storage.
 /// </summary>
 /// <param name="data">The data to store.</param>
 /// <param name="state">A state object passed by the index.</param>
 /// <returns>The storer result, if any.</returns>
 /// <remarks>When saving a new document, the document ID in data.Mappings must be
 /// replaced with the currect document ID, generated by the concrete implementation of
 /// this method. data.Words should have IDs numbered from uint.MaxValue downwards. 
 /// The method re-numbers the words appropriately.</remarks>
 protected abstract IndexStorerResult SaveData(DumpedChange data, object state);
Beispiel #12
0
 /// <summary>
 /// Deletes data from the data storage.
 /// </summary>
 /// <param name="data">The data to delete.</param>
 /// <param name="state">A state object passed from the index.</param>
 protected abstract void DeleteData(DumpedChange data, object state);
 /// <summary>
 /// Takes care of firing the <see cref="IndexChanged" /> event.
 /// </summary>
 /// <param name="document">The affected document.</param>
 /// <param name="change">The change performed.</param>
 /// <param name="changeData">The dumped change data.</param>
 /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
 /// <returns>The storage result or <c>null</c>.</returns>
 protected IndexStorerResult OnIndexChange(IDocument document, IndexChangeType change, DumpedChange changeData, object state)
 {
     if (IndexChanged != null)
     {
         IndexChangedEventArgs args = new IndexChangedEventArgs(document, change, changeData, state);
         IndexChanged(this, args);
         return(args.Result);
     }
     else
     {
         return(null);
     }
 }
        /// <summary>
        /// Stores a document in the index.
        /// </summary>
        /// <param name="document">The document.</param>
        /// <param name="keywords">The document keywords, if any, an empty array or <c>null</c> otherwise.</param>
        /// <param name="content">The content of the document.</param>
        /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
        /// <returns>The number of indexed words (including duplicates) in the document title and content.</returns>
        /// <remarks>Indexing the content of the document is <b>O(n)</b>,
        /// where <b>n</b> is the total number of words in the document.
        /// If the specified document was already in the index, all the old occurrences
        /// are deleted from the index.</remarks>
        /// <exception cref="ArgumentNullException">If <paramref name="document"/> or <paramref name="content"/> are <c>null</c>.</exception>
        public int StoreDocument(IDocument document, string[] keywords, string content, object state)
        {
            if (document == null)
            {
                throw new ArgumentNullException("document");
            }
            if (keywords == null)
            {
                keywords = new string[0];
            }
            if (content == null)
            {
                throw new ArgumentNullException("content");
            }

            lock (this) {
                DumpedChange removeChange = RemoveDocumentInternal(document);

                if (removeChange != null)
                {
                    OnIndexChange(document, IndexChangeType.DocumentRemoved, removeChange, state);
                }
            }

            keywords = Tools.CleanupKeywords(keywords);

            // When the IndexStorer handles the IndexChanged event and a document is added, the storer generates a new ID and returns it
            // via the event handler, then the in-memory index is updated (the document instance is shared across all words) - the final ID
            // is generated by the actual IndexStorer implementation (SaveData properly populates the Result field in the args)

            List <DumpedWord>        dw = new List <DumpedWord>(content.Length / 5);
            List <DumpedWordMapping> dm = new List <DumpedWordMapping>(content.Length / 5);
            Word        tempWord        = null;
            List <Word> newWords        = new List <Word>(50);
            DumpedWord  tempDumpedWord  = null;

            int  count            = 0;
            uint sequentialWordId = uint.MaxValue;

            // Store content words
            WordInfo[] words = document.Tokenize(content);
            words = Tools.RemoveStopWords(words, stopWords);

            foreach (WordInfo info in words)
            {
                dm.Add(StoreWord(info.Text, document, info.FirstCharIndex, info.WordIndex, WordLocation.Content, out tempWord, out tempDumpedWord));
                if (tempDumpedWord != null && tempWord != null)
                {
                    dm[dm.Count - 1].WordID = sequentialWordId;
                    tempDumpedWord.ID       = sequentialWordId;
                    dw.Add(tempDumpedWord);
                    tempWord.ID = sequentialWordId;
                    newWords.Add(tempWord);
                    sequentialWordId--;
                }
            }
            count += words.Length;

            // Store title words
            words = document.Tokenize(document.Title);
            words = Tools.RemoveStopWords(words, stopWords);

            foreach (WordInfo info in words)
            {
                dm.Add(StoreWord(info.Text, document, info.FirstCharIndex, info.WordIndex, WordLocation.Title, out tempWord, out tempDumpedWord));
                if (tempDumpedWord != null && tempWord != null)
                {
                    dm[dm.Count - 1].WordID = sequentialWordId;
                    tempDumpedWord.ID       = sequentialWordId;
                    dw.Add(tempDumpedWord);
                    tempWord.ID = sequentialWordId;
                    newWords.Add(tempWord);
                    sequentialWordId--;
                }
            }
            count += words.Length;

            ushort tempCount = 0;

            // Store keywords
            for (ushort i = 0; i < (ushort)keywords.Length; i++)
            {
                dm.Add(StoreWord(keywords[i], document, tempCount, i, WordLocation.Keywords, out tempWord, out tempDumpedWord));
                if (tempDumpedWord != null && tempWord != null)
                {
                    dm[dm.Count - 1].WordID = sequentialWordId;
                    tempDumpedWord.ID       = sequentialWordId;
                    dw.Add(tempDumpedWord);
                    tempWord.ID = sequentialWordId;
                    newWords.Add(tempWord);
                    sequentialWordId--;
                }
                tempCount += (ushort)(1 + keywords[i].Length);
            }
            count += keywords.Length;

            IndexStorerResult result = OnIndexChange(document, IndexChangeType.DocumentAdded,
                                                     new DumpedChange(new DumpedDocument(document), dw, dm), state);

            // Update document ID
            if (result != null && result.DocumentID.HasValue)
            {
                document.ID = result.DocumentID.Value;
            }
            else
            {
                // HACK: result is null -> index is corrupted, silently return
                return(0);
            }

            // Update word IDs in newWords
            bool wordIdUpdated = false;

            foreach (Word word in newWords)
            {
                wordIdUpdated = false;
                foreach (WordId id in result.WordIDs)
                {
                    if (id.Text == word.Text)
                    {
                        word.ID       = id.ID;
                        wordIdUpdated = true;
                        break;
                    }
                }
                if (!wordIdUpdated)
                {
                    throw new InvalidOperationException("No ID for new word");
                }
            }

            return(count);
        }