/// <summary> /// Handles the <see cref="IInMemoryIndex.IndexChanged" /> events. /// </summary> /// <param name="sender">The sender.</param> /// <param name="e">The event arguments.</param> protected void IndexChangedHandler(object sender, IndexChangedEventArgs e) { lock (this) { if (disposed) { return; } switch (e.Change) { case IndexChangeType.IndexCleared: InitDataStore(e.State); break; case IndexChangeType.DocumentAdded: if (!dataCorrupted) { IndexStorerResult result = SaveData(e.ChangeData, e.State); e.Result = result; } break; case IndexChangeType.DocumentRemoved: if (!dataCorrupted) { DeleteData(e.ChangeData, e.State); } break; default: throw new NotSupportedException("Invalid Change Type"); } } }
/// <summary> /// Initializes a new instance of the <see cref="IndexChangedEventArgs" /> class. /// </summary> /// <param name="document">The affected document.</param> /// <param name="change">The change performed.</param> /// <param name="changeData">The dumped change data.</param> /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param> /// <param name="result">The storer result, if any.</param> /// <exception cref="ArgumentNullException"> /// If <paramref name="change" /> is not /// <see cref="IndexChangeType.IndexCleared" /> and <paramref name="document" /> or <paramref name="changeData" /> are /// <c>null</c>. /// </exception> public IndexChangedEventArgs(IDocument document, IndexChangeType change, DumpedChange changeData, object state, IndexStorerResult result) : this(document, change, changeData, state) { Result = result; }
/// <summary> /// Initializes a new instance of the <see cref="IndexChangedEventArgs" /> class. /// </summary> /// <param name="document">The affected document.</param> /// <param name="change">The change performed.</param> /// <param name="changeData">The dumped change data.</param> /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param> /// <param name="result">The storer result, if any.</param> /// <exception cref="ArgumentNullException">If <paramref name="change"/> is not <see cref="IndexChangeType.IndexCleared"/> and <paramref name="document"/> or <paramref name="changeData"/> are <c>null</c>.</exception> public IndexChangedEventArgs(IDocument document, IndexChangeType change, DumpedChange changeData, object state, IndexStorerResult result) : this(document, change, changeData, state) { this.result = result; }
/// <summary> /// Stores new data into the data storage. /// </summary> /// <param name="data">The data to store.</param> /// <param name="state">A state object passed from the index.</param> /// <returns>The storer result, if any.</returns> /// <remarks>When saving a new document, the document ID in data.Mappings must be /// replaced with the currect document ID, generated by the concrete implementation of /// this method. data.Words should have IDs numbered from uint.MaxValue downwards. /// The method re-numbers the words appropriately.</remarks> protected override IndexStorerResult SaveData(DumpedChange data, object state) { IndexStorerResult result = new IndexStorerResult(null, null); // 1. Save Document using(FileStream fs = new FileStream(documentsFile, FileMode.Open, FileAccess.ReadWrite, FileShare.None)) { int count = ReadCount(fs); // Update count and append document BinaryWriter writer = new BinaryWriter(fs, Encoding.UTF8); fs.Seek(-4, SeekOrigin.Current); writer.Write(count + 1); writer.Seek(0, SeekOrigin.End); data.Document.ID = firstFreeDocumentId; WriteDumpedDocument(writer, data.Document); result.DocumentID = firstFreeDocumentId; firstFreeDocumentId++; } // 2. Save Words Dictionary<uint, WordId> wordIds = null; using(FileStream fs = new FileStream(wordsFile, FileMode.Open, FileAccess.ReadWrite, FileShare.None)) { int count = ReadCount(fs); // Update count and append words BinaryWriter writer = new BinaryWriter(fs, Encoding.UTF8); fs.Seek(-4, SeekOrigin.Current); writer.Write(count + data.Words.Count); fs.Seek(0, SeekOrigin.End); wordIds = new Dictionary<uint, WordId>(data.Words.Count); foreach(DumpedWord dw in data.Words) { wordIds.Add(dw.ID, new WordId(dw.Text, firstFreeWordId)); dw.ID = firstFreeWordId; WriteDumpedWord(writer, dw); firstFreeWordId++; } result.WordIDs = new List<WordId>(wordIds.Values); } // 3. Save Mappings using(FileStream fs = new FileStream(mappingsFile, FileMode.Open, FileAccess.ReadWrite, FileShare.None)) { int count = ReadCount(fs); // Update count and append mappings BinaryWriter writer = new BinaryWriter(fs, Encoding.UTF8); fs.Seek(-4, SeekOrigin.Current); writer.Write(count + data.Mappings.Count); fs.Seek(0, SeekOrigin.End); foreach(DumpedWordMapping map in data.Mappings) { // Words are autonumbered from uint.MaxValue downwards by IndexBase so that // IndexStorer can identify the DumpedWordMappings easily and // fix the IDs with the ones actually stored WordId newMappingWordId; if(wordIds != null && wordIds.TryGetValue(map.WordID, out newMappingWordId)) { map.WordID = newMappingWordId.ID; } WriteDumpedWordMapping(writer, new DumpedWordMapping(map.WordID, result.DocumentID.Value, map.FirstCharIndex, map.WordIndex, map.Location)); } } return result; }
/// <summary> /// Stores a document in the index. /// </summary> /// <param name="document">The document.</param> /// <param name="keywords">The document keywords, if any, an empty array or <c>null</c> otherwise.</param> /// <param name="content">The content of the document.</param> /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param> /// <returns>The number of indexed words (including duplicates) in the document title and content.</returns> /// <remarks>Indexing the content of the document is <b>O(n)</b>, /// where <b>n</b> is the total number of words in the document. /// If the specified document was already in the index, all the old occurrences /// are deleted from the index.</remarks> /// <exception cref="ArgumentNullException">If <paramref name="document"/> or <paramref name="content"/> are <c>null</c>.</exception> public int StoreDocument(IDocument document, string[] keywords, string content, object state) { if (document == null) { throw new ArgumentNullException("document"); } if (keywords == null) { keywords = new string[0]; } if (content == null) { throw new ArgumentNullException("content"); } lock (this) { DumpedChange removeChange = RemoveDocumentInternal(document); if (removeChange != null) { OnIndexChange(document, IndexChangeType.DocumentRemoved, removeChange, state); } } keywords = Tools.CleanupKeywords(keywords); // When the IndexStorer handles the IndexChanged event and a document is added, the storer generates a new ID and returns it // via the event handler, then the in-memory index is updated (the document instance is shared across all words) - the final ID // is generated by the actual IndexStorer implementation (SaveData properly populates the Result field in the args) List <DumpedWord> dw = new List <DumpedWord>(content.Length / 5); List <DumpedWordMapping> dm = new List <DumpedWordMapping>(content.Length / 5); Word tempWord = null; List <Word> newWords = new List <Word>(50); DumpedWord tempDumpedWord = null; int count = 0; uint sequentialWordId = uint.MaxValue; // Store content words WordInfo[] words = document.Tokenize(content); words = Tools.RemoveStopWords(words, stopWords); foreach (WordInfo info in words) { dm.Add(StoreWord(info.Text, document, info.FirstCharIndex, info.WordIndex, WordLocation.Content, out tempWord, out tempDumpedWord)); if (tempDumpedWord != null && tempWord != null) { dm[dm.Count - 1].WordID = sequentialWordId; tempDumpedWord.ID = sequentialWordId; dw.Add(tempDumpedWord); tempWord.ID = sequentialWordId; newWords.Add(tempWord); sequentialWordId--; } } count += words.Length; // Store title words words = document.Tokenize(document.Title); words = Tools.RemoveStopWords(words, stopWords); foreach (WordInfo info in words) { dm.Add(StoreWord(info.Text, document, info.FirstCharIndex, info.WordIndex, WordLocation.Title, out tempWord, out tempDumpedWord)); if (tempDumpedWord != null && tempWord != null) { dm[dm.Count - 1].WordID = sequentialWordId; tempDumpedWord.ID = sequentialWordId; dw.Add(tempDumpedWord); tempWord.ID = sequentialWordId; newWords.Add(tempWord); sequentialWordId--; } } count += words.Length; ushort tempCount = 0; // Store keywords for (ushort i = 0; i < (ushort)keywords.Length; i++) { dm.Add(StoreWord(keywords[i], document, tempCount, i, WordLocation.Keywords, out tempWord, out tempDumpedWord)); if (tempDumpedWord != null && tempWord != null) { dm[dm.Count - 1].WordID = sequentialWordId; tempDumpedWord.ID = sequentialWordId; dw.Add(tempDumpedWord); tempWord.ID = sequentialWordId; newWords.Add(tempWord); sequentialWordId--; } tempCount += (ushort)(1 + keywords[i].Length); } count += keywords.Length; IndexStorerResult result = OnIndexChange(document, IndexChangeType.DocumentAdded, new DumpedChange(new DumpedDocument(document), dw, dm), state); // Update document ID if (result != null && result.DocumentID.HasValue) { document.ID = result.DocumentID.Value; } else { // HACK: result is null -> index is corrupted, silently return return(0); } // Update word IDs in newWords bool wordIdUpdated = false; foreach (Word word in newWords) { wordIdUpdated = false; foreach (WordId id in result.WordIDs) { if (id.Text == word.Text) { word.ID = id.ID; wordIdUpdated = true; break; } } if (!wordIdUpdated) { throw new InvalidOperationException("No ID for new word"); } } return(count); }