public void WriteMasterList() { Logging.Info("+WriteMasterList"); string filename_temp = Path.GetTempFileName(); Utilities.LockPerfTimer l1_clk = Utilities.LockPerfChecker.Start(); lock (locker) { l1_clk.LockPerfTimerStop(); FlushAllWords_LOCK(); PurgeAllWords_LOCK(); using (FileStream fs = File.Open(filename_temp, FileMode.Create)) { Headers headers = new Headers(); // First the documents { headers.documents = new List <DocumentMapHeader>(); foreach (var pair in fingerprint_to_document_ids) { DocumentMapHeader header = new DocumentMapHeader { Fingerprint = pair.Key, DocumentId = pair.Value }; headers.documents.Add(header); } } // Then the words { headers.words = new List <WordMapHeader>(); foreach (WordInWordIndex word_in_word_index in word_in_word_indexes) { WordMapHeader header = new WordMapHeader { Word = word_in_word_index.Word, WordId = word_in_word_index.WordId, DocCount = word_in_word_index.DocumentCount }; headers.words.Add(header); } } Serializer.Serialize <Headers>(fs, headers); } } Logging.Info("-WriteMasterList"); // Move the temp file over the library filename Directory.CreateDirectory(Path.GetDirectoryName(GetFilename_MasterList())); FileTools.MoveSafelyWithOverwriting(filename_temp, GetFilename_MasterList()); // Write the version of the index File.WriteAllText(VersionFilename, INDEX_VERSION); }
/// <summary> /// Purges words from memory if there are too many of them for if they have been untouched/idle for too long. /// </summary> private void PurgeAllWords_LOCK() { Logging.Info("+PurgeAllWords_LOCK"); var ordered_results = from wiwi in word_in_word_indexes where wiwi.IsLoaded group wiwi by GangStart(wiwi.WordId) into g let last_accessed = g.Max(wiwi => wiwi.last_accessed) orderby last_accessed ascending select new { Gang = g.Min(wiwi => wiwi.WordId), LastAccessed = last_accessed }; int total_gangs = ordered_results.Count(); Logging.Info("We have {0} loaded gangs", total_gangs); int total_purged = 0; foreach (var result in ordered_results) { bool bad_gang = false; int MAXIMUM_GANGS_IN_MEMORY = 10; int MAXIMUM_GANGS_IDLE_TIME_IN_MEMORY_IN_SECONDS = 30; // Is it a bad gang? if (total_gangs - total_purged > MAXIMUM_GANGS_IN_MEMORY) { bad_gang = true; } if (DateTime.Now.Subtract(result.LastAccessed).TotalSeconds > MAXIMUM_GANGS_IDLE_TIME_IN_MEMORY_IN_SECONDS) { bad_gang = true; } if (bad_gang) { for (int i = 0; i < GANG_SIZE; ++i) { if (word_in_word_indexes.Count <= result.Gang + i) { break; } word_in_word_indexes[result.Gang + i].Purge(); } ++total_purged; } } Logging.Info("Purged {0} out of {1} gangs", total_purged, total_gangs); Logging.Info("-PurgeAllWords_LOCK"); }
void LoadWord_LOCK(WordInWordIndex wiwi) { // If the word is already loaded, nothing to do... if (wiwi.IsLoaded) { return; } try { string filename = Filename_GangList(wiwi.WordId); using (FileStream fs = File.OpenRead(filename)) { { bool gang_has_corrupted_word_counts = false; List <WordEntry> word_entrys = Serializer.Deserialize <List <WordEntry> >(fs); int gang_start = GangStart(wiwi.WordId); for (int i = 0; i < word_entrys.Count; ++i) { if (0 != String.Compare(word_in_word_indexes[gang_start + i].Word, word_entrys[i].Word)) { throw new Exception("The ordering of the word index is corrupt: words don't match"); } if (null != word_in_word_indexes[gang_start + i].DocumentIds) { Logging.Warn("The ordering of the word index is corrupt: document_ids should be null"); } WordInWordIndex wiwi_just_loaded = word_in_word_indexes[gang_start + i]; bool corruption_detected = wiwi_just_loaded.SetDocumentIds(word_entrys[i].DocumentIds, word_entrys[i].DocumentIdsCount); if (corruption_detected) { gang_has_corrupted_word_counts = true; } } if (gang_has_corrupted_word_counts) { Logging.Warn("The ordering of a word index in the gang is corrupt: doc counts don't match (the user probably exited before the gang was saved...)"); } } } } catch (Exception ex) { Logging.Error(ex, "There was a problem loading the word document list for word {0}:{1}. Assuming it was empty.", wiwi.WordId, wiwi.Word); bool set_result = wiwi.SetDocumentIds(new List <int>(), new List <int>()); } }
private void ReadMasterList() { Logging.Info("+ReadMasterList"); Utilities.LockPerfTimer l1_clk = Utilities.LockPerfChecker.Start(); lock (locker) { l1_clk.LockPerfTimerStop(); try { using (FileStream fs = File.OpenRead(GetFilename_MasterList())) { Headers headers = Serializer.Deserialize <Headers>(fs); // First the documents { foreach (var header in headers.documents) { fingerprint_to_document_ids[header.Fingerprint] = header.DocumentId; document_id_to_fingerprints[header.DocumentId] = header.Fingerprint; } } // Then the words { foreach (var header in headers.words) { WordInWordIndex wiwi = new WordInWordIndex(header.Word, header.WordId, header.DocCount); // Sanity check that they are in the right order if (wiwi.WordId != word_in_word_indexes.Count) { throw new Exception("The ordering of the word index is corrupt"); } // Add to our maps word_in_word_indexes.Add(wiwi); word_in_word_index_lookups[wiwi.Word] = wiwi; } } } } catch (Exception ex) { Logging.Warn(ex, "Unable to load index master list, so starting from scratch"); } } Logging.Info("-ReadMasterList"); }
private void FlushAllWords_LOCK() { Logging.Info("Starting to flush out word document indices"); int total_flushed = 0; foreach (WordInWordIndex wiwi in word_in_word_index_lookups.Values) { if (wiwi.needs_flushing) { FlushKeyword_LOCK(wiwi); ++total_flushed; } } Logging.Info("Flushed out {0} word document indices", total_flushed); }
public static void Test() { WordIndex wim = new WordIndex(@"C:\Temp\QiqqaTestIndex\", true); while (true) { int WORDS_PER_ITERATION = 1000; Logging.Info("+Indexing {0} words", WORDS_PER_ITERATION); for (int i = 0; i < WORDS_PER_ITERATION; ++i) { string document = "DOC" + RandomAugmented.Instance.NextIntExclusive(1000); string word = "WORD" + RandomAugmented.Instance.NextIntExclusive(200 * 1000); wim.AddDocumentWord(document, word); } Logging.Info("-Indexing {0} words", WORDS_PER_ITERATION); wim.WriteMasterList(); } }
private void CheckIndexVersion() { string version = null; try { if (File.Exists(VersionFilename)) { string[] index_version_lines = File.ReadAllLines(VersionFilename); version = index_version_lines[0]; } } catch (Exception ex) { Logging.Error(ex, "There was a problem while trying to check the index version"); } if (0 != String.Compare(version, INDEX_VERSION)) { Logging.Warn("This index is out of date (it's version is {0}), so deleting the index.", version); DeleteIndex(); } }
private void DeleteIndex() { Logging.Info("Deleting the index at path '{0}'", LIBRARY_INDEX_BASE_PATH); DirectoryTools.DeleteDirectory(LIBRARY_INDEX_BASE_PATH, true); }