public void TestIndexDatabaseSerialization() { IndexRecord indexRecord1 = new IndexRecord(@"C:\DummyFile.txt",new List<int>() { 1,2,3,4,5 }); IndexRecord indexRecord2 = new IndexRecord(@"C:\DummyFile1.txt", new List<int>() { 1, 2, 3 }); List<IndexRecord> indexRecords = new List<IndexRecord>() { indexRecord1, indexRecord2 }; Dictionary<string,List<IndexRecord>> searchPatternToIndexRecords = new Dictionary<string,List<IndexRecord>>(); searchPatternToIndexRecords.Add("Test",indexRecords); IndexDatabase indexDatabase = new IndexDatabase(searchPatternToIndexRecords,DateTime.Now); //serialize the index database IndexDatabaseWriter writer = new IndexDatabaseWriter(); writer.Persist(indexDatabase); }
private void IndexInternal(List<string> files) { files.Sort(StringComparer.OrdinalIgnoreCase); try { string fileText = string.Empty; for (int i = 0; i < files.Count; i++) { //skip the file used for logging if (0 == string.Compare(files[i], LogWriter.LogFileName, StringComparison.OrdinalIgnoreCase)) continue; if (!Utilities.IsTextFile(files[i])) continue; //read the text in the text file fileText = Utilities.ReadTextFromFiles(files[i]); //we are not going to filter out any words as of the current release //this may result in large search times, but we are OK with it for now. List<string> wordsInFile = Utilities.ExtractWordsInText(fileText, null); //send the words in the file to the file search engine as a pattern. //since we need to identify all the possible positions of the current word //we directly invoke the KMPEngine functionality on the current file. foreach (string word in wordsInFile) { if (0 == string.Compare(" ", word) || 0 == string.Compare("\r", word) || 0 == string.Compare("\n", word) || 0 == string.Compare("\t", word) || 0 == string.Compare(Environment.NewLine, word)) continue; KMPEngine kmpEngine = new KMPEngine(fileText, word); List<int> occurences = kmpEngine.Find(true, true); //since the word is already present in the file, we make an entry //we are sure that there is ATLEAST ONE occurence.We go ahead and create //index record. we add the record only if the number of occurences is //greater than zero if (occurences.Count == 0) continue; IndexRecord indexRecord = new IndexRecord(files[i], occurences); if (!_indexDatabaseEntries.ContainsKey(word)) { List<IndexRecord> indexRecords = new List<IndexRecord>(); indexRecords.Add(indexRecord); _indexDatabaseEntries.Add(word, indexRecords); LogWriter.WriteToLogFile(string.Format("Created first Index record for file {0} containing word {1}", files[i], word)); } else { _indexDatabaseEntries[word].Add(indexRecord); LogWriter.WriteToLogFile(string.Format("Appended Index record for file {0} containing word {1}", files[i], word)); } } } LogWriter.WriteToLogFile(string.Format("Created index database with {0} index entries", _indexDatabaseEntries.Count)); } catch (Exception ex) { LogWriter.WriteToLogFile(string.Format("Exception ex:{0}", ex.ToString())); } finally { LogWriter.Dispose(); } }