public void TestIndexDatabaseSerialization()
        {
            IndexRecord indexRecord1 = new IndexRecord(@"C:\DummyFile.txt",new List<int>() { 1,2,3,4,5 });
            IndexRecord indexRecord2 = new IndexRecord(@"C:\DummyFile1.txt", new List<int>() { 1, 2, 3 });

            List<IndexRecord> indexRecords = new List<IndexRecord>() { indexRecord1, indexRecord2 };

            Dictionary<string,List<IndexRecord>> searchPatternToIndexRecords = new Dictionary<string,List<IndexRecord>>();

            searchPatternToIndexRecords.Add("Test",indexRecords);

            IndexDatabase indexDatabase = new IndexDatabase(searchPatternToIndexRecords,DateTime.Now);

            //serialize the index database
            IndexDatabaseWriter writer = new IndexDatabaseWriter();
            writer.Persist(indexDatabase);
        }
Пример #2
0
        private void IndexInternal(List<string> files)
        {
            files.Sort(StringComparer.OrdinalIgnoreCase);

            try
            {
                string fileText = string.Empty;

                for (int i = 0; i < files.Count; i++)
                {

                    //skip the file used for logging
                    if (0 == string.Compare(files[i], LogWriter.LogFileName, StringComparison.OrdinalIgnoreCase))
                        continue;

                    if (!Utilities.IsTextFile(files[i]))
                        continue;

                    //read the text in the text file
                    fileText = Utilities.ReadTextFromFiles(files[i]);

                    //we are not going to filter out any words as of the current release
                    //this may result in large search times, but we are OK with it for now.

                    List<string> wordsInFile = Utilities.ExtractWordsInText(fileText, null);

                    //send the words in the file to the file search engine as a pattern.
                    //since we need to identify all the possible positions of the current word
                    //we directly invoke the KMPEngine functionality on the current file.
                    foreach (string word in wordsInFile)
                    {

                        if (0 == string.Compare(" ", word) || 0 == string.Compare("\r", word)
                            || 0 == string.Compare("\n", word) || 0 == string.Compare("\t", word)
                            || 0 == string.Compare(Environment.NewLine, word))
                            continue;
                        KMPEngine kmpEngine = new KMPEngine(fileText, word);

                        List<int> occurences = kmpEngine.Find(true, true);

                        //since the word is already present in the file, we make an entry
                        //we are sure that there is ATLEAST ONE occurence.We go ahead and create
                        //index record. we add the record only if the number of occurences is
                        //greater than zero
                        if (occurences.Count == 0)
                            continue;

                        IndexRecord indexRecord = new IndexRecord(files[i], occurences);

                        if (!_indexDatabaseEntries.ContainsKey(word))
                        {
                            List<IndexRecord> indexRecords = new List<IndexRecord>();

                            indexRecords.Add(indexRecord);
                            _indexDatabaseEntries.Add(word, indexRecords);
                            LogWriter.WriteToLogFile(string.Format("Created  first Index record for file {0} containing word {1}", files[i], word));

                        }
                        else
                        {
                            _indexDatabaseEntries[word].Add(indexRecord);
                            LogWriter.WriteToLogFile(string.Format("Appended Index record for file {0} containing word {1}", files[i], word));
                        }

                    }

                }

               LogWriter.WriteToLogFile(string.Format("Created index database with {0} index entries", _indexDatabaseEntries.Count));

            }
            catch (Exception ex)
            {
                LogWriter.WriteToLogFile(string.Format("Exception ex:{0}", ex.ToString()));

            }
            finally
            {
                LogWriter.Dispose();
            }
        }