/// Helper function for CreateIndex /// This function implements threading to improve indexing speed. /// For each filename the method: /// 1. Read the file and store in a string. /// 2. Turn the text into IRDocument object and add to collection /// 3. Add the IRDocument to the index private IRCollection ReadAndProcessFiles(List <string> fileNames) { IRCollection collection = new IRCollection(); // Lists are not thread safe so... // 1. need to create a ConcurrentBag<IRDocument> // 2. add docs to this collection // 3. then after all docs are added, convert the array to a list int numDocs = fileNames.Count; IRDocument[] docArray = new IRDocument[numDocs]; var conDocs = new ConcurrentBag <IRDocument>(); Parallel.ForEach(fileNames, fn => { string docText = FileHandling.ReadTextFile(fn); IRDocument doc = GetNewDoc(docText); if (doc != null) { conDocs.Add(doc); doc.AddToIndex(writer); } else { Console.WriteLine("Error with file: " + fn); } }); // add documents to collection object and set maxResults collection.AddDocs(conDocs.ToList()); maxResults = conDocs.Count; return(collection); }
/// Builds the index... public int CreateIndex(string collectionPath, string indexPath) { // start timer... DateTime start = DateTime.Now; // get all of the files names in the collection path List <string> filenames = FileHandling.GetFileNames(collectionPath, false); // initialise the index InitIndex(indexPath); // build the index // this method call does lots of things in parallel myCollection = ReadAndProcessFiles(filenames); // close the index CleanUpIndex(); // end timer and calculate total time DateTime end = DateTime.Now; TimeSpan duration = end - start; indexTime = duration.Seconds + (float)duration.Milliseconds / 1000; return(myCollection.Length()); }
/// Builds an IRCollection from the search results. // This is used to display the search results. // returns the number of results public int BuildResults() { CreateSearcher(); IRCollection resultDocs = new IRCollection(myCollection, searcher, searchResults); CleanUpSearcher(); resultsCollection = resultDocs; return(resultDocs.Length()); }
// this is used to build an IRCollection from an original IRCollection // and a set of results public IRCollection(IRCollection origCollection, IndexSearcher searcher, TopDocs results) { List <IRDocument> resultCollection = new List <IRDocument>(); int rank; float score; for (int i = 0; i < results.TotalHits; i++) { rank = i + 1; score = results.ScoreDocs[i].Score; Document doc = searcher.Doc(results.ScoreDocs[i].Doc); string docID = doc.Get("docID"); IRDocument newDoc = origCollection.collectionDocs.Find(x => x.GetDocID() == docID); newDoc.Rank = rank; newDoc.Score = score; resultCollection.Add(newDoc); collectionDocs = resultCollection; } }