/// Helper function for CreateIndex /// This function implements threading to improve indexing speed. /// For each filename the method: /// 1. Read the file and store in a string. /// 2. Turn the text into IRDocument object and add to collection /// 3. Add the IRDocument to the index private IRCollection ReadAndProcessFiles(List <string> fileNames) { IRCollection collection = new IRCollection(); // Lists are not thread safe so... // 1. need to create a ConcurrentBag<IRDocument> // 2. add docs to this collection // 3. then after all docs are added, convert the array to a list int numDocs = fileNames.Count; IRDocument[] docArray = new IRDocument[numDocs]; var conDocs = new ConcurrentBag <IRDocument>(); Parallel.ForEach(fileNames, fn => { string docText = FileHandling.ReadTextFile(fn); IRDocument doc = GetNewDoc(docText); if (doc != null) { conDocs.Add(doc); doc.AddToIndex(writer); } else { Console.WriteLine("Error with file: " + fn); } }); // add documents to collection object and set maxResults collection.AddDocs(conDocs.ToList()); maxResults = conDocs.Count; return(collection); }
// Parses a standard information needs file public static Dictionary <string, string> GetInfoNeeds(string fileName) { Dictionary <string, string> iNeeds = new Dictionary <string, string>(); // open file and dump into a string string document = FileHandling.ReadTextFile(fileName); // split string based on ".I" and ".D" delimiters string[] delims = { ".I", ".D" }; string[] docParts = document.Split(delims, StringSplitOptions.RemoveEmptyEntries); if (docParts.Length > 2) { // build dicationary from string array for (int i = 0; i < docParts.Length; i++) { iNeeds.Add(docParts[i].Trim(), docParts[i + 1].Trim()); // inc i so that it goes up 2 each iteration i++; } return(iNeeds); } else { return(null); } }