Exemplo n.º 1
0
        /// Helper function for CreateIndex
        /// This function implements threading to improve indexing speed.
        /// For each filename the method:
        /// 1. Read the file and store in a string.
        /// 2. Turn the text into IRDocument object and add to collection
        /// 3. Add the IRDocument to the index
        private IRCollection ReadAndProcessFiles(List <string> fileNames)
        {
            IRCollection collection = new IRCollection();

            // Lists are not thread safe so...
            // 1. need to create a ConcurrentBag<IRDocument>
            // 2. add docs to this collection
            // 3. then after all docs are added, convert the array to a list
            int numDocs = fileNames.Count;

            IRDocument[] docArray = new IRDocument[numDocs];

            var conDocs = new ConcurrentBag <IRDocument>();

            Parallel.ForEach(fileNames, fn =>
            {
                string docText = FileHandling.ReadTextFile(fn);
                IRDocument doc = GetNewDoc(docText);
                if (doc != null)
                {
                    conDocs.Add(doc);
                    doc.AddToIndex(writer);
                }
                else
                {
                    Console.WriteLine("Error with file: " + fn);
                }
            });

            // add documents to collection object and set maxResults
            collection.AddDocs(conDocs.ToList());
            maxResults = conDocs.Count;

            return(collection);
        }
Exemplo n.º 2
0
        // Parses a standard information needs file
        public static Dictionary <string, string> GetInfoNeeds(string fileName)
        {
            Dictionary <string, string> iNeeds = new Dictionary <string, string>();

            // open file and dump into a string
            string document = FileHandling.ReadTextFile(fileName);

            // split string based on ".I" and ".D" delimiters
            string[] delims   = { ".I", ".D" };
            string[] docParts = document.Split(delims, StringSplitOptions.RemoveEmptyEntries);

            if (docParts.Length > 2)
            {
                // build dicationary from string array
                for (int i = 0; i < docParts.Length; i++)
                {
                    iNeeds.Add(docParts[i].Trim(), docParts[i + 1].Trim());

                    // inc i so that it goes up 2 each iteration
                    i++;
                }

                return(iNeeds);
            }
            else
            {
                return(null);
            }
        }