示例#1
0
        /// Helper function for CreateIndex
        /// This function implements threading to improve indexing speed.
        /// For each filename the method:
        /// 1. Read the file and store in a string.
        /// 2. Turn the text into IRDocument object and add to collection
        /// 3. Add the IRDocument to the index
        private IRCollection ReadAndProcessFiles(List <string> fileNames)
        {
            IRCollection collection = new IRCollection();

            // Lists are not thread safe so...
            // 1. need to create a ConcurrentBag<IRDocument>
            // 2. add docs to this collection
            // 3. then after all docs are added, convert the array to a list
            int numDocs = fileNames.Count;

            IRDocument[] docArray = new IRDocument[numDocs];

            var conDocs = new ConcurrentBag <IRDocument>();

            Parallel.ForEach(fileNames, fn =>
            {
                string docText = FileHandling.ReadTextFile(fn);
                IRDocument doc = GetNewDoc(docText);
                if (doc != null)
                {
                    conDocs.Add(doc);
                    doc.AddToIndex(writer);
                }
                else
                {
                    Console.WriteLine("Error with file: " + fn);
                }
            });

            // add documents to collection object and set maxResults
            collection.AddDocs(conDocs.ToList());
            maxResults = conDocs.Count;

            return(collection);
        }
示例#2
0
        /// Builds the index...
        public int CreateIndex(string collectionPath, string indexPath)
        {
            // start timer...
            DateTime start = DateTime.Now;

            // get all of the files names in the collection path
            List <string> filenames = FileHandling.GetFileNames(collectionPath, false);

            // initialise the index
            InitIndex(indexPath);

            // build the index
            // this method call does lots of things in parallel
            myCollection = ReadAndProcessFiles(filenames);

            // close the index
            CleanUpIndex();

            // end timer and calculate total time
            DateTime end      = DateTime.Now;
            TimeSpan duration = end - start;

            indexTime = duration.Seconds + (float)duration.Milliseconds / 1000;

            return(myCollection.Length());
        }
示例#3
0
        /// Builds an IRCollection from the search results.
        //  This is used to display the search results.
        //  returns the number of results
        public int BuildResults()
        {
            CreateSearcher();

            IRCollection resultDocs = new IRCollection(myCollection, searcher, searchResults);

            CleanUpSearcher();

            resultsCollection = resultDocs;

            return(resultDocs.Length());
        }
示例#4
0
        // this is used to build an IRCollection from an original IRCollection
        // and a set of results
        public IRCollection(IRCollection origCollection, IndexSearcher searcher, TopDocs results)
        {
            List <IRDocument> resultCollection = new List <IRDocument>();

            int   rank;
            float score;

            for (int i = 0; i < results.TotalHits; i++)
            {
                rank  = i + 1;
                score = results.ScoreDocs[i].Score;
                Document   doc    = searcher.Doc(results.ScoreDocs[i].Doc);
                string     docID  = doc.Get("docID");
                IRDocument newDoc = origCollection.collectionDocs.Find(x => x.GetDocID() == docID);
                newDoc.Rank  = rank;
                newDoc.Score = score;
                resultCollection.Add(newDoc);

                collectionDocs = resultCollection;
            }
        }