Example #1
0
        /// Helper function for CreateIndex
        /// This function implements threading to improve indexing speed.
        /// For each filename the method:
        /// 1. Read the file and store in a string.
        /// 2. Turn the text into IRDocument object and add to collection
        /// 3. Add the IRDocument to the index
        private IRCollection ReadAndProcessFiles(List <string> fileNames)
        {
            IRCollection collection = new IRCollection();

            // Lists are not thread safe so...
            // 1. need to create a ConcurrentBag<IRDocument>
            // 2. add docs to this collection
            // 3. then after all docs are added, convert the array to a list
            int numDocs = fileNames.Count;

            IRDocument[] docArray = new IRDocument[numDocs];

            var conDocs = new ConcurrentBag <IRDocument>();

            Parallel.ForEach(fileNames, fn =>
            {
                string docText = FileHandling.ReadTextFile(fn);
                IRDocument doc = GetNewDoc(docText);
                if (doc != null)
                {
                    conDocs.Add(doc);
                    doc.AddToIndex(writer);
                }
                else
                {
                    Console.WriteLine("Error with file: " + fn);
                }
            });

            // add documents to collection object and set maxResults
            collection.AddDocs(conDocs.ToList());
            maxResults = conDocs.Count;

            return(collection);
        }
Example #2
0
        /// Builds the index...
        public int CreateIndex(string collectionPath, string indexPath)
        {
            // start timer...
            DateTime start = DateTime.Now;

            // get all of the files names in the collection path
            List <string> filenames = FileHandling.GetFileNames(collectionPath, false);

            // initialise the index
            InitIndex(indexPath);

            // build the index
            // this method call does lots of things in parallel
            myCollection = ReadAndProcessFiles(filenames);

            // close the index
            CleanUpIndex();

            // end timer and calculate total time
            DateTime end      = DateTime.Now;
            TimeSpan duration = end - start;

            indexTime = duration.Seconds + (float)duration.Milliseconds / 1000;

            return(myCollection.Length());
        }
Example #3
0
        // Parses a standard information needs file
        public static Dictionary <string, string> GetInfoNeeds(string fileName)
        {
            Dictionary <string, string> iNeeds = new Dictionary <string, string>();

            // open file and dump into a string
            string document = FileHandling.ReadTextFile(fileName);

            // split string based on ".I" and ".D" delimiters
            string[] delims   = { ".I", ".D" };
            string[] docParts = document.Split(delims, StringSplitOptions.RemoveEmptyEntries);

            if (docParts.Length > 2)
            {
                // build dicationary from string array
                for (int i = 0; i < docParts.Length; i++)
                {
                    iNeeds.Add(docParts[i].Trim(), docParts[i + 1].Trim());

                    // inc i so that it goes up 2 each iteration
                    i++;
                }

                return(iNeeds);
            }
            else
            {
                return(null);
            }
        }
Example #4
0
        /// Writes a trec evaluation file from the search results.
        /// if the query is not a standard one, '000' is used as the topicID
        public int WriteEvalFile(string fileName, string topicID)
        {
            List <string> evalList = new List <string>();

            bool appendFlag = true;

            // check if the file exists
            if (File.Exists(fileName) == true)
            {
                // prompt for append
                DialogResult append = MessageBox.Show("Do you want to append to the existing file?",
                                                      "Confirm",
                                                      MessageBoxButtons.YesNo);

                if (append == DialogResult.Yes)
                {
                    appendFlag = true;
                }
                else
                {
                    // if overwrite confirm
                    DialogResult ruSure = MessageBox.Show("Are you sure you want to overwrite the file?",
                                                          "Confirm",
                                                          MessageBoxButtons.YesNo);
                    if (ruSure == DialogResult.Yes)
                    {
                        appendFlag = false;
                    }
                }
            }

            // this is fixed
            string groupName = "09648500_NathanOnly";

            // structure TopicID QO DocID rank score group
            string tempString = "";

            for (int i = 0; i < resultsCollection.Length(); i++)
            {
                IRDocument doc = resultsCollection.GetIRDocument(i);
                tempString  = topicID + "\tQ0\t";
                tempString += doc.GetDocID() + "\t";
                tempString += doc.Rank + "\t";
                tempString += doc.Score + "\t";
                tempString += groupName + "\n";

                evalList.Add(tempString);
            }

            // write file
            FileHandling.WriteTextFile(evalList, fileName, appendFlag);

            return(0);
        }
Example #5
0
        // this is for testing only
        public void AutoResults(string filename, Dictionary <string, string> queries, bool preproc)
        {
            string dontcare = "";

            bool appendFlag = false;

            foreach (KeyValuePair <string, string> q in queries)
            {
                // execute query
                string topicID = q.Key;
                RunQuery(q.Value, preproc, out dontcare);

                // get results
                //IRCollection results = BuildResults();
                int numResults = BuildResults();

                // write to file
                string groupName = "09648500_NathanOnly";

                List <string> evalList = new List <string>();

                // structure TopicID QO DocID rank score group
                string tempString = "";
                for (int i = 0; i < numResults; i++)
                {
                    IRDocument doc = resultsCollection.GetIRDocument(i);
                    tempString  = topicID + "\tQ0\t";
                    tempString += doc.GetDocID() + "\t";
                    tempString += doc.Rank + "\t";
                    tempString += doc.Score + "\t";
                    tempString += groupName + "\n";

                    evalList.Add(tempString);
                }

                // write file
                FileHandling.WriteTextFile(evalList, filename, appendFlag);

                appendFlag = true;
            }

            string trecpath = "../../../../results/";

            if (File.Exists(trecpath + Path.GetFileName(filename)))
            {
                File.Delete(trecpath + Path.GetFileName(filename));
            }

            File.Move(filename, trecpath + Path.GetFileName(filename));

            // from MSDN
            Process p = new Process();

            p.StartInfo.UseShellExecute        = false;
            p.StartInfo.RedirectStandardOutput = true;
            p.StartInfo.FileName  = trecpath + "trec_eval";
            p.StartInfo.Arguments = "-q " + trecpath + "cranqrel.txt " + trecpath + "autoquery_results.txt";
            p.Start();
            string output = p.StandardOutput.ReadToEnd();

            p.WaitForExit();
            Console.WriteLine(output);
        }