예제 #1
0
        /// Helper function for CreateIndex
        /// This function implements threading to improve indexing speed.
        /// For each filename the method:
        /// 1. Read the file and store in a string.
        /// 2. Turn the text into IRDocument object and add to collection
        /// 3. Add the IRDocument to the index
        private IRCollection ReadAndProcessFiles(List <string> fileNames)
        {
            IRCollection collection = new IRCollection();

            // Lists are not thread safe so...
            // 1. need to create a ConcurrentBag<IRDocument>
            // 2. add docs to this collection
            // 3. then after all docs are added, convert the array to a list
            int numDocs = fileNames.Count;

            IRDocument[] docArray = new IRDocument[numDocs];

            var conDocs = new ConcurrentBag <IRDocument>();

            Parallel.ForEach(fileNames, fn =>
            {
                string docText = FileHandling.ReadTextFile(fn);
                IRDocument doc = GetNewDoc(docText);
                if (doc != null)
                {
                    conDocs.Add(doc);
                    doc.AddToIndex(writer);
                }
                else
                {
                    Console.WriteLine("Error with file: " + fn);
                }
            });

            // add documents to collection object and set maxResults
            collection.AddDocs(conDocs.ToList());
            maxResults = conDocs.Count;

            return(collection);
        }
예제 #2
0
        /// Writes a trec evaluation file from the search results.
        /// if the query is not a standard one, '000' is used as the topicID
        public int WriteEvalFile(string fileName, string topicID)
        {
            List <string> evalList = new List <string>();

            bool appendFlag = true;

            // check if the file exists
            if (File.Exists(fileName) == true)
            {
                // prompt for append
                DialogResult append = MessageBox.Show("Do you want to append to the existing file?",
                                                      "Confirm",
                                                      MessageBoxButtons.YesNo);

                if (append == DialogResult.Yes)
                {
                    appendFlag = true;
                }
                else
                {
                    // if overwrite confirm
                    DialogResult ruSure = MessageBox.Show("Are you sure you want to overwrite the file?",
                                                          "Confirm",
                                                          MessageBoxButtons.YesNo);
                    if (ruSure == DialogResult.Yes)
                    {
                        appendFlag = false;
                    }
                }
            }

            // this is fixed
            string groupName = "09648500_NathanOnly";

            // structure TopicID QO DocID rank score group
            string tempString = "";

            for (int i = 0; i < resultsCollection.Length(); i++)
            {
                IRDocument doc = resultsCollection.GetIRDocument(i);
                tempString  = topicID + "\tQ0\t";
                tempString += doc.GetDocID() + "\t";
                tempString += doc.Rank + "\t";
                tempString += doc.Score + "\t";
                tempString += groupName + "\n";

                evalList.Add(tempString);
            }

            // write file
            FileHandling.WriteTextFile(evalList, fileName, appendFlag);

            return(0);
        }
예제 #3
0
        // Display the details of the IRDocument
        // as this form is specific to the IRDocument type
        // this detail will need to be updated if he IRDocument
        // type is change for a different application
        public frmDetail(IRDocument doc)
        {
            InitializeComponent();
            JournalAbstract JAdoc = doc as JournalAbstract;

            tbAbstract.Text = JAdoc.Words;
            lblBib.Text     = JAdoc.BiblioInfo;
            lblAuthor.Text  = JAdoc.Author;
            lblTitle.Text   = JAdoc.Title;

            // action for escape key
            this.CancelButton = btnOK;
        }
예제 #4
0
        // this is used to build an IRCollection from an original IRCollection
        // and a set of results
        public IRCollection(IRCollection origCollection, IndexSearcher searcher, TopDocs results)
        {
            List <IRDocument> resultCollection = new List <IRDocument>();

            int   rank;
            float score;

            for (int i = 0; i < results.TotalHits; i++)
            {
                rank  = i + 1;
                score = results.ScoreDocs[i].Score;
                Document   doc    = searcher.Doc(results.ScoreDocs[i].Doc);
                string     docID  = doc.Get("docID");
                IRDocument newDoc = origCollection.collectionDocs.Find(x => x.GetDocID() == docID);
                newDoc.Rank  = rank;
                newDoc.Score = score;
                resultCollection.Add(newDoc);

                collectionDocs = resultCollection;
            }
        }
예제 #5
0
        // this is for testing only
        public void AutoResults(string filename, Dictionary <string, string> queries, bool preproc)
        {
            string dontcare = "";

            bool appendFlag = false;

            foreach (KeyValuePair <string, string> q in queries)
            {
                // execute query
                string topicID = q.Key;
                RunQuery(q.Value, preproc, out dontcare);

                // get results
                //IRCollection results = BuildResults();
                int numResults = BuildResults();

                // write to file
                string groupName = "09648500_NathanOnly";

                List <string> evalList = new List <string>();

                // structure TopicID QO DocID rank score group
                string tempString = "";
                for (int i = 0; i < numResults; i++)
                {
                    IRDocument doc = resultsCollection.GetIRDocument(i);
                    tempString  = topicID + "\tQ0\t";
                    tempString += doc.GetDocID() + "\t";
                    tempString += doc.Rank + "\t";
                    tempString += doc.Score + "\t";
                    tempString += groupName + "\n";

                    evalList.Add(tempString);
                }

                // write file
                FileHandling.WriteTextFile(evalList, filename, appendFlag);

                appendFlag = true;
            }

            string trecpath = "../../../../results/";

            if (File.Exists(trecpath + Path.GetFileName(filename)))
            {
                File.Delete(trecpath + Path.GetFileName(filename));
            }

            File.Move(filename, trecpath + Path.GetFileName(filename));

            // from MSDN
            Process p = new Process();

            p.StartInfo.UseShellExecute        = false;
            p.StartInfo.RedirectStandardOutput = true;
            p.StartInfo.FileName  = trecpath + "trec_eval";
            p.StartInfo.Arguments = "-q " + trecpath + "cranqrel.txt " + trecpath + "autoquery_results.txt";
            p.Start();
            string output = p.StandardOutput.ReadToEnd();

            p.WaitForExit();
            Console.WriteLine(output);
        }
예제 #6
0
 // Add one IRDocument to the collection
 public void AddDoc(IRDocument doc)
 {
     collectionDocs.Add(doc);
 }