/// Helper function for CreateIndex /// This function implements threading to improve indexing speed. /// For each filename the method: /// 1. Read the file and store in a string. /// 2. Turn the text into IRDocument object and add to collection /// 3. Add the IRDocument to the index private IRCollection ReadAndProcessFiles(List <string> fileNames) { IRCollection collection = new IRCollection(); // Lists are not thread safe so... // 1. need to create a ConcurrentBag<IRDocument> // 2. add docs to this collection // 3. then after all docs are added, convert the array to a list int numDocs = fileNames.Count; IRDocument[] docArray = new IRDocument[numDocs]; var conDocs = new ConcurrentBag <IRDocument>(); Parallel.ForEach(fileNames, fn => { string docText = FileHandling.ReadTextFile(fn); IRDocument doc = GetNewDoc(docText); if (doc != null) { conDocs.Add(doc); doc.AddToIndex(writer); } else { Console.WriteLine("Error with file: " + fn); } }); // add documents to collection object and set maxResults collection.AddDocs(conDocs.ToList()); maxResults = conDocs.Count; return(collection); }
/// Writes a trec evaluation file from the search results. /// if the query is not a standard one, '000' is used as the topicID public int WriteEvalFile(string fileName, string topicID) { List <string> evalList = new List <string>(); bool appendFlag = true; // check if the file exists if (File.Exists(fileName) == true) { // prompt for append DialogResult append = MessageBox.Show("Do you want to append to the existing file?", "Confirm", MessageBoxButtons.YesNo); if (append == DialogResult.Yes) { appendFlag = true; } else { // if overwrite confirm DialogResult ruSure = MessageBox.Show("Are you sure you want to overwrite the file?", "Confirm", MessageBoxButtons.YesNo); if (ruSure == DialogResult.Yes) { appendFlag = false; } } } // this is fixed string groupName = "09648500_NathanOnly"; // structure TopicID QO DocID rank score group string tempString = ""; for (int i = 0; i < resultsCollection.Length(); i++) { IRDocument doc = resultsCollection.GetIRDocument(i); tempString = topicID + "\tQ0\t"; tempString += doc.GetDocID() + "\t"; tempString += doc.Rank + "\t"; tempString += doc.Score + "\t"; tempString += groupName + "\n"; evalList.Add(tempString); } // write file FileHandling.WriteTextFile(evalList, fileName, appendFlag); return(0); }
// Display the details of the IRDocument // as this form is specific to the IRDocument type // this detail will need to be updated if he IRDocument // type is change for a different application public frmDetail(IRDocument doc) { InitializeComponent(); JournalAbstract JAdoc = doc as JournalAbstract; tbAbstract.Text = JAdoc.Words; lblBib.Text = JAdoc.BiblioInfo; lblAuthor.Text = JAdoc.Author; lblTitle.Text = JAdoc.Title; // action for escape key this.CancelButton = btnOK; }
// this is used to build an IRCollection from an original IRCollection // and a set of results public IRCollection(IRCollection origCollection, IndexSearcher searcher, TopDocs results) { List <IRDocument> resultCollection = new List <IRDocument>(); int rank; float score; for (int i = 0; i < results.TotalHits; i++) { rank = i + 1; score = results.ScoreDocs[i].Score; Document doc = searcher.Doc(results.ScoreDocs[i].Doc); string docID = doc.Get("docID"); IRDocument newDoc = origCollection.collectionDocs.Find(x => x.GetDocID() == docID); newDoc.Rank = rank; newDoc.Score = score; resultCollection.Add(newDoc); collectionDocs = resultCollection; } }
// this is for testing only public void AutoResults(string filename, Dictionary <string, string> queries, bool preproc) { string dontcare = ""; bool appendFlag = false; foreach (KeyValuePair <string, string> q in queries) { // execute query string topicID = q.Key; RunQuery(q.Value, preproc, out dontcare); // get results //IRCollection results = BuildResults(); int numResults = BuildResults(); // write to file string groupName = "09648500_NathanOnly"; List <string> evalList = new List <string>(); // structure TopicID QO DocID rank score group string tempString = ""; for (int i = 0; i < numResults; i++) { IRDocument doc = resultsCollection.GetIRDocument(i); tempString = topicID + "\tQ0\t"; tempString += doc.GetDocID() + "\t"; tempString += doc.Rank + "\t"; tempString += doc.Score + "\t"; tempString += groupName + "\n"; evalList.Add(tempString); } // write file FileHandling.WriteTextFile(evalList, filename, appendFlag); appendFlag = true; } string trecpath = "../../../../results/"; if (File.Exists(trecpath + Path.GetFileName(filename))) { File.Delete(trecpath + Path.GetFileName(filename)); } File.Move(filename, trecpath + Path.GetFileName(filename)); // from MSDN Process p = new Process(); p.StartInfo.UseShellExecute = false; p.StartInfo.RedirectStandardOutput = true; p.StartInfo.FileName = trecpath + "trec_eval"; p.StartInfo.Arguments = "-q " + trecpath + "cranqrel.txt " + trecpath + "autoquery_results.txt"; p.Start(); string output = p.StandardOutput.ReadToEnd(); p.WaitForExit(); Console.WriteLine(output); }
// Add one IRDocument to the collection public void AddDoc(IRDocument doc) { collectionDocs.Add(doc); }