internal static void Test(System.IO.TextReader reader, bool verbose, long bytes) { Analyzer analyzer = new SimpleAnalyzer(); TokenStream stream = analyzer.TokenStream(null, reader); System.DateTime start = System.DateTime.Now; int count = 0; for (Token t = stream.Next(); t != null; t = stream.Next()) { if (verbose) { System.Console.Out.WriteLine("Text=" + t.TermText() + " start=" + t.StartOffset() + " end=" + t.EndOffset()); } count++; } System.DateTime end = System.DateTime.Now; long time = end.Ticks - start.Ticks; System.Console.Out.WriteLine(time + " milliseconds to extract " + count + " tokens"); System.Console.Out.WriteLine((time * 1000.0) / count + " microseconds/token"); System.Console.Out.WriteLine((bytes * 1000.0 * 60.0 * 60.0) / (time * 1000000.0) + " megabytes/hour"); }
private Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = NewRandom(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.UseCompoundFile = false; for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return(dir); }
public static Lucene.Net.Analysis.Analyzer GetAnalyzerByName(string analyzerName) { Lucene.Net.Analysis.Analyzer result; Lucene.Net.Util.Version AppLuceneVersion = Lucene.Net.Util.Version.LUCENE_30; switch (analyzerName) { case "SimpleAnalyzer": result = new Lucene.Net.Analysis.SimpleAnalyzer(); break; case "StandardAnalyzer": result = new Lucene.Net.Analysis.Standard.StandardAnalyzer(AppLuceneVersion); break; case "KeywordAnalyzer": result = new Lucene.Net.Analysis.KeywordAnalyzer(); break; case "StopAnalyzer": result = new Lucene.Net.Analysis.StopAnalyzer(AppLuceneVersion); break; case "WhitespaceAnalyzer": result = new Lucene.Net.Analysis.WhitespaceAnalyzer(); break; default: result = new Lucene.Net.Analysis.SimpleAnalyzer(); break; } return(result); }
private Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = NewRandom(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return dir; }
private static Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = new System.Random((System.Int32) (BASE_SEED + 42)); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.TOKENIZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return dir; }
public virtual QueryParsers.QueryParser GetParser(Analyzer a) { if (a == null) { a = new SimpleAnalyzer(); } QueryParsers.QueryParser qp = new QueryParsers.QueryParser("Field", a); qp.SetOperator(QueryParsers.QueryParser.DEFAULT_OPERATOR_OR); return(qp); }
public virtual Query GetQueryDOA(System.String query, Analyzer a) { if (a == null) { a = new SimpleAnalyzer(); } QueryParsers.QueryParser qp = new QueryParsers.QueryParser("Field", a); qp.SetOperator(QueryParsers.QueryParser.DEFAULT_OPERATOR_AND); return(qp.Parse(query)); }
public static void IndexDoc(System.String segment, System.String fileName) { Directory directory = FSDirectory.GetDirectory("test", false); Analyzer analyzer = new SimpleAnalyzer(); DocumentWriter writer = new DocumentWriter(directory, analyzer, Similarity.GetDefault(), 1000); System.IO.FileInfo file = new System.IO.FileInfo(fileName); Document doc = Lucene.Net.Demo.FileDocument.Document(file); writer.AddDocument(segment, doc); directory.Close(); }
private void IndexDoc(System.String segment, System.String fileName) { Directory directory = FSDirectory.GetDirectory(indexDir, false); Analyzer analyzer = new SimpleAnalyzer(); DocumentWriter writer = new DocumentWriter(directory, analyzer, Similarity.GetDefault(), 1000); System.IO.FileInfo file = new System.IO.FileInfo(workDir.FullName + "\\" + fileName); Document doc = FileDocument.Document(file); writer.AddDocument(segment, doc); directory.Close(); }
public virtual QueryParsers.QueryParser GetParser(Analyzer a) { if (a == null) a = new SimpleAnalyzer(); QueryParsers.QueryParser qp = new QueryParsers.QueryParser("Field", a); qp.SetOperator(QueryParsers.QueryParser.DEFAULT_OPERATOR_OR); return qp; }
public virtual Query GetQueryDOA(System.String query, Analyzer a) { if (a == null) a = new SimpleAnalyzer(); QueryParsers.QueryParser qp = new QueryParsers.QueryParser("Field", a); qp.SetOperator(QueryParsers.QueryParser.DEFAULT_OPERATOR_AND); return qp.Parse(query); }
public virtual QueryParser GetParser(Analyzer a) { if (a == null) a = new SimpleAnalyzer(); QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", a); qp.DefaultOperator = QueryParser.OR_OPERATOR; return qp; }
public virtual Query GetQueryDOA(System.String query, Analyzer a) { if (a == null) a = new SimpleAnalyzer(); QueryParser qp = new QueryParser("field", a); qp.SetDefaultOperator(QueryParser.AND_OPERATOR); return qp.Parse(query); }
public virtual Query GetQueryDOA(System.String query, Analyzer a) { if (a == null) a = new SimpleAnalyzer(); QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", a); qp.DefaultOperator = QueryParser.AND_OPERATOR; return qp.Parse(query); }
public virtual QueryParser GetParser(Analyzer a) { if (a == null) a = new SimpleAnalyzer(); QueryParser qp = new QueryParser("field", a); qp.SetDefaultOperator(QueryParser.OR_OPERATOR); return qp; }
private void lblSearch_Click(object sender, EventArgs e) { //Check the saving path if (String.IsNullOrEmpty(txt_Saving.Text)) { MessageBox.Show("Please choose a path to save results."); return; } if (String.IsNullOrEmpty(txt_FileName.Text)) { MessageBox.Show("Please input a file name."); return; } // Check is existing or new file. String filePath = String.Format("{0}\\{1}.txt", txt_Saving.Text, txt_FileName.Text); if (File.Exists(filePath)) { if (MessageBox.Show("The file is existing, do you want to append text into the file? (Yes: append it, No: Set a new file name.)", "Confirm", MessageBoxButtons.YesNo) == DialogResult.Yes) { // User wanna append it, so get the Topic ID from document... var lastLine = File.ReadLines(filePath).Count() != 0 ? File.ReadLines(filePath).Last() : "000"; if (!String.IsNullOrEmpty(lastLine)) { var firstword = lastLine.ToString().Split(' ')[0]; try { TopicID = Convert.ToInt32(firstword) + 1; } catch (Exception ex) { MessageBox.Show("Fail appending! The format in the file is wrong. Please set a new file name."); return; } } } else { return; } } btn_SaveBrowse.Visible = false; txt_Saving.ReadOnly = true; txt_FileName.ReadOnly = true; // Calculate the Process Time Lucene.Net.Analysis.Analyzer analyzer; if (ckb_Analzer.Checked) { analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); } else { analyzer = generator.analyzer; } Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); Searcher searcher = new Searcher(txt_IndexPath.Text, analyzer, generator.writer); searcher.CreateSearcher(); searcher.CreateParser(ddl_Type.SelectedItem.ToString(), ddl_Fields.SelectedItem.ToString()); // Searching and generate result Dictionary <string, float> resultList = searcher.DisplayResults(searcher.SearchIndex(txt_InformationNeeds.Text), generator.collectionList); lbl_QueryText.Text = searcher.query.ToString(); ResultCollectionList = (from c in generator.collectionList join r in resultList on c.DocID equals r.Key select new Collection { DocID = c.DocID, Title = c.Title, Author = c.Author, Bibliographic = c.Bibliographic, Words = c.Words, Score = r.Value }).OrderByDescending(n => n.Score).ToList(); stopWatch.Stop(); long ts = stopWatch.ElapsedMilliseconds; lbl_SearchingTime.Text = ts.ToString() + " ms"; // Set columns to listview GenerateListView(); // Set the total number of documents total = resultList.Count(); lblTotalDocs.Text = total + " docs"; // Saving using (System.IO.StreamWriter file = new System.IO.StreamWriter(filePath, true)) { int rank = 1; foreach (Collection c in ResultCollectionList) { string TopicIDString = string.Format("{0:000}", TopicID); file.NewLine = "\n"; file.WriteLine(String.Format("{0} {1} {2} {3} {4} {5}", TopicIDString, "Q0", c.DocID, rank.ToString(), c.Score, "9913661_9913351_10032711_RetrievalHero")); rank = rank + 1; } TopicID++; } }