Exemple #1
0
 private Analyzer GetAnalyer(int type)
 {
     Analyzer analyzer;
     string path =System.Configuration.ConfigurationSettings.AppSettings["Data"].ToString();
     switch (type)
     {
         case 0:
             analyzer=new StockFooAnalyzer(path);
             break;
        case 1:
             analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT);
             break;
       case 2:
             analyzer = new SimpleAnalyzer();
             break;
       case 3:
             analyzer = new StopAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT);
             break;
      case 4:
             analyzer = new KeywordAnalyzer();
             break;
       case 5:
             analyzer = new WhitespaceAnalyzer();
             break;
      default:
             analyzer = new StockFooAnalyzer(path);
             break;
     }
     return analyzer;
 }
        public void SpecifyAnalyzer()
        {
            var analyzer = new SimpleAnalyzer();

            map.Property(x => x.Date).AnalyzeWith(analyzer);

            var mapper = GetMappingInfo("Date");

            Assert.That(mapper.Analyzer, Is.SameAs(analyzer));
        }
		public virtual void  TestSimple()
		{
			Analyzer a = new SimpleAnalyzer();
			AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[]{"foo", "bar", "foo", "bar"});
			AssertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new System.String[]{"foo", "bar", "foo", "bar"});
			AssertAnalyzesTo(a, "foo.bar.FOO.BAR", new System.String[]{"foo", "bar", "foo", "bar"});
			AssertAnalyzesTo(a, "U.S.A.", new System.String[]{"u", "s", "a"});
			AssertAnalyzesTo(a, "C++", new System.String[]{"c"});
			AssertAnalyzesTo(a, "B2B", new System.String[]{"b", "b"});
			AssertAnalyzesTo(a, "2B", new System.String[]{"b"});
			AssertAnalyzesTo(a, "\"QUOTED\" word", new System.String[]{"quoted", "word"});
		}
 public void ReusabeleTokenStreamTest()
 {
     string testwords = "我是中国人,I can speak chinese!";
     SimpleAnalyzer sanalyzer = new SimpleAnalyzer();
     TokenStream ts = sanalyzer.ReusableTokenStream("", new StringReader(testwords));
     Token token;
     while ((token = ts.Next()) != null)
     {
         Console.WriteLine(token.TermText());
     }
     ts.Close();
 }
		public static void  Main(System.String[] args)
		{
			try
			{
				Directory directory = new RAMDirectory();
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(directory, analyzer, true);
				
				int MAX_DOCS = 225;
				
				for (int j = 0; j < MAX_DOCS; j++)
				{
					Lucene.Net.Documents.Document d = new Lucene.Net.Documents.Document();
					d.Add(new Field(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES, Field.Index.TOKENIZED));
					d.Add(new Field(ID_FIELD, System.Convert.ToString(j), Field.Store.YES, Field.Index.TOKENIZED));
					writer.AddDocument(d);
				}
				writer.Close();
				
				// try a search without OR
				Searcher searcher = new IndexSearcher(directory);
				Hits hits = null;
				
				Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(PRIORITY_FIELD, analyzer);
				
				Query query = parser.Parse(HIGH_PRIORITY);
				System.Console.Out.WriteLine("Query: " + query.ToString(PRIORITY_FIELD));
				
				hits = searcher.Search(query);
				PrintHits(hits);
				
				searcher.Close();
				
				// try a new search with OR
				searcher = new IndexSearcher(directory);
				hits = null;
				
				parser = new Lucene.Net.QueryParsers.QueryParser(PRIORITY_FIELD, analyzer);
				
				query = parser.Parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
				System.Console.Out.WriteLine("Query: " + query.ToString(PRIORITY_FIELD));
				
				hits = searcher.Search(query);
				PrintHits(hits);
				
				searcher.Close();
			}
			catch (System.Exception e)
			{
				System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
			}
		}
Exemple #6
0
        public virtual void  TestSimple()
        {
            Analyzer a = new SimpleAnalyzer();

            AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[] { "foo", "bar", "foo", "bar" });
            AssertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new System.String[] { "foo", "bar", "foo", "bar" });
            AssertAnalyzesTo(a, "foo.bar.FOO.BAR", new System.String[] { "foo", "bar", "foo", "bar" });
            AssertAnalyzesTo(a, "U.S.A.", new System.String[] { "u", "s", "a" });
            AssertAnalyzesTo(a, "C++", new System.String[] { "c" });
            AssertAnalyzesTo(a, "B2B", new System.String[] { "b", "b" });
            AssertAnalyzesTo(a, "2B", new System.String[] { "b" });
            AssertAnalyzesTo(a, "\"QUOTED\" word", new System.String[] { "quoted", "word" });
        }
 // This method takes a search term and a text as a parameter, and displays the text
 // with the search term in bold.
 public static void RealHighlighter(string searchTerm, string text)
 {
     TermQuery query = new TermQuery(new Term("mainText", searchTerm));
         Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(query);
         Highlighter highlighter = new Highlighter(scorer);
         System.IO.StringReader reader = new System.IO.StringReader(text);
         TokenStream tokenStream = new SimpleAnalyzer().TokenStream("mainText", reader);
         String[] toBePrinted = highlighter.GetBestFragments(tokenStream, text, 5); // 5 is the maximum number of fragments that gets tested
         foreach (var word in toBePrinted)
         {
             Console.Write(word);
         }
 }
Exemple #8
0
		public static void  Main(System.String[] args)
		{
			try
			{
				Directory directory = new RAMDirectory();
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(directory, analyzer, true);
				
				System.String[] docs = new System.String[]{"a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c"};
				for (int j = 0; j < docs.Length; j++)
				{
					Document d = new Document();
					d.Add(Field.Text("contents", docs[j]));
					writer.AddDocument(d);
				}
				writer.Close();
				
				Searcher searcher = new IndexSearcher(directory);
				
				System.String[] queries = new System.String[]{"\"a c e\""};
				Hits hits = null;
				
				QueryParsers.QueryParser parser = new QueryParsers.QueryParser("contents", analyzer);
				parser.SetPhraseSlop(4);
				for (int j = 0; j < queries.Length; j++)
				{
					Query query = parser.Parse(queries[j]);
					System.Console.Out.WriteLine("Query: " + query.ToString("contents"));
					
					//DateFilter filter =
					//  new DateFilter("modified", Time(1997,0,1), Time(1998,0,1));
					//DateFilter filter = DateFilter.Before("modified", Time(1997,00,01));
					//System.out.println(filter);
					
					hits = searcher.Search(query);
					
					System.Console.Out.WriteLine(hits.Length() + " total results");
					for (int i = 0; i < hits.Length() && i < 10; i++)
					{
						Document d = hits.Doc(i);
						System.Console.Out.WriteLine(i + " " + hits.Score(i) + " " + d.Get("contents"));
					}
				}
				searcher.Close();
			}
			catch (System.Exception e)
			{
				System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
			}
		}
 // TEST METHOD FOR HIGHLIGHTING.
 public static void Highlighter()
 {
     string textTest = "I am a man that follows hell.";
         TermQuery queryTest = new TermQuery(new Term("", "hell"));
         Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(queryTest);
         Highlighter highlighter = new Highlighter(scorer);
         System.IO.StringReader reader = new System.IO.StringReader(textTest);
         TokenStream tokenStream = new SimpleAnalyzer().TokenStream("field", reader);
         String[] toBePrinted = highlighter.GetBestFragments(tokenStream, textTest, 1); // 1 is the maximum number of fragments that gets tested
         foreach (var word in toBePrinted)
         {
             Console.WriteLine(word);
         }
 }
Exemple #10
0
        public void Code()
        {
            Analyzer _keywordanalyzer    = new KeywordAnalyzer();
            Analyzer _simpleanalyzer     = new Lucene.Net.Analysis.SimpleAnalyzer();
            Analyzer _stopanalyzer       = new Lucene.Net.Analysis.StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            Analyzer _whitespaceanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer();
            Analyzer _standardanalyzer   = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);


            var _perfieldanalyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(_standardanalyzer);

            _perfieldanalyzer.AddAnalyzer("firstname", _keywordanalyzer);
            _perfieldanalyzer.AddAnalyzer("lastname", _keywordanalyzer);


            IndexWriter _writer = new IndexWriter(_directory, _perfieldanalyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            IndexReader _reader = _writer.GetReader();

            IndexSearcher _searcher = new IndexSearcher(_reader);


            //QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", _standardanalyzer);

            string[] fields = new[] { "text", "title", "author" };
            var      boosts = new Dictionary <string, float>();

            boosts.Add("text", 2.0f);
            boosts.Add("title", 1.5f);
            QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, _standardanalyzer, boosts);
            Query       query  = parser.Parse("lucene is great");


            TopDocs hits = _searcher.Search(query, 1000);

            IEnumerable <Document> docs = hits.ScoreDocs.Select(hit => _searcher.Doc(hit.Doc));

            var books = docs.Select(doc => new Book()
            {
                Text   = doc.Get("text"),
                Title  = doc.Get("title"),
                Author = doc.Get("author"),
                Length = Int32.Parse(doc.Get("length"))
            });


            _writer.Optimize();
            _writer.Commit();
            _writer.DeleteAll();
        }
		private void  DoTest(System.IO.StringWriter out_Renamed, bool useCompoundFiles)
		{
			Directory directory = new RAMDirectory();
			Analyzer analyzer = new SimpleAnalyzer();
			IndexWriter writer = new IndexWriter(directory, analyzer, true);
			
			writer.SetUseCompoundFile(useCompoundFiles);
			
			int MAX_DOCS = 225;
			
			for (int j = 0; j < MAX_DOCS; j++)
			{
				Document d = new Document();
				d.Add(Field.Text(PRIORITY_FIELD, HIGH_PRIORITY));
				d.Add(Field.Text(ID_FIELD, System.Convert.ToString(j)));
				writer.AddDocument(d);
			}
			writer.Close();
			
			// try a search without OR
			Searcher searcher = new IndexSearcher(directory);
			Hits hits = null;
			
			QueryParsers.QueryParser parser = new QueryParsers.QueryParser(PRIORITY_FIELD, analyzer);
			
			Query query = parser.Parse(HIGH_PRIORITY);
			out_Renamed.WriteLine("Query: " + query.ToString(PRIORITY_FIELD));
			
			hits = searcher.Search(query);
			PrintHits(out_Renamed, hits);
			CheckHits(hits, MAX_DOCS);
			
			searcher.Close();
			
			// try a new search with OR
			searcher = new IndexSearcher(directory);
			hits = null;
			
			parser = new QueryParsers.QueryParser(PRIORITY_FIELD, analyzer);
			
			query = parser.Parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
			out_Renamed.WriteLine("Query: " + query.ToString(PRIORITY_FIELD));
			
			hits = searcher.Search(query);
			PrintHits(out_Renamed, hits);
			CheckHits(hits, MAX_DOCS);
			
			searcher.Close();
		}
Exemple #12
0
		private void  DoTestSearch(System.IO.StreamWriter out_Renamed, bool useCompoundFile)
		{
			Directory directory = new RAMDirectory();
			Analyzer analyzer = new SimpleAnalyzer();
			IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			
			writer.SetUseCompoundFile(useCompoundFile);
			
			System.String[] docs = new System.String[]{"a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c"};
			for (int j = 0; j < docs.Length; j++)
			{
				Document d = new Document();
				d.Add(new Field("contents", docs[j], Field.Store.YES, Field.Index.ANALYZED));
				writer.AddDocument(d);
			}
			writer.Close();
			
			Searcher searcher = new IndexSearcher(directory);
			
			System.String[] queries = new System.String[]{"a b", "\"a b\"", "\"a b c\"", "a c", "\"a c\"", "\"a c e\""};
			ScoreDoc[] hits = null;
			
			QueryParser parser = new QueryParser("contents", analyzer);
			parser.SetPhraseSlop(4);
			for (int j = 0; j < queries.Length; j++)
			{
				Query query = parser.Parse(queries[j]);
				out_Renamed.WriteLine("Query: " + query.ToString("contents"));
				
				//DateFilter filter =
				//  new DateFilter("modified", Time(1997,0,1), Time(1998,0,1));
				//DateFilter filter = DateFilter.Before("modified", Time(1997,00,01));
				//System.out.println(filter);
				
				hits = searcher.Search(query, null, 1000).scoreDocs;
				
				out_Renamed.WriteLine(hits.Length + " total results");
				for (int i = 0; i < hits.Length && i < 10; i++)
				{
					Document d = searcher.Doc(hits[i].doc);
					out_Renamed.WriteLine(i + " " + hits[i].score + " " + d.Get("contents"));
				}
			}
			searcher.Close();
		}
        public void WithoutGettingErrors()
        {
            using(var luceneCodecDirectory = new LuceneCodecDirectory(Path, Enumerable.Empty<AbstractIndexCodec>()))
            using(var simpleAnalyzer = new SimpleAnalyzer())
            {
                using (var w = new IndexWriter(luceneCodecDirectory, simpleAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    var doc = new Lucene.Net.Documents.Document();
                    doc.Add(new Field("test", "value", Field.Store.YES, Field.Index.ANALYZED));
                    w.AddDocument(doc);
                }

                using(var s = new IndexSearcher(luceneCodecDirectory))
                {
                    var termQuery = new TermQuery(new Term("test", "value"));
                    var topDocs = s.Search(termQuery, 10);
                    Assert.Equal(1, topDocs.TotalHits);
                }
            }

        }
    // This method is printing out the message details given the index document.
    // NOTE: The field "mainText" must be stored in indexing level. Same goes for any
    // other field you want to search.
    private static void DisplayMessage(Document d, string searchTerm)
    {
        // THIS IS USED IN THE DATABASE INDEXic
            //Console.WriteLine("id: " + d.Get("id") + "\n" + "messageBox: " + d.Get("messageBox") + "\n" + "incoming: " + d.Get("incoming") + "\n" + "date: " + d.Get("date") + "\n" + "mainText: " + d.Get("mainText"));

            // THIS IS USED IN MY TEST FILES
            //Console.WriteLine("id: " + d.Get("id") + "\n" + "mainText: " + d.Get("mainText"));
            string text = d.Get("mainText");
            TermQuery query = new TermQuery(new Term("mainText", searchTerm));
            Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(query);
            Highlighter highlighter = new Highlighter(scorer);
            System.IO.StringReader reader = new System.IO.StringReader(text);
            TokenStream tokenStream = new SimpleAnalyzer().TokenStream("mainText", reader);
            String[] toBePrinted = highlighter.GetBestFragments(tokenStream, text, 5); // 5 is the maximum number of fragments that gets tested
           foreach (var word in toBePrinted)
            {
                Console.Write(word);
            }

            Console.WriteLine("=====================");
            Console.ReadKey();
    }
Exemple #15
0
 private void InitLucene()
 {
     analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
     parser   = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer);
 }
 /// <summary>
 /// Returns an Analyzer for the given AnalyzerType
 /// </summary>
 /// <param name="oAnalyzerType">Enumeration value</param>
 /// <returns>Analyzer</returns>
 public static Analyzer GetAnalyzer(AnalyzerType oAnalyzerType)
 {
     Analyzer oAnalyzer = null;
     switch (oAnalyzerType)
     {
         case AnalyzerType.SimpleAnalyzer:
             oAnalyzer = new SimpleAnalyzer();
             break;
         case AnalyzerType.StopAnalyzer:
             oAnalyzer = new StopAnalyzer();
             break;
         case AnalyzerType.WhitespaceAnalyzer:
             oAnalyzer = new WhitespaceAnalyzer();
             break;
         default:
         case AnalyzerType.StandardAnalyzer:
             oAnalyzer = new StandardAnalyzer();
             break;
     }
     return oAnalyzer;
 }
 private void  DoTest(System.IO.StreamWriter out_Renamed, bool useCompoundFiles)
 {
     Directory directory = new RAMDirectory();
     Analyzer analyzer = new SimpleAnalyzer();
     IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
     
     writer.UseCompoundFile = useCompoundFiles;
     
     int MAX_DOCS = 225;
     
     for (int j = 0; j < MAX_DOCS; j++)
     {
         Document d = new Document();
         d.Add(new Field(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES, Field.Index.ANALYZED));
         d.Add(new Field(ID_FIELD, System.Convert.ToString(j), Field.Store.YES, Field.Index.ANALYZED));
         writer.AddDocument(d);
     }
     writer.Close();
     
     // try a search without OR
     Searcher searcher = new IndexSearcher(directory, true);
     
     QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, PRIORITY_FIELD, analyzer);
     
     Query query = parser.Parse(HIGH_PRIORITY);
     out_Renamed.WriteLine("Query: " + query.ToString(PRIORITY_FIELD));
     
     ScoreDoc[] hits = searcher.Search(query, null, MAX_DOCS).ScoreDocs;
     PrintHits(out_Renamed, hits, searcher);
     CheckHits(hits, MAX_DOCS, searcher);
     
     searcher.Close();
     
     // try a new search with OR
     searcher = new IndexSearcher(directory, true);
     hits = null;
     
     parser = new QueryParser(Util.Version.LUCENE_CURRENT, PRIORITY_FIELD, analyzer);
     
     query = parser.Parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
     out_Renamed.WriteLine("Query: " + query.ToString(PRIORITY_FIELD));
     
     hits = searcher.Search(query, null, MAX_DOCS).ScoreDocs;
     PrintHits(out_Renamed, hits, searcher);
     CheckHits(hits, MAX_DOCS, searcher);
     
     searcher.Close();
 }
Exemple #18
0
        public static string[] SplitWords2(string content)
        {
            List<string> strList = new List<string>();
            Analyzer analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法

            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content));

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;

            bool hasNext = tokenStream.IncrementToken();

            while (hasNext)
            {
                ita = tokenStream.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                strList.Add(ita.Term);
                hasNext = tokenStream.IncrementToken();
            }

            return strList.ToArray();
        }
Exemple #19
0
        private void queryRhino_Lucene()
        {
            try
            {
                String indexRhinoFile = System.IO.Directory.GetCurrentDirectory() + @"\rhino-documents.json";
                String queryRhinoFile = System.IO.Directory.GetCurrentDirectory() + @"\rhino-queries.json";
                String indexLuceneFile = System.IO.Directory.GetCurrentDirectory() + @"\lucene-documents.json";
                String queryLuceneFile = System.IO.Directory.GetCurrentDirectory() + @"\lucene-queries.json";

                List<indexDocument> results = new List<indexDocument>();
                List<indexQuery> queries = new List<indexQuery>();

                String line;
                System.IO.StreamReader indexFile = new System.IO.StreamReader(indexLuceneFile);
                //System.IO.StreamReader indexFile = new System.IO.StreamReader(indexRhinoFile);
                while ((line = indexFile.ReadLine()) != null)
                {
                    indexDocument result = JsonConvert.DeserializeObject<indexDocument>(line);
                    results.Add(result);
                }
                indexFile.Close();

                line = "";
                System.IO.StreamReader queryFile = new System.IO.StreamReader(queryLuceneFile);
                //System.IO.StreamReader queryFile = new System.IO.StreamReader(queryRhinoFile);
                while ((line = queryFile.ReadLine()) != null)
                {
                    indexQuery query = JsonConvert.DeserializeObject<indexQuery>(line);
                    queries.Add(query);
                }
                queryFile.Close();

                Lucene.Net.Store.Directory indexDirectory = FSDirectory.Open(@"c:\TEMP_LUCENE2");//System.IO.Directory.GetCurrentDirectory());
                //Lucene.Net.Store.Directory indexDirectory = FSDirectory.Open(@"c:\TEMP_RHINO2");

                Analyzer analyzer = new SimpleAnalyzer();
                //Analyzer analyzer = new StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                //Analyzer analyzer = new WhitespaceAnalyzer();
                // Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

                //CREATE INDEX

                IndexWriterConfig config = new IndexWriterConfig();
                IndexWriter writer = new IndexWriter(indexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

                //For each line in LUCENE-DOCUMENTS
                foreach (indexDocument d in results)
                {
                    Document doc;// = new Document();
                    doc = d.ToDocument();
                    //doc.Add(new Field("name", d.name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                    //doc.Add(new Field("id", d.id, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                    //doc.Add(new Field("file_name", d.file_name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                    //doc.Add(new Field("tokens", d.tokens.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                    writer.AddDocument(doc);
                }

                writer.Optimize();
                writer.Commit();
                writer.Dispose();

                //System.IO.StreamWriter resultsFile = new System.IO.StreamWriter(@"C:\JSON_RESULTS\lucene_results.json");
                //File.Create(@"C:\rhino_results.json");

                //System.IO.StreamWriter resultsFile = new System.IO.StreamWriter(@"C:\JSON_RESULTS\rhino_results.json", true);
                //SEARCH
                //For each query
                foreach (indexQuery q in queries)
                {
                    Document query;// = new Document();

                    query = q.ToDocument();
                    //console.writeline("Query " + query.Get("id"));
                    String jsonResult = "{\"query_id\": " + query.Get("id") + ", ";

                    Field[] title_tokens = query.GetFields("title_tokens");
                    Field[] description_tokens = query.GetFields("description_tokens");
                    String title_query = "";
                    String description_query = "";

                    foreach (Field f in title_tokens)
                    {
                        title_query += " " + f.StringValue;
                    }

                    foreach (Field f in description_tokens)
                    {
                        description_query += " " + f.StringValue;
                    }

                    String query1 = title_query + " " + description_query;
                    String query2 = title_query;
                    String query3 = description_query;

                    QueryParser queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "tokens", analyzer);
                    Query query1Parsed = queryParser.Parse(query1);
                    Query query2Parsed = queryParser.Parse(query2);
                    Query query3Parsed = queryParser.Parse(query3);

                    int hitsPerPage = 20;

                    //Lucene.Net.Store.Directory directory = FSDirectory.Open(System.IO.Directory.GetCurrentDirectory());
                    IndexReader reader = DirectoryReader.Open(indexDirectory, true);
                    IndexSearcher searcher = new IndexSearcher(reader);
                    //console.writeline("---------------");

                    jsonResult += "\"title_and_description\": [";
                    //console.writeline("Results for query #1 - Tile and Description:");
                    TopDocs docs = searcher.Search(query1Parsed, hitsPerPage);
                    ScoreDoc[] hits = docs.ScoreDocs;
                    for (int i = 0; i < hits.Length; i++)
                    {
                        int docId = hits[i].Doc;
                        Document d = searcher.Doc(docId);
                        //string file_name = d.Get("file_name");
                        //console.writeline("DocId: " + docId.ToString());
                        if (i == hits.Length - 1) jsonResult += (docId + 1).ToString();
                        else jsonResult += (docId + 1).ToString() + ", ";
                    }
                    jsonResult += "], ";
                    //console.writeline("---------------");

                    jsonResult += "\"title_only\": [";
                    //console.writeline("Results for query #2 - Title only:");
                    docs = searcher.Search(query2Parsed, hitsPerPage);
                    hits = docs.ScoreDocs;

                    for (int i = 0; i < hits.Length; i++)
                    {
                        int docId = hits[i].Doc;
                        Document d = searcher.Doc(docId);
                        //string file_name = d.Get("file_name");
                        //console.writeline("DocId: " + docId.ToString());
                        if (i == hits.Length - 1) jsonResult += (docId + 1).ToString();
                        else jsonResult += (docId + 1).ToString() + ", ";
                    }
                    jsonResult += "], ";
                    //console.writeline("---------------");

                    jsonResult += "\"description_only\": [";
                    //console.writeline("Results for query #3 - Description only:");
                    docs = searcher.Search(query3Parsed, hitsPerPage);
                    hits = docs.ScoreDocs;
                    for (int i = 0; i < hits.Length; i++)
                    {
                        int docId = hits[i].Doc;
                        Document d = searcher.Doc(docId);
                        //string file_name = d.Get("file_name");
                        //console.writeline("DocId: " + docId.ToString());
                        if (i == hits.Length - 1) jsonResult += (docId + 1).ToString();
                        else jsonResult += (docId + 1).ToString() + ", ";
                    }
                    jsonResult += "]}";
                    //console.writeline("---------------");

                    //resultsFile.WriteLine(jsonResult);
                    Console.WriteLine(jsonResult);
                }
                //resultsFile.Close();
            }
            catch (Exception ex) { MessageBox.Show(ex.ToString()); }
        }
		public virtual Lucene.Net.QueryParsers.QueryParser GetParser(Analyzer a)
		{
			if (a == null)
				a = new SimpleAnalyzer();
			Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("field", a);
			qp.SetDefaultOperator(Lucene.Net.QueryParsers.QueryParser.OR_OPERATOR);
			return qp;
		}
		public virtual Query GetQueryDOA(System.String query, Analyzer a)
		{
			if (a == null)
				a = new SimpleAnalyzer();
			Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("field", a);
			qp.SetDefaultOperator(Lucene.Net.QueryParsers.QueryParser.AND_OPERATOR);
			return qp.Parse(query);
		}
        public void TestQueryScorerHits()
        {
            Analyzer analyzer = new SimpleAnalyzer();
            QueryParser qp = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer);
            query = qp.Parse("\"very long\"");
            searcher = new IndexSearcher(ramDir, true);
            TopDocs hits = searcher.Search(query, 10);

            QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
            Highlighter highlighter = new Highlighter(scorer);


            for (int i = 0; i < hits.ScoreDocs.Length; i++)
            {
                Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
                String storedField = doc.Get(FIELD_NAME);

                TokenStream stream = TokenSources.GetAnyTokenStream(searcher.IndexReader, hits.ScoreDocs[i].Doc,
                                                                    FIELD_NAME, doc, analyzer);

                IFragmenter fragmenter = new SimpleSpanFragmenter(scorer);

                highlighter.TextFragmenter = fragmenter;

                String fragment = highlighter.GetBestFragment(stream, storedField);

                Console.WriteLine(fragment);
            }
        }
Exemple #23
0
        private void OpenIndexWriter()
        {
            var dir = FSDirectory.Open(new DirectoryInfo(Config.IndexPath));

            //create an analyzer to process the text
            var analyzer = new SimpleAnalyzer();

            //create the index writer with the directory and analyzer defined.
            _indexWriter = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
            _indexWriter.SetMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH);
        }
Exemple #24
0
        public void addToDictionary()
        {
            String derbyJson = AppDomain.CurrentDomain.BaseDirectory + @"jira\derby_issues_filtered.json";
            String ofbizJson = AppDomain.CurrentDomain.BaseDirectory + @"jira\ofbiz_issues_filtered.json";

            List<Issue> issuesDerby = new List<Issue>();
            List<Issue> issuesOfbiz = new List<Issue>();

            String line;
            System.IO.StreamReader indexFileDerby = new System.IO.StreamReader(derbyJson);
            System.IO.StreamReader indexFileOfbiz = new System.IO.StreamReader(ofbizJson);
            //System.IO.StreamReader indexFile = new System.IO.StreamReader(indexRhinoFile);
            while ((line = indexFileDerby.ReadLine()) != null)
            {
                Issue issue = JsonConvert.DeserializeObject<Issue>(line);
                issuesDerby.Add(issue);
            }
            indexFileDerby.Close();

            while ((line = indexFileOfbiz.ReadLine()) != null)
            {
                Issue issue = JsonConvert.DeserializeObject<Issue>(line);
                issuesOfbiz.Add(issue);
            }
            indexFileOfbiz.Close();

            Lucene.Net.Store.Directory indexDirectory = FSDirectory.Open(@"c:\temp_corpus"); //System.IO.Directory.GetCurrentDirectory());
                                                                                             //Lucene.Net.Store.Directory indexDirectory = FSDirectory.Open(@"c:\TEMP_RHINO2");

            Analyzer analyzer = new SimpleAnalyzer();
            //Analyzer analyzer = new StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            //Analyzer analyzer = new WhitespaceAnalyzer();
            // Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

            //CREATE INDEX

            IndexWriterConfig config = new IndexWriterConfig();
            IndexWriter writer = new IndexWriter(indexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

            //For each line in LUCENE-DOCUMENTS
            foreach (Issue d in issuesDerby)
            {
                Document doc;// = new Document();
                doc = d.ToDocument();
                //doc.Add(new Field("name", d.name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                //doc.Add(new Field("id", d.id, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                //doc.Add(new Field("file_name", d.file_name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                //doc.Add(new Field("tokens", d.tokens.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                writer.AddDocument(doc);
            }

            writer.Optimize();
            writer.Commit();
            writer.Dispose();

            foreach (Issue d in issuesOfbiz)
            {
                Document doc;// = new Document();
                doc = d.ToDocument();
                //doc.Add(new Field("name", d.name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                //doc.Add(new Field("id", d.id, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                //doc.Add(new Field("file_name", d.file_name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                //doc.Add(new Field("tokens", d.tokens.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                writer.AddDocument(doc);
            }

            writer.Optimize();
            writer.Commit();
            writer.Dispose();
        }