Exemplo n.º 1
0
        public virtual void  TestSetBufferSize()
        {
            System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testSetBufferSize"));
            MockFSDirectory    dir      = new MockFSDirectory(indexDir, NewRandom());

            try
            {
                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
                writer.SetUseCompoundFile(false);
                for (int i = 0; i < 37; i++)
                {
                    Document doc = new Document();
                    doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED));
                    writer.AddDocument(doc);
                }
                writer.Close();

                dir.allIndexInputs.Clear();

                IndexReader reader = IndexReader.Open(dir);
                Term        aaa    = new Term("content", "aaa");
                Term        bbb    = new Term("content", "bbb");
                Term        ccc    = new Term("content", "ccc");
                Assert.AreEqual(37, reader.DocFreq(ccc));
                reader.DeleteDocument(0);
                Assert.AreEqual(37, reader.DocFreq(aaa));
                dir.tweakBufferSizes();
                reader.DeleteDocument(4);
                Assert.AreEqual(reader.DocFreq(bbb), 37);
                dir.tweakBufferSizes();

                IndexSearcher searcher = new IndexSearcher(reader);
                ScoreDoc[]    hits     = searcher.Search(new TermQuery(bbb), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(35, hits.Length);
                dir.tweakBufferSizes();
                hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(1, hits.Length);
                hits = searcher.Search(new TermQuery(aaa), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(35, hits.Length);
                searcher.Close();
                reader.Close();
            }
            finally
            {
                _TestUtil.RmDir(indexDir);
            }
        }
Exemplo n.º 2
0
 public bool CheckDocExist(OfficeData officeData)
 {
     Lucene.Net.Search.Query        query1 = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("FileName", officeData.FileName));
     Lucene.Net.Search.Query        query2 = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("LastWriteTime", officeData.LastWriteTime));
     Lucene.Net.Search.BooleanQuery query3 = new Lucene.Net.Search.BooleanQuery();
     query3.Add(query1, Lucene.Net.Search.Occur.MUST);
     query3.Add(query2, Lucene.Net.Search.Occur.MUST);
     Lucene.Net.Search.TopDocs topDocs = searcher.Search(query3, 2);
     if (topDocs.TotalHits == 0)
     {
         return(false);
     }
     return(true);
 }
Exemplo n.º 3
0
		public virtual void  TestDemo_Renamed_Method()
		{
			
			Analyzer analyzer = new StandardAnalyzer();
			
			// Store the index in memory:
			Directory directory = new RAMDirectory();
			// To store an index on disk, use this instead (note that the 
			// parameter true will overwrite the index in that directory
			// if one exists):
			//Directory directory = FSDirectory.getDirectory("/tmp/testindex", true);
			IndexWriter iwriter = new IndexWriter(directory, analyzer, true);
			iwriter.SetMaxFieldLength(25000);
			Document doc = new Document();
			System.String text = "This is the text to be indexed.";
			doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED));
			iwriter.AddDocument(doc);
			iwriter.Close();
			
			// Now search the index:
			IndexSearcher isearcher = new IndexSearcher(directory);
			// Parse a simple query that searches for "text":
			Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer);
			Query query = parser.Parse("text");
			Hits hits = isearcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			// Iterate through the results:
			for (int i = 0; i < hits.Length(); i++)
			{
				Document hitDoc = hits.Doc(i);
				Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname"));
			}
			isearcher.Close();
			directory.Close();
		}
Exemplo n.º 4
0
        public string Search(string strQuery)
        {
            string result = string.Empty;

            Lucene.Net.Index.IndexReader        reader           = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"]));
            Lucene.Net.QueryParsers.QueryParser parser           = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
            Lucene.Net.Search.Query             query            = parser.Parse(strQuery);
            Lucene.Net.Search.IndexSearcher     searcher         = new Lucene.Net.Search.IndexSearcher(reader);
            Lucene.Net.Search.Hits                   hits        = searcher.Search(query);
            Lucene.Net.Highlight.QueryScorer         score       = new Lucene.Net.Highlight.QueryScorer(query);
            Lucene.Net.Highlight.SimpleHTMLFormatter formater    = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>");
            Lucene.Net.Highlight.Highlighter         highlighter = new Lucene.Net.Highlight.Highlighter(formater, score);
            result += "<div align='right' style='background-color:#F0F7F9; padding-right:15px' height='30px'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #005482; FONT-FAMILY: arial'>Kết quả tìm thấy : " + hits.Length() + "  </font></div>";
            result += "<div style='padding: 10px 10px 10px 10px;'>";
            for (int i = 0; i < hits.Length(); i++)
            {
                string id     = hits.Doc(i).Get("ArticleId");
                string title  = hits.Doc(i).Get("ArticleTitle");
                string detail = hits.Doc(i).Get("ArticleDetail");
                Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail));
                result += string.Format("<div align='left'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #5b5b5b; FONT-FAMILY: arial'><a href='/?ArticleId={0}'>{1}</a></font>", id, title);
                result += string.Format("<div align='left'><font style='FONT-SIZE: 9pt' face='Arial' color='#005482'>...{0}...</font></div></div></br>", highlighter.GetBestFragment(ts, detail));
            }
            result += "</div>";
            reader.Close();
            return(result);
        }
Exemplo n.º 5
0
        public void TestBooleanQuerySerialization()
        {
            Lucene.Net.Search.BooleanQuery lucQuery = new Lucene.Net.Search.BooleanQuery();

            lucQuery.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field", "x")), Lucene.Net.Search.BooleanClause.Occur.MUST);

            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            System.IO.MemoryStream ms = new System.IO.MemoryStream();
            bf.Serialize(ms, lucQuery);
            ms.Seek(0, System.IO.SeekOrigin.Begin);
            Lucene.Net.Search.BooleanQuery lucQuery2 = (Lucene.Net.Search.BooleanQuery)bf.Deserialize(ms);
            ms.Close();

            Assert.AreEqual(lucQuery, lucQuery2, "Error in serialization");

            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            int hitCount = searcher.Search(lucQuery, 20).totalHits;

            searcher.Close();
            searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            int hitCount2 = searcher.Search(lucQuery2, 20).totalHits;

            Assert.AreEqual(hitCount, hitCount2, "Error in serialization - different hit counts");
        }
Exemplo n.º 6
0
        /// <summary>
        /// Search for files.
        /// </summary>
        /// <param name="queryText">The query text.</param>
        /// <returns>The files that match the query text.</returns>
        public SourceFile[] Search(string queryText)
        {
            Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(
                Lucene.Net.Util.Version.LUCENE_30,
                "body",
                _analyzer);

            Lucene.Net.Search.Query query = parser.Parse(queryText);

            using (Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(_directory, true))
            {
                Lucene.Net.Search.TopDocs result = searcher.Search(query, int.MaxValue);

                List <SourceFile> files = new List <SourceFile>();
                foreach (Lucene.Net.Search.ScoreDoc d in result.ScoreDocs)
                {
                    Lucene.Net.Documents.Document doc = searcher.Doc(d.Doc);
                    files.Add(new SourceFile(
                                  doc.Get("id"),
                                  doc.Get("type"),
                                  doc.Get("name"),
                                  doc.Get("fileName"),
                                  null));
                }

                return(files.ToArray());
            }
        }
Exemplo n.º 7
0
        public virtual void  TestDemo_Renamed()
        {
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

            // Store the index in memory:
            Directory directory = new RAMDirectory();
            // To store an index on disk, use this instead:
            //Directory directory = FSDirectory.open("/tmp/testindex");
            IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000));
            Document    doc     = new Document();

            System.String text = "This is the text to be indexed.";
            doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED));
            iwriter.AddDocument(doc);
            iwriter.Close();

            // Now search the index:
            IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true
            // Parse a simple query that searches for "text":
            QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fieldname", analyzer);
            Query       query  = parser.Parse("text");

            ScoreDoc[] hits = isearcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            // Iterate through the results:
            for (int i = 0; i < hits.Length; i++)
            {
                Document hitDoc = isearcher.Doc(hits[i].Doc);
                Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed.");
            }
            isearcher.Close();
            directory.Close();
        }
Exemplo n.º 8
0
        public void Run()
        {
            Lucene.Net.Index.IndexReader indexReader = Lucene.Net.Index.IndexReader.Open(_directory, true);
            Lucene.Net.Search.Searcher   indexSearch = new Lucene.Net.Search.IndexSearcher(indexReader);

            var queryParser = new QueryParser(luceneVersion, "TAGS", _analyzer);

            Console.Write("검색어를 입력해주세요 :");
            string q = Console.ReadLine();

            var query = queryParser.Parse(q);

            Console.WriteLine("[검색어] {0}", q);

            Lucene.Net.Search.TopDocs resultDocs = indexSearch.Search(query, indexReader.MaxDoc);

            var hits = resultDocs.ScoreDocs;

            int currentRow = 0;

            foreach (var hit in hits)
            {
                var documentFromSearch = indexSearch.Doc(hit.Doc);
                Console.WriteLine("* Result {0}", ++currentRow);
                Console.WriteLine("\t-제목 : {0}", documentFromSearch.Get("TITLE"));
                Console.WriteLine("\t-내용 : {0}", documentFromSearch.Get("SUMMARY"));
                Console.WriteLine("\t-태그 : {0}", documentFromSearch.Get("TAGS"));
            }

            Console.WriteLine();
        }
Exemplo n.º 9
0
        public virtual void  TestNPESpanQuery()
        {
            Directory   dir    = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(new System.Collections.Hashtable(0)), IndexWriter.MaxFieldLength.LIMITED);

            // Add documents
            AddDoc(writer, "1", "the big dogs went running to the market");
            AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly");

            // Commit
            writer.Close();

            // Get searcher
            IndexReader   reader   = IndexReader.Open(dir);
            IndexSearcher searcher = new IndexSearcher(reader);

            // Control (make sure docs indexed)
            Assert.AreEqual(2, HitCount(searcher, "the"));
            Assert.AreEqual(1, HitCount(searcher, "cat"));
            Assert.AreEqual(1, HitCount(searcher, "dogs"));
            Assert.AreEqual(0, HitCount(searcher, "rabbit"));

            // This throws exception (it shouldn't)
            Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10).TotalHits);
            reader.Close();
            dir.Close();
        }
Exemplo n.º 10
0
        public virtual void TestDifferentNumResults()
        {
            // test the collector w/ FacetRequests and different numResults
            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            var taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = NewSearcher(indexReader);

            FacetsCollector sfc = new FacetsCollector();
            TermQuery q = new TermQuery(A);
            searcher.Search(q, sfc);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc);
            FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_A, CP_A);
            Assert.AreEqual(-1, (int)result.Value);
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(termExpectedCounts[CP_A + "/" + labelValue.label], labelValue.value);
            }
            result = facets.GetTopChildren(NUM_CHILDREN_CP_B, CP_B);
            Assert.AreEqual(termExpectedCounts[CP_B], result.Value);
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(termExpectedCounts[CP_B + "/" + labelValue.label], labelValue.value);
            }

            IOUtils.Close(indexReader, taxoReader);
        }
Exemplo n.º 11
0
        public virtual void TestNoParents()
        {
            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            var taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = NewSearcher(indexReader);

            var sfc = new FacetsCollector();
            searcher.Search(new MatchAllDocsQuery(), sfc);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc);

            FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_C, CP_C);
            Assert.AreEqual(allExpectedCounts[CP_C], result.Value);
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(allExpectedCounts[CP_C + "/" + labelValue.label], labelValue.value);
            }
            result = facets.GetTopChildren(NUM_CHILDREN_CP_D, CP_D);
            Assert.AreEqual(allExpectedCounts[CP_C], result.Value);
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(allExpectedCounts[CP_D + "/" + labelValue.label], labelValue.value);
            }

            IOUtils.Close(indexReader, taxoReader);
        }
Exemplo n.º 12
0
        public virtual void  TestGetValuesForIndexedDocument()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new StandardAnalyzer(), true);

            writer.AddDocument(MakeDocumentWithFields());
            writer.Close();

            Searcher searcher = new IndexSearcher(dir);

            // search for something that does exists
            Query query = new TermQuery(new Term("keyword", "test1"));

            // ensure that queries return expected results without DateFilter first
            Hits hits = searcher.Search(query);

            Assert.AreEqual(1, hits.Length());

            try
            {
                DoAssert(hits.Doc(0), true);
            }
            catch (System.Exception e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                System.Console.Error.Write("\n");
            }
            finally
            {
                searcher.Close();
            }
        }
Exemplo n.º 13
0
        public virtual void TestAllCounts()
        {
            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            var taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = NewSearcher(indexReader);

            FacetsCollector sfc = new FacetsCollector();
            searcher.Search(new MatchAllDocsQuery(), sfc);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc);

            FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_A, CP_A);
            Assert.AreEqual(-1, (int)result.Value);
            int prevValue = int.MaxValue;
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(allExpectedCounts[CP_A + "/" + labelValue.label], labelValue.value);
                Assert.True((int)labelValue.value <= prevValue, "wrong sort order of sub results: labelValue.value=" + labelValue.value + " prevValue=" + prevValue);
                prevValue = (int)labelValue.value;
            }

            result = facets.GetTopChildren(NUM_CHILDREN_CP_B, CP_B);
            Assert.AreEqual(allExpectedCounts[CP_B], result.Value);
            prevValue = int.MaxValue;
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(allExpectedCounts[CP_B + "/" + labelValue.label], labelValue.value);
                Assert.True((int)labelValue.value <= prevValue, "wrong sort order of sub results: labelValue.value=" + labelValue.value + " prevValue=" + prevValue);
                prevValue = (int)labelValue.value;
            }

            IOUtils.Close(indexReader, taxoReader);
        }
Exemplo n.º 14
0
        public Task <IEnumerable <ISearchItem> > Search(string pattern, int page)
        {
            using (Analyzer analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48))
                using (Lucene.Net.Store.Directory index = new SimpleFSDirectory(Path.ChangeExtension(_bookFile.FullName,
                                                                                                     Convert.ToInt32(LuceneVersion.LUCENE_48).ToString())))
                    using (IndexReader reader = DirectoryReader.Open(index))
                    {
                        Lucene.Net.Search.Query query =
                            new QueryParser(LuceneVersion.LUCENE_48, nameof(TabHtmlText.Html), analyzer).Parse(pattern);
                        Lucene.Net.Search.TopScoreDocCollector collector =
                            Lucene.Net.Search.TopScoreDocCollector.Create(512, true);
                        Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader);
                        searcher.Search(query, collector);
                        Lucene.Net.Search.TopDocs docs = collector.GetTopDocs(page * PageSize, PageSize);

                        QueryScorer scorer      = new QueryScorer(query);
                        Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), scorer) // SpanGradientFormatter
                        {
                            TextFragmenter = new SimpleSpanFragmenter(scorer, 30)
                        };

                        IEnumerable <ISearchItem> items = docs.ScoreDocs.Select(scoreDoc =>
                        {
                            Document doc       = searcher.Doc(scoreDoc.Doc);
                            string html        = doc.Get(nameof(TabHtmlText.Html));
                            string[] fragments = highlighter.GetBestFragments(new HTMLStripCharAnalyzer(),
                                                                              nameof(TabHtmlText.Html), html, 3);
                            return(new SearchItem(int.Parse(doc.Get(nameof(TabHtmlText.NumId))), string.Join("\n", fragments)));
                        });

                        return(Task.FromResult(items.ToList().AsEnumerable()));
                    }
        }
Exemplo n.º 15
0
        } // End Sub BuildIndex

        // https://lucenenet.apache.org/
        // https://www.codeproject.com/Articles/609980/Small-Lucene-NET-Demo-App
        // https://stackoverflow.com/questions/12600196/lucene-how-to-index-file-names
        private static void SearchPath(string phrase, string indexPath)
        {
            Lucene.Net.Util.LuceneVersion version = Lucene.Net.Util.LuceneVersion.LUCENE_48;
            Lucene.Net.Store.Directory    luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);

            Lucene.Net.Index.IndexReader r = Lucene.Net.Index.DirectoryReader.Open(luceneIndexDirectory);

            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(r);
            Lucene.Net.Analysis.Analyzer    analyzer = GetWrappedAnalyzer();

            Lucene.Net.QueryParsers.Classic.QueryParser parser = new Lucene.Net.QueryParsers.Classic.QueryParser(version, "file_name", analyzer);

            // https://stackoverflow.com/questions/15170097/how-to-search-across-all-the-fields
            // Lucene.Net.QueryParsers.Classic.MultiFieldQueryParser parser = new Lucene.Net.QueryParsers.Classic.MultiFieldQueryParser(version, GetFields(r), analyzer);


            Lucene.Net.Search.Query query = parser.Parse(Lucene.Net.QueryParsers.Classic.QueryParser.Escape(phrase));

            Lucene.Net.Search.ScoreDoc[] hits = searcher.Search(query, 10).ScoreDocs;
            foreach (Lucene.Net.Search.ScoreDoc hit in hits)
            {
                Lucene.Net.Documents.Document foundDoc = searcher.Doc(hit.Doc);
                System.Console.WriteLine(hit.Score);
                string full_name = foundDoc.Get("full_name");
                System.Console.WriteLine(full_name);
                // string favoritePhrase = foundDoc.Get("favoritePhrase");
                // System.Console.WriteLine(favoritePhrase);
            } // Next hit
        }     // End Sub SearchPath
            public override void Warm(IndexReader r)
            {
                IndexSearcher s = new IndexSearcher(r);

                Lucene.Net.Search.TopDocs hits = s.Search(new TermQuery(new Term("foo", "bar")), 10);
                Assert.AreEqual(20, hits.TotalHits);
            }
Exemplo n.º 17
0
		public virtual void  TestDemo_Renamed()
		{
			
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
			
			// Store the index in memory:
			Directory directory = new RAMDirectory();
			// To store an index on disk, use this instead:
			//Directory directory = FSDirectory.open("/tmp/testindex");
			IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000));
			Document doc = new Document();
			System.String text = "This is the text to be indexed.";
			doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED));
			iwriter.AddDocument(doc);
			iwriter.Close();
			
			// Now search the index:
			IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true
			// Parse a simple query that searches for "text":
			QueryParser parser = new QueryParser("fieldname", analyzer);
			Query query = parser.Parse("text");
			ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual(1, hits.Length);
			// Iterate through the results:
			for (int i = 0; i < hits.Length; i++)
			{
				Document hitDoc = isearcher.Doc(hits[i].doc);
				Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed.");
			}
			isearcher.Close();
			directory.Close();
		}
Exemplo n.º 18
0
 public IEnumerable <int> Get(string search, int max = 100, int minScore = 1)
 {
     if (!built)
     {
         BuildIndexes();
         built = true;
     }
     try
     {
         var dir = FSDirectory.Open(new System.IO.DirectoryInfo(@"C:\lucene"));
         Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
         Lucene.Net.QueryParsers.QueryParser           parser   = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, "body", analyzer);
         Lucene.Net.Search.Query query = null;
         query = parser.Parse(search);
         var searcher = new Lucene.Net.Search.IndexSearcher(dir);
         var hits     = searcher.Search(query, max);
         var doc      = searcher.Doc(hits.ScoreDocs[0].Doc);
         var result   = hits.ScoreDocs.Where(s => s.Score > minScore).Select(h => int.Parse(searcher.Doc(h.Doc).GetField("id").StringValue));
         return(result);
     }
     catch (Exception e)
     {
         throw;
     }
 }
Exemplo n.º 19
0
        static void Main(string[] args)
        {
            var    directory           = LuceneIndex();
            var    luceneIndexSearcher = new Lucene.Net.Search.IndexSearcher(directory);
            string input;

            Console.WriteLine(luceneIndexSearcher.MaxDoc);
            Console.WriteLine(luceneIndexSearcher.Doc(0));
            do
            {
                input = Console.ReadLine();
                //build a query object
                var searchTerm = new Lucene.Net.Index.Term("Data", input);
                var query      = new Lucene.Net.Search.FuzzyQuery(searchTerm, 0.05f);

                //execute the query
                var hits = luceneIndexSearcher.Search(query, 10);
                //iterate over the results.
                for (int i = 0; i < Math.Min(10, hits.TotalHits); i++)
                {
                    var    doc          = hits.ScoreDocs[i];
                    string contentValue = luceneIndexSearcher.Doc(doc.Doc).Get("Data");

                    Console.WriteLine(contentValue);
                }
                if (input == "all")
                {
                    for (int i = 0; i < luceneIndexSearcher.MaxDoc; i++)
                    {
                        Console.WriteLine(luceneIndexSearcher.Doc(i));
                    }
                }
            }while (input != "exit");
            directory.Dispose();
        }
Exemplo n.º 20
0
        // Test that queries based on reverse/ordFieldScore returns docs with expected score.
        private void  DoTestExactScore(System.String field, bool inOrder)
        {
            IndexSearcher s = new IndexSearcher(dir, true);
            ValueSource   vs;

            if (inOrder)
            {
                vs = new OrdFieldSource(field);
            }
            else
            {
                vs = new ReverseOrdFieldSource(field);
            }
            Query   q  = new ValueSourceQuery(vs);
            TopDocs td = s.Search(q, null, 1000);

            Assert.AreEqual(N_DOCS, td.TotalHits, "All docs should be matched!");
            ScoreDoc[] sd = td.ScoreDocs;
            for (int i = 0; i < sd.Length; i++)
            {
                float         score = sd[i].Score;
                System.String id    = s.IndexReader.Document(sd[i].Doc).Get(ID_FIELD);
                Log("-------- " + i + ". Explain doc " + id);
                Log(s.Explain(q, sd[i].Doc));
                float expectedScore = N_DOCS - i;
                Assert.AreEqual(expectedScore, score, TEST_SCORE_TOLERANCE_DELTA, "score of result " + i + " shuould be " + expectedScore + " != " + score);
                System.String expectedId = inOrder?Id2String(N_DOCS - i):Id2String(i + 1);                 // reverse  ==> smaller values first
                Assert.IsTrue(expectedId.Equals(id), "id of result " + i + " shuould be " + expectedId + " != " + score);
            }
        }
Exemplo n.º 21
0
        private int GetHitCount(Directory dir, Term term)
        {
            IndexSearcher searcher = new IndexSearcher(dir, true, null);
            int           hitCount = searcher.Search(new TermQuery(term), null, 1000, null).TotalHits;

            searcher.Close();
            return(hitCount);
        }
Exemplo n.º 22
0
        public List <LuceneData> MemberSearch(string searchTerm)
        {
            var searchData = new List <LuceneData>();

            try
            {
                Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(_indexFileLocation);


                //create an analyzer to process the text
                Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer();

                //create the query parser, with the default search feild set to "content"
                Lucene.Net.QueryParsers.QueryParser queryParser = new Lucene.Net.QueryParsers.QueryParser("SearchContent", analyzer);

                //parse the query string into a Query object
                Lucene.Net.Search.Query query = queryParser.Parse(searchTerm);


                //create an index searcher that will perform the search
                //Lucene.Net.Index.IndexReader indexReader = Lucene.Net.Index.IndexReader.Open(dir, true);
                Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

                ////build a query object
                //Lucene.Net.Index.Term luceneSearchTerm = new Lucene.Net.Index.Term("searchContent", searchTerm);
                //Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(luceneSearchTerm);

                //execute the query
                Lucene.Net.Search.Hits hits = searcher.Search(query);

                //int resultCount = hits.Length();
                //if (resultCount > 1000){
                //    resultCount = 1000;
                //}

                //iterate over the results.
                for (int i = 0; i < hits.Length(); i++)
                {
                    Lucene.Net.Documents.Document doc = hits.Doc(i);
                    searchData.Add(new LuceneData
                    {
                        MemberID    = Convert.ToInt32(doc.Get("MemberID")),
                        FirstName   = doc.Get("FirstName"),
                        LastName    = doc.Get("LastName"),
                        CompanyName = doc.Get("CompanyName"),
                        City        = doc.Get("City"),
                        State       = doc.Get("State"),
                        PostalCode  = doc.Get("PostalCode")
                    });
                }
            }
            catch (Exception ex)
            {
            }

            return(searchData);
        }
Exemplo n.º 23
0
        public string SearchAndPaging(string strQuery, string index)
        {
            string result = string.Empty;

            try
            {
                List <SearchArticle>            searchArticleList = new List <SearchArticle>();
                PSCPortal.CMS.ArticleCollection ArticleList       = ArticleCollection.GetArticleCollectionPublish();
                string         nameSub       = Libs.Ultility.GetSubDomain() == string.Empty ? "HomePage" : Libs.Ultility.GetSubDomain();
                SubDomain      subDomain     = PSCPortal.Engine.SubDomain.GetSubByName(nameSub);
                PageCollection pagesBelongTo = subDomain.GetPagesBelongTo();
                string         strId         = string.Empty;
                foreach (var page in pagesBelongTo)
                {
                    foreach (var ar in ArticleList.Where(ar => ar.PageId == page.Id))
                    {
                        strId += ar.Id + " OR ";
                    }
                    if (strId.Length > 0)
                    {
                        strId = strId.Remove(strId.Length - 3, 3);
                    }
                }
                int    pageIndex = Int32.Parse(index);
                string strSearch = " ArticleDetail:(" + strQuery + ") AND ArticleId:" + "( " + strId + " )";
                Lucene.Net.Index.IndexReader        reader           = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"]));
                Lucene.Net.QueryParsers.QueryParser parser           = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
                Lucene.Net.Search.Query             query            = parser.Parse(strSearch);
                Lucene.Net.Search.IndexSearcher     searcher         = new Lucene.Net.Search.IndexSearcher(reader);
                Lucene.Net.Search.Hits                   hits        = searcher.Search(query);
                Lucene.Net.Highlight.QueryScorer         score       = new Lucene.Net.Highlight.QueryScorer(query);
                Lucene.Net.Highlight.SimpleHTMLFormatter formater    = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>");
                Lucene.Net.Highlight.Highlighter         highlighter = new Lucene.Net.Highlight.Highlighter(formater, score);
                result += hits.Length() + "_" + "<div class='blog_news'><div class='topic_news_title1'><div class='topic_news_title'><a href='#'>Kết quả tìm thấy: " + hits.Length() + "</a></div></div>";
                result += "<div class='ct_topic_l'><div class='ct_topic_r1'>";
                for (int i = pageIndex * 20 - 20; i < pageIndex * 20 && i < hits.Length(); i++)
                {
                    string detail = hits.Doc(i).Get("ArticleDetail");
                    Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail));
                    SearchArticle searchArticle        = new SearchArticle();
                    searchArticle.Id        = hits.Doc(i).Get("ArticleId");;
                    searchArticle.Title     = hits.Doc(i).Get("ArticleTitle");
                    searchArticle.Highligth = highlighter.GetBestFragment(ts, detail);
                    searchArticleList.Add(searchArticle);
                }
                reader.Close();
                JavaScriptSerializer        serializer = new JavaScriptSerializer();
                Dictionary <string, object> resultDic  = new Dictionary <string, object>();
                resultDic["Count"] = hits.Length();
                resultDic["Data"]  = searchArticleList;
                result             = serializer.Serialize(resultDic);
            }
            catch (Exception e)
            {
            }
            return(result);
        }
Exemplo n.º 24
0
        public List <indexVideo> searchArticles(string queryString, int numberOfResults, bool komin)
        {
            List <indexVideo> resultsList = new List <indexVideo>();

            Lucene.Net.Store.Directory index = publicIndex;
            if (komin)
            {
                index = kominIndex;
            }

            if (!string.IsNullOrEmpty(queryString))
            {
                Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new[] { "title", "shortDescription" }, analyser);
                try
                {
                    Lucene.Net.Search.Query query = parser.Parse(queryString + "~");

                    Lucene.Net.Search.Searcher searcher = new Lucene.Net.Search.IndexSearcher(index, true);

                    Lucene.Net.Search.TopScoreDocCollector collector = Lucene.Net.Search.TopScoreDocCollector.Create(numberOfResults, true);

                    searcher.Search(query, collector);

                    Lucene.Net.Search.ScoreDoc[] hits = collector.TopDocs().ScoreDocs;

                    if (hits.Length >= 1)
                    {
                        for (int i = 0; i < hits.Length; i++)
                        {
                            indexVideo video = new indexVideo();

                            int   docId = hits[i].Doc;
                            float score = hits[i].Score;

                            Lucene.Net.Documents.Document doc = searcher.Doc(docId);

                            video.bctid            = doc.Get("bctid");
                            video.title            = doc.Get("title");
                            video.score            = score;
                            video.shortDescription = doc.Get("shortDescription");
                            video.imageURL         = doc.Get("imageURL");

                            resultsList.Add(video);
                        }
                    }
                }
                catch (Exception e)
                {
                }
            }
            return(resultsList);
        }
Exemplo n.º 25
0
        public virtual void  TestFieldSetValue()
        {
            Field    field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED);
            Document doc   = new Document();

            doc.Add(field);
            doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED));

            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            field.SetValue("id2");
            writer.AddDocument(doc);
            field.SetValue("id3");
            writer.AddDocument(doc);
            writer.Close();

            Searcher searcher = new IndexSearcher(dir);

            Query query = new TermQuery(new Term("keyword", "test"));

            // ensure that queries return expected results without DateFilter first
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            int result = 0;

            for (int i = 0; i < 3; i++)
            {
                Document doc2 = searcher.Doc(hits[i].doc);
                Field    f    = doc2.GetField("id");
                if (f.StringValue().Equals("id1"))
                {
                    result |= 1;
                }
                else if (f.StringValue().Equals("id2"))
                {
                    result |= 2;
                }
                else if (f.StringValue().Equals("id3"))
                {
                    result |= 4;
                }
                else
                {
                    Assert.Fail("unexpected id field");
                }
            }
            searcher.Close();
            dir.Close();
            Assert.AreEqual(7, result, "did not see all IDs");
        }
Exemplo n.º 26
0
        public virtual void  TestPerFieldAnalyzer()
        {
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());

            analyzer.AddAnalyzer("partnum", new KeywordAnalyzer());

            QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, "description", analyzer);
            Query       query       = queryParser.Parse("partnum:Q36 AND SPACE");

            ScoreDoc[] hits = searcher.Search(query, null, 1000, null).ScoreDocs;
            Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is");
            Assert.AreEqual(1, hits.Length, "doc found!");
        }
Exemplo n.º 27
0
        /* Open pre-lockless index, add docs, do a delete &
         * setNorm, and search */
        public virtual void  ChangeIndexNoAdds(System.String dirName)
        {
            dirName = FullDir(dirName);

            Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));

            // make sure searching sees right # hits
            IndexSearcher searcher = new IndexSearcher(dir, true);

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length, "wrong number of hits");
            Document d = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d.Get("id"), "wrong first document");
            searcher.Close();

            // make sure we can do a delete & setNorm against this
            // pre-lockless segment:
            IndexReader reader     = IndexReader.Open(dir, false);
            Term        searchTerm = new Term("id", "6");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "wrong delete count");
            reader.SetNorm(22, "content", (float)2.0);
            reader.Close();

            // make sure they "took":
            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(33, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 33, searcher.IndexReader);
            searcher.Close();

            // optimize
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(33, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 33, searcher.IndexReader);
            searcher.Close();

            dir.Close();
        }
Exemplo n.º 28
0
        public virtual void  TestIterator()
        {
            RAMDirectory directory = new RAMDirectory();

            IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            Document    doc    = new Document();

            doc.Add(new Field("field", "iterator test doc 1", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("field", "iterator test doc 2", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Close();

            _TestUtil.CheckIndex(directory);

            IndexSearcher searcher = new IndexSearcher(directory);
            Hits          hits     = searcher.Search(new TermQuery(new Term("field", "iterator")));

            HitIterator iterator = (HitIterator)hits.Iterator();

            Assert.AreEqual(2, iterator.Length());
            Assert.IsTrue(iterator.MoveNext());
            Hit hit = (Hit)iterator.Current;

            Assert.AreEqual("iterator test doc 1", hit.Get("field"));

            Assert.IsTrue(iterator.MoveNext());
            hit = (Hit)iterator.Current;
            Assert.AreEqual("iterator test doc 2", hit.GetDocument().Get("field"));

            Assert.IsFalse(iterator.MoveNext());

            bool caughtException = false;

            try
            {
                System.Object generatedAux = iterator.Current;
            }
            catch (System.ArgumentOutOfRangeException e)
            {
                Assert.IsTrue(true);
                caughtException = true;
            }

            Assert.IsTrue(caughtException);
        }
Exemplo n.º 29
0
            override public void  Run()
            {
                IndexSearcher searcher = null;
                Query         query    = new TermQuery(new Term("content", "aaa"));

                for (int i = 0; i < this.numIteration; i++)
                {
                    try
                    {
                        searcher = new IndexSearcher(dir);
                    }
                    catch (System.Exception e)
                    {
                        hitException = true;
                        System.Console.Out.WriteLine("Stress Test Index Searcher: create hit unexpected exception: " + e.ToString());
                        System.Console.Out.WriteLine(e.StackTrace);
                        break;
                    }
                    if (searcher != null)
                    {
                        ScoreDoc[] hits = null;
                        try
                        {
                            hits = searcher.Search(query, null, 1000).scoreDocs;
                        }
                        catch (System.IO.IOException e)
                        {
                            hitException = true;
                            System.Console.Out.WriteLine("Stress Test Index Searcher: search hit unexpected exception: " + e.ToString());
                            System.Console.Out.WriteLine(e.StackTrace);
                            break;
                        }
                        // System.out.println(hits.length() + " total results");
                        try
                        {
                            searcher.Close();
                        }
                        catch (System.IO.IOException e)
                        {
                            hitException = true;
                            System.Console.Out.WriteLine("Stress Test Index Searcher: close hit unexpected exception: " + e.ToString());
                            System.Console.Out.WriteLine(e.StackTrace);
                            break;
                        }
                        searcher = null;
                    }
                }
            }
Exemplo n.º 30
0
        // Test that FieldScoreQuery returns docs with expected score.
        private void  DoTestExactScore(System.String field, FieldScoreQuery.Type tp)
        {
            IndexSearcher s  = new IndexSearcher(dir, true);
            Query         q  = new FieldScoreQuery(field, tp);
            TopDocs       td = s.Search(q, null, 1000);

            Assert.AreEqual(N_DOCS, td.TotalHits, "All docs should be matched!");
            ScoreDoc[] sd = td.ScoreDocs;
            for (int i = 0; i < sd.Length; i++)
            {
                float score = sd[i].Score;
                Log(s.Explain(q, sd[i].Doc));
                System.String id            = s.IndexReader.Document(sd[i].Doc).Get(ID_FIELD);
                float         expectedScore = ExpectedFieldScore(id);         // "ID7" --> 7.0
                Assert.AreEqual(expectedScore, score, TEST_SCORE_TOLERANCE_DELTA, "score of " + id + " shuould be " + expectedScore + " != " + score);
            }
        }
Exemplo n.º 31
0
        public virtual void TestShrinkToAfterShortestMatch3()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new TestPayloadAnalyzer(this), IndexWriter.MaxFieldLength.LIMITED);
            Document     doc       = new Document();

            doc.Add(new Field("content", new System.IO.StreamReader(new System.IO.MemoryStream(System.Text.Encoding.ASCII.GetBytes("j k a l f k k p a t a k l k t a")))));
            writer.AddDocument(doc);
            writer.Close();

            IndexSearcher is_Renamed = new IndexSearcher(directory, true);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));

            SpanQuery[]   sqs   = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq   = new SpanNearQuery(sqs, 0, true);
            Spans         spans = snq.GetSpans(is_Renamed.IndexReader);

            TopDocs topDocs = is_Renamed.Search(snq, 1);

            System.Collections.Hashtable payloadSet = new System.Collections.Hashtable();
            for (int i = 0; i < topDocs.ScoreDocs.Length; i++)
            {
                while (spans.Next())
                {
                    System.Collections.Generic.ICollection <byte[]> payloads = spans.GetPayload();

                    for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();)
                    {
                        CollectionsHelper.AddIfNotContains(payloadSet, new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current)));
                    }
                }
            }
            Assert.AreEqual(2, payloadSet.Count);
            if (DEBUG)
            {
                System.Collections.IEnumerator pit = payloadSet.GetEnumerator();
                while (pit.MoveNext())
                {
                    System.Console.Out.WriteLine("match:" + pit.Current);
                }
            }
            Assert.IsTrue(payloadSet.Contains("a:Noise:10"));
            Assert.IsTrue(payloadSet.Contains("k:Noise:11"));
        }
Exemplo n.º 32
0
        private bool videoExistsInIndex(string id, Lucene.Net.Store.Directory index)
        {
            bool exist = false;

            Lucene.Net.Search.TermQuery termQuery    = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("bctid", id));
            Lucene.Net.Search.Searcher  termSearcher = new Lucene.Net.Search.IndexSearcher(index, true);

            Lucene.Net.Search.TopScoreDocCollector termCollector = Lucene.Net.Search.TopScoreDocCollector.Create(1, true);
            termSearcher.Search(termQuery, termCollector);
            int termResults = termCollector.TopDocs().TotalHits;

            if (termResults > 0)
            {
                exist = true;
            }
            return(exist);
        }
Exemplo n.º 33
0
		public virtual void  TestIterator()
		{
			RAMDirectory directory = new RAMDirectory();
			
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Document doc = new Document();
			doc.Add(new Field("field", "iterator test doc 1", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			doc = new Document();
			doc.Add(new Field("field", "iterator test doc 2", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			writer.Close();
			
			_TestUtil.CheckIndex(directory);
			
			IndexSearcher searcher = new IndexSearcher(directory);
			Hits hits = searcher.Search(new TermQuery(new Term("field", "iterator")));
			
			HitIterator iterator = (HitIterator) hits.Iterator();
			Assert.AreEqual(2, iterator.Length());
			Assert.IsTrue(iterator.MoveNext());
			Hit hit = (Hit) iterator.Current;
			Assert.AreEqual("iterator test doc 1", hit.Get("field"));
			
			Assert.IsTrue(iterator.MoveNext());
			hit = (Hit) iterator.Current;
			Assert.AreEqual("iterator test doc 2", hit.GetDocument().Get("field"));
			
			Assert.IsFalse(iterator.MoveNext());
			
			bool caughtException = false;
			try
			{
				System.Object generatedAux = iterator.Current;
			}
			catch (System.ArgumentOutOfRangeException e)
			{
				Assert.IsTrue(true);
				caughtException = true;
			}
			
			Assert.IsTrue(caughtException);
		}
Exemplo n.º 34
0
        public static bool PreviouslyIndexed(string url)
        {
            string indexFileLocation = indexDir;
            Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false);
            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir);
            Lucene.Net.Search.Hits hits = null;
            try
            {
                Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("url", url));

                hits = searcher.Search(query);

            }
            catch { }
            finally
            {
                searcher.Close();
            }
            return hits.Length() > 0;
        }
Exemplo n.º 35
0
        public static List<IndexedItem> SearchProjects(string s)
        {
            List<IndexedItem> retVal = new List<IndexedItem>();

            string indexFileLocation = indexDir;
            Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false);
            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir);

            try
            {
                Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("content", s));
                query = query.Combine(new Lucene.Net.Search.Query[] { query, new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("url", fromUrl)) });
                query = query.Combine(new Lucene.Net.Search.Query[] { query, new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("title", s)) });

                //execute the query
                Lucene.Net.Search.Hits hits = searcher.Search(query);

                //iterate over the results.
                for (int i = 0; i < hits.Length(); i++)
                {
                    Lucene.Net.Documents.Document doc = hits.Doc(i);
                    string article = doc.Get("content");
                    string title = doc.Get("title");
                    string url = doc.Get("url");
                    retVal.Add(new IndexedItem { Article = article, Href = url, Title = title });
                }
                foreach (IndexedItem ind in retVal)
                {
                    Console.WriteLine(ind.Href);
                }

                retVal = retVal.Distinct().ToList();
            }
            catch { }
            finally
            {
                searcher.Close();
            }
            return retVal;
        }
Exemplo n.º 36
0
		// Test that queries based on reverse/ordFieldScore scores correctly
		private void  DoTestRank(System.String field, bool inOrder)
		{
			IndexSearcher s = new IndexSearcher(dir, true);
			ValueSource vs;
			if (inOrder)
			{
				vs = new OrdFieldSource(field);
			}
			else
			{
				vs = new ReverseOrdFieldSource(field);
			}
			
			Query q = new ValueSourceQuery(vs);
			Log("test: " + q);
			QueryUtils.Check(q, s);
			ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(N_DOCS, h.Length, "All docs should be matched!");
			System.String prevID = inOrder?"IE":"IC"; // smaller than all ids of docs in this test ("ID0001", etc.)
			
			for (int i = 0; i < h.Length; i++)
			{
				System.String resID = s.Doc(h[i].Doc).Get(ID_FIELD);
				Log(i + ".   score=" + h[i].Score + "  -  " + resID);
				Log(s.Explain(q, h[i].Doc));
				if (inOrder)
				{
					Assert.IsTrue(String.CompareOrdinal(resID, prevID) < 0, "res id " + resID + " should be < prev res id " + prevID);
				}
				else
				{
					Assert.IsTrue(String.CompareOrdinal(resID, prevID) > 0, "res id " + resID + " should be > prev res id " + prevID);
				}
				prevID = resID;
			}
		}
Exemplo n.º 37
0
		public virtual void  TestFieldSetValue()
		{
			
			Field field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED);
			Document doc = new Document();
			doc.Add(field);
			doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED));
			
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.AddDocument(doc);
			field.SetValue("id2");
			writer.AddDocument(doc);
			field.SetValue("id3");
			writer.AddDocument(doc);
			writer.Close();
			
			Searcher searcher = new IndexSearcher(dir);
			
			Query query = new TermQuery(new Term("keyword", "test"));
			
			// ensure that queries return expected results without DateFilter first
			ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual(3, hits.Length);
			int result = 0;
			for (int i = 0; i < 3; i++)
			{
				Document doc2 = searcher.Doc(hits[i].doc);
				Field f = doc2.GetField("id");
				if (f.StringValue().Equals("id1"))
					result |= 1;
				else if (f.StringValue().Equals("id2"))
					result |= 2;
				else if (f.StringValue().Equals("id3"))
					result |= 4;
				else
					Assert.Fail("unexpected id field");
			}
			searcher.Close();
			dir.Close();
			Assert.AreEqual(7, result, "did not see all IDs");
		}
Exemplo n.º 38
0
		public virtual void  TestDiverseDocs()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetRAMBufferSizeMB(0.5);
			System.Random rand = new System.Random((System.Int32) 31415);
			for (int i = 0; i < 3; i++)
			{
				// First, docs where every term is unique (heavy on
				// Posting instances)
				for (int j = 0; j < 100; j++)
				{
					Document doc = new Document();
					for (int k = 0; k < 100; k++)
					{
						doc.Add(new Field("field", System.Convert.ToString(rand.Next()), Field.Store.YES, Field.Index.TOKENIZED));
					}
					writer.AddDocument(doc);
				}
				
				// Next, many single term docs where only one term
				// occurs (heavy on byte blocks)
				for (int j = 0; j < 100; j++)
				{
					Document doc = new Document();
					doc.Add(new Field("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.TOKENIZED));
					writer.AddDocument(doc);
				}
				
				// Next, many single term docs where only one term
				// occurs but the terms are very long (heavy on
				// char[] arrays)
				for (int j = 0; j < 100; j++)
				{
					System.Text.StringBuilder b = new System.Text.StringBuilder();
					System.String x = System.Convert.ToString(j) + ".";
					for (int k = 0; k < 1000; k++)
						b.Append(x);
					System.String longTerm = b.ToString();
					
					Document doc = new Document();
					doc.Add(new Field("field", longTerm, Field.Store.YES, Field.Index.TOKENIZED));
					writer.AddDocument(doc);
				}
			}
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(dir);
			Hits hits = searcher.Search(new TermQuery(new Term("field", "aaa")));
			Assert.AreEqual(300, hits.Length());
			searcher.Close();
			
			dir.Close();
		}
Exemplo n.º 39
0
		public virtual void  TestGetValuesForIndexedDocument()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.AddDocument(MakeDocumentWithFields());
			writer.Close();
			
			Searcher searcher = new IndexSearcher(dir);
			
			// search for something that does exists
			Query query = new TermQuery(new Term("keyword", "test1"));
			
			// ensure that queries return expected results without DateFilter first
			ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual(1, hits.Length);
			
			DoAssert(searcher.Doc(hits[0].doc), true);
			searcher.Close();
		}
		public virtual void  TestStopWordSearching()
		{
			Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
			Directory ramDir = new RAMDirectory();
			var iw = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            var doc = new Document();
			doc.Add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED));
			iw.AddDocument(doc);
			iw.Close();

            var mfqp = new MultiFieldQueryParser(Util.Version.LUCENE_CURRENT, new[] { "body" }, analyzer);
			mfqp.DefaultOperator = QueryParser.Operator.AND;
            var q = mfqp.Parse("the footest");
            var is_Renamed = new IndexSearcher(ramDir, true);
            var hits = is_Renamed.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			is_Renamed.Close();
		}
Exemplo n.º 41
0
		public virtual void  TestKeepLastNDeletionPolicyWithCreates()
		{
			
			int N = 10;
			
			for (int pass = 0; pass < 4; pass++)
			{
				
				bool autoCommit = pass < 2;
				bool useCompoundFile = (pass % 2) > 0;
				
				KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);
				
				Directory dir = new RAMDirectory();
				IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
				writer.SetMaxBufferedDocs(10);
				writer.SetUseCompoundFile(useCompoundFile);
				writer.Close();
				Term searchTerm = new Term("content", "aaa");
				Query query = new TermQuery(searchTerm);
				
				for (int i = 0; i < N + 1; i++)
				{
					
					writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
					writer.SetMaxBufferedDocs(10);
					writer.SetUseCompoundFile(useCompoundFile);
					for (int j = 0; j < 17; j++)
					{
						AddDoc(writer);
					}
					// this is a commit when autoCommit=false:
					writer.Close();
					IndexReader reader = IndexReader.Open(dir, policy);
					reader.DeleteDocument(3);
					reader.SetNorm(5, "content", 2.0F);
					IndexSearcher searcher = new IndexSearcher(reader);
					Hits hits = searcher.Search(query);
					Assert.AreEqual(16, hits.Length());
					// this is a commit when autoCommit=false:
					reader.Close();
					searcher.Close();
					
					writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
					// This will not commit: there are no changes
					// pending because we opened for "create":
					writer.Close();
				}
				
				Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit);
				if (autoCommit)
				{
					Assert.IsTrue(policy.numOnCommit > 3 * (N + 1) - 1);
				}
				else
				{
					Assert.AreEqual(2 * (N + 1), policy.numOnCommit);
				}
				
				IndexSearcher searcher2 = new IndexSearcher(dir);
				Hits hits2 = searcher2.Search(query);
				Assert.AreEqual(0, hits2.Length());
				
				// Simplistic check: just verify only the past N segments_N's still
				// exist, and, I can open a reader on each:
				long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);
				
				dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
				int expectedCount = 0;
				
				for (int i = 0; i < N + 1; i++)
				{
					try
					{
						IndexReader reader = IndexReader.Open(dir);
						
						// Work backwards in commits on what the expected
						// count should be.  Only check this in the
						// autoCommit false case:
						if (!autoCommit)
						{
							searcher2 = new IndexSearcher(reader);
							hits2 = searcher2.Search(query);
							Assert.AreEqual(expectedCount, hits2.Length());
							searcher2.Close();
							if (expectedCount == 0)
							{
								expectedCount = 16;
							}
							else if (expectedCount == 16)
							{
								expectedCount = 17;
							}
							else if (expectedCount == 17)
							{
								expectedCount = 0;
							}
						}
						reader.Close();
						if (i == N)
						{
							Assert.Fail("should have failed on commits before last " + N);
						}
					}
					catch (System.IO.IOException e)
					{
						if (i != N)
						{
							throw e;
						}
					}
					if (i < N)
					{
						dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
					}
					gen--;
				}
				
				dir.Close();
			}
		}
Exemplo n.º 42
0
		public virtual void  TestCommitOnCloseAbort()
		{
			Directory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetMaxBufferedDocs(10);
			for (int i = 0; i < 14; i++)
			{
				AddDoc(writer);
			}
			writer.Close();
			
			Term searchTerm = new Term("content", "aaa");
			IndexSearcher searcher = new IndexSearcher(dir);
			Hits hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(14, hits.Length(), "first number of hits");
			searcher.Close();
			
			writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false);
			writer.SetMaxBufferedDocs(10);
			for (int j = 0; j < 17; j++)
			{
				AddDoc(writer);
			}
			// Delete all docs:
			writer.DeleteDocuments(searchTerm);
			
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(14, hits.Length(), "reader incorrectly sees changes from writer with autoCommit disabled");
			searcher.Close();
			
			// Now, close the writer:
			writer.Abort();
			
			AssertNoUnreferencedFiles(dir, "unreferenced files remain after abort()");
			
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(14, hits.Length(), "saw changes after writer.abort");
			searcher.Close();
			
			// Now make sure we can re-open the index, add docs,
			// and all is good:
			writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false);
			writer.SetMaxBufferedDocs(10);
			for (int i = 0; i < 12; i++)
			{
				for (int j = 0; j < 17; j++)
				{
					AddDoc(writer);
				}
				searcher = new IndexSearcher(dir);
				hits = searcher.Search(new TermQuery(searchTerm));
				Assert.AreEqual(14, hits.Length(), "reader incorrectly sees changes from writer with autoCommit disabled");
				searcher.Close();
			}
			
			writer.Close();
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(218, hits.Length(), "didn't see changes after close");
			searcher.Close();
			
			dir.Close();
		}
Exemplo n.º 43
0
		public static void  Main(System.String[] args)
		{
			try
			{
				Searcher searcher = new IndexSearcher(@"index");
				Analyzer analyzer = new StandardAnalyzer();
				
				System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding);
				while (true)
				{
					System.Console.Out.Write("Query: ");
					System.String line = in_Renamed.ReadLine();
					
					if (line.Length == - 1)
						break;
					
					Query query = QueryParser.Parse(line, "contents", analyzer);
					System.Console.Out.WriteLine("Searching for: " + query.ToString("contents"));
					
					Hits hits = searcher.Search(query);
					System.Console.Out.WriteLine(hits.Length() + " total matching documents");
					
					int HITS_PER_PAGE = 10;
					for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE)
					{
						int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE);
						for (int i = start; i < end; i++)
						{
							Document doc = hits.Doc(i);
							System.String path = doc.Get("path");
							if (path != null)
							{
								System.Console.Out.WriteLine(i + ". " + path);
							}
							else
							{
								System.String url = doc.Get("url");
								if (url != null)
								{
									System.Console.Out.WriteLine(i + ". " + url);
									System.Console.Out.WriteLine("   - " + doc.Get("title"));
								}
								else
								{
									System.Console.Out.WriteLine(i + ". " + "No path nor URL for this document");
								}
							}
						}
						
						if (hits.Length() > end)
						{
							System.Console.Out.Write("more (y/n) ? ");
							line = in_Renamed.ReadLine();
							if (line.Length == 0 || line[0] == 'n')
								break;
						}
					}
				}
				searcher.Close();
			}
			catch (System.Exception e)
			{
				System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
			}
		}
Exemplo n.º 44
0
		// Test that queries based on reverse/ordFieldScore returns docs with expected score.
		private void  DoTestExactScore(System.String field, bool inOrder)
		{
			IndexSearcher s = new IndexSearcher(dir, true);
			ValueSource vs;
			if (inOrder)
			{
				vs = new OrdFieldSource(field);
			}
			else
			{
				vs = new ReverseOrdFieldSource(field);
			}
			Query q = new ValueSourceQuery(vs);
			TopDocs td = s.Search(q, null, 1000);
			Assert.AreEqual(N_DOCS, td.TotalHits, "All docs should be matched!");
			ScoreDoc[] sd = td.ScoreDocs;
			for (int i = 0; i < sd.Length; i++)
			{
				float score = sd[i].Score;
				System.String id = s.IndexReader.Document(sd[i].Doc).Get(ID_FIELD);
				Log("-------- " + i + ". Explain doc " + id);
				Log(s.Explain(q, sd[i].Doc));
				float expectedScore = N_DOCS - i;
				Assert.AreEqual(expectedScore, score, TEST_SCORE_TOLERANCE_DELTA, "score of result " + i + " shuould be " + expectedScore + " != " + score);
				System.String expectedId = inOrder?Id2String(N_DOCS - i):Id2String(i + 1); // reverse  ==> smaller values first 
				Assert.IsTrue(expectedId.Equals(id), "id of result " + i + " shuould be " + expectedId + " != " + score);
			}
		}
Exemplo n.º 45
0
		// Test that values loaded for FieldScoreQuery are cached properly and consumes the proper RAM resources.
		private void  DoTestCaching(System.String field, bool inOrder)
		{
			IndexSearcher s = new IndexSearcher(dir, true);
			System.Object innerArray = null;
			
			bool warned = false; // print warning once
			
			for (int i = 0; i < 10; i++)
			{
				ValueSource vs;
				if (inOrder)
				{
					vs = new OrdFieldSource(field);
				}
				else
				{
					vs = new ReverseOrdFieldSource(field);
				}
				ValueSourceQuery q = new ValueSourceQuery(vs);
				ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs;
				try
				{
					Assert.AreEqual(N_DOCS, h.Length, "All docs should be matched!");
					IndexReader[] readers = s.IndexReader.GetSequentialSubReaders();
					
					for (int j = 0; j < readers.Length; j++)
					{
						IndexReader reader = readers[j];
						if (i == 0)
						{
                            innerArray = q.valSrc.GetValues(reader).InnerArray;
						}
						else
						{
                            Log(i + ".  compare: " + innerArray + " to " + q.valSrc.GetValues(reader).InnerArray);
                            Assert.AreSame(innerArray, q.valSrc.GetValues(reader).InnerArray, "field values should be cached and reused!");
						}
					}
				}
				catch (System.NotSupportedException)
				{
					if (!warned)
					{
						System.Console.Error.WriteLine("WARNING: " + TestName() + " cannot fully test values of " + q);
						warned = true;
					}
				}
			}
			
			ValueSource vs2;
			ValueSourceQuery q2;
			ScoreDoc[] h2;
			
			// verify that different values are loaded for a different field
			System.String field2 = INT_FIELD;
			Assert.IsFalse(field.Equals(field2)); // otherwise this test is meaningless.
			if (inOrder)
			{
				vs2 = new OrdFieldSource(field2);
			}
			else
			{
				vs2 = new ReverseOrdFieldSource(field2);
			}
			q2 = new ValueSourceQuery(vs2);
			h2 = s.Search(q2, null, 1000).ScoreDocs;
			Assert.AreEqual(N_DOCS, h2.Length, "All docs should be matched!");
			IndexReader[] readers2 = s.IndexReader.GetSequentialSubReaders();
			
			for (int j = 0; j < readers2.Length; j++)
			{
				IndexReader reader = readers2[j];
				try
				{
                    Log("compare (should differ): " + innerArray + " to " + q2.valSrc.GetValues(reader).InnerArray);
                    Assert.AreNotSame(innerArray, q2.valSrc.GetValues(reader).InnerArray, "different values shuold be loaded for a different field!");
				}
				catch (System.NotSupportedException)
				{
					if (!warned)
					{
						System.Console.Error.WriteLine("WARNING: " + TestName() + " cannot fully test values of " + q2);
						warned = true;
					}
				}
			}
			
			// verify new values are reloaded (not reused) for a new reader
			s = new IndexSearcher(dir, true);
			if (inOrder)
			{
				vs2 = new OrdFieldSource(field);
			}
			else
			{
				vs2 = new ReverseOrdFieldSource(field);
			}
			q2 = new ValueSourceQuery(vs2);
			h2 = s.Search(q2, null, 1000).ScoreDocs;
			Assert.AreEqual(N_DOCS, h2.Length, "All docs should be matched!");
			readers2 = s.IndexReader.GetSequentialSubReaders();
			
			for (int j = 0; j < readers2.Length; j++)
			{
				IndexReader reader = readers2[j];
				try
				{
                    Log("compare (should differ): " + innerArray + " to " + q2.valSrc.GetValues(reader).InnerArray);
                    Assert.AreNotSame(innerArray, q2.valSrc.GetValues(reader).InnerArray, "cached field values should not be reused if reader as changed!");
				}
				catch (System.NotSupportedException)
				{
					if (!warned)
					{
						System.Console.Error.WriteLine("WARNING: " + TestName() + " cannot fully test values of " + q2);
						warned = true;
					}
				}
			}
		}
Exemplo n.º 46
0
		public virtual void  TestNullLockFactory()
		{
			
			
			Directory dir = new MyRAMDirectory(this);
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			for (int i = 0; i < 100; i++)
			{
				AddDoc(writer);
			}
			writer.Close();
			Term searchTerm = new Term("content", "aaa");
			IndexSearcher searcher = new IndexSearcher(dir);
			Hits hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(100, hits.Length(), "did not get right number of hits");
			writer.Close();
			
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.Close();
			
			dir.Close();
		}
Exemplo n.º 47
0
		public virtual void  TestEnablingNorms()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetMaxBufferedDocs(10);
			// Enable norms for only 1 doc, pre flush
			for (int j = 0; j < 10; j++)
			{
				Document doc = new Document();
				Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED);
				if (j != 8)
				{
					f.SetOmitNorms(true);
				}
				doc.Add(f);
				writer.AddDocument(doc);
			}
			writer.Close();
			
			Term searchTerm = new Term("field", "aaa");
			
			IndexSearcher searcher = new IndexSearcher(dir);
			Hits hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(10, hits.Length());
			searcher.Close();
			
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetMaxBufferedDocs(10);
			// Enable norms for only 1 doc, post flush
			for (int j = 0; j < 27; j++)
			{
				Document doc = new Document();
				Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED);
				if (j != 26)
				{
					f.SetOmitNorms(true);
				}
				doc.Add(f);
				writer.AddDocument(doc);
			}
			writer.Close();
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(27, hits.Length());
			searcher.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			reader.Close();
			
			dir.Close();
		}
Exemplo n.º 48
0
 public void TestNegativePositions()
 {
     SinkTokenizer tokens = new SinkTokenizer();
     Token t = new Token();
     t.SetTermText("a");
     t.SetPositionIncrement(0);
     tokens.Add(t);
     t.SetTermText("b");
     t.SetPositionIncrement(1);
     tokens.Add(t);
     t.SetTermText("c");
     tokens.Add(t);
     MockRAMDirectory dir = new MockRAMDirectory();
     IndexWriter w = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true);
     Document doc = new Document();
     doc.Add(new Field("field", tokens));
     w.AddDocument(doc);
     w.Close();
     IndexSearcher s = new IndexSearcher(dir);
     PhraseQuery pq = new PhraseQuery();
     pq.Add(new Term("field", "a"));
     pq.Add(new Term("field", "b"));
     pq.Add(new Term("field", "c"));
     Hits hits = s.Search(pq);
     Assert.AreEqual(1, hits.Length());
     Query q = new SpanTermQuery(new Term("field", "a"));
     hits = s.Search(q);
     Assert.AreEqual(1, hits.Length());
     TermPositions tps = s.GetIndexReader().TermPositions(new Term("field", "a"));
     Assert.IsTrue(tps.Next());
     Assert.AreEqual(1, tps.Freq());
     Assert.AreEqual(-1, tps.NextPosition());
     Assert.IsTrue(_TestUtil.CheckIndex(dir));
     s.Close();
     dir.Close();
 }
Exemplo n.º 49
0
		public virtual void  TestAddIndexOnDiskFull()
		{
			int START_COUNT = 57;
			int NUM_DIR = 50;
			int END_COUNT = START_COUNT + NUM_DIR * 25;
			
			bool debug = false;
			
			// Build up a bunch of dirs that have indexes which we
			// will then merge together by calling addIndexes(*):
			Directory[] dirs = new Directory[NUM_DIR];
			long inputDiskUsage = 0;
			for (int i = 0; i < NUM_DIR; i++)
			{
				dirs[i] = new RAMDirectory();
				IndexWriter writer = new IndexWriter(dirs[i], new WhitespaceAnalyzer(), true);
				for (int j = 0; j < 25; j++)
				{
					AddDocWithIndex(writer, 25 * i + j);
				}
				writer.Close();
				System.String[] files = dirs[i].List();
				for (int j = 0; j < files.Length; j++)
				{
					inputDiskUsage += dirs[i].FileLength(files[j]);
				}
			}
			
			// Now, build a starting index that has START_COUNT docs.  We
			// will then try to addIndexes into a copy of this:
			RAMDirectory startDir = new RAMDirectory();
			IndexWriter writer2 = new IndexWriter(startDir, new WhitespaceAnalyzer(), true);
			for (int j = 0; j < START_COUNT; j++)
			{
				AddDocWithIndex(writer2, j);
			}
			writer2.Close();
			
			// Make sure starting index seems to be working properly:
			Term searchTerm = new Term("content", "aaa");
			IndexReader reader = IndexReader.Open(startDir);
			Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq");
			
			IndexSearcher searcher = new IndexSearcher(reader);
			Hits hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(57, hits.Length(), "first number of hits");
			searcher.Close();
			reader.Close();
			
			// Iterate with larger and larger amounts of free
			// disk space.  With little free disk space,
			// addIndexes will certainly run out of space &
			// fail.  Verify that when this happens, index is
			// not corrupt and index in fact has added no
			// documents.  Then, we increase disk space by 2000
			// bytes each iteration.  At some point there is
			// enough free disk space and addIndexes should
			// succeed and index should show all documents were
			// added.
			
			// String[] files = startDir.list();
			long diskUsage = startDir.SizeInBytes();
			
			long startDiskUsage = 0;
			System.String[] files2 = startDir.List();
			for (int i = 0; i < files2.Length; i++)
			{
				startDiskUsage += startDir.FileLength(files2[i]);
			}
			
			for (int iter = 0; iter < 6; iter++)
			{
				
				if (debug)
					System.Console.Out.WriteLine("TEST: iter=" + iter);
				
				// Start with 100 bytes more than we are currently using:
				long diskFree = diskUsage + 100;
				
				bool autoCommit = iter % 2 == 0;
				int method = iter / 2;
				
				bool success = false;
				bool done = false;
				
				System.String methodName;
				if (0 == method)
				{
					methodName = "addIndexes(Directory[])";
				}
				else if (1 == method)
				{
					methodName = "addIndexes(IndexReader[])";
				}
				else
				{
					methodName = "addIndexesNoOptimize(Directory[])";
				}
				
				while (!done)
				{
					
					// Make a new dir that will enforce disk usage:
					MockRAMDirectory dir = new MockRAMDirectory(startDir);
					writer2 = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
					System.IO.IOException err = null;
					
					MergeScheduler ms = writer2.GetMergeScheduler();
					for (int x = 0; x < 2; x++)
					{
						if (ms is ConcurrentMergeScheduler)
						// This test intentionally produces exceptions
						// in the threads that CMS launches; we don't
						// want to pollute test output with these.
							if (0 == x)
								((ConcurrentMergeScheduler)ms).SetSuppressExceptions_ForNUnitTest();
							else
								((ConcurrentMergeScheduler) ms).ClearSuppressExceptions_ForNUnitTest();
						
						// Two loops: first time, limit disk space &
						// throw random IOExceptions; second time, no
						// disk space limit:
						
						double rate = 0.05;
						double diskRatio = ((double) diskFree) / diskUsage;
						long thisDiskFree;
						
						System.String testName = null;
						
						if (0 == x)
						{
							thisDiskFree = diskFree;
							if (diskRatio >= 2.0)
							{
								rate /= 2;
							}
							if (diskRatio >= 4.0)
							{
								rate /= 2;
							}
							if (diskRatio >= 6.0)
							{
								rate = 0.0;
							}
							if (debug)
								testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes autoCommit=" + autoCommit;
						}
						else
						{
							thisDiskFree = 0;
							rate = 0.0;
							if (debug)
								testName = "disk full test " + methodName + " with unlimited disk space autoCommit=" + autoCommit;
						}
						
						if (debug)
							System.Console.Out.WriteLine("\ncycle: " + testName);
						
						dir.SetMaxSizeInBytes(thisDiskFree);
						dir.SetRandomIOExceptionRate(rate, diskFree);
						
						try
						{
							
							if (0 == method)
							{
								writer2.AddIndexes(dirs);
							}
							else if (1 == method)
							{
								IndexReader[] readers = new IndexReader[dirs.Length];
								for (int i = 0; i < dirs.Length; i++)
								{
									readers[i] = IndexReader.Open(dirs[i]);
								}
								try
								{
									writer2.AddIndexes(readers);
								}
								finally
								{
									for (int i = 0; i < dirs.Length; i++)
									{
										readers[i].Close();
									}
								}
							}
							else
							{
								writer2.AddIndexesNoOptimize(dirs);
							}
							
							success = true;
							if (debug)
							{
								System.Console.Out.WriteLine("  success!");
							}
							
							if (0 == x)
							{
								done = true;
							}
						}
						catch (System.IO.IOException e)
						{
							success = false;
							err = e;
							if (debug)
							{
								System.Console.Out.WriteLine("  hit IOException: " + e);
								System.Console.Out.WriteLine(e.StackTrace);
							}
							
							if (1 == x)
							{
								System.Console.Out.WriteLine(e.StackTrace);
								Assert.Fail(methodName + " hit IOException after disk space was freed up");
							}
						}
						
						// Make sure all threads from
						// ConcurrentMergeScheduler are done
						_TestUtil.SyncConcurrentMerges(writer2);
						
						if (autoCommit)
						{
							
							// Whether we succeeded or failed, check that
							// all un-referenced files were in fact
							// deleted (ie, we did not create garbage).
							// Only check this when autoCommit is true:
							// when it's false, it's expected that there
							// are unreferenced files (ie they won't be
							// referenced until the "commit on close").
							// Just create a new IndexFileDeleter, have it
							// delete unreferenced files, then verify that
							// in fact no files were deleted:
							
							System.String successStr;
							if (success)
							{
								successStr = "success";
							}
							else
							{
								successStr = "IOException";
							}
							System.String message = methodName + " failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes)";
							AssertNoUnreferencedFiles(dir, message);
						}
						
						if (debug)
						{
							System.Console.Out.WriteLine("  now test readers");
						}
						
						// Finally, verify index is not corrupt, and, if
						// we succeeded, we see all docs added, and if we
						// failed, we see either all docs or no docs added
						// (transactional semantics):
						try
						{
							reader = IndexReader.Open(dir);
						}
						catch (System.IO.IOException e)
						{
							System.Console.Out.WriteLine(e.StackTrace);
							Assert.Fail(testName + ": exception when creating IndexReader: " + e);
						}
						int result = reader.DocFreq(searchTerm);
						if (success)
						{
							if (autoCommit && result != END_COUNT)
							{
								Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT);
							}
							else if (!autoCommit && result != START_COUNT)
							{
								Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " [autoCommit = false]");
							}
						}
						else
						{
							// On hitting exception we still may have added
							// all docs:
							if (result != START_COUNT && result != END_COUNT)
							{
								System.Console.Out.WriteLine(err.StackTrace);
								Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);
							}
						}
						
						searcher = new IndexSearcher(reader);
						try
						{
							hits = searcher.Search(new TermQuery(searchTerm));
						}
						catch (System.IO.IOException e)
						{
							System.Console.Out.WriteLine(e.StackTrace);
							Assert.Fail(testName + ": exception when searching: " + e);
						}
						int result2 = hits.Length();
						if (success)
						{
							if (result2 != result)
							{
								Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result);
							}
						}
						else
						{
							// On hitting exception we still may have added
							// all docs:
							if (result2 != result)
							{
								System.Console.Out.WriteLine(err.StackTrace);
								Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result);
							}
						}
						
						searcher.Close();
						reader.Close();
						if (debug)
						{
							System.Console.Out.WriteLine("  count is " + result);
						}
						
						if (done || result == END_COUNT)
						{
							break;
						}
					}
					
					if (debug)
					{
						System.Console.Out.WriteLine("  start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.GetMaxUsedSizeInBytes());
					}
					
					if (done)
					{
						// Javadocs state that temp free Directory space
						// required is at most 2X total input size of
						// indices so let's make sure:
						Assert.IsTrue(
							(dir.GetMaxUsedSizeInBytes() - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage),
							"max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.GetMaxUsedSizeInBytes() - startDiskUsage) + " bytes; " + "starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes"
						);
					}
					
					writer2.Close();
					
					// Wait for all BG threads to finish else
					// dir.close() will throw IOException because
					// there are still open files
					_TestUtil.SyncConcurrentMerges(ms);
					
					dir.Close();
					
					// Try again with 2000 more bytes of free space:
					diskFree += 2000;
				}
			}
			
			startDir.Close();
		}
		public virtual void  TestSetBufferSize()
		{
			System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testSetBufferSize"));
			MockFSDirectory dir = new MockFSDirectory(indexDir);
			try
			{
				IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
				writer.SetUseCompoundFile(false);
				for (int i = 0; i < 37; i++)
				{
					Document doc = new Document();
					doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.TOKENIZED));
					doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.TOKENIZED));
					writer.AddDocument(doc);
				}
				writer.Close();
				
				dir.allIndexInputs.Clear();
				
				IndexReader reader = IndexReader.Open(dir);
				Term aaa = new Term("content", "aaa");
				Term bbb = new Term("content", "bbb");
				Term ccc = new Term("content", "ccc");
				Assert.AreEqual(reader.DocFreq(ccc), 37);
				reader.DeleteDocument(0);
				Assert.AreEqual(reader.DocFreq(aaa), 37);
				dir.TweakBufferSizes();
				reader.DeleteDocument(4);
				Assert.AreEqual(reader.DocFreq(bbb), 37);
				dir.TweakBufferSizes();
				
				IndexSearcher searcher = new IndexSearcher(reader);
				Hits hits = searcher.Search(new TermQuery(bbb));
				dir.TweakBufferSizes();
				Assert.AreEqual(35, hits.Length());
				dir.TweakBufferSizes();
				hits = searcher.Search(new TermQuery(new Term("id", "33")));
				dir.TweakBufferSizes();
				Assert.AreEqual(1, hits.Length());
				hits = searcher.Search(new TermQuery(aaa));
				dir.TweakBufferSizes();
				Assert.AreEqual(35, hits.Length());
				searcher.Close();
				reader.Close();
			}
			finally
			{
				_TestUtil.RmDir(indexDir);
			}
		}
Exemplo n.º 51
0
		public virtual void  TestNPESpanQuery()
		{
			Directory dir = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(new System.Collections.Hashtable(0)), IndexWriter.MaxFieldLength.LIMITED);
			
			// Add documents
			AddDoc(writer, "1", "the big dogs went running to the market");
			AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly");
			
			// Commit
			writer.Close();
			
			// Get searcher
			IndexReader reader = IndexReader.Open(dir);
			IndexSearcher searcher = new IndexSearcher(reader);
			
			// Control (make sure docs indexed)
			Assert.AreEqual(2, HitCount(searcher, "the"));
			Assert.AreEqual(1, HitCount(searcher, "cat"));
			Assert.AreEqual(1, HitCount(searcher, "dogs"));
			Assert.AreEqual(0, HitCount(searcher, "rabbit"));
			
			// This throws exception (it shouldn't)
			Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[]{CreateSpan(4, false, "chased", "cat"), CreateSpan("ate")}), 10).totalHits);
			reader.Close();
			dir.Close();
		}
Exemplo n.º 52
0
		private int GetHitCount(Directory dir, Term term)
		{
			IndexSearcher searcher = new IndexSearcher(dir);
			int hitCount = searcher.Search(new TermQuery(term), null, 1000).TotalHits;
			searcher.Close();
			return hitCount;
		}
Exemplo n.º 53
0
        public static void  Main(System.String[] a)
        {
            System.String indexName = "localhost_index";
            System.String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en";
            System.Uri url = null;
            for (int i = 0; i < a.Length; i++)
            {
                if (a[i].Equals("-i"))
                {
                    indexName = a[++i];
                }
                else if (a[i].Equals("-f"))
                {
                    fn = a[++i];
                }
                else if (a[i].Equals("-url"))
                {
                    url = new System.Uri(a[++i]);
                }
            }
			
            System.IO.StreamWriter temp_writer;
            temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
            temp_writer.AutoFlush = true;
            System.IO.StreamWriter o = temp_writer;
            IndexReader r = IndexReader.Open(indexName);
            o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs");
			
            MoreLikeThis mlt = new MoreLikeThis(r);
			
            o.WriteLine("Query generation parameters:");
            o.WriteLine(mlt.DescribeParams());
            o.WriteLine();
			
            Query query = null;
            if (url != null)
            {
                o.WriteLine("Parsing URL: " + url);
                query = mlt.Like(url);
            }
            else if (fn != null)
            {
                o.WriteLine("Parsing file: " + fn);
                query = mlt.Like(new System.IO.FileInfo(fn));
            }
			
            o.WriteLine("q: " + query);
            o.WriteLine();
            IndexSearcher searcher = new IndexSearcher(indexName);
			
            Hits hits = searcher.Search(query);
            int len = hits.Length();
            o.WriteLine("found: " + len + " documents matching");
            o.WriteLine();
            for (int i = 0; i < System.Math.Min(25, len); i++)
            {
                Document d = hits.Doc(i);
                System.String summary = d.Get("summary");
                o.WriteLine("score  : " + hits.Score(i));
                o.WriteLine("url    : " + d.Get("url"));
                o.WriteLine("\ttitle  : " + d.Get("title"));
                if (summary != null)
                    o.WriteLine("\tsummary: " + d.Get("summary"));
                o.WriteLine();
            }
        }
Exemplo n.º 54
0
 // Test that FieldScoreQuery returns docs with expected score.
 private void  DoTestExactScore(System.String field, FieldScoreQuery.Type tp)
 {
     IndexSearcher s = new IndexSearcher(dir, true);
     Query q = new FieldScoreQuery(field, tp);
     TopDocs td = s.Search(q, null, 1000);
     Assert.AreEqual(N_DOCS, td.TotalHits, "All docs should be matched!");
     ScoreDoc[] sd = td.ScoreDocs;
     for (int i = 0; i < sd.Length; i++)
     {
         float score = sd[i].Score;
         Log(s.Explain(q, sd[i].Doc));
         System.String id = s.IndexReader.Document(sd[i].Doc).Get(ID_FIELD);
         float expectedScore = ExpectedFieldScore(id); // "ID7" --> 7.0
         Assert.AreEqual(expectedScore, score, TEST_SCORE_TOLERANCE_DELTA, "score of " + id + " shuould be " + expectedScore + " != " + score);
     }
 }
Exemplo n.º 55
0
        private void Search()
        {
            try
            {
                SearchProgressBar.Maximum = 11;
                ProgressLabel.Text = "Progress: Initialize Search ...";
                Searcher searcher = new IndexSearcher(@"Canon\index");
                Analyzer analyzer = new StandardAnalyzer();
                ArrayList resultList = new ArrayList();

                System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding);

                String line = QueryInputBox.Text;
                if (line.Length == - 1)
                    return;
                ProgressLabel.Text = "Progress: Parsing Query ...";
                Query query = QueryParser.Parse(line, "contents", analyzer);
                //int[] ix = qtm.GetTermFrequencies();

                Hits hits = searcher.Search(query);
                SearchProgressBar.Increment(1);
                ProgressLabel.Text = "Progress: Searched. Analyzing results ...";

                //QueryHighlightExtractor highlighter = new QueryHighlightExtractor(query, new WhitespaceAnalyzer(), "<B>", "</B>");
                Highlighter highlighter = new Highlighter(new QueryScorer(query));
                highlighter.SetTextFragmenter(new SimpleFragmenter(80));
                int maxNumFragmentsRequired = 1;

                    //int HITS_PER_PAGE = 10;
                    for (int i = 0; i < 10; i++)
                    {
                            SearchProgressBar.Increment(1);
                            ProgressLabel.Text = "Progress: Analyzing hit " + (i+1).ToString();
                            // get the document from index
                            Document doc = hits.Doc(i);
                            //SegmentReader ir = new SegmentReader();
                            //Lucene.Net.Index.TermFreqVector tfv =
                            //tfv.GetTermFrequencies
                            string score = hits.Score(i).ToString();
                            //Box += "Hit no. " + i + " scored: " + score + " occ: " + /*highlighter.tokenFrequency */ " best fragment: \n";
                            ResultSet a = new ResultSet();
                            a.BookName = doc.Get("path").Replace(@"c:\cscd\temp\","");
                            a.Score = hits.Score(i);
                            a.numberOfHits = hits.Length();

                            // get the document filename
                            // we can't get the text from the index
                            //because we didn't store it there
                            //so get it from archive
                            string path = doc.Get("path");
                            string name = GetInternalName(path);
                            PaliReaderUtils.AalekhDecoder.UnzipFromZipLibrary(name);
                            path = System.IO.Directory.GetCurrentDirectory() + @"\Work\" + name + ".htm";
                            string plainText = "";
                            //load text from zip archive temporarily
                            using (StreamReader sr = new StreamReader(path, System.Text.Encoding.Default))
                            {
                                plainText = parseHtml(sr.ReadToEnd());
                            }
            //-------------------------------Highlighter Code 1.4
                            TokenStream tokenStream = analyzer.TokenStream(new StringReader(plainText));
                            a.textFragment = highlighter.GetBestFragments(tokenStream, plainText, maxNumFragmentsRequired, "...");
                            if(File.Exists(path))
                                File.Delete(path);
            //-------------------------------
                            resultList.Add(a);
                        }
                SearchProgressBar.Value = 0;
                searcher.Close();
                ssr = new ShowSearchResults(/*Box*/resultList);
                //this.Hide();
                ssr.OpenBookEvent += new ShowSearchResults.OpenBook(this.TriggerOpenBook);
                ssr.Closing += new System.ComponentModel.CancelEventHandler(this.Closing_ResultWindow);
                this.Hide();
                ssr.ShowDialog();

            }
            catch (System.Exception e)
            {
                MessageBox.Show(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
Exemplo n.º 56
0
 // Test that values loaded for FieldScoreQuery are cached properly and consumes the proper RAM resources.
 private void  DoTestCaching(System.String field, FieldScoreQuery.Type tp)
 {
     // prepare expected array types for comparison
     System.Collections.Hashtable expectedArrayTypes = new System.Collections.Hashtable();
     expectedArrayTypes[FieldScoreQuery.Type.BYTE] = new sbyte[0];
     expectedArrayTypes[FieldScoreQuery.Type.SHORT] = new short[0];
     expectedArrayTypes[FieldScoreQuery.Type.INT] = new int[0];
     expectedArrayTypes[FieldScoreQuery.Type.FLOAT] = new float[0];
     
     IndexSearcher s = new IndexSearcher(dir, true);
     System.Object[] innerArray = new Object[s.IndexReader.GetSequentialSubReaders().Length];
     
     bool warned = false; // print warning once.
     for (int i = 0; i < 10; i++)
     {
         FieldScoreQuery q = new FieldScoreQuery(field, tp);
         ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs;
         Assert.AreEqual(N_DOCS, h.Length, "All docs should be matched!");
         IndexReader[] readers = s.IndexReader.GetSequentialSubReaders();
         for (int j = 0; j < readers.Length; j++)
         {
             IndexReader reader = readers[j];
             try
             {
                 if (i == 0)
                 {
                     innerArray[j] = q.valSrc.GetValues(reader).InnerArray;
                     Log(i + ".  compare: " + innerArray[j].GetType() + " to " + expectedArrayTypes[tp].GetType());
                     Assert.AreEqual(innerArray[j].GetType(), expectedArrayTypes[tp].GetType(), "field values should be cached in the correct array type!");
                 }
                 else
                 {
                     Log(i + ".  compare: " + innerArray[j] + " to " + q.valSrc.GetValues(reader).InnerArray);
                     Assert.AreSame(innerArray[j], q.valSrc.GetValues(reader).InnerArray, "field values should be cached and reused!");
                 }
             }
             catch (System.NotSupportedException)
             {
                 if (!warned)
                 {
                     System.Console.Error.WriteLine("WARNING: " +  TestName() + " cannot fully test values of " + q);
                     warned = true;
                 }
             }
         }
     }
     
     // verify new values are reloaded (not reused) for a new reader
     s = new IndexSearcher(dir, true);
     FieldScoreQuery q2 = new FieldScoreQuery(field, tp);
     ScoreDoc[] h2 = s.Search(q2, null, 1000).ScoreDocs;
     Assert.AreEqual(N_DOCS, h2.Length, "All docs should be matched!");
     IndexReader[] readers2 = s.IndexReader.GetSequentialSubReaders();
     for (int j = 0; j < readers2.Length; j++)
     {
         IndexReader reader = readers2[j];
         try
         {
             Log("compare: " + innerArray + " to " + q2.valSrc.GetValues(reader).InnerArray);
             Assert.AreNotSame(innerArray, q2.valSrc.GetValues(reader).InnerArray, "cached field values should not be reused if reader as changed!");
         }
         catch (System.NotSupportedException)
         {
             if (!warned)
             {
                 System.Console.Error.WriteLine("WARNING: " + TestName() + " cannot fully test values of " + q2);
                 warned = true;
             }
         }
     }
 }
		// Test that FieldScoreQuery returns docs with expected score.
		private void  DoTestCustomScore(System.String field, FieldScoreQuery.Type tp, double dboost)
		{
			float boost = (float) dboost;
			IndexSearcher s = new IndexSearcher(dir);
			FieldScoreQuery qValSrc = new FieldScoreQuery(field, tp); // a query that would score by the field
			Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser(TEXT_FIELD, anlzr);
			System.String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup.
			
			// regular (boolean) query.
			Query q1 = qp.Parse(qtxt);
			Log(q1);
			
			// custom query, that should score the same as q1.
			CustomScoreQuery q2CustomNeutral = new CustomScoreQuery(q1);
			q2CustomNeutral.SetBoost(boost);
			Log(q2CustomNeutral);
			
			// custom query, that should (by default) multiply the scores of q1 by that of the field
			CustomScoreQuery q3CustomMul = new CustomScoreQuery(q1, qValSrc);
			q3CustomMul.SetStrict(true);
			q3CustomMul.SetBoost(boost);
			Log(q3CustomMul);
			
			// custom query, that should add the scores of q1 to that of the field
			CustomScoreQuery q4CustomAdd = new CustomAddQuery(q1, qValSrc);
			q4CustomAdd.SetStrict(true);
			q4CustomAdd.SetBoost(boost);
			Log(q4CustomAdd);
			
			// custom query, that multiplies and adds the field score to that of q1
			CustomScoreQuery q5CustomMulAdd = new CustomMulAddQuery(q1, qValSrc, qValSrc);
			q5CustomMulAdd.SetStrict(true);
			q5CustomMulAdd.SetBoost(boost);
			Log(q5CustomMulAdd);
			
			// do al the searches 
			TopDocs td1 = s.Search(q1, null, 1000);
			TopDocs td2CustomNeutral = s.Search(q2CustomNeutral, null, 1000);
			TopDocs td3CustomMul = s.Search(q3CustomMul, null, 1000);
			TopDocs td4CustomAdd = s.Search(q4CustomAdd, null, 1000);
			TopDocs td5CustomMulAdd = s.Search(q5CustomMulAdd, null, 1000);
			
			// put results in map so we can verify the scores although they have changed
			System.Collections.Hashtable h1 = TopDocsToMap(td1);
			System.Collections.Hashtable h2CustomNeutral = TopDocsToMap(td2CustomNeutral);
			System.Collections.Hashtable h3CustomMul = TopDocsToMap(td3CustomMul);
			System.Collections.Hashtable h4CustomAdd = TopDocsToMap(td4CustomAdd);
			System.Collections.Hashtable h5CustomMulAdd = TopDocsToMap(td5CustomMulAdd);
			
			VerifyResults(boost, s, h1, h2CustomNeutral, h3CustomMul, h4CustomAdd, h5CustomMulAdd, q1, q2CustomNeutral, q3CustomMul, q4CustomAdd, q5CustomMulAdd);
		}
Exemplo n.º 58
0
 // Test that FieldScoreQuery returns docs in expected order.
 private void  DoTestRank(System.String field, FieldScoreQuery.Type tp)
 {
     IndexSearcher s = new IndexSearcher(dir, true);
     Query q = new FieldScoreQuery(field, tp);
     Log("test: " + q);
     QueryUtils.Check(q, s);
     ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs;
     Assert.AreEqual(N_DOCS, h.Length, "All docs should be matched!");
     System.String prevID = "ID" + (N_DOCS + 1); // greater than all ids of docs in this test
     for (int i = 0; i < h.Length; i++)
     {
         System.String resID = s.Doc(h[i].Doc).Get(ID_FIELD);
         Log(i + ".   score=" + h[i].Score + "  -  " + resID);
         Log(s.Explain(q, h[i].Doc));
         Assert.IsTrue(String.CompareOrdinal(resID, prevID) < 0, "res id " + resID + " should be < prev res id " + prevID);
         prevID = resID;
     }
 }
Exemplo n.º 59
0
		public virtual void  TestSetBufferSize()
		{
			System.IO.DirectoryInfo indexDir = new System.IO.DirectoryInfo(System.IO.Path.Combine(AppSettings.Get("tempDir", ""), "testSetBufferSize"));
			MockFSDirectory dir = new MockFSDirectory(indexDir, NewRandom());
			try
			{
				IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
				writer.UseCompoundFile = false;
				for (int i = 0; i < 37; i++)
				{
					Document doc = new Document();
					doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED));
					doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED));
					writer.AddDocument(doc);
				}
				writer.Close();
				
				dir.allIndexInputs.Clear();
				
				IndexReader reader = IndexReader.Open(dir, false);
				Term aaa = new Term("content", "aaa");
				Term bbb = new Term("content", "bbb");
				Term ccc = new Term("content", "ccc");
				Assert.AreEqual(37, reader.DocFreq(ccc));
				reader.DeleteDocument(0);
				Assert.AreEqual(37, reader.DocFreq(aaa));
				dir.tweakBufferSizes();
				reader.DeleteDocument(4);
				Assert.AreEqual(reader.DocFreq(bbb), 37);
				dir.tweakBufferSizes();
				
				IndexSearcher searcher = new IndexSearcher(reader);
				ScoreDoc[] hits = searcher.Search(new TermQuery(bbb), null, 1000).ScoreDocs;
				dir.tweakBufferSizes();
				Assert.AreEqual(35, hits.Length);
				dir.tweakBufferSizes();
				hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).ScoreDocs;
				dir.tweakBufferSizes();
				Assert.AreEqual(1, hits.Length);
				hits = searcher.Search(new TermQuery(aaa), null, 1000).ScoreDocs;
				dir.tweakBufferSizes();
				Assert.AreEqual(35, hits.Length);
				searcher.Close();
				reader.Close();
			}
			finally
			{
				_TestUtil.RmDir(indexDir);
			}
		}
Exemplo n.º 60
0
		/// <summary> Make sure if modifier tries to commit but hits disk full that modifier
		/// remains consistent and usable. Similar to TestIndexReader.testDiskFull().
		/// </summary>
		private void  TestOperationsOnDiskFull(bool updates)
		{
			
			bool debug = false;
			Term searchTerm = new Term("content", "aaa");
			int START_COUNT = 157;
			int END_COUNT = 144;
			
			for (int pass = 0; pass < 2; pass++)
			{
				bool autoCommit = (0 == pass);
				
				// First build up a starting index:
				MockRAMDirectory startDir = new MockRAMDirectory();
				IndexWriter writer = new IndexWriter(startDir, autoCommit, new WhitespaceAnalyzer(), true);
				for (int i = 0; i < 157; i++)
				{
					Document d = new Document();
					d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
					d.Add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.ANALYZED));
					writer.AddDocument(d);
				}
				writer.Close();
				
				long diskUsage = startDir.SizeInBytes();
				long diskFree = diskUsage + 10;
				
				System.IO.IOException err = null;
				
				bool done = false;
				
				// Iterate w/ ever increasing free disk space:
				while (!done)
				{
					MockRAMDirectory dir = new MockRAMDirectory(startDir);
					dir.SetPreventDoubleWrite(false);
					IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer());
					
					modifier.SetMaxBufferedDocs(1000); // use flush or close
					modifier.SetMaxBufferedDeleteTerms(1000); // use flush or close
					
					// For each disk size, first try to commit against
					// dir that will hit random IOExceptions & disk
					// full; after, give it infinite disk space & turn
					// off random IOExceptions & retry w/ same reader:
					bool success = false;
					
					for (int x = 0; x < 2; x++)
					{
						
						double rate = 0.1;
						double diskRatio = ((double) diskFree) / diskUsage;
						long thisDiskFree;
						System.String testName;
						
						if (0 == x)
						{
							thisDiskFree = diskFree;
							if (diskRatio >= 2.0)
							{
								rate /= 2;
							}
							if (diskRatio >= 4.0)
							{
								rate /= 2;
							}
							if (diskRatio >= 6.0)
							{
								rate = 0.0;
							}
							if (debug)
							{
								System.Console.Out.WriteLine("\ncycle: " + diskFree + " bytes");
							}
							testName = "disk full during reader.close() @ " + thisDiskFree + " bytes";
						}
						else
						{
							thisDiskFree = 0;
							rate = 0.0;
							if (debug)
							{
								System.Console.Out.WriteLine("\ncycle: same writer: unlimited disk space");
							}
							testName = "reader re-use after disk full";
						}
						
						dir.SetMaxSizeInBytes(thisDiskFree);
						dir.SetRandomIOExceptionRate(rate, diskFree);
						
						try
						{
							if (0 == x)
							{
								int docId = 12;
								for (int i = 0; i < 13; i++)
								{
									if (updates)
									{
										Document d = new Document();
										d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
										d.Add(new Field("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED));
										modifier.UpdateDocument(new Term("id", System.Convert.ToString(docId)), d);
									}
									else
									{
										// deletes
										modifier.DeleteDocuments(new Term("id", System.Convert.ToString(docId)));
										// modifier.setNorm(docId, "contents", (float)2.0);
									}
									docId += 12;
								}
							}
							modifier.Close();
							success = true;
							if (0 == x)
							{
								done = true;
							}
						}
						catch (System.IO.IOException e)
						{
							if (debug)
							{
								System.Console.Out.WriteLine("  hit IOException: " + e);
								System.Console.Out.WriteLine(e.StackTrace);
							}
							err = e;
							if (1 == x)
							{
								System.Console.Error.WriteLine(e.StackTrace);
								Assert.Fail(testName + " hit IOException after disk space was freed up");
							}
						}
						
						// If the close() succeeded, make sure there are
						// no unreferenced files.
                        if (success)
                        {
                            Lucene.Net.Util._TestUtil.CheckIndex(dir);
                            TestIndexWriter.AssertNoUnreferencedFiles(dir, "after writer.close");
                        }
						
						// Finally, verify index is not corrupt, and, if
						// we succeeded, we see all docs changed, and if
						// we failed, we see either all docs or no docs
						// changed (transactional semantics):
						IndexReader newReader = null;
						try
						{
							newReader = IndexReader.Open(dir);
						}
						catch (System.IO.IOException e)
						{
							System.Console.Error.WriteLine(e.StackTrace);
							Assert.Fail(testName + ":exception when creating IndexReader after disk full during close: " + e);
						}
						
						IndexSearcher searcher = new IndexSearcher(newReader);
						ScoreDoc[] hits = null;
						try
						{
							hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs;
						}
						catch (System.IO.IOException e)
						{
							System.Console.Error.WriteLine(e.StackTrace);
							Assert.Fail(testName + ": exception when searching: " + e);
						}
						int result2 = hits.Length;
						if (success)
						{
							if (x == 0 && result2 != END_COUNT)
							{
								Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + END_COUNT);
							}
							else if (x == 1 && result2 != START_COUNT && result2 != END_COUNT)
							{
								// It's possible that the first exception was
								// "recoverable" wrt pending deletes, in which
								// case the pending deletes are retained and
								// then re-flushing (with plenty of disk
								// space) will succeed in flushing the
								// deletes:
								Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT);
							}
						}
						else
						{
							// On hitting exception we still may have added
							// all docs:
							if (result2 != START_COUNT && result2 != END_COUNT)
							{
								System.Console.Error.WriteLine(err.StackTrace);
								Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT);
							}
						}
						
						searcher.Close();
						newReader.Close();
						
						if (result2 == END_COUNT)
						{
							break;
						}
					}
					
					dir.Close();
					
					// Try again with 10 more bytes of free space:
					diskFree += 10;
				}
			}
		}