Example #1
0
        public string Search(string strQuery)
        {
            string result = string.Empty;

            Lucene.Net.Index.IndexReader        reader           = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"]));
            Lucene.Net.QueryParsers.QueryParser parser           = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
            Lucene.Net.Search.Query             query            = parser.Parse(strQuery);
            Lucene.Net.Search.IndexSearcher     searcher         = new Lucene.Net.Search.IndexSearcher(reader);
            Lucene.Net.Search.Hits                   hits        = searcher.Search(query);
            Lucene.Net.Highlight.QueryScorer         score       = new Lucene.Net.Highlight.QueryScorer(query);
            Lucene.Net.Highlight.SimpleHTMLFormatter formater    = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>");
            Lucene.Net.Highlight.Highlighter         highlighter = new Lucene.Net.Highlight.Highlighter(formater, score);
            result += "<div align='right' style='background-color:#F0F7F9; padding-right:15px' height='30px'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #005482; FONT-FAMILY: arial'>Kết quả tìm thấy : " + hits.Length() + "  </font></div>";
            result += "<div style='padding: 10px 10px 10px 10px;'>";
            for (int i = 0; i < hits.Length(); i++)
            {
                string id     = hits.Doc(i).Get("ArticleId");
                string title  = hits.Doc(i).Get("ArticleTitle");
                string detail = hits.Doc(i).Get("ArticleDetail");
                Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail));
                result += string.Format("<div align='left'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #5b5b5b; FONT-FAMILY: arial'><a href='/?ArticleId={0}'>{1}</a></font>", id, title);
                result += string.Format("<div align='left'><font style='FONT-SIZE: 9pt' face='Arial' color='#005482'>...{0}...</font></div></div></br>", highlighter.GetBestFragment(ts, detail));
            }
            result += "</div>";
            reader.Close();
            return(result);
        }
Example #2
0
 /// <summary> Note that the underlying IndexReader is not closed, if
 /// IndexSearcher was constructed with IndexSearcher(IndexReader r).
 /// If the IndexReader was supplied implicitly by specifying a directory, then
 /// the IndexReader gets closed.
 /// </summary>
 public override void  Close()
 {
     if (closeReader)
     {
         reader.Close();
     }
 }
Example #3
0
        public static void  Main(System.String[] args)
        {
            try
            {
                Directory   directory = FSDirectory.GetDirectory("demo index", false);
                IndexReader reader    = IndexReader.Open(directory);

                //       Term term = new Term("path", "pizza");
                //       int deleted = reader.delete(term);

                //       System.out.println("deleted " + deleted +
                //           " documents containing " + term);

                for (int i = 0; i < reader.MaxDoc(); i++)
                {
                    reader.Delete(i);
                }

                reader.Close();
                directory.Close();
            }
            catch (System.Exception e)
            {
                System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
Example #4
0
        public virtual void  TestVerifyIndex()
        {
            IndexReader reader = IndexReader.Open(mDirectory);

            Assert.AreEqual(8, reader.NumDocs());
            reader.Close();
        }
Example #5
0
        public static void  Main(System.String[] args)
        {
            System.String usage = typeof(DeleteFiles) + " <unique_term>";
            if (args.Length == 0)
            {
                System.Console.Error.WriteLine("Usage: " + usage);
                System.Environment.Exit(1);
            }
            try
            {
                Directory   directory = FSDirectory.Open(new System.IO.FileInfo("index"));
                IndexReader reader    = IndexReader.Open(directory, false);              // we don't want read-only because we are about to delete

                Term term    = new Term("path", args[0]);
                int  deleted = reader.DeleteDocuments(term);

                System.Console.Out.WriteLine("deleted " + deleted + " documents containing " + term);

                // one can also delete documents by their internal id:

                /*
                 * for (int i = 0; i < reader.maxDoc(); i++) {
                 * System.out.println("Deleting document with id " + i);
                 * reader.delete(i);
                 * }*/

                reader.Close();
                directory.Close();
            }
            catch (System.Exception e)
            {
                System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
 public override void  TearDown()
 {
     base.TearDown();
     readerA.Close();
     readerB.Close();
     readerX.Close();
 }
Example #7
0
        /* Walk directory hierarchy in uid order, while keeping uid iterator from
         * /* existing index in sync.  Mismatches indicate one of: (a) old documents to
         * /* be deleted; (b) unchanged documents, to be left alone; or (c) new
         * /* documents, to be indexed.
         */

        private static void  IndexDocs(System.IO.FileInfo file, System.IO.FileInfo index, bool create)
        {
            if (!create)
            {
                // incrementally update

                reader  = IndexReader.Open(FSDirectory.Open(index), false);  // open existing index
                uidIter = reader.Terms(new Term("uid", ""));                 // init uid iterator

                IndexDocs(file);

                if (deleting)
                {
                    // delete rest of stale docs
                    while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid")
                    {
                        System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text()));
                        reader.DeleteDocuments(uidIter.Term());
                        uidIter.Next();
                    }
                    deleting = false;
                }

                uidIter.Close();                // close uid iterator
                reader.Close();                 // close existing index
            }
            // don't have exisiting
            else
            {
                IndexDocs(file);
            }
        }
Example #8
0
        public virtual void  TestNPESpanQuery()
        {
            Directory   dir    = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(new System.Collections.Hashtable(0)), IndexWriter.MaxFieldLength.LIMITED);

            // Add documents
            AddDoc(writer, "1", "the big dogs went running to the market");
            AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly");

            // Commit
            writer.Close();

            // Get searcher
            IndexReader   reader   = IndexReader.Open(dir);
            IndexSearcher searcher = new IndexSearcher(reader);

            // Control (make sure docs indexed)
            Assert.AreEqual(2, HitCount(searcher, "the"));
            Assert.AreEqual(1, HitCount(searcher, "cat"));
            Assert.AreEqual(1, HitCount(searcher, "dogs"));
            Assert.AreEqual(0, HitCount(searcher, "rabbit"));

            // This throws exception (it shouldn't)
            Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10).TotalHits);
            reader.Close();
            dir.Close();
        }
        public virtual void  TestCachingWorks()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.Close();

            IndexReader reader = IndexReader.Open(dir);

            MockFilter           filter = new MockFilter();
            CachingWrapperFilter cacher = new CachingWrapperFilter(filter);

            // first time, nested filter is called
            cacher.GetDocIdSet(reader);
            Assert.IsTrue(filter.WasCalled(), "first time");

            // make sure no exception if cache is holding the wrong bitset
            cacher.Bits(reader);
            cacher.GetDocIdSet(reader);

            // second time, nested filter should not be called
            filter.Clear();
            cacher.GetDocIdSet(reader);
            Assert.IsFalse(filter.WasCalled(), "second time");

            reader.Close();
        }
Example #10
0
        public virtual void  TestCompressionTools()
        {
            Fieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.Compress(System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed)), Field.Store.YES);
            Fieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.CompressString(binaryValCompressed), Field.Store.YES);

            Document doc = new Document();

            doc.Add(binaryFldCompressed);
            doc.Add(stringFldCompressed);

            /** add the doc to a ram index */
            MockRAMDirectory dir    = new MockRAMDirectory();
            IndexWriter      writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            writer.Close();

            /** open a reader and fetch the document */
            IndexReader reader        = IndexReader.Open(dir);
            Document    docFromReader = reader.Document(0);

            Assert.IsTrue(docFromReader != null);

            /** fetch the binary compressed field and compare it's content with the original one */
            System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed"))));
            Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed));
            Assert.IsTrue(CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed")).Equals(binaryValCompressed));

            reader.Close();
            dir.Close();
        }
Example #11
0
        private static RAMDirectory MakeEmptyIndex(int numDeletedDocs)
        {
            RAMDirectory d = new RAMDirectory();
            IndexWriter  w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED);

            for (int i = 0; i < numDeletedDocs; i++)
            {
                w.AddDocument(new Document());
            }
            w.Commit();
            w.DeleteDocuments(new MatchAllDocsQuery());
            w.Commit();

            if (0 < numDeletedDocs)
            {
                Assert.IsTrue(w.HasDeletions(), "writer has no deletions");
            }

            Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs");
            Assert.AreEqual(0, w.NumDocs(), "writer has non-deleted docs");
            w.Close();
            IndexReader r = IndexReader.Open(d, true);

            Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs");
            r.Close();
            return(d);
        }
Example #12
0
        public virtual void  TestRAMDirectoryString()
        {
            MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.FullName);

            // Check size
            Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes());

            // open reader to test document count
            IndexReader reader = IndexReader.Open(ramDir);

            Assert.AreEqual(docsToAdd, reader.NumDocs());

            // open search zo check if all doc's are there
            IndexSearcher searcher = new IndexSearcher(reader);

            // search for all documents
            for (int i = 0; i < docsToAdd; i++)
            {
                Document doc = searcher.Doc(i);
                Assert.IsTrue(doc.GetField("content") != null);
            }

            // cleanup
            reader.Close();
            searcher.Close();
        }
        public virtual void  Test()
        {
            IndexReader reader = null;

            try
            {
                reader = IndexReader.Open(directory);
                for (int i = 1; i <= numThreads; i++)
                {
                    TestTermPositionVectors(reader, i);
                }
            }
            catch (System.IO.IOException ioe)
            {
                Assert.Fail(ioe.Message);
            }
            finally
            {
                if (reader != null)
                {
                    try
                    {
                        /** close the opened reader */
                        reader.Close();
                    }
                    catch (System.IO.IOException ioe)
                    {
                        System.Console.Error.WriteLine(ioe.StackTrace);
                    }
                }
            }
        }
Example #14
0
        public string SearchAndPaging(string strQuery, string index)
        {
            string result = string.Empty;

            try
            {
                List <SearchArticle>            searchArticleList = new List <SearchArticle>();
                PSCPortal.CMS.ArticleCollection ArticleList       = ArticleCollection.GetArticleCollectionPublish();
                string         nameSub       = Libs.Ultility.GetSubDomain() == string.Empty ? "HomePage" : Libs.Ultility.GetSubDomain();
                SubDomain      subDomain     = PSCPortal.Engine.SubDomain.GetSubByName(nameSub);
                PageCollection pagesBelongTo = subDomain.GetPagesBelongTo();
                string         strId         = string.Empty;
                foreach (var page in pagesBelongTo)
                {
                    foreach (var ar in ArticleList.Where(ar => ar.PageId == page.Id))
                    {
                        strId += ar.Id + " OR ";
                    }
                    if (strId.Length > 0)
                    {
                        strId = strId.Remove(strId.Length - 3, 3);
                    }
                }
                int    pageIndex = Int32.Parse(index);
                string strSearch = " ArticleDetail:(" + strQuery + ") AND ArticleId:" + "( " + strId + " )";
                Lucene.Net.Index.IndexReader        reader           = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"]));
                Lucene.Net.QueryParsers.QueryParser parser           = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
                Lucene.Net.Search.Query             query            = parser.Parse(strSearch);
                Lucene.Net.Search.IndexSearcher     searcher         = new Lucene.Net.Search.IndexSearcher(reader);
                Lucene.Net.Search.Hits                   hits        = searcher.Search(query);
                Lucene.Net.Highlight.QueryScorer         score       = new Lucene.Net.Highlight.QueryScorer(query);
                Lucene.Net.Highlight.SimpleHTMLFormatter formater    = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>");
                Lucene.Net.Highlight.Highlighter         highlighter = new Lucene.Net.Highlight.Highlighter(formater, score);
                result += hits.Length() + "_" + "<div class='blog_news'><div class='topic_news_title1'><div class='topic_news_title'><a href='#'>Kết quả tìm thấy: " + hits.Length() + "</a></div></div>";
                result += "<div class='ct_topic_l'><div class='ct_topic_r1'>";
                for (int i = pageIndex * 20 - 20; i < pageIndex * 20 && i < hits.Length(); i++)
                {
                    string detail = hits.Doc(i).Get("ArticleDetail");
                    Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail));
                    SearchArticle searchArticle        = new SearchArticle();
                    searchArticle.Id        = hits.Doc(i).Get("ArticleId");;
                    searchArticle.Title     = hits.Doc(i).Get("ArticleTitle");
                    searchArticle.Highligth = highlighter.GetBestFragment(ts, detail);
                    searchArticleList.Add(searchArticle);
                }
                reader.Close();
                JavaScriptSerializer        serializer = new JavaScriptSerializer();
                Dictionary <string, object> resultDic  = new Dictionary <string, object>();
                resultDic["Count"] = hits.Length();
                resultDic["Data"]  = searchArticleList;
                result             = serializer.Serialize(resultDic);
            }
            catch (Exception e)
            {
            }
            return(result);
        }
Example #15
0
        private int Numdoc()
        {
            IndexReader rs  = IndexReader.Open(spellindex);
            int         num = rs.NumDocs();

            Assert.IsTrue(num != 0);
            //System.out.println("num docs: " + num);
            rs.Close();
            return(num);
        }
Example #16
0
        /// <summary> Note that the underlying IndexReader is not closed, if
        /// IndexSearcher was constructed with IndexSearcher(IndexReader r).
        /// If the IndexReader was supplied implicitly by specifying a directory, then
        /// the IndexReader gets closed.
        /// </summary>
        public override void  Close()
        {
            if (closeReader)
            {
                FieldSortedHitQueue.Close(reader);
                Lucene.Net.Search.FieldCache_Fields.DEFAULT.Close(reader);

                reader.Close();
            }
        }
Example #17
0
        public virtual void  TestSetBufferSize()
        {
            System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testSetBufferSize"));
            MockFSDirectory    dir      = new MockFSDirectory(indexDir, NewRandom());

            try
            {
                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
                writer.SetUseCompoundFile(false);
                for (int i = 0; i < 37; i++)
                {
                    Document doc = new Document();
                    doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED));
                    writer.AddDocument(doc);
                }
                writer.Close();

                dir.allIndexInputs.Clear();

                IndexReader reader = IndexReader.Open(dir);
                Term        aaa    = new Term("content", "aaa");
                Term        bbb    = new Term("content", "bbb");
                Term        ccc    = new Term("content", "ccc");
                Assert.AreEqual(37, reader.DocFreq(ccc));
                reader.DeleteDocument(0);
                Assert.AreEqual(37, reader.DocFreq(aaa));
                dir.tweakBufferSizes();
                reader.DeleteDocument(4);
                Assert.AreEqual(reader.DocFreq(bbb), 37);
                dir.tweakBufferSizes();

                IndexSearcher searcher = new IndexSearcher(reader);
                ScoreDoc[]    hits     = searcher.Search(new TermQuery(bbb), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(35, hits.Length);
                dir.tweakBufferSizes();
                hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(1, hits.Length);
                hits = searcher.Search(new TermQuery(aaa), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(35, hits.Length);
                searcher.Close();
                reader.Close();
            }
            finally
            {
                _TestUtil.RmDir(indexDir);
            }
        }
        public virtual void  TestBinaryFieldInIndex()
        {
            IFieldable binaryFldStored = new Field("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValStored), Field.Store.YES);
            IFieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);

            // binary fields with store off are not allowed
            Assert.Throws <ArgumentException>(
                () => new Field("fail", System.Text.Encoding.UTF8.GetBytes(binaryValStored), Field.Store.NO));

            Document doc = new Document();

            doc.Add(binaryFldStored);

            doc.Add(stringFldStored);

            /* test for field count */
            Assert.AreEqual(2, doc.fields_ForNUnit.Count);

            /* add the doc to a ram index */
            MockRAMDirectory dir    = new MockRAMDirectory();
            IndexWriter      writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            writer.Close();

            /* open a reader and fetch the document */
            IndexReader reader        = IndexReader.Open(dir, false);
            Document    docFromReader = reader.Document(0);

            Assert.IsTrue(docFromReader != null);

            /* fetch the binary stored field and compare it's content with the original one */
            System.String binaryFldStoredTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryStored")));
            Assert.IsTrue(binaryFldStoredTest.Equals(binaryValStored));

            /* fetch the string field and compare it's content with the original one */
            System.String stringFldStoredTest = docFromReader.Get("stringStored");
            Assert.IsTrue(stringFldStoredTest.Equals(binaryValStored));

            /* delete the document from index */
            reader.DeleteDocument(0);
            Assert.AreEqual(0, reader.NumDocs());

            reader.Close();
            dir.Close();
        }
        public virtual void  TestBasic()
        {
            HashSet <string> fileExtensions = new HashSet <string>();

            fileExtensions.Add("fdt");
            fileExtensions.Add("fdx");

            Directory    primaryDir   = new MockRAMDirectory();
            RAMDirectory secondaryDir = new MockRAMDirectory();

            FileSwitchDirectory fsd    = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true);
            IndexWriter         writer = new IndexWriter(fsd, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null);

            writer.UseCompoundFile = false;
            TestIndexWriterReader.CreateIndexNoClose(true, "ram", writer);
            IndexReader reader = writer.GetReader(null);

            Assert.AreEqual(100, reader.MaxDoc);
            writer.Commit(null);
            // we should see only fdx,fdt files here
            System.String[] files = primaryDir.ListAll(null);
            Assert.IsTrue(files.Length > 0);
            for (int x = 0; x < files.Length; x++)
            {
                System.String ext = FileSwitchDirectory.GetExtension(files[x]);
                Assert.IsTrue(fileExtensions.Contains(ext));
            }
            files = secondaryDir.ListAll(null);
            Assert.IsTrue(files.Length > 0);
            // we should not see fdx,fdt files here
            for (int x = 0; x < files.Length; x++)
            {
                System.String ext = FileSwitchDirectory.GetExtension(files[x]);
                Assert.IsFalse(fileExtensions.Contains(ext));
            }
            reader.Close();
            writer.Close();

            files = fsd.ListAll(null);
            for (int i = 0; i < files.Length; i++)
            {
                Assert.IsNotNull(files[i]);
            }
            fsd.Close();
        }
        public virtual void  TestCaching()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            Document    doc    = new Document();
            TokenStream stream = new AnonymousClassTokenStream(this);

            stream = new CachingTokenFilter(stream);

            doc.Add(new Field("preanalyzed", stream, TermVector.NO));

            // 1) we consume all tokens twice before we add the doc to the index
            checkTokens(stream);
            stream.Reset();
            checkTokens(stream);

            // 2) now add the document to the index and verify if all tokens are indexed
            //    don't reset the stream here, the DocumentWriter should do that implicitly
            writer.AddDocument(doc);
            writer.Close();

            IndexReader   reader        = IndexReader.Open(dir);
            TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1"));

            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(0, termPositions.NextPosition());

            termPositions.Seek(new Term("preanalyzed", "term2"));
            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(2, termPositions.Freq());
            Assert.AreEqual(1, termPositions.NextPosition());
            Assert.AreEqual(3, termPositions.NextPosition());

            termPositions.Seek(new Term("preanalyzed", "term3"));
            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(2, termPositions.NextPosition());
            reader.Close();

            // 3) reset stream and consume tokens again
            stream.Reset();
            checkTokens(stream);
        }
Example #21
0
        public virtual void  TestMissingTerms()
        {
            System.String    fieldName = "field1";
            MockRAMDirectory rd        = new MockRAMDirectory();
            IndexWriter      w         = new IndexWriter(rd, new KeywordAnalyzer(), MaxFieldLength.UNLIMITED);

            for (int i = 0; i < 100; i++)
            {
                Document doc  = new Document();
                int      term = i * 10;            //terms are units of 10;
                doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.NOT_ANALYZED));
                w.AddDocument(doc);
            }
            w.Close();

            IndexReader   reader   = IndexReader.Open(rd, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            int           numDocs  = reader.NumDocs();

            ScoreDoc[]        results;
            MatchAllDocsQuery q = new MatchAllDocsQuery();

            System.Collections.ArrayList terms = new System.Collections.ArrayList();
            terms.Add("5");
            results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs).ScoreDocs;
            Assert.AreEqual(0, results.Length, "Must match nothing");

            terms = new System.Collections.ArrayList();
            terms.Add("10");
            results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs).ScoreDocs;
            Assert.AreEqual(1, results.Length, "Must match 1");

            terms = new System.Collections.ArrayList();
            terms.Add("10");
            terms.Add("20");
            results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs).ScoreDocs;
            Assert.AreEqual(2, results.Length, "Must match 2");

            reader.Close();
            rd.Close();
        }
        public virtual void  TestFilterWorks()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            for (int i = 0; i < 500; i++)
            {
                Document document = new Document();
                document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(document);
            }
            writer.Close();

            IndexReader reader = IndexReader.Open(dir, true);

            SpanTermQuery    query    = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim()));
            SpanQueryFilter  filter   = new SpanQueryFilter(query);
            SpanFilterResult result   = filter.BitSpans(reader);
            DocIdSet         docIdSet = result.DocIdSet;

            Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be");
            AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10);
            var spans = result.Positions;

            Assert.IsTrue(spans != null, "spans is null and it shouldn't be");
            int size = GetDocIdSetSize(docIdSet);

            Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size);
            for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext();)
            {
                SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo)iterator.Current;
                Assert.IsTrue(info != null, "info is null and it shouldn't be");
                //The doc should indicate the bit is on
                AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc);
                //There should be two positions in each
                Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2);
            }
            reader.Close();
        }
Example #23
0
        public virtual void  TestSetNorm_Renamed()
        {
            RAMDirectory store  = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            // add the same document four times
            IFieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED);
            Document   d1 = new Document();

            d1.Add(f1);
            writer.AddDocument(d1);
            writer.AddDocument(d1);
            writer.AddDocument(d1);
            writer.AddDocument(d1);
            writer.Close();

            // reset the boost of each instance of this document
            IndexReader reader = IndexReader.Open(store, false);

            reader.SetNorm(0, "field", 1.0f);
            reader.SetNorm(1, "field", 2.0f);
            reader.SetNorm(2, "field", 4.0f);
            reader.SetNorm(3, "field", 16.0f);
            reader.Close();

            // check that searches are ordered by this boost
            float[] scores = new float[4];

            new IndexSearcher(store, true).Search(new TermQuery(new Term("field", "word")), new AnonymousClassCollector(scores, this));

            float lastScore = 0.0f;

            for (int i = 0; i < 4; i++)
            {
                Assert.IsTrue(scores[i] > lastScore);
                lastScore = scores[i];
            }
        }
Example #24
0
        public virtual void  TestGiga()
        {
            StandardAnalyzer analyzer = new StandardAnalyzer();

            Directory   index = new MockRAMDirectory();
            IndexWriter w     = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

            AddDoc("Lucene in Action", w);
            AddDoc("Lucene for Dummies", w);

            // addDoc("Giga", w);
            AddDoc("Giga byte", w);

            AddDoc("ManagingGigabytesManagingGigabyte", w);
            AddDoc("ManagingGigabytesManagingGigabytes", w);

            AddDoc("The Art of Computer Science", w);
            AddDoc("J. K. Rowling", w);
            AddDoc("JK Rowling", w);
            AddDoc("Joanne K Roling", w);
            AddDoc("Bruce Willis", w);
            AddDoc("Willis bruce", w);
            AddDoc("Brute willis", w);
            AddDoc("B. willis", w);
            IndexReader r = w.GetReader();

            w.Close();

            Query q = new QueryParser("field", analyzer).Parse("giga~0.9");

            // 3. search
            IndexSearcher searcher = new IndexSearcher(r);

            ScoreDoc[] hits = searcher.Search(q, 10).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), "Giga byte");
            r.Close();
        }
Example #25
0
        public virtual void  TestNullOrSubScorer()
        {
            Directory   dir = new MockRAMDirectory();
            IndexWriter w   = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
            Document    doc = new Document();

            doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED));
            w.AddDocument(doc);
            IndexReader   r = w.GetReader();
            IndexSearcher s = new IndexSearcher(r);
            BooleanQuery  q = new BooleanQuery();

            q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);

            // PhraseQuery w/ no terms added returns a null scorer
            PhraseQuery pq = new PhraseQuery();

            q.Add(pq, BooleanClause.Occur.SHOULD);
            Assert.AreEqual(1, s.Search(q, 10).totalHits);

            // A required clause which returns null scorer should return null scorer to
            // IndexSearcher.
            q  = new BooleanQuery();
            pq = new PhraseQuery();
            q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);
            q.Add(pq, BooleanClause.Occur.MUST);
            Assert.AreEqual(0, s.Search(q, 10).totalHits);

            DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f);

            dmq.Add(new TermQuery(new Term("field", "a")));
            dmq.Add(pq);
            Assert.AreEqual(1, s.Search(dmq, 10).totalHits);

            r.Close();
            w.Close();
            dir.Close();
        }
Example #26
0
        public virtual void  TestCachingWorks()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true);

            writer.Close();

            IndexReader reader = IndexReader.Open(dir);

            MockFilter           filter = new MockFilter();
            CachingWrapperFilter cacher = new CachingWrapperFilter(filter);

            // first time, nested filter is called
            cacher.Bits(reader);
            Assert.IsTrue(filter.WasCalled(), "first time");

            // second time, nested filter should not be called
            filter.Clear();
            cacher.Bits(reader);
            Assert.IsFalse(filter.WasCalled(), "second time");

            reader.Close();
        }
Example #27
0
        /// <summary> Index a Dictionary</summary>
        /// <param name="dict">the dictionary to index
        /// </param>
        /// <throws>  IOException </throws>
        public virtual void  IndexDictionary(Dictionary dict)
        {
            IndexReader.Unlock(spellindex);
            IndexWriter writer = new IndexWriter(spellindex, new WhitespaceAnalyzer(), !IndexReader.IndexExists(spellindex));

            writer.SetMergeFactor(300);
            writer.SetMaxBufferedDocs(150);

            System.Collections.IEnumerator iter = dict.GetWordsIterator();
            while (iter.MoveNext())
            {
                System.String word = (System.String)iter.Current;

                int len = word.Length;
                if (len < 3)
                {
                    continue; // too short we bail but "too long" is fine...
                }

                if (this.Exist(word))
                {
                    // if the word already exist in the gramindex
                    continue;
                }

                // ok index the word
                Document doc = CreateDocument(word, GetMin(len), GetMax(len));
                writer.AddDocument(doc);
            }
            // close writer
            writer.Optimize();
            writer.Close();

            // close reader
            reader.Close();
            reader = null;
        }
Example #28
0
        public virtual void  TestBinaryFieldInIndex()
        {
            Fieldable binaryFldStored     = new Field("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValStored), Field.Store.YES);
            Fieldable binaryFldCompressed = new Field("binaryCompressed", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed), Field.Store.COMPRESS);
            Fieldable stringFldStored     = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
            Fieldable stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO);

            try
            {
                // binary fields with store off are not allowed
                new Field("fail", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed), Field.Store.NO);
                Assert.Fail();
            }
            catch (System.ArgumentException iae)
            {
                ;
            }

            Document doc = new Document();

            doc.Add(binaryFldStored);
            doc.Add(binaryFldCompressed);

            doc.Add(stringFldStored);
            doc.Add(stringFldCompressed);

            /** test for field count */
            Assert.AreEqual(4, doc.fields_ForNUnit.Count);

            /** add the doc to a ram index */
            MockRAMDirectory dir    = new MockRAMDirectory();
            IndexWriter      writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            writer.Close();

            /** open a reader and fetch the document */
            IndexReader reader        = IndexReader.Open(dir);
            Document    docFromReader = reader.Document(0);

            Assert.IsTrue(docFromReader != null);

            /** fetch the binary stored field and compare it's content with the original one */
            System.String binaryFldStoredTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryStored")));
            Assert.IsTrue(binaryFldStoredTest.Equals(binaryValStored));

            /** fetch the binary compressed field and compare it's content with the original one */
            System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryCompressed")));
            Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed));

            /** fetch the string field and compare it's content with the original one */
            System.String stringFldStoredTest = docFromReader.Get("stringStored");
            Assert.IsTrue(stringFldStoredTest.Equals(binaryValStored));

            /** fetch the compressed string field and compare it's content with the original one */
            System.String stringFldCompressedTest = docFromReader.Get("stringCompressed");
            Assert.IsTrue(stringFldCompressedTest.Equals(binaryValCompressed));

            /** delete the document from index */
            reader.DeleteDocument(0);
            Assert.AreEqual(0, reader.NumDocs());

            reader.Close();
            dir.Close();
        }
Example #29
0
 public static void DeleteArticleIndexing(Guid articleId)
 {
     Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["IndexingArticle"]));
     reader.DeleteDocuments(new Lucene.Net.Index.Term("ArticleId", articleId.ToString()));
     reader.Close();
 }
        public virtual void  TestEmptyIndex()
        {
            // creating two directories for indices
            Directory indexStoreA = new MockRAMDirectory();
            Directory indexStoreB = new MockRAMDirectory();

            // creating a document to store
            Document lDoc = new Document();

            lDoc.Add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED));
            lDoc.Add(new Field("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED));
            lDoc.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

            // creating a document to store
            Document lDoc2 = new Document();

            lDoc2.Add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.ANALYZED));
            lDoc2.Add(new Field("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED));
            lDoc2.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

            // creating a document to store
            Document lDoc3 = new Document();

            lDoc3.Add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.ANALYZED));
            lDoc3.Add(new Field("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED));
            lDoc3.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

            // creating an index writer for the first index
            IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
            // creating an index writer for the second index, but writing nothing
            IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);

            //--------------------------------------------------------------------
            // scenario 1
            //--------------------------------------------------------------------

            // writing the documents to the first index
            writerA.AddDocument(lDoc);
            writerA.AddDocument(lDoc2);
            writerA.AddDocument(lDoc3);
            writerA.Optimize();
            writerA.Close();

            // closing the second index
            writerB.Close();

            // creating the query
            QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(Util.Version.LUCENE_CURRENT));
            Query       query  = parser.Parse("handle:1");

            // building the searchables
            Searcher[] searchers = new Searcher[2];
            // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
            searchers[0] = new IndexSearcher(indexStoreB, true);
            searchers[1] = new IndexSearcher(indexStoreA, true);
            // creating the multiSearcher
            Searcher mSearcher = GetMultiSearcherInstance(searchers);

            // performing the search
            ScoreDoc[] hits = mSearcher.Search(query, null, 1000).ScoreDocs;

            Assert.AreEqual(3, hits.Length);

            // iterating over the hit documents
            for (int i = 0; i < hits.Length; i++)
            {
                mSearcher.Doc(hits[i].Doc);
            }
            mSearcher.Close();


            //--------------------------------------------------------------------
            // scenario 2
            //--------------------------------------------------------------------

            // adding one document to the empty index
            writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED);
            writerB.AddDocument(lDoc);
            writerB.Optimize();
            writerB.Close();

            // building the searchables
            Searcher[] searchers2 = new Searcher[2];
            // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
            searchers2[0] = new IndexSearcher(indexStoreB, true);
            searchers2[1] = new IndexSearcher(indexStoreA, true);
            // creating the mulitSearcher
            MultiSearcher mSearcher2 = GetMultiSearcherInstance(searchers2);

            // performing the same search
            ScoreDoc[] hits2 = mSearcher2.Search(query, null, 1000).ScoreDocs;

            Assert.AreEqual(4, hits2.Length);

            // iterating over the hit documents
            for (int i = 0; i < hits2.Length; i++)
            {
                // no exception should happen at this point
                mSearcher2.Doc(hits2[i].Doc);
            }

            // test the subSearcher() method:
            Query subSearcherQuery = parser.Parse("id:doc1");

            hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits2.Length);
            Assert.AreEqual(0, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[0]
            Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[1].Doc)); // hit from searchers2[1]
            subSearcherQuery = parser.Parse("id:doc2");
            hits2            = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits2.Length);
            Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[1]
            mSearcher2.Close();

            //--------------------------------------------------------------------
            // scenario 3
            //--------------------------------------------------------------------

            // deleting the document just added, this will cause a different exception to take place
            Term        term    = new Term("id", "doc1");
            IndexReader readerB = IndexReader.Open(indexStoreB, false);

            readerB.DeleteDocuments(term);
            readerB.Close();

            // optimizing the index with the writer
            writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED);
            writerB.Optimize();
            writerB.Close();

            // building the searchables
            Searcher[] searchers3 = new Searcher[2];

            searchers3[0] = new IndexSearcher(indexStoreB, true);
            searchers3[1] = new IndexSearcher(indexStoreA, true);
            // creating the mulitSearcher
            Searcher mSearcher3 = GetMultiSearcherInstance(searchers3);

            // performing the same search
            ScoreDoc[] hits3 = mSearcher3.Search(query, null, 1000).ScoreDocs;

            Assert.AreEqual(3, hits3.Length);

            // iterating over the hit documents
            for (int i = 0; i < hits3.Length; i++)
            {
                mSearcher3.Doc(hits3[i].Doc);
            }
            mSearcher3.Close();
            indexStoreA.Close();
            indexStoreB.Close();
        }
Example #31
0
		/* Walk directory hierarchy in uid order, while keeping uid iterator from
		/* existing index in sync.  Mismatches indicate one of: (a) old documents to
		/* be deleted; (b) unchanged documents, to be left alone; or (c) new
		/* documents, to be indexed.
		*/

        private static void IndexDocs(System.IO.DirectoryInfo file, System.IO.DirectoryInfo index, bool create)
		{
			if (!create)
			{
				// incrementally update
				
				reader = IndexReader.Open(FSDirectory.Open(index), false); // open existing index
				uidIter = reader.Terms(new Term("uid", "")); // init uid iterator
				
				IndexDocs(file);
				
				if (deleting)
				{
					// delete rest of stale docs
					while (uidIter.Term() != null && (System.Object) uidIter.Term().Field == (System.Object) "uid")
					{
						System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text));
						reader.DeleteDocuments(uidIter.Term());
						uidIter.Next();
					}
					deleting = false;
				}
				
				uidIter.Close(); // close uid iterator
				reader.Close(); // close existing index
			}
			// don't have exisiting
			else
				IndexDocs(file);
		}