Example #1
0
		public virtual void  TestDemo_Renamed()
		{
			
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
			
			// Store the index in memory:
			Directory directory = new RAMDirectory();
			// To store an index on disk, use this instead:
			//Directory directory = FSDirectory.open("/tmp/testindex");
			IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000));
			Document doc = new Document();
			System.String text = "This is the text to be indexed.";
			doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED));
			iwriter.AddDocument(doc);
			iwriter.Close();
			
			// Now search the index:
			IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true
			// Parse a simple query that searches for "text":
			QueryParser parser = new QueryParser("fieldname", analyzer);
			Query query = parser.Parse("text");
			ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual(1, hits.Length);
			// Iterate through the results:
			for (int i = 0; i < hits.Length; i++)
			{
				Document hitDoc = isearcher.Doc(hits[i].doc);
				Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed.");
			}
			isearcher.Close();
			directory.Close();
		}
Example #2
0
        public virtual void  TestDemo_Renamed()
        {
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

            // Store the index in memory:
            Directory directory = new RAMDirectory();
            // To store an index on disk, use this instead:
            //Directory directory = FSDirectory.open("/tmp/testindex");
            IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000));
            Document    doc     = new Document();

            System.String text = "This is the text to be indexed.";
            doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED));
            iwriter.AddDocument(doc);
            iwriter.Close();

            // Now search the index:
            IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true
            // Parse a simple query that searches for "text":
            QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fieldname", analyzer);
            Query       query  = parser.Parse("text");

            ScoreDoc[] hits = isearcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            // Iterate through the results:
            for (int i = 0; i < hits.Length; i++)
            {
                Document hitDoc = isearcher.Doc(hits[i].Doc);
                Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed.");
            }
            isearcher.Close();
            directory.Close();
        }
Example #3
0
        public virtual void  TestRAMDirectoryString()
        {
            MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.FullName);

            // Check size
            Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes());

            // open reader to test document count
            IndexReader reader = IndexReader.Open(ramDir);

            Assert.AreEqual(docsToAdd, reader.NumDocs());

            // open search zo check if all doc's are there
            IndexSearcher searcher = new IndexSearcher(reader);

            // search for all documents
            for (int i = 0; i < docsToAdd; i++)
            {
                Document doc = searcher.Doc(i);
                Assert.IsTrue(doc.GetField("content") != null);
            }

            // cleanup
            reader.Close();
            searcher.Close();
        }
Example #4
0
        public virtual void  TestMmapIndex()
        {
            Assert.Ignore("Need to port tests, but we don't really support MMapDirectories anyway");

            FSDirectory storeDirectory;

            storeDirectory = new MMapDirectory(new System.IO.DirectoryInfo(storePathname), null);

            // plan to add a set of useful stopwords, consider changing some of the
            // interior filters.
            StandardAnalyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>());
            // TODO: something about lock timeouts and leftover locks.
            IndexWriter   writer   = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);
            IndexSearcher searcher = new IndexSearcher(storeDirectory, true, null);

            for (int dx = 0; dx < 1000; dx++)
            {
                System.String f   = RandomField();
                Document      doc = new Document();
                doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc, null);
            }

            searcher.Close();
            writer.Close();
            RmDir(new System.IO.FileInfo(storePathname));
        }
Example #5
0
        public virtual void  TestGetValuesForIndexedDocument()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new StandardAnalyzer(), true);

            writer.AddDocument(MakeDocumentWithFields());
            writer.Close();

            Searcher searcher = new IndexSearcher(dir);

            // search for something that does exists
            Query query = new TermQuery(new Term("keyword", "test1"));

            // ensure that queries return expected results without DateFilter first
            Hits hits = searcher.Search(query);

            Assert.AreEqual(1, hits.Length());

            try
            {
                DoAssert(hits.Doc(0), true);
            }
            catch (System.Exception e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                System.Console.Error.Write("\n");
            }
            finally
            {
                searcher.Close();
            }
        }
        public virtual void  TestMmapIndex()
        {
            FSDirectory storeDirectory;

            storeDirectory = new MMapDirectory(new System.IO.FileInfo(storePathname), null);

            // plan to add a set of useful stopwords, consider changing some of the
            // interior filters.
            StandardAnalyzer analyzer = new StandardAnalyzer(new System.Collections.Hashtable());
            // TODO: something about lock timeouts and leftover locks.
            IndexWriter   writer   = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            IndexSearcher searcher = new IndexSearcher(storePathname);

            for (int dx = 0; dx < 1000; dx++)
            {
                System.String f   = RandomField();
                Document      doc = new Document();
                doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }

            searcher.Close();
            writer.Close();
            RmDir(new System.IO.FileInfo(storePathname));
        }
Example #7
0
		public virtual void  TestDemo_Renamed_Method()
		{
			
			Analyzer analyzer = new StandardAnalyzer();
			
			// Store the index in memory:
			Directory directory = new RAMDirectory();
			// To store an index on disk, use this instead (note that the 
			// parameter true will overwrite the index in that directory
			// if one exists):
			//Directory directory = FSDirectory.getDirectory("/tmp/testindex", true);
			IndexWriter iwriter = new IndexWriter(directory, analyzer, true);
			iwriter.SetMaxFieldLength(25000);
			Document doc = new Document();
			System.String text = "This is the text to be indexed.";
			doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED));
			iwriter.AddDocument(doc);
			iwriter.Close();
			
			// Now search the index:
			IndexSearcher isearcher = new IndexSearcher(directory);
			// Parse a simple query that searches for "text":
			Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer);
			Query query = parser.Parse("text");
			Hits hits = isearcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			// Iterate through the results:
			for (int i = 0; i < hits.Length(); i++)
			{
				Document hitDoc = hits.Doc(i);
				Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname"));
			}
			isearcher.Close();
			directory.Close();
		}
Example #8
0
        public void TestBooleanQuerySerialization()
        {
            Lucene.Net.Search.BooleanQuery lucQuery = new Lucene.Net.Search.BooleanQuery();

            lucQuery.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field", "x")), Lucene.Net.Search.BooleanClause.Occur.MUST);

            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            System.IO.MemoryStream ms = new System.IO.MemoryStream();
            bf.Serialize(ms, lucQuery);
            ms.Seek(0, System.IO.SeekOrigin.Begin);
            Lucene.Net.Search.BooleanQuery lucQuery2 = (Lucene.Net.Search.BooleanQuery)bf.Deserialize(ms);
            ms.Close();

            Assert.AreEqual(lucQuery, lucQuery2, "Error in serialization");

            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            int hitCount = searcher.Search(lucQuery, 20).totalHits;

            searcher.Close();
            searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            int hitCount2 = searcher.Search(lucQuery2, 20).totalHits;

            Assert.AreEqual(hitCount, hitCount2, "Error in serialization - different hit counts");
        }
Example #9
0
        private int GetHitCount(Directory dir, Term term)
        {
            IndexSearcher searcher = new IndexSearcher(dir, true, null);
            int           hitCount = searcher.Search(new TermQuery(term), null, 1000, null).TotalHits;

            searcher.Close();
            return(hitCount);
        }
Example #10
0
        public virtual void  TestFieldSetValue()
        {
            Field    field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED);
            Document doc   = new Document();

            doc.Add(field);
            doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED));

            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            field.SetValue("id2");
            writer.AddDocument(doc);
            field.SetValue("id3");
            writer.AddDocument(doc);
            writer.Close();

            Searcher searcher = new IndexSearcher(dir);

            Query query = new TermQuery(new Term("keyword", "test"));

            // ensure that queries return expected results without DateFilter first
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            int result = 0;

            for (int i = 0; i < 3; i++)
            {
                Document doc2 = searcher.Doc(hits[i].doc);
                Field    f    = doc2.GetField("id");
                if (f.StringValue().Equals("id1"))
                {
                    result |= 1;
                }
                else if (f.StringValue().Equals("id2"))
                {
                    result |= 2;
                }
                else if (f.StringValue().Equals("id3"))
                {
                    result |= 4;
                }
                else
                {
                    Assert.Fail("unexpected id field");
                }
            }
            searcher.Close();
            dir.Close();
            Assert.AreEqual(7, result, "did not see all IDs");
        }
Example #11
0
        /* Open pre-lockless index, add docs, do a delete &
         * setNorm, and search */
        public virtual void  ChangeIndexNoAdds(System.String dirName)
        {
            dirName = FullDir(dirName);

            Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));

            // make sure searching sees right # hits
            IndexSearcher searcher = new IndexSearcher(dir, true);

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length, "wrong number of hits");
            Document d = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d.Get("id"), "wrong first document");
            searcher.Close();

            // make sure we can do a delete & setNorm against this
            // pre-lockless segment:
            IndexReader reader     = IndexReader.Open(dir, false);
            Term        searchTerm = new Term("id", "6");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "wrong delete count");
            reader.SetNorm(22, "content", (float)2.0);
            reader.Close();

            // make sure they "took":
            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(33, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 33, searcher.IndexReader);
            searcher.Close();

            // optimize
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(33, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 33, searcher.IndexReader);
            searcher.Close();

            dir.Close();
        }
Example #12
0
        public virtual void  TestSetBufferSize()
        {
            System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testSetBufferSize"));
            MockFSDirectory    dir      = new MockFSDirectory(indexDir, NewRandom());

            try
            {
                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
                writer.SetUseCompoundFile(false);
                for (int i = 0; i < 37; i++)
                {
                    Document doc = new Document();
                    doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED));
                    writer.AddDocument(doc);
                }
                writer.Close();

                dir.allIndexInputs.Clear();

                IndexReader reader = IndexReader.Open(dir);
                Term        aaa    = new Term("content", "aaa");
                Term        bbb    = new Term("content", "bbb");
                Term        ccc    = new Term("content", "ccc");
                Assert.AreEqual(37, reader.DocFreq(ccc));
                reader.DeleteDocument(0);
                Assert.AreEqual(37, reader.DocFreq(aaa));
                dir.tweakBufferSizes();
                reader.DeleteDocument(4);
                Assert.AreEqual(reader.DocFreq(bbb), 37);
                dir.tweakBufferSizes();

                IndexSearcher searcher = new IndexSearcher(reader);
                ScoreDoc[]    hits     = searcher.Search(new TermQuery(bbb), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(35, hits.Length);
                dir.tweakBufferSizes();
                hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(1, hits.Length);
                hits = searcher.Search(new TermQuery(aaa), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(35, hits.Length);
                searcher.Close();
                reader.Close();
            }
            finally
            {
                _TestUtil.RmDir(indexDir);
            }
        }
Example #13
0
        public int GetDocumentCount()
        {
            int           num;
            IndexSearcher searcher = this.CreateSearcher(true);

            try
            {
                num = searcher.Reader.NumDocs();
            }
            finally
            {
                searcher.Close();
            }
            return(num);
        }
Example #14
0
            override public void  Run()
            {
                IndexSearcher searcher = null;
                Query         query    = new TermQuery(new Term("content", "aaa"));

                for (int i = 0; i < this.numIteration; i++)
                {
                    try
                    {
                        searcher = new IndexSearcher(dir);
                    }
                    catch (System.Exception e)
                    {
                        hitException = true;
                        System.Console.Out.WriteLine("Stress Test Index Searcher: create hit unexpected exception: " + e.ToString());
                        System.Console.Out.WriteLine(e.StackTrace);
                        break;
                    }
                    if (searcher != null)
                    {
                        ScoreDoc[] hits = null;
                        try
                        {
                            hits = searcher.Search(query, null, 1000).scoreDocs;
                        }
                        catch (System.IO.IOException e)
                        {
                            hitException = true;
                            System.Console.Out.WriteLine("Stress Test Index Searcher: search hit unexpected exception: " + e.ToString());
                            System.Console.Out.WriteLine(e.StackTrace);
                            break;
                        }
                        // System.out.println(hits.length() + " total results");
                        try
                        {
                            searcher.Close();
                        }
                        catch (System.IO.IOException e)
                        {
                            hitException = true;
                            System.Console.Out.WriteLine("Stress Test Index Searcher: close hit unexpected exception: " + e.ToString());
                            System.Console.Out.WriteLine(e.StackTrace);
                            break;
                        }
                        searcher = null;
                    }
                }
            }
Example #15
0
        public static void Main(System.String[] args)
        {
            System.String index = @"c:\EmailTest\LuceneDB";
            IndexReader reader = IndexReader.Open(FSDirectory.Open(new System.IO.FileInfo(index)), true); // only searching, so read-only=true
            Searcher searcher = new IndexSearcher(reader);

            if (Stopwatch.IsHighResolution) {
                System.Console.WriteLine("We have a high resolution timer with an frequency of {0} ticks/ms", Stopwatch.Frequency/1000);
            }

            searchFor(searcher, "jeske AND neotonic");
            searchFor(searcher, "noticed AND problems");
            searchFor(searcher, "data AND returned");
            searchFor(searcher, "scott AND hassan");

            searcher.Close();
            reader.Close();
            System.Console.WriteLine("done");
        }
Example #16
0
        public static bool PreviouslyIndexed(string url)
        {
            string indexFileLocation = indexDir;
            Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false);
            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir);
            Lucene.Net.Search.Hits hits = null;
            try
            {
                Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("url", url));

                hits = searcher.Search(query);

            }
            catch { }
            finally
            {
                searcher.Close();
            }
            return hits.Length() > 0;
        }
Example #17
0
        public virtual void  TestGetValuesForIndexedDocument()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(MakeDocumentWithFields());
            writer.Close();

            Searcher searcher = new IndexSearcher(dir);

            // search for something that does exists
            Query query = new TermQuery(new Term("keyword", "test1"));

            // ensure that queries return expected results without DateFilter first
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            DoAssert(searcher.Doc(hits[0].doc), true);
            searcher.Close();
        }
Example #18
0
        public virtual void  TestStopWordSearching()
        {
            Analyzer  analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
            Directory ramDir   = new RAMDirectory();
            var       iw       = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            var       doc      = new Document();

            doc.Add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED));
            iw.AddDocument(doc);
            iw.Close();

            var mfqp = new MultiFieldQueryParser(Util.Version.LUCENE_CURRENT, new[] { "body" }, analyzer);

            mfqp.DefaultOperator = QueryParser.Operator.AND;
            var q          = mfqp.Parse("the footest");
            var is_Renamed = new IndexSearcher(ramDir, true);
            var hits       = is_Renamed.Search(q, null, 1000).ScoreDocs;

            Assert.AreEqual(1, hits.Length);
            is_Renamed.Close();
        }
        public virtual void  TestStopWordSearching()
        {
            Analyzer    analyzer = new StandardAnalyzer();
            Directory   ramDir   = new RAMDirectory();
            IndexWriter iw       = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            Document    doc      = new Document();

            doc.Add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED));
            iw.AddDocument(doc);
            iw.Close();

            MultiFieldQueryParser mfqp = new MultiFieldQueryParser(new System.String[] { "body" }, analyzer);

            mfqp.SetDefaultOperator(QueryParser.Operator.AND);
            Query         q          = mfqp.Parse("the footest");
            IndexSearcher is_Renamed = new IndexSearcher(ramDir);

            ScoreDoc[] hits = is_Renamed.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            is_Renamed.Close();
        }
Example #20
0
        public static List<IndexedItem> SearchProjects(string s)
        {
            List<IndexedItem> retVal = new List<IndexedItem>();

            string indexFileLocation = indexDir;
            Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false);
            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir);

            try
            {
                Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("content", s));
                query = query.Combine(new Lucene.Net.Search.Query[] { query, new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("url", fromUrl)) });
                query = query.Combine(new Lucene.Net.Search.Query[] { query, new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("title", s)) });

                //execute the query
                Lucene.Net.Search.Hits hits = searcher.Search(query);

                //iterate over the results.
                for (int i = 0; i < hits.Length(); i++)
                {
                    Lucene.Net.Documents.Document doc = hits.Doc(i);
                    string article = doc.Get("content");
                    string title = doc.Get("title");
                    string url = doc.Get("url");
                    retVal.Add(new IndexedItem { Article = article, Href = url, Title = title });
                }
                foreach (IndexedItem ind in retVal)
                {
                    Console.WriteLine(ind.Href);
                }

                retVal = retVal.Distinct().ToList();
            }
            catch { }
            finally
            {
                searcher.Close();
            }
            return retVal;
        }
Example #21
0
        public void Test_Store_RAMDirectory()
        {
            Lucene.Net.Store.RAMDirectory ramDIR = new Lucene.Net.Store.RAMDirectory();

            //Index 1 Doc
            Lucene.Net.Index.IndexWriter  wr  = new Lucene.Net.Index.IndexWriter(ramDIR, new Lucene.Net.Analysis.WhitespaceAnalyzer(), true);
            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
            doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED));
            wr.AddDocument(doc);
            wr.Close();

            //now serialize it
            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            System.IO.MemoryStream memoryStream = new System.IO.MemoryStream();
            serializer.Serialize(memoryStream, ramDIR);

            //Close DIR
            ramDIR.Close();
            ramDIR = null;

            //now deserialize
            memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
            Lucene.Net.Store.RAMDirectory ramDIR2 = (Lucene.Net.Store.RAMDirectory)serializer.Deserialize(memoryStream);

            //Add 1 more doc
            wr  = new Lucene.Net.Index.IndexWriter(ramDIR2, new Lucene.Net.Analysis.WhitespaceAnalyzer(), false);
            doc = new Lucene.Net.Documents.Document();
            doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED));
            wr.AddDocument(doc);
            wr.Close();

            //Search
            Lucene.Net.Search.IndexSearcher     s       = new Lucene.Net.Search.IndexSearcher(ramDIR2);
            Lucene.Net.QueryParsers.QueryParser qp      = new Lucene.Net.QueryParsers.QueryParser("field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
            Lucene.Net.Search.Query             q       = qp.Parse("value1");
            Lucene.Net.Search.TopDocs           topDocs = s.Search(q, 100);
            s.Close();

            Assert.AreEqual(topDocs.totalHits, 2, "See the issue: LUCENENET-174");
        }
Example #22
0
		public virtual void  TestGetValuesForIndexedDocument()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.AddDocument(MakeDocumentWithFields());
			writer.Close();
			
			Searcher searcher = new IndexSearcher(dir);
			
			// search for something that does exists
			Query query = new TermQuery(new Term("keyword", "test1"));
			
			// ensure that queries return expected results without DateFilter first
			ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual(1, hits.Length);
			
			DoAssert(searcher.Doc(hits[0].doc), true);
			searcher.Close();
		}
Example #23
0
        private static void terminology(string languageModelFile, string inputFile, bool keepIntermedyaryFiles, string lang, bool alreadyProcessed)
        {
            List <string> terms = new List <string>();

            //HashSet<string> generalTerms = new HashSet<string>();
            //if (lang == "ro")
            //{
            //    generalTerms = DataStructReader.readHashSet("gtRo.txt", Encoding.UTF8, 0, '\t', true, null);
            //}
            //else if (lang == "en")
            //{
            //    generalTerms = DataStructReader.readHashSet("gtEn.txt", Encoding.UTF8, 0, '\t', true, null);
            //}

            Dictionary <string, double> ncounts = DataStructReader.readDictionaryD(languageModelFile, Encoding.UTF8, 0, 1, '\t', false, null, null);

            if (ncounts.Count == 0)
            {
                Console.WriteLine("Language Model Missing... Press key for aborting!");
                Console.ReadLine();
            }
            else
            {
                Dictionary <string, double> userCounts = new Dictionary <string, double>();
                double total = 0;

                if (!File.Exists(inputFile))
                {
                    Console.WriteLine("Input File doesn't exist... Press key for aborting!");
                    Console.ReadLine();
                }
                else
                {
                    Dictionary <string, string> fileCorrespondences = new Dictionary <string, string>();
                    string       line = "";
                    StreamReader rdr  = new StreamReader(inputFile, Encoding.UTF8);
                    while ((line = rdr.ReadLine()) != null)
                    {
                        string[] parts = line.Trim().Split('\t');
                        if (!fileCorrespondences.ContainsKey(parts[0]))
                        {
                            fileCorrespondences.Add(parts[0], parts[1]);
                        }
                    }

                    string[] files = fileCorrespondences.Keys.ToArray();
                    Dictionary <string, Dictionary <string, int> > singleOccurencesFirst = new Dictionary <string, Dictionary <string, int> >();
                    StreamWriter wrtProcessed = new StreamWriter("_preprocessed", false, Encoding.UTF8);
                    wrtProcessed.AutoFlush = true;

                    foreach (string file in files)
                    {
                        if (alreadyProcessed)
                        {
                            Console.Write("\nReading file: {0}", file);
                        }
                        else
                        {
                            Console.WriteLine("\nProcessing file: {0}", file);
                        }
                        getUserCounts(ref userCounts, ref singleOccurencesFirst, file, wrtProcessed, ref total, lang, alreadyProcessed);
                        //Console.WriteLine(" ... done!");
                    }
                    wrtProcessed.Close();

                    Console.Write("Extracting single word terms");

                    foreach (string key in userCounts.Keys.ToArray())
                    {
                        if (userCounts[key] < 2 /*|| generalTerms.Contains(key)*/)
                        {
                            userCounts.Remove(key);
                        }
                        else
                        {
                            userCounts[key] = userCounts[key] / total;
                        }
                    }

                    Dictionary <string, List <string> > singleOccurences = getSingle(singleOccurencesFirst);

                    Dictionary <string, double> results = new Dictionary <string, double>();
                    foreach (string word in userCounts.Keys)
                    {
                        double newVal = 0;

                        if (ncounts.ContainsKey(word))
                        {
                            newVal = userCounts[word] / ncounts[word];
                        }
                        else
                        {
                            newVal = userCounts[word] / ncounts["_dummy_"];
                        }

                        results.Add(word, newVal);
                    }

                    string[] keys   = results.Keys.ToArray();
                    double[] values = results.Values.ToArray();

                    Array.Sort(values, keys);

                    StreamWriter wrt = new StreamWriter("_monoTerms", false, Encoding.UTF8);
                    wrt.AutoFlush = true;
                    for (int i = keys.Length - 1; i >= 0; i--)
                    {
                        wrt.WriteLine("{0}\t{1}", keys[i], values[i]);
                    }
                    wrt.Close();

                    Console.WriteLine(" ... done!");

                    Console.Write("Extracting multi word terms");

                    ColocationExtractor ce = new ColocationExtractor();
                    Dictionary <string, List <string> > multiOccurences = new Dictionary <string, List <string> >();

                    if (ce.extractCollocations("_preprocessed", "_multiTerms"))
                    {
                        Console.WriteLine(" ... done!");

                        Console.Write("Create index for extracting exact occurences");

                        if (!Directory.Exists("_index"))
                        {
                            Directory.CreateDirectory("_index");
                        }
                        ce.indexText("_preprocessed", "_index");
                        Console.WriteLine(" ... done!");

                        Console.Write("Search for exact occurences");
                        Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher("_index");
                        multiOccurences = retriveOccurences(ce, searcher, "_multiTerms");
                        Console.WriteLine(" ... done!");

                        searcher.Close();
                        string[] filesToDel = Directory.GetFiles("_index");
                        foreach (string f in filesToDel)
                        {
                            File.Delete(f);
                        }
                        Directory.Delete("_index");
                    }
                    else
                    {
                        Console.WriteLine(" ... done! - no multi word terms found!");
                    }


                    Console.Write("Retrieving terminology");

                    terms = extractTerminology("_monoTerms", "_multiTerms", singleOccurences, multiOccurences);

                    if (keepIntermedyaryFiles)
                    {
                        StreamWriter wrtT = new StreamWriter("_terminology", false, Encoding.UTF8);
                        wrtT.AutoFlush = true;
                        foreach (string term in terms)
                        {
                            wrtT.WriteLine(term);
                        }
                        wrtT.Close();
                    }

                    Console.WriteLine(" ... done!");

                    HashSet <string> mono = new HashSet <string>();
                    Dictionary <string, HashSet <string> > multi = new Dictionary <string, HashSet <string> >();
                    HashSet <string> multiOrg = new HashSet <string>();

                    getTerms(terms, ref mono, ref multi, ref multiOrg);
                    markTerms(lang, fileCorrespondences, mono, multi, multiOrg, alreadyProcessed);

                    if (!keepIntermedyaryFiles)
                    {
                        File.Delete("_preprocessed");
                        File.Delete("_monoTerms");
                        File.Delete("_multiTerms");
                    }
                }
            }
        }
Example #24
0
        public static void  Main(System.String[] args)
        {
            try
            {
                Searcher searcher = new IndexSearcher(@"index");
                Analyzer analyzer = new StandardAnalyzer();

                System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding);
                while (true)
                {
                    System.Console.Out.Write("Query: ");
                    System.String line = in_Renamed.ReadLine();

                    if (line.Length == -1)
                    {
                        break;
                    }

                    Query query = QueryParser.Parse(line, "contents", analyzer);
                    System.Console.Out.WriteLine("Searching for: " + query.ToString("contents"));

                    Hits hits = searcher.Search(query);
                    System.Console.Out.WriteLine(hits.Length() + " total matching documents");

                    int HITS_PER_PAGE = 10;
                    for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE)
                    {
                        int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE);
                        for (int i = start; i < end; i++)
                        {
                            Document      doc  = hits.Doc(i);
                            System.String path = doc.Get("path");
                            if (path != null)
                            {
                                System.Console.Out.WriteLine(i + ". " + path);
                            }
                            else
                            {
                                System.String url = doc.Get("url");
                                if (url != null)
                                {
                                    System.Console.Out.WriteLine(i + ". " + url);
                                    System.Console.Out.WriteLine("   - " + doc.Get("title"));
                                }
                                else
                                {
                                    System.Console.Out.WriteLine(i + ". " + "No path nor URL for this document");
                                }
                            }
                        }

                        if (hits.Length() > end)
                        {
                            System.Console.Out.Write("more (y/n) ? ");
                            line = in_Renamed.ReadLine();
                            if (line.Length == 0 || line[0] == 'n')
                            {
                                break;
                            }
                        }
                    }
                }
                searcher.Close();
            }
            catch (System.Exception e)
            {
                System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
Example #25
0
        public virtual void  TestMmapIndex()
        {
            Assert.Ignore("Need to port tests, but we don't really support MMapDirectories anyway");

            FSDirectory storeDirectory;
            storeDirectory = new MMapDirectory(new System.IO.DirectoryInfo(storePathname), null);
            
            // plan to add a set of useful stopwords, consider changing some of the
            // interior filters.
            StandardAnalyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet<string>());
            // TODO: something about lock timeouts and leftover locks.
            IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            IndexSearcher searcher = new IndexSearcher(storeDirectory, true);
            
            for (int dx = 0; dx < 1000; dx++)
            {
                System.String f = RandomField();
                Document doc = new Document();
                doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }
            
            searcher.Close();
            writer.Close();
            RmDir(new System.IO.FileInfo(storePathname));
        }
		public virtual void  TestCommitOnClose()
		{
			Directory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			for (int i = 0; i < 14; i++)
			{
				AddDoc(writer);
			}
			writer.Close();
			
			Term searchTerm = new Term("content", "aaa");
			IndexSearcher searcher = new IndexSearcher(dir);
			Hits hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(14, hits.Length(), "first number of hits");
			searcher.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			
			writer = new IndexWriter(dir, false, new WhitespaceAnalyzer());
			for (int i = 0; i < 3; i++)
			{
				for (int j = 0; j < 11; j++)
				{
					AddDoc(writer);
				}
				searcher = new IndexSearcher(dir);
				hits = searcher.Search(new TermQuery(searchTerm));
				Assert.AreEqual(14, hits.Length(), "reader incorrectly sees changes from writer with autoCommit disabled");
				searcher.Close();
				Assert.IsTrue(reader.IsCurrent(), "reader should have still been current");
			}
			
			// Now, close the writer:
			writer.Close();
			Assert.IsFalse(reader.IsCurrent(), "reader should not be current now");
			
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(47, hits.Length(), "reader did not see changes after writer was closed");
			searcher.Close();
		}
		public virtual void  TestStopWordSearching()
		{
			Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
			Directory ramDir = new RAMDirectory();
			var iw = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            var doc = new Document();
			doc.Add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED));
			iw.AddDocument(doc);
			iw.Close();

            var mfqp = new MultiFieldQueryParser(Util.Version.LUCENE_CURRENT, new[] { "body" }, analyzer);
			mfqp.DefaultOperator = QueryParser.Operator.AND;
            var q = mfqp.Parse("the footest");
            var is_Renamed = new IndexSearcher(ramDir, true);
            var hits = is_Renamed.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			is_Renamed.Close();
		}
Example #28
0
        public virtual void  searchIndex(System.String dirName, System.String oldName)
        {
            //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer());
            //Query query = parser.parse("handle:1");

            dirName = FullDir(dirName);

            Directory     dir      = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));
            IndexSearcher searcher = new IndexSearcher(dir, true);
            IndexReader   reader   = searcher.IndexReader;

            _TestUtil.CheckIndex(dir);

            for (int i = 0; i < 35; i++)
            {
                if (!reader.IsDeleted(i))
                {
                    Document d      = reader.Document(i);
                    var      fields = d.GetFields();
                    if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
                    {
                        if (d.GetField("content3") == null)
                        {
                            int numFields = oldName.StartsWith("29.") ? 7 : 5;
                            Assert.AreEqual(numFields, fields.Count);
                            Field f = d.GetField("id");
                            Assert.AreEqual("" + i, f.StringValue);

                            f = (Field)d.GetField("utf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                            f = (Field)d.GetField("autf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                            f = (Field)d.GetField("content2");
                            Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue);

                            f = (Field)d.GetField("fie\u2C77ld");
                            Assert.AreEqual("field with non-ascii name", f.StringValue);
                        }
                    }
                }
                // Only ID 7 is deleted
                else
                {
                    Assert.AreEqual(7, i);
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #21 since it's norm was
            // increased:
            Document d2 = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d2.Get("id"), "didn't get the right document first");

            TestHits(hits, 34, searcher.IndexReader);

            if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
            {
                // Test on indices >= 2.3
                hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
            }

            searcher.Close();
            dir.Close();
        }
 public void TestNegativePositions()
 {
     SinkTokenizer tokens = new SinkTokenizer();
     Token t = new Token();
     t.SetTermText("a");
     t.SetPositionIncrement(0);
     tokens.Add(t);
     t.SetTermText("b");
     t.SetPositionIncrement(1);
     tokens.Add(t);
     t.SetTermText("c");
     tokens.Add(t);
     MockRAMDirectory dir = new MockRAMDirectory();
     IndexWriter w = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true);
     Document doc = new Document();
     doc.Add(new Field("field", tokens));
     w.AddDocument(doc);
     w.Close();
     IndexSearcher s = new IndexSearcher(dir);
     PhraseQuery pq = new PhraseQuery();
     pq.Add(new Term("field", "a"));
     pq.Add(new Term("field", "b"));
     pq.Add(new Term("field", "c"));
     Hits hits = s.Search(pq);
     Assert.AreEqual(1, hits.Length());
     Query q = new SpanTermQuery(new Term("field", "a"));
     hits = s.Search(q);
     Assert.AreEqual(1, hits.Length());
     TermPositions tps = s.GetIndexReader().TermPositions(new Term("field", "a"));
     Assert.IsTrue(tps.Next());
     Assert.AreEqual(1, tps.Freq());
     Assert.AreEqual(-1, tps.NextPosition());
     Assert.IsTrue(_TestUtil.CheckIndex(dir));
     s.Close();
     dir.Close();
 }
		public virtual void  TestAddIndexOnDiskFull()
		{
			int START_COUNT = 57;
			int NUM_DIR = 50;
			int END_COUNT = START_COUNT + NUM_DIR * 25;
			
			bool debug = false;
			
			// Build up a bunch of dirs that have indexes which we
			// will then merge together by calling addIndexes(*):
			Directory[] dirs = new Directory[NUM_DIR];
			long inputDiskUsage = 0;
			for (int i = 0; i < NUM_DIR; i++)
			{
				dirs[i] = new RAMDirectory();
				IndexWriter writer = new IndexWriter(dirs[i], new WhitespaceAnalyzer(), true);
				for (int j = 0; j < 25; j++)
				{
					AddDocWithIndex(writer, 25 * i + j);
				}
				writer.Close();
				System.String[] files = dirs[i].List();
				for (int j = 0; j < files.Length; j++)
				{
					inputDiskUsage += dirs[i].FileLength(files[j]);
				}
			}
			
			// Now, build a starting index that has START_COUNT docs.  We
			// will then try to addIndexes into a copy of this:
			RAMDirectory startDir = new RAMDirectory();
			IndexWriter writer2 = new IndexWriter(startDir, new WhitespaceAnalyzer(), true);
			for (int j = 0; j < START_COUNT; j++)
			{
				AddDocWithIndex(writer2, j);
			}
			writer2.Close();
			
			// Make sure starting index seems to be working properly:
			Term searchTerm = new Term("content", "aaa");
			IndexReader reader = IndexReader.Open(startDir);
			Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq");
			
			IndexSearcher searcher = new IndexSearcher(reader);
			Hits hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(57, hits.Length(), "first number of hits");
			searcher.Close();
			reader.Close();
			
			// Iterate with larger and larger amounts of free
			// disk space.  With little free disk space,
			// addIndexes will certainly run out of space &
			// fail.  Verify that when this happens, index is
			// not corrupt and index in fact has added no
			// documents.  Then, we increase disk space by 2000
			// bytes each iteration.  At some point there is
			// enough free disk space and addIndexes should
			// succeed and index should show all documents were
			// added.
			
			// String[] files = startDir.list();
			long diskUsage = startDir.SizeInBytes();
			
			long startDiskUsage = 0;
			System.String[] files2 = startDir.List();
			for (int i = 0; i < files2.Length; i++)
			{
				startDiskUsage += startDir.FileLength(files2[i]);
			}
			
			for (int iter = 0; iter < 6; iter++)
			{
				
				if (debug)
					System.Console.Out.WriteLine("TEST: iter=" + iter);
				
				// Start with 100 bytes more than we are currently using:
				long diskFree = diskUsage + 100;
				
				bool autoCommit = iter % 2 == 0;
				int method = iter / 2;
				
				bool success = false;
				bool done = false;
				
				System.String methodName;
				if (0 == method)
				{
					methodName = "addIndexes(Directory[])";
				}
				else if (1 == method)
				{
					methodName = "addIndexes(IndexReader[])";
				}
				else
				{
					methodName = "addIndexesNoOptimize(Directory[])";
				}
				
				while (!done)
				{
					
					// Make a new dir that will enforce disk usage:
					MockRAMDirectory dir = new MockRAMDirectory(startDir);
					writer2 = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
					System.IO.IOException err = null;
					
					MergeScheduler ms = writer2.GetMergeScheduler();
					for (int x = 0; x < 2; x++)
					{
						if (ms is ConcurrentMergeScheduler)
						// This test intentionally produces exceptions
						// in the threads that CMS launches; we don't
						// want to pollute test output with these.
							if (0 == x)
								((ConcurrentMergeScheduler)ms).SetSuppressExceptions_ForNUnitTest();
							else
								((ConcurrentMergeScheduler) ms).ClearSuppressExceptions_ForNUnitTest();
						
						// Two loops: first time, limit disk space &
						// throw random IOExceptions; second time, no
						// disk space limit:
						
						double rate = 0.05;
						double diskRatio = ((double) diskFree) / diskUsage;
						long thisDiskFree;
						
						System.String testName = null;
						
						if (0 == x)
						{
							thisDiskFree = diskFree;
							if (diskRatio >= 2.0)
							{
								rate /= 2;
							}
							if (diskRatio >= 4.0)
							{
								rate /= 2;
							}
							if (diskRatio >= 6.0)
							{
								rate = 0.0;
							}
							if (debug)
								testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes autoCommit=" + autoCommit;
						}
						else
						{
							thisDiskFree = 0;
							rate = 0.0;
							if (debug)
								testName = "disk full test " + methodName + " with unlimited disk space autoCommit=" + autoCommit;
						}
						
						if (debug)
							System.Console.Out.WriteLine("\ncycle: " + testName);
						
						dir.SetMaxSizeInBytes(thisDiskFree);
						dir.SetRandomIOExceptionRate(rate, diskFree);
						
						try
						{
							
							if (0 == method)
							{
								writer2.AddIndexes(dirs);
							}
							else if (1 == method)
							{
								IndexReader[] readers = new IndexReader[dirs.Length];
								for (int i = 0; i < dirs.Length; i++)
								{
									readers[i] = IndexReader.Open(dirs[i]);
								}
								try
								{
									writer2.AddIndexes(readers);
								}
								finally
								{
									for (int i = 0; i < dirs.Length; i++)
									{
										readers[i].Close();
									}
								}
							}
							else
							{
								writer2.AddIndexesNoOptimize(dirs);
							}
							
							success = true;
							if (debug)
							{
								System.Console.Out.WriteLine("  success!");
							}
							
							if (0 == x)
							{
								done = true;
							}
						}
						catch (System.IO.IOException e)
						{
							success = false;
							err = e;
							if (debug)
							{
								System.Console.Out.WriteLine("  hit IOException: " + e);
								System.Console.Out.WriteLine(e.StackTrace);
							}
							
							if (1 == x)
							{
								System.Console.Out.WriteLine(e.StackTrace);
								Assert.Fail(methodName + " hit IOException after disk space was freed up");
							}
						}
						
						// Make sure all threads from
						// ConcurrentMergeScheduler are done
						_TestUtil.SyncConcurrentMerges(writer2);
						
						if (autoCommit)
						{
							
							// Whether we succeeded or failed, check that
							// all un-referenced files were in fact
							// deleted (ie, we did not create garbage).
							// Only check this when autoCommit is true:
							// when it's false, it's expected that there
							// are unreferenced files (ie they won't be
							// referenced until the "commit on close").
							// Just create a new IndexFileDeleter, have it
							// delete unreferenced files, then verify that
							// in fact no files were deleted:
							
							System.String successStr;
							if (success)
							{
								successStr = "success";
							}
							else
							{
								successStr = "IOException";
							}
							System.String message = methodName + " failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes)";
							AssertNoUnreferencedFiles(dir, message);
						}
						
						if (debug)
						{
							System.Console.Out.WriteLine("  now test readers");
						}
						
						// Finally, verify index is not corrupt, and, if
						// we succeeded, we see all docs added, and if we
						// failed, we see either all docs or no docs added
						// (transactional semantics):
						try
						{
							reader = IndexReader.Open(dir);
						}
						catch (System.IO.IOException e)
						{
							System.Console.Out.WriteLine(e.StackTrace);
							Assert.Fail(testName + ": exception when creating IndexReader: " + e);
						}
						int result = reader.DocFreq(searchTerm);
						if (success)
						{
							if (autoCommit && result != END_COUNT)
							{
								Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT);
							}
							else if (!autoCommit && result != START_COUNT)
							{
								Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " [autoCommit = false]");
							}
						}
						else
						{
							// On hitting exception we still may have added
							// all docs:
							if (result != START_COUNT && result != END_COUNT)
							{
								System.Console.Out.WriteLine(err.StackTrace);
								Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);
							}
						}
						
						searcher = new IndexSearcher(reader);
						try
						{
							hits = searcher.Search(new TermQuery(searchTerm));
						}
						catch (System.IO.IOException e)
						{
							System.Console.Out.WriteLine(e.StackTrace);
							Assert.Fail(testName + ": exception when searching: " + e);
						}
						int result2 = hits.Length();
						if (success)
						{
							if (result2 != result)
							{
								Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result);
							}
						}
						else
						{
							// On hitting exception we still may have added
							// all docs:
							if (result2 != result)
							{
								System.Console.Out.WriteLine(err.StackTrace);
								Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result);
							}
						}
						
						searcher.Close();
						reader.Close();
						if (debug)
						{
							System.Console.Out.WriteLine("  count is " + result);
						}
						
						if (done || result == END_COUNT)
						{
							break;
						}
					}
					
					if (debug)
					{
						System.Console.Out.WriteLine("  start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.GetMaxUsedSizeInBytes());
					}
					
					if (done)
					{
						// Javadocs state that temp free Directory space
						// required is at most 2X total input size of
						// indices so let's make sure:
						Assert.IsTrue(
							(dir.GetMaxUsedSizeInBytes() - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage),
							"max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.GetMaxUsedSizeInBytes() - startDiskUsage) + " bytes; " + "starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes"
						);
					}
					
					writer2.Close();
					
					// Wait for all BG threads to finish else
					// dir.close() will throw IOException because
					// there are still open files
					_TestUtil.SyncConcurrentMerges(ms);
					
					dir.Close();
					
					// Try again with 2000 more bytes of free space:
					diskFree += 2000;
				}
			}
			
			startDir.Close();
		}
		public virtual void  TestEnablingNorms()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetMaxBufferedDocs(10);
			// Enable norms for only 1 doc, pre flush
			for (int j = 0; j < 10; j++)
			{
				Document doc = new Document();
				Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED);
				if (j != 8)
				{
					f.SetOmitNorms(true);
				}
				doc.Add(f);
				writer.AddDocument(doc);
			}
			writer.Close();
			
			Term searchTerm = new Term("field", "aaa");
			
			IndexSearcher searcher = new IndexSearcher(dir);
			Hits hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(10, hits.Length());
			searcher.Close();
			
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetMaxBufferedDocs(10);
			// Enable norms for only 1 doc, post flush
			for (int j = 0; j < 27; j++)
			{
				Document doc = new Document();
				Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED);
				if (j != 26)
				{
					f.SetOmitNorms(true);
				}
				doc.Add(f);
				writer.AddDocument(doc);
			}
			writer.Close();
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(27, hits.Length());
			searcher.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			reader.Close();
			
			dir.Close();
		}
		public virtual void  TestDiverseDocs()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetRAMBufferSizeMB(0.5);
			System.Random rand = new System.Random((System.Int32) 31415);
			for (int i = 0; i < 3; i++)
			{
				// First, docs where every term is unique (heavy on
				// Posting instances)
				for (int j = 0; j < 100; j++)
				{
					Document doc = new Document();
					for (int k = 0; k < 100; k++)
					{
						doc.Add(new Field("field", System.Convert.ToString(rand.Next()), Field.Store.YES, Field.Index.TOKENIZED));
					}
					writer.AddDocument(doc);
				}
				
				// Next, many single term docs where only one term
				// occurs (heavy on byte blocks)
				for (int j = 0; j < 100; j++)
				{
					Document doc = new Document();
					doc.Add(new Field("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.TOKENIZED));
					writer.AddDocument(doc);
				}
				
				// Next, many single term docs where only one term
				// occurs but the terms are very long (heavy on
				// char[] arrays)
				for (int j = 0; j < 100; j++)
				{
					System.Text.StringBuilder b = new System.Text.StringBuilder();
					System.String x = System.Convert.ToString(j) + ".";
					for (int k = 0; k < 1000; k++)
						b.Append(x);
					System.String longTerm = b.ToString();
					
					Document doc = new Document();
					doc.Add(new Field("field", longTerm, Field.Store.YES, Field.Index.TOKENIZED));
					writer.AddDocument(doc);
				}
			}
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(dir);
			Hits hits = searcher.Search(new TermQuery(new Term("field", "aaa")));
			Assert.AreEqual(300, hits.Length());
			searcher.Close();
			
			dir.Close();
		}
		public virtual void  TestCommitOnCloseAbort()
		{
			Directory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetMaxBufferedDocs(10);
			for (int i = 0; i < 14; i++)
			{
				AddDoc(writer);
			}
			writer.Close();
			
			Term searchTerm = new Term("content", "aaa");
			IndexSearcher searcher = new IndexSearcher(dir);
			Hits hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(14, hits.Length(), "first number of hits");
			searcher.Close();
			
			writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false);
			writer.SetMaxBufferedDocs(10);
			for (int j = 0; j < 17; j++)
			{
				AddDoc(writer);
			}
			// Delete all docs:
			writer.DeleteDocuments(searchTerm);
			
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(14, hits.Length(), "reader incorrectly sees changes from writer with autoCommit disabled");
			searcher.Close();
			
			// Now, close the writer:
			writer.Abort();
			
			AssertNoUnreferencedFiles(dir, "unreferenced files remain after abort()");
			
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(14, hits.Length(), "saw changes after writer.abort");
			searcher.Close();
			
			// Now make sure we can re-open the index, add docs,
			// and all is good:
			writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false);
			writer.SetMaxBufferedDocs(10);
			for (int i = 0; i < 12; i++)
			{
				for (int j = 0; j < 17; j++)
				{
					AddDoc(writer);
				}
				searcher = new IndexSearcher(dir);
				hits = searcher.Search(new TermQuery(searchTerm));
				Assert.AreEqual(14, hits.Length(), "reader incorrectly sees changes from writer with autoCommit disabled");
				searcher.Close();
			}
			
			writer.Close();
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(searchTerm));
			Assert.AreEqual(218, hits.Length(), "didn't see changes after close");
			searcher.Close();
			
			dir.Close();
		}
Example #34
0
		public static void  Main(System.String[] args)
		{
			try
			{
				Searcher searcher = new IndexSearcher(@"index");
				Analyzer analyzer = new StandardAnalyzer();
				
				System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding);
				while (true)
				{
					System.Console.Out.Write("Query: ");
					System.String line = in_Renamed.ReadLine();
					
					if (line.Length == - 1)
						break;
					
					Query query = QueryParser.Parse(line, "contents", analyzer);
					System.Console.Out.WriteLine("Searching for: " + query.ToString("contents"));
					
					Hits hits = searcher.Search(query);
					System.Console.Out.WriteLine(hits.Length() + " total matching documents");
					
					int HITS_PER_PAGE = 10;
					for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE)
					{
						int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE);
						for (int i = start; i < end; i++)
						{
							Document doc = hits.Doc(i);
							System.String path = doc.Get("path");
							if (path != null)
							{
								System.Console.Out.WriteLine(i + ". " + path);
							}
							else
							{
								System.String url = doc.Get("url");
								if (url != null)
								{
									System.Console.Out.WriteLine(i + ". " + url);
									System.Console.Out.WriteLine("   - " + doc.Get("title"));
								}
								else
								{
									System.Console.Out.WriteLine(i + ". " + "No path nor URL for this document");
								}
							}
						}
						
						if (hits.Length() > end)
						{
							System.Console.Out.Write("more (y/n) ? ");
							line = in_Renamed.ReadLine();
							if (line.Length == 0 || line[0] == 'n')
								break;
						}
					}
				}
				searcher.Close();
			}
			catch (System.Exception e)
			{
				System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
			}
		}
		/// <summary> Make sure if modifier tries to commit but hits disk full that modifier
		/// remains consistent and usable. Similar to TestIndexReader.testDiskFull().
		/// </summary>
		private void  TestOperationsOnDiskFull(bool updates)
		{
			
			bool debug = false;
			Term searchTerm = new Term("content", "aaa");
			int START_COUNT = 157;
			int END_COUNT = 144;
			
			for (int pass = 0; pass < 2; pass++)
			{
				bool autoCommit = (0 == pass);
				
				// First build up a starting index:
				MockRAMDirectory startDir = new MockRAMDirectory();
				IndexWriter writer = new IndexWriter(startDir, autoCommit, new WhitespaceAnalyzer(), true);
				for (int i = 0; i < 157; i++)
				{
					Document d = new Document();
					d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
					d.Add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.ANALYZED));
					writer.AddDocument(d);
				}
				writer.Close();
				
				long diskUsage = startDir.SizeInBytes();
				long diskFree = diskUsage + 10;
				
				System.IO.IOException err = null;
				
				bool done = false;
				
				// Iterate w/ ever increasing free disk space:
				while (!done)
				{
					MockRAMDirectory dir = new MockRAMDirectory(startDir);
					dir.SetPreventDoubleWrite(false);
					IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer());
					
					modifier.SetMaxBufferedDocs(1000); // use flush or close
					modifier.SetMaxBufferedDeleteTerms(1000); // use flush or close
					
					// For each disk size, first try to commit against
					// dir that will hit random IOExceptions & disk
					// full; after, give it infinite disk space & turn
					// off random IOExceptions & retry w/ same reader:
					bool success = false;
					
					for (int x = 0; x < 2; x++)
					{
						
						double rate = 0.1;
						double diskRatio = ((double) diskFree) / diskUsage;
						long thisDiskFree;
						System.String testName;
						
						if (0 == x)
						{
							thisDiskFree = diskFree;
							if (diskRatio >= 2.0)
							{
								rate /= 2;
							}
							if (diskRatio >= 4.0)
							{
								rate /= 2;
							}
							if (diskRatio >= 6.0)
							{
								rate = 0.0;
							}
							if (debug)
							{
								System.Console.Out.WriteLine("\ncycle: " + diskFree + " bytes");
							}
							testName = "disk full during reader.close() @ " + thisDiskFree + " bytes";
						}
						else
						{
							thisDiskFree = 0;
							rate = 0.0;
							if (debug)
							{
								System.Console.Out.WriteLine("\ncycle: same writer: unlimited disk space");
							}
							testName = "reader re-use after disk full";
						}
						
						dir.SetMaxSizeInBytes(thisDiskFree);
						dir.SetRandomIOExceptionRate(rate, diskFree);
						
						try
						{
							if (0 == x)
							{
								int docId = 12;
								for (int i = 0; i < 13; i++)
								{
									if (updates)
									{
										Document d = new Document();
										d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
										d.Add(new Field("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED));
										modifier.UpdateDocument(new Term("id", System.Convert.ToString(docId)), d);
									}
									else
									{
										// deletes
										modifier.DeleteDocuments(new Term("id", System.Convert.ToString(docId)));
										// modifier.setNorm(docId, "contents", (float)2.0);
									}
									docId += 12;
								}
							}
							modifier.Close();
							success = true;
							if (0 == x)
							{
								done = true;
							}
						}
						catch (System.IO.IOException e)
						{
							if (debug)
							{
								System.Console.Out.WriteLine("  hit IOException: " + e);
								System.Console.Out.WriteLine(e.StackTrace);
							}
							err = e;
							if (1 == x)
							{
								System.Console.Error.WriteLine(e.StackTrace);
								Assert.Fail(testName + " hit IOException after disk space was freed up");
							}
						}
						
						// If the close() succeeded, make sure there are
						// no unreferenced files.
                        if (success)
                        {
                            Lucene.Net.Util._TestUtil.CheckIndex(dir);
                            TestIndexWriter.AssertNoUnreferencedFiles(dir, "after writer.close");
                        }
						
						// Finally, verify index is not corrupt, and, if
						// we succeeded, we see all docs changed, and if
						// we failed, we see either all docs or no docs
						// changed (transactional semantics):
						IndexReader newReader = null;
						try
						{
							newReader = IndexReader.Open(dir);
						}
						catch (System.IO.IOException e)
						{
							System.Console.Error.WriteLine(e.StackTrace);
							Assert.Fail(testName + ":exception when creating IndexReader after disk full during close: " + e);
						}
						
						IndexSearcher searcher = new IndexSearcher(newReader);
						ScoreDoc[] hits = null;
						try
						{
							hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs;
						}
						catch (System.IO.IOException e)
						{
							System.Console.Error.WriteLine(e.StackTrace);
							Assert.Fail(testName + ": exception when searching: " + e);
						}
						int result2 = hits.Length;
						if (success)
						{
							if (x == 0 && result2 != END_COUNT)
							{
								Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + END_COUNT);
							}
							else if (x == 1 && result2 != START_COUNT && result2 != END_COUNT)
							{
								// It's possible that the first exception was
								// "recoverable" wrt pending deletes, in which
								// case the pending deletes are retained and
								// then re-flushing (with plenty of disk
								// space) will succeed in flushing the
								// deletes:
								Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT);
							}
						}
						else
						{
							// On hitting exception we still may have added
							// all docs:
							if (result2 != START_COUNT && result2 != END_COUNT)
							{
								System.Console.Error.WriteLine(err.StackTrace);
								Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT);
							}
						}
						
						searcher.Close();
						newReader.Close();
						
						if (result2 == END_COUNT)
						{
							break;
						}
					}
					
					dir.Close();
					
					// Try again with 10 more bytes of free space:
					diskFree += 10;
				}
			}
		}
Example #36
0
        /// <summary> Make sure if modifier tries to commit but hits disk full that modifier
        /// remains consistent and usable. Similar to TestIndexReader.testDiskFull().
        /// </summary>
        private void TestOperationsOnDiskFull(bool updates)
        {
            bool debug       = false;
            Term searchTerm  = new Term("content", "aaa");
            int  START_COUNT = 157;
            int  END_COUNT   = 144;

            // First build up a starting index:
            MockRAMDirectory startDir = new MockRAMDirectory();
            IndexWriter      writer   = new IndexWriter(startDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED, null);

            for (int i = 0; i < 157; i++)
            {
                Document d = new Document();
                d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
                d.Add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(d, null);
            }
            writer.Close();

            long diskUsage = startDir.SizeInBytes();
            long diskFree  = diskUsage + 10;

            System.IO.IOException err = null;

            bool done = false;

            // Iterate w/ ever increasing free disk space:
            while (!done)
            {
                MockRAMDirectory dir = new MockRAMDirectory(startDir);
                dir.SetPreventDoubleWrite(false);
                IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null);

                modifier.SetMaxBufferedDocs(1000);         // use flush or close
                modifier.SetMaxBufferedDeleteTerms(1000);  // use flush or close

                // For each disk size, first try to commit against
                // dir that will hit random IOExceptions & disk
                // full; after, give it infinite disk space & turn
                // off random IOExceptions & retry w/ same reader:
                bool success = false;

                for (int x = 0; x < 2; x++)
                {
                    double        rate      = 0.1;
                    double        diskRatio = ((double)diskFree) / diskUsage;
                    long          thisDiskFree;
                    System.String testName;

                    if (0 == x)
                    {
                        thisDiskFree = diskFree;
                        if (diskRatio >= 2.0)
                        {
                            rate /= 2;
                        }
                        if (diskRatio >= 4.0)
                        {
                            rate /= 2;
                        }
                        if (diskRatio >= 6.0)
                        {
                            rate = 0.0;
                        }
                        if (debug)
                        {
                            System.Console.Out.WriteLine("\ncycle: " + diskFree + " bytes");
                        }
                        testName = "disk full during reader.close() @ " + thisDiskFree + " bytes";
                    }
                    else
                    {
                        thisDiskFree = 0;
                        rate         = 0.0;
                        if (debug)
                        {
                            System.Console.Out.WriteLine("\ncycle: same writer: unlimited disk space");
                        }
                        testName = "reader re-use after disk full";
                    }

                    dir.SetMaxSizeInBytes(thisDiskFree);
                    dir.SetRandomIOExceptionRate(rate, diskFree);

                    try
                    {
                        if (0 == x)
                        {
                            int docId = 12;
                            for (int i = 0; i < 13; i++)
                            {
                                if (updates)
                                {
                                    Document d = new Document();
                                    d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES,
                                                    Field.Index.NOT_ANALYZED));
                                    d.Add(new Field("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED));
                                    modifier.UpdateDocument(new Term("id", System.Convert.ToString(docId)), d, null);
                                }
                                else
                                {
                                    // deletes
                                    modifier.DeleteDocuments(null, new Term("id", System.Convert.ToString(docId)));
                                    // modifier.setNorm(docId, "contents", (float)2.0);
                                }
                                docId += 12;
                            }
                        }
                        modifier.Close();
                        success = true;
                        if (0 == x)
                        {
                            done = true;
                        }
                    }
                    catch (System.IO.IOException e)
                    {
                        if (debug)
                        {
                            System.Console.Out.WriteLine("  hit IOException: " + e);
                            System.Console.Out.WriteLine(e.StackTrace);
                        }
                        err = e;
                        if (1 == x)
                        {
                            System.Console.Error.WriteLine(e.StackTrace);
                            Assert.Fail(testName + " hit IOException after disk space was freed up");
                        }
                    }

                    // If the close() succeeded, make sure there are
                    // no unreferenced files.
                    if (success)
                    {
                        Lucene.Net.Util._TestUtil.CheckIndex(dir);
                        TestIndexWriter.AssertNoUnreferencedFiles(dir, "after writer.close");
                    }

                    // Finally, verify index is not corrupt, and, if
                    // we succeeded, we see all docs changed, and if
                    // we failed, we see either all docs or no docs
                    // changed (transactional semantics):
                    IndexReader newReader = null;
                    try
                    {
                        newReader = IndexReader.Open((Directory)dir, true, null);
                    }
                    catch (System.IO.IOException e)
                    {
                        System.Console.Error.WriteLine(e.StackTrace);
                        Assert.Fail(testName + ":exception when creating IndexReader after disk full during close: " + e);
                    }

                    IndexSearcher searcher = new IndexSearcher(newReader);
                    ScoreDoc[]    hits     = null;
                    try
                    {
                        hits = searcher.Search(new TermQuery(searchTerm), null, 1000, null).ScoreDocs;
                    }
                    catch (System.IO.IOException e)
                    {
                        System.Console.Error.WriteLine(e.StackTrace);
                        Assert.Fail(testName + ": exception when searching: " + e);
                    }
                    int result2 = hits.Length;
                    if (success)
                    {
                        if (x == 0 && result2 != END_COUNT)
                        {
                            Assert.Fail(testName +
                                        ": method did not throw exception but hits.length for search on term 'aaa' is " +
                                        result2 + " instead of expected " + END_COUNT);
                        }
                        else if (x == 1 && result2 != START_COUNT && result2 != END_COUNT)
                        {
                            // It's possible that the first exception was
                            // "recoverable" wrt pending deletes, in which
                            // case the pending deletes are retained and
                            // then re-flushing (with plenty of disk
                            // space) will succeed in flushing the
                            // deletes:
                            Assert.Fail(testName +
                                        ": method did not throw exception but hits.length for search on term 'aaa' is " +
                                        result2 + " instead of expected " + START_COUNT + " or " + END_COUNT);
                        }
                    }
                    else
                    {
                        // On hitting exception we still may have added
                        // all docs:
                        if (result2 != START_COUNT && result2 != END_COUNT)
                        {
                            System.Console.Error.WriteLine(err.StackTrace);
                            Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " +
                                        result2 + " instead of expected " + START_COUNT + " or " + END_COUNT);
                        }
                    }

                    searcher.Close();
                    newReader.Close();

                    if (result2 == END_COUNT)
                    {
                        break;
                    }
                }

                dir.Close();

                // Try again with 10 more bytes of free space:
                diskFree += 10;
            }
        }
Example #37
0
		public virtual void  TestDiskFull()
		{
			
			bool debug = false;
			Term searchTerm = new Term("content", "aaa");
			int START_COUNT = 157;
			int END_COUNT = 144;
			
			// First build up a starting index:
			RAMDirectory startDir = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			for (int i = 0; i < 157; i++)
			{
				Document d = new Document();
				d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
				d.Add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.ANALYZED));
				writer.AddDocument(d);
			}
			writer.Close();
			
			long diskUsage = startDir.SizeInBytes();
			long diskFree = diskUsage + 100;
			
			System.IO.IOException err = null;
			
			bool done = false;
			
			// Iterate w/ ever increasing free disk space:
			while (!done)
			{
				MockRAMDirectory dir = new MockRAMDirectory(startDir);
				
				// If IndexReader hits disk full, it can write to
				// the same files again.
				dir.SetPreventDoubleWrite(false);
				
				IndexReader reader = IndexReader.Open(dir, false);
				
				// For each disk size, first try to commit against
				// dir that will hit random IOExceptions & disk
				// full; after, give it infinite disk space & turn
				// off random IOExceptions & retry w/ same reader:
				bool success = false;
				
				for (int x = 0; x < 2; x++)
				{
					
					double rate = 0.05;
					double diskRatio = ((double) diskFree) / diskUsage;
					long thisDiskFree;
					System.String testName;
					
					if (0 == x)
					{
						thisDiskFree = diskFree;
						if (diskRatio >= 2.0)
						{
							rate /= 2;
						}
						if (diskRatio >= 4.0)
						{
							rate /= 2;
						}
						if (diskRatio >= 6.0)
						{
							rate = 0.0;
						}
						if (debug)
						{
							System.Console.Out.WriteLine("\ncycle: " + diskFree + " bytes");
						}
						testName = "disk full during reader.close() @ " + thisDiskFree + " bytes";
					}
					else
					{
						thisDiskFree = 0;
						rate = 0.0;
						if (debug)
						{
							System.Console.Out.WriteLine("\ncycle: same writer: unlimited disk space");
						}
						testName = "reader re-use after disk full";
					}
					
					dir.SetMaxSizeInBytes(thisDiskFree);
					dir.SetRandomIOExceptionRate(rate, diskFree);
					
					try
					{
						if (0 == x)
						{
							int docId = 12;
							for (int i = 0; i < 13; i++)
							{
								reader.DeleteDocument(docId);
								reader.SetNorm(docId, "contents", (float) 2.0);
								docId += 12;
							}
						}
						reader.Close();
						success = true;
						if (0 == x)
						{
							done = true;
						}
					}
					catch (System.IO.IOException e)
					{
						if (debug)
						{
							System.Console.Out.WriteLine("  hit IOException: " + e);
							System.Console.Out.WriteLine(e.StackTrace);
						}
						err = e;
						if (1 == x)
						{
							System.Console.Error.WriteLine(e.StackTrace);
							Assert.Fail(testName + " hit IOException after disk space was freed up");
						}
					}
					
					// Whether we succeeded or failed, check that all
					// un-referenced files were in fact deleted (ie,
					// we did not create garbage).  Just create a
					// new IndexFileDeleter, have it delete
					// unreferenced files, then verify that in fact
					// no files were deleted:
					System.String[] startFiles = dir.ListAll();
					SegmentInfos infos = new SegmentInfos();
					infos.Read(dir);
					new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, null,null);
					System.String[] endFiles = dir.ListAll();
					
					System.Array.Sort(startFiles);
					System.Array.Sort(endFiles);
					
					//for(int i=0;i<startFiles.length;i++) {
					//  System.out.println("  startFiles: " + i + ": " + startFiles[i]);
					//}
					
					if (!CollectionsHelper.Equals(startFiles, endFiles))
					{
						System.String successStr;
						if (success)
						{
							successStr = "success";
						}
						else
						{
							successStr = "IOException";
							System.Console.Error.WriteLine(err.StackTrace);
						}
						Assert.Fail("reader.close() failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes): before delete:\n    " + ArrayToString(startFiles) + "\n  after delete:\n    " + ArrayToString(endFiles));
					}
					
					// Finally, verify index is not corrupt, and, if
					// we succeeded, we see all docs changed, and if
					// we failed, we see either all docs or no docs
					// changed (transactional semantics):
					IndexReader newReader = null;
					try
					{
						newReader = IndexReader.Open(dir, false);
					}
					catch (System.IO.IOException e)
					{
						System.Console.Error.WriteLine(e.StackTrace);
						Assert.Fail(testName + ":exception when creating IndexReader after disk full during close: " + e);
					}
					/*
					int result = newReader.docFreq(searchTerm);
					if (success) {
					if (result != END_COUNT) {
					fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT);
					}
					} else {
					// On hitting exception we still may have added
					// all docs:
					if (result != START_COUNT && result != END_COUNT) {
					err.printStackTrace();
					fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);
					}
					}
					*/
					
					IndexSearcher searcher = new IndexSearcher(newReader);
					ScoreDoc[] hits = null;
					try
					{
						hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs;
					}
					catch (System.IO.IOException e)
					{
						System.Console.Error.WriteLine(e.StackTrace);
						Assert.Fail(testName + ": exception when searching: " + e);
					}
					int result2 = hits.Length;
					if (success)
					{
						if (result2 != END_COUNT)
						{
							Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + END_COUNT);
						}
					}
					else
					{
						// On hitting exception we still may have added
						// all docs:
						if (result2 != START_COUNT && result2 != END_COUNT)
						{
							System.Console.Error.WriteLine(err.StackTrace);
							Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT);
						}
					}
					
					searcher.Close();
					newReader.Close();
					
					if (result2 == END_COUNT)
					{
						break;
					}
				}
				
				dir.Close();
				
				// Try again with 10 more bytes of free space:
				diskFree += 10;
			}
			
			startDir.Close();
		}
Example #38
0
        /* Open pre-lockless index, add docs, do a delete &
         * setNorm, and search */
        public virtual void  ChangeIndexWithAdds(System.String dirName)
        {
            System.String origDirName = dirName;
            dirName = FullDir(dirName);

            Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));

            // open writer
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);

            // add 10 docs
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, 35 + i);
            }

            // make sure writer sees right total -- writer seems not to know about deletes in .del?
            int expected;

            if (Compare(origDirName, "24") < 0)
            {
                expected = 45;
            }
            else
            {
                expected = 46;
            }
            Assert.AreEqual(expected, writer.MaxDoc(), "wrong doc count");
            writer.Close();

            // make sure searching sees right # hits
            IndexSearcher searcher = new IndexSearcher(dir, true);

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Document   d    = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d.Get("id"), "wrong first document");
            TestHits(hits, 44, searcher.IndexReader);
            searcher.Close();

            // make sure we can do delete & setNorm against this
            // pre-lockless segment:
            IndexReader reader     = IndexReader.Open(dir, false);
            Term        searchTerm = new Term("id", "6");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "wrong delete count");
            reader.SetNorm(22, "content", (float)2.0);
            reader.Close();

            // make sure they "took":
            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(43, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 43, searcher.IndexReader);
            searcher.Close();

            // optimize
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(43, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            TestHits(hits, 43, searcher.IndexReader);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            searcher.Close();

            dir.Close();
        }
		public virtual void  TestSetBufferSize()
		{
			System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testSetBufferSize"));
			MockFSDirectory dir = new MockFSDirectory(indexDir);
			try
			{
				IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
				writer.SetUseCompoundFile(false);
				for (int i = 0; i < 37; i++)
				{
					Document doc = new Document();
					doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.TOKENIZED));
					doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.TOKENIZED));
					writer.AddDocument(doc);
				}
				writer.Close();
				
				dir.allIndexInputs.Clear();
				
				IndexReader reader = IndexReader.Open(dir);
				Term aaa = new Term("content", "aaa");
				Term bbb = new Term("content", "bbb");
				Term ccc = new Term("content", "ccc");
				Assert.AreEqual(reader.DocFreq(ccc), 37);
				reader.DeleteDocument(0);
				Assert.AreEqual(reader.DocFreq(aaa), 37);
				dir.TweakBufferSizes();
				reader.DeleteDocument(4);
				Assert.AreEqual(reader.DocFreq(bbb), 37);
				dir.TweakBufferSizes();
				
				IndexSearcher searcher = new IndexSearcher(reader);
				Hits hits = searcher.Search(new TermQuery(bbb));
				dir.TweakBufferSizes();
				Assert.AreEqual(35, hits.Length());
				dir.TweakBufferSizes();
				hits = searcher.Search(new TermQuery(new Term("id", "33")));
				dir.TweakBufferSizes();
				Assert.AreEqual(1, hits.Length());
				hits = searcher.Search(new TermQuery(aaa));
				dir.TweakBufferSizes();
				Assert.AreEqual(35, hits.Length());
				searcher.Close();
				reader.Close();
			}
			finally
			{
				_TestUtil.RmDir(indexDir);
			}
		}
		private int GetHitCount(Directory dir, Term term)
		{
			IndexSearcher searcher = new IndexSearcher(dir);
			int hitCount = searcher.Search(new TermQuery(term), null, 1000).TotalHits;
			searcher.Close();
			return hitCount;
		}
        public virtual void  TestKeepLastNDeletionPolicyWithReader()
        {
            int N = 10;

            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);

                Directory   dir    = new RAMDirectory();
                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, policy, IndexWriter.MaxFieldLength.UNLIMITED);
                writer.UseCompoundFile = useCompoundFile;
                writer.Close();
                Term  searchTerm = new Term("content", "aaa");
                Query query      = new TermQuery(searchTerm);

                for (int i = 0; i < N + 1; i++)
                {
                    writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED);
                    writer.UseCompoundFile = useCompoundFile;
                    for (int j = 0; j < 17; j++)
                    {
                        AddDoc(writer);
                    }
                    // this is a commit
                    writer.Close();
                    IndexReader reader = IndexReader.Open(dir, policy, false);
                    reader.DeleteDocument(3 * i + 1);
                    reader.SetNorm(4 * i + 1, "content", 2.0F);
                    IndexSearcher searcher = new IndexSearcher(reader);
                    ScoreDoc[]    hits     = searcher.Search(query, null, 1000).ScoreDocs;
                    Assert.AreEqual(16 * (1 + i), hits.Length);
                    // this is a commit
                    reader.Close();
                    searcher.Close();
                }
                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED);
                writer.UseCompoundFile = useCompoundFile;
                writer.Optimize();
                // this is a commit
                writer.Close();

                Assert.AreEqual(2 * (N + 2), policy.numOnInit);
                Assert.AreEqual(2 * (N + 2) - 1, policy.numOnCommit);

                IndexSearcher searcher2 = new IndexSearcher(dir, false);
                ScoreDoc[]    hits2     = searcher2.Search(query, null, 1000).ScoreDocs;
                Assert.AreEqual(176, hits2.Length);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);

                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                int expectedCount = 176;

                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = IndexReader.Open(dir, true);

                        // Work backwards in commits on what the expected
                        // count should be.
                        searcher2 = new IndexSearcher(reader);
                        hits2     = searcher2.Search(query, null, 1000).ScoreDocs;
                        if (i > 1)
                        {
                            if (i % 2 == 0)
                            {
                                expectedCount += 1;
                            }
                            else
                            {
                                expectedCount -= 17;
                            }
                        }
                        Assert.AreEqual(expectedCount, hits2.Length);
                        searcher2.Close();
                        reader.Close();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits before last 5");
                        }
                    }
                    catch (System.IO.IOException e)
                    {
                        if (i != N)
                        {
                            throw e;
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Close();
            }
        }
		// Apply buffered delete terms, queries and docIDs to the
		// provided reader
		private bool ApplyDeletes(IndexReader reader, int docIDStart)
		{
			lock (this)
			{
				
				int docEnd = docIDStart + reader.MaxDoc();
				bool any = false;
				
                System.Diagnostics.Debug.Assert(CheckDeleteTerm(null));

				// Delete by term
                //System.Collections.IEnumerator iter = new System.Collections.Hashtable(deletesFlushed.terms).GetEnumerator();
				System.Collections.IEnumerator iter = deletesFlushed.terms.GetEnumerator();
				TermDocs docs = reader.TermDocs();
				try
				{
					while (iter.MoveNext())
					{
						System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current;
						Term term = (Term) entry.Key;
						// LUCENE-2086: we should be iterating a TreeMap,
                        // here, so terms better be in order:
                        System.Diagnostics.Debug.Assert(CheckDeleteTerm(term));
						docs.Seek(term);
						int limit = ((BufferedDeletes.Num) entry.Value).GetNum();
						while (docs.Next())
						{
							int docID = docs.Doc();
							if (docIDStart + docID >= limit)
								break;
							reader.DeleteDocument(docID);
							any = true;
						}
					}
				}
				finally
				{
					docs.Close();
				}
				
				// Delete by docID
				iter = deletesFlushed.docIDs.GetEnumerator();
				while (iter.MoveNext())
				{
					int docID = ((System.Int32) iter.Current);
					if (docID >= docIDStart && docID < docEnd)
					{
						reader.DeleteDocument(docID - docIDStart);
						any = true;
					}
				}
				
				// Delete by query
				IndexSearcher searcher = new IndexSearcher(reader);
				iter = new System.Collections.Hashtable(deletesFlushed.queries).GetEnumerator();
				while (iter.MoveNext())
				{
					System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current;
					Query query = (Query) entry.Key;
					int limit = ((System.Int32) entry.Value);
					Weight weight = query.Weight(searcher);
					Scorer scorer = weight.Scorer(reader, true, false);
					if (scorer != null)
					{
						while (true)
						{
							int doc = scorer.NextDoc();
							if (((long) docIDStart) + doc >= limit)
								break;
							reader.DeleteDocument(doc);
							any = true;
						}
					}
				}
				searcher.Close();
				return any;
			}
		}
        // Apply buffered delete terms, queries and docIDs to the
        // provided reader
        private bool ApplyDeletes(IndexReader reader, int docIDStart)
        {
            lock (this)
            {
                int docEnd = docIDStart + reader.MaxDoc();
                bool any = false;

                // Delete by term
                IEnumerator<KeyValuePair<object, object>> iter = deletesFlushed.terms.GetEnumerator();
                while (iter.MoveNext())
                {
                    KeyValuePair<object, object> entry = (KeyValuePair<object, object>)iter.Current;
                    Term term = (Term)entry.Key;

                    TermDocs docs = reader.TermDocs(term);
                    if (docs != null)
                    {
                        int limit = ((BufferedDeletes.Num)entry.Value).GetNum();
                        try
                        {
                            while (docs.Next())
                            {
                                int docID = docs.Doc();
                                if (docIDStart + docID >= limit)
                                    break;
                                reader.DeleteDocument(docID);
                                any = true;
                            }
                        }
                        finally
                        {
                            docs.Close();
                        }
                    }
                }

                // Delete by docID
                IEnumerator<object> iter2 = deletesFlushed.docIDs.GetEnumerator();
                while (iter2.MoveNext())
                {
                    int docID = (int)iter2.Current;
                    if (docID >= docIDStart && docID < docEnd)
                    {
                        reader.DeleteDocument(docID - docIDStart);
                        any = true;
                    }
                }

                // Delete by query
                IndexSearcher searcher = new IndexSearcher(reader);
                iter = deletesFlushed.queries.GetEnumerator();
                while (iter.MoveNext())
                {
                    KeyValuePair<object, object> entry = (KeyValuePair<object, object>)iter.Current;
                    Query query = (Query)entry.Key;
                    int limit = (int)entry.Value;
                    Weight weight = query.Weight(searcher);
                    Scorer scorer = weight.Scorer(reader);
                    while (scorer.Next())
                    {
                        int docID = scorer.Doc();
                        if (docIDStart + docID >= limit)
                            break;
                        reader.DeleteDocument(docID);
                        any = true;
                    }
                }
                searcher.Close();
                return any;
            }
        }
		public virtual void  TestKeepLastNDeletionPolicyWithCreates()
		{
			
			int N = 10;
			
			for (int pass = 0; pass < 4; pass++)
			{
				
				bool autoCommit = pass < 2;
				bool useCompoundFile = (pass % 2) > 0;
				
				KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);
				
				Directory dir = new RAMDirectory();
				IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
				writer.SetMaxBufferedDocs(10);
				writer.SetUseCompoundFile(useCompoundFile);
				writer.Close();
				Term searchTerm = new Term("content", "aaa");
				Query query = new TermQuery(searchTerm);
				
				for (int i = 0; i < N + 1; i++)
				{
					
					writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
					writer.SetMaxBufferedDocs(10);
					writer.SetUseCompoundFile(useCompoundFile);
					for (int j = 0; j < 17; j++)
					{
						AddDoc(writer);
					}
					// this is a commit when autoCommit=false:
					writer.Close();
					IndexReader reader = IndexReader.Open(dir, policy);
					reader.DeleteDocument(3);
					reader.SetNorm(5, "content", 2.0F);
					IndexSearcher searcher = new IndexSearcher(reader);
					Hits hits = searcher.Search(query);
					Assert.AreEqual(16, hits.Length());
					// this is a commit when autoCommit=false:
					reader.Close();
					searcher.Close();
					
					writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
					// This will not commit: there are no changes
					// pending because we opened for "create":
					writer.Close();
				}
				
				Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit);
				if (autoCommit)
				{
					Assert.IsTrue(policy.numOnCommit > 3 * (N + 1) - 1);
				}
				else
				{
					Assert.AreEqual(2 * (N + 1), policy.numOnCommit);
				}
				
				IndexSearcher searcher2 = new IndexSearcher(dir);
				Hits hits2 = searcher2.Search(query);
				Assert.AreEqual(0, hits2.Length());
				
				// Simplistic check: just verify only the past N segments_N's still
				// exist, and, I can open a reader on each:
				long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);
				
				dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
				int expectedCount = 0;
				
				for (int i = 0; i < N + 1; i++)
				{
					try
					{
						IndexReader reader = IndexReader.Open(dir);
						
						// Work backwards in commits on what the expected
						// count should be.  Only check this in the
						// autoCommit false case:
						if (!autoCommit)
						{
							searcher2 = new IndexSearcher(reader);
							hits2 = searcher2.Search(query);
							Assert.AreEqual(expectedCount, hits2.Length());
							searcher2.Close();
							if (expectedCount == 0)
							{
								expectedCount = 16;
							}
							else if (expectedCount == 16)
							{
								expectedCount = 17;
							}
							else if (expectedCount == 17)
							{
								expectedCount = 0;
							}
						}
						reader.Close();
						if (i == N)
						{
							Assert.Fail("should have failed on commits before last " + N);
						}
					}
					catch (System.IO.IOException e)
					{
						if (i != N)
						{
							throw e;
						}
					}
					if (i < N)
					{
						dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
					}
					gen--;
				}
				
				dir.Close();
			}
		}
		/* Open pre-lockless index, add docs, do a delete &
		* setNorm, and search */
		public virtual void  ChangeIndexNoAdds(System.String dirName, bool autoCommit)
		{
			
			dirName = FullDir(dirName);
			
			Directory dir = FSDirectory.Open(new System.IO.FileInfo(dirName));
			
			// make sure searching sees right # hits
			IndexSearcher searcher = new IndexSearcher(dir);
			ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
			Assert.AreEqual(34, hits.Length, "wrong number of hits");
			Document d = searcher.Doc(hits[0].doc);
			Assert.AreEqual("21", d.Get("id"), "wrong first document");
			searcher.Close();
			
			// make sure we can do a delete & setNorm against this
			// pre-lockless segment:
			IndexReader reader = IndexReader.Open(dir);
			Term searchTerm = new Term("id", "6");
			int delCount = reader.DeleteDocuments(searchTerm);
			Assert.AreEqual(1, delCount, "wrong delete count");
			reader.SetNorm(22, "content", (float) 2.0);
			reader.Close();
			
			// make sure they "took":
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
			Assert.AreEqual(33, hits.Length, "wrong number of hits");
			d = searcher.Doc(hits[0].doc);
			Assert.AreEqual("22", d.Get("id"), "wrong first document");
			TestHits(hits, 33, searcher.GetIndexReader());
			searcher.Close();
			
			// optimize
			IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
			writer.Optimize();
			writer.Close();
			
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
			Assert.AreEqual(33, hits.Length, "wrong number of hits");
			d = searcher.Doc(hits[0].doc);
			Assert.AreEqual("22", d.Get("id"), "wrong first document");
			TestHits(hits, 33, searcher.GetIndexReader());
			searcher.Close();
			
			dir.Close();
		}
Example #46
0
        public virtual void  TestKeepLastNDeletionPolicyWithCreates()
        {
            int N = 10;

            for (int pass = 0; pass < 4; pass++)
            {
                bool autoCommit      = pass < 2;
                bool useCompoundFile = (pass % 2) > 0;

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);

                Directory   dir    = new RAMDirectory();
                IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                writer.SetMaxBufferedDocs(10);
                writer.SetUseCompoundFile(useCompoundFile);
                writer.Close();
                Term  searchTerm = new Term("content", "aaa");
                Query query      = new TermQuery(searchTerm);

                for (int i = 0; i < N + 1; i++)
                {
                    writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
                    writer.SetMaxBufferedDocs(10);
                    writer.SetUseCompoundFile(useCompoundFile);
                    for (int j = 0; j < 17; j++)
                    {
                        AddDoc(writer);
                    }
                    // this is a commit when autoCommit=false:
                    writer.Close();
                    IndexReader reader = IndexReader.Open(dir, policy);
                    reader.DeleteDocument(3);
                    reader.SetNorm(5, "content", 2.0F);
                    IndexSearcher searcher = new IndexSearcher(reader);
                    ScoreDoc[]    hits     = searcher.Search(query, null, 1000).scoreDocs;
                    Assert.AreEqual(16, hits.Length);
                    // this is a commit when autoCommit=false:
                    reader.Close();
                    searcher.Close();

                    writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                    // This will not commit: there are no changes
                    // pending because we opened for "create":
                    writer.Close();
                }

                Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit);
                if (!autoCommit)
                {
                    Assert.AreEqual(3 * (N + 1), policy.numOnCommit);
                }

                IndexSearcher searcher2 = new IndexSearcher(dir);
                ScoreDoc[]    hits2     = searcher2.Search(query, null, 1000).scoreDocs;
                Assert.AreEqual(0, hits2.Length);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);

                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                int expectedCount = 0;

                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = IndexReader.Open(dir);

                        // Work backwards in commits on what the expected
                        // count should be.  Only check this in the
                        // autoCommit false case:
                        if (!autoCommit)
                        {
                            searcher2 = new IndexSearcher(reader);
                            hits2     = searcher2.Search(query, null, 1000).scoreDocs;
                            Assert.AreEqual(expectedCount, hits2.Length);
                            searcher2.Close();
                            if (expectedCount == 0)
                            {
                                expectedCount = 16;
                            }
                            else if (expectedCount == 16)
                            {
                                expectedCount = 17;
                            }
                            else if (expectedCount == 17)
                            {
                                expectedCount = 0;
                            }
                        }
                        reader.Close();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits before last " + N);
                        }
                    }
                    catch (System.IO.IOException e)
                    {
                        if (i != N)
                        {
                            throw e;
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Close();
            }
        }
		/* Open pre-lockless index, add docs, do a delete &
		* setNorm, and search */
		public virtual void  ChangeIndexWithAdds(System.String dirName, bool autoCommit)
		{
			System.String origDirName = dirName;
			dirName = FullDir(dirName);
			
			Directory dir = FSDirectory.Open(new System.IO.FileInfo(dirName));
			
			// open writer
			IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
			
			// add 10 docs
			for (int i = 0; i < 10; i++)
			{
				AddDoc(writer, 35 + i);
			}
			
			// make sure writer sees right total -- writer seems not to know about deletes in .del?
			int expected;
			if (Compare(origDirName, "24") < 0)
			{
				expected = 45;
			}
			else
			{
				expected = 46;
			}
			Assert.AreEqual(expected, writer.DocCount(), "wrong doc count");
			writer.Close();
			
			// make sure searching sees right # hits
			IndexSearcher searcher = new IndexSearcher(dir);
			ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
			Document d = searcher.Doc(hits[0].doc);
			Assert.AreEqual("21", d.Get("id"), "wrong first document");
			TestHits(hits, 44, searcher.GetIndexReader());
			searcher.Close();
			
			// make sure we can do delete & setNorm against this
			// pre-lockless segment:
			IndexReader reader = IndexReader.Open(dir);
			Term searchTerm = new Term("id", "6");
			int delCount = reader.DeleteDocuments(searchTerm);
			Assert.AreEqual(1, delCount, "wrong delete count");
			reader.SetNorm(22, "content", (float) 2.0);
			reader.Close();
			
			// make sure they "took":
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
			Assert.AreEqual(43, hits.Length, "wrong number of hits");
			d = searcher.Doc(hits[0].doc);
			Assert.AreEqual("22", d.Get("id"), "wrong first document");
			TestHits(hits, 43, searcher.GetIndexReader());
			searcher.Close();
			
			// optimize
			writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
			writer.Optimize();
			writer.Close();
			
			searcher = new IndexSearcher(dir);
			hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
			Assert.AreEqual(43, hits.Length, "wrong number of hits");
			d = searcher.Doc(hits[0].doc);
			TestHits(hits, 43, searcher.GetIndexReader());
			Assert.AreEqual("22", d.Get("id"), "wrong first document");
			searcher.Close();
			
			dir.Close();
		}
Example #48
0
        /// <summary> Suggest similar words (restricted or not to a field of a user index)</summary>
        /// <param name="word">String the word you want a spell check done on
        /// </param>
        /// <param name="num_sug">int the number of suggest words
        /// </param>
        /// <param name="ir">the indexReader of the user index (can be null see field param)
        /// </param>
        /// <param name="field">String the field of the user index: if field is not null, the suggested
        /// words are restricted to the words present in this field.
        /// </param>
        /// <param name="morePopular">boolean return only the suggest words that are more frequent than the searched word
        /// (only if restricted mode = (indexReader!=null and field!=null)
        /// </param>
        /// <throws>  IOException </throws>
        /// <returns> String[] the sorted list of the suggest words with this 2 criteria:
        /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
        /// of the suggest words in the field of the user index
        /// </returns>
        public virtual System.String[] SuggestSimilar(System.String word, int num_sug, IndexReader ir, System.String field, bool morePopular)
        {
            float            min = this.minScore;
            TRStringDistance sd  = new TRStringDistance(word);
            int lengthWord       = word.Length;

            int goalFreq = (morePopular && ir != null) ? ir.DocFreq(new Term(field, word)) : 0;

            if (!morePopular && goalFreq > 0)
            {
                return(new System.String[] { word }); // return the word if it exist in the index and i don't want a more popular word
            }

            BooleanQuery query = new BooleanQuery();

            System.String[] grams;
            System.String   key;

            for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
            {
                key = "gram" + ng;           // form key

                grams = FormGrams(word, ng); // form word into ngrams (allow dups too)

                if (grams.Length == 0)
                {
                    continue; // hmm
                }

                if (bStart > 0)
                {
                    // should we boost prefixes?
                    Add(query, "start" + ng, grams[0], bStart); // matches start of word
                }
                if (bEnd > 0)
                {
                    // should we boost suffixes
                    Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
                }
                for (int i = 0; i < grams.Length; i++)
                {
                    Add(query, key, grams[i]);
                }
            }

            IndexSearcher    searcher = new IndexSearcher(this.spellindex);
            Hits             hits     = searcher.Search(query);
            SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug);

            int         stop    = Math.Min(hits.Length(), 10 * num_sug); // go thru more than 'maxr' matches in case the distance filter triggers
            SuggestWord sugword = new SuggestWord();

            for (int i = 0; i < stop; i++)
            {
                sugword.string_Renamed = hits.Doc(i).Get(F_WORD); // get orig word)

                if (sugword.string_Renamed.Equals(word))
                {
                    continue; // don't suggest a word for itself, that would be silly
                }

                //edit distance/normalize with the min word length
                sugword.score = 1.0f - ((float)sd.GetDistance(sugword.string_Renamed) / System.Math.Min(sugword.string_Renamed.Length, lengthWord));
                if (sugword.score < min)
                {
                    continue;
                }

                if (ir != null)
                {
                    // use the user index
                    sugword.freq = ir.DocFreq(new Term(field, sugword.string_Renamed)); // freq in the index
                    if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1)
                    {
                        // don't suggest a word that is not present in the field
                        continue;
                    }
                }
                sugqueue.Insert(sugword);
                if (sugqueue.Size() == num_sug)
                {
                    //if queue full , maintain the min score
                    min = ((SuggestWord)sugqueue.Top()).score;
                }
                sugword = new SuggestWord();
            }

            // convert to array string
            System.String[] list = new System.String[sugqueue.Size()];
            for (int i = sugqueue.Size() - 1; i >= 0; i--)
            {
                list[i] = ((SuggestWord)sugqueue.Pop()).string_Renamed;
            }

            searcher.Close();
            return(list);
        }
		public virtual void  TestSetBufferSize()
		{
			System.IO.DirectoryInfo indexDir = new System.IO.DirectoryInfo(System.IO.Path.Combine(AppSettings.Get("tempDir", ""), "testSetBufferSize"));
			MockFSDirectory dir = new MockFSDirectory(indexDir, NewRandom());
			try
			{
				IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
				writer.UseCompoundFile = false;
				for (int i = 0; i < 37; i++)
				{
					Document doc = new Document();
					doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED));
					doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED));
					writer.AddDocument(doc);
				}
				writer.Close();
				
				dir.allIndexInputs.Clear();
				
				IndexReader reader = IndexReader.Open(dir, false);
				Term aaa = new Term("content", "aaa");
				Term bbb = new Term("content", "bbb");
				Term ccc = new Term("content", "ccc");
				Assert.AreEqual(37, reader.DocFreq(ccc));
				reader.DeleteDocument(0);
				Assert.AreEqual(37, reader.DocFreq(aaa));
				dir.tweakBufferSizes();
				reader.DeleteDocument(4);
				Assert.AreEqual(reader.DocFreq(bbb), 37);
				dir.tweakBufferSizes();
				
				IndexSearcher searcher = new IndexSearcher(reader);
				ScoreDoc[] hits = searcher.Search(new TermQuery(bbb), null, 1000).ScoreDocs;
				dir.tweakBufferSizes();
				Assert.AreEqual(35, hits.Length);
				dir.tweakBufferSizes();
				hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).ScoreDocs;
				dir.tweakBufferSizes();
				Assert.AreEqual(1, hits.Length);
				hits = searcher.Search(new TermQuery(aaa), null, 1000).ScoreDocs;
				dir.tweakBufferSizes();
				Assert.AreEqual(35, hits.Length);
				searcher.Close();
				reader.Close();
			}
			finally
			{
				_TestUtil.RmDir(indexDir);
			}
		}
Example #50
0
        public string GetStudentsByYearIdAndTimesIdAndSchoolIdAndStudentName(string schoolYear, string times, string schoolId, string StudentName, string pIndex)
        {
            string    result          = string.Empty;
            int       pageIndex       = Int32.Parse(pIndex);
            ArrayList students        = new ArrayList();
            string    pathOfIndexFile = Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["StudentIndexing"].ToString());

            if (Int32.Parse(schoolYear) >= 2000)
            {
                pathOfIndexFile += "\\" + schoolYear + "\\Index";
            }

            string studentName = StudentName.Replace("\"", "");

            studentName = "\"" + studentName + "\"";

            Lucene.Net.Search.IndexSearcher iSearcher = new Lucene.Net.Search.IndexSearcher(pathOfIndexFile);

            Lucene.Net.QueryParsers.QueryParser qYearParser = new Lucene.Net.QueryParsers.QueryParser("YearId", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
            Lucene.Net.Search.Query             iYearQuery  = qYearParser.Parse(schoolYear);

            Lucene.Net.QueryParsers.QueryParser qTestDayParser = new Lucene.Net.QueryParsers.QueryParser("TestDayId", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
            Lucene.Net.Search.Query             iTestDayQuery  = qTestDayParser.Parse(times);

            Lucene.Net.QueryParsers.QueryParser qStudentIdParser = new Lucene.Net.QueryParsers.QueryParser("StudentID", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
            Lucene.Net.Search.Query             iStudentIdQuery  = qStudentIdParser.Parse("1");

            //////////////////////////////////////////////////////////////////////
            Lucene.Net.Search.BooleanQuery bQuery = new Lucene.Net.Search.BooleanQuery();
            bQuery.Add(iYearQuery, Lucene.Net.Search.BooleanClause.Occur.MUST);
            bQuery.Add(iTestDayQuery, Lucene.Net.Search.BooleanClause.Occur.MUST);


            if (StudentName != " " && StudentName != "")
            {
                Lucene.Net.QueryParsers.QueryParser qStudentParser = new Lucene.Net.QueryParsers.QueryParser("StudentName", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
                Lucene.Net.Search.Query             iStudentQuery  = qStudentParser.Parse(studentName);
                bQuery.Add(iStudentQuery, Lucene.Net.Search.BooleanClause.Occur.MUST);
            }

            Lucene.Net.Search.Hits iHits = iSearcher.Search(bQuery);

            using (System.Data.SqlClient.SqlConnection con = new System.Data.SqlClient.SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["PSCPortalConnectionString"].ConnectionString))
            {
                con.Open();

                //paging
                for (int i = pageIndex * 20 - 20; i < pageIndex * 20 && i < iHits.Length(); i++)
                {
                    string yId       = iHits.Doc(i).Get("YearId");
                    string stuId     = iHits.Doc(i).Get("StudentID");
                    string testDayId = iHits.Doc(i).Get("TestDayId");

                    System.Data.SqlClient.SqlCommand com = new System.Data.SqlClient.SqlCommand();
                    com.Connection  = con;
                    com.CommandType = CommandType.Text;
                    com.CommandText = @"   select StudentTHPT.TotalMark,[RoundTotalMark],StudentTHPT.YearId,StudentTHPT.TestDayId,StudentId,FirstName+' '+MiddleName+' '+LastName as FullName,Sex,Birthday,MarkEncourage,Section.Name from StudentTHPT inner join Section on StudentTHPT.SectionId = Section.SectionId 
			                                    where StudentTHPT.YearId=@yearId and StudentTHPT.TestDayId=@timeId and StudentId = @studentId
                                           Order by LastName
	                                   "    ;
                    com.Parameters.Add("@yearId", SqlDbType.NChar);
                    com.Parameters["@yearId"].Value = yId;

                    com.Parameters.Add("@timeId", SqlDbType.NVarChar);
                    com.Parameters["@timeId"].Value = testDayId;

                    com.Parameters.Add("@studentId", SqlDbType.NVarChar);
                    com.Parameters["@studentId"].Value = stuId;

                    using (System.Data.SqlClient.SqlDataReader reader = com.ExecuteReader())
                    {
                        while (reader.Read())
                        {
                            string fullName  = reader["FullName"].ToString();
                            string birthday  = reader["Birthday"].ToString().Trim();
                            string studentId = reader["StudentId"].ToString();
                            string total     = iHits.Length().ToString();
                            //    string markEncourage = reader["MarkEncourage"].ToString();
                            string  totalMark      = reader["TotalMark"].ToString();
                            string  section        = reader["Name"].ToString();
                            string  roundTotalMark = reader["RoundTotalMark"].ToString();
                            Student s = new Student {
                                StudentId = studentId, FullName = fullName, Birthday = birthday, Total = total, Section = section, TotalMark = totalMark, RoundTotalMark = roundTotalMark
                            };
                            students.Add(s);
                        }
                    }
                }
            }
            iSearcher.Close();

            System.Web.Script.Serialization.JavaScriptSerializer serialize = new System.Web.Script.Serialization.JavaScriptSerializer();

            result = serialize.Serialize(students);
            return(result);
        }
Example #51
0
        public virtual void  TestBasic()
        {
            Directory   dir      = new MockRAMDirectory();
            Analyzer    analyzer = new StandardAnalyzer();
            IndexWriter writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMergeFactor(2);
            writer.SetMaxBufferedDocs(2);
            writer.SetSimilarity(new SimpleSimilarity());


            System.Text.StringBuilder sb   = new System.Text.StringBuilder(265);
            System.String             term = "term";
            for (int i = 0; i < 30; i++)
            {
                Document d = new Document();
                sb.Append(term).Append(" ");
                System.String content = sb.ToString();
                Field         noTf    = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED);
                noTf.SetOmitTermFreqAndPositions(true);
                d.Add(noTf);

                Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED);
                d.Add(tf);

                writer.AddDocument(d);
                //System.out.println(d);
            }

            writer.Optimize();
            // flush
            writer.Close();
            _TestUtil.CheckIndex(dir);

            /*
             * Verify the index
             */
            Searcher searcher = new IndexSearcher(dir);

            searcher.SetSimilarity(new SimpleSimilarity());

            Term      a  = new Term("noTf", term);
            Term      b  = new Term("tf", term);
            Term      c  = new Term("noTf", "notf");
            Term      d2 = new Term("tf", "tf");
            TermQuery q1 = new TermQuery(a);
            TermQuery q2 = new TermQuery(b);
            TermQuery q3 = new TermQuery(c);
            TermQuery q4 = new TermQuery(d2);


            searcher.Search(q1, new AnonymousClassCountingHitCollector(this));
            //System.out.println(CountingHitCollector.getCount());


            searcher.Search(q2, new AnonymousClassCountingHitCollector1(this));
            //System.out.println(CountingHitCollector.getCount());



            searcher.Search(q3, new AnonymousClassCountingHitCollector2(this));
            //System.out.println(CountingHitCollector.getCount());


            searcher.Search(q4, new AnonymousClassCountingHitCollector3(this));
            //System.out.println(CountingHitCollector.getCount());



            BooleanQuery bq = new BooleanQuery();

            bq.Add(q1, Occur.MUST);
            bq.Add(q4, Occur.MUST);

            searcher.Search(bq, new AnonymousClassCountingHitCollector4(this));
            Assert.IsTrue(15 == CountingHitCollector.GetCount());

            searcher.Close();
            dir.Close();
        }
Example #52
0
		public virtual void  TestFieldSetValue()
		{
			
			Field field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED);
			Document doc = new Document();
			doc.Add(field);
			doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED));
			
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.AddDocument(doc);
			field.SetValue("id2");
			writer.AddDocument(doc);
			field.SetValue("id3");
			writer.AddDocument(doc);
			writer.Close();
			
			Searcher searcher = new IndexSearcher(dir);
			
			Query query = new TermQuery(new Term("keyword", "test"));
			
			// ensure that queries return expected results without DateFilter first
			ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual(3, hits.Length);
			int result = 0;
			for (int i = 0; i < 3; i++)
			{
				Document doc2 = searcher.Doc(hits[i].doc);
				Field f = doc2.GetField("id");
				if (f.StringValue().Equals("id1"))
					result |= 1;
				else if (f.StringValue().Equals("id2"))
					result |= 2;
				else if (f.StringValue().Equals("id3"))
					result |= 4;
				else
					Assert.Fail("unexpected id field");
			}
			searcher.Close();
			dir.Close();
			Assert.AreEqual(7, result, "did not see all IDs");
		}
Example #53
0
		public virtual void  TestRAMDirectoryString()
		{
			
			MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.FullName);
			
			// Check size
			Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes());
			
			// open reader to test document count
			IndexReader reader = IndexReader.Open(ramDir);
			Assert.AreEqual(docsToAdd, reader.NumDocs());
			
			// open search zo check if all doc's are there
			IndexSearcher searcher = new IndexSearcher(reader);
			
			// search for all documents
			for (int i = 0; i < docsToAdd; i++)
			{
				Document doc = searcher.Doc(i);
				Assert.IsTrue(doc.GetField("content") != null);
			}
			
			// cleanup
			reader.Close();
			searcher.Close();
		}
        private void Search()
        {
            try
            {
                SearchProgressBar.Maximum = 11;
                ProgressLabel.Text = "Progress: Initialize Search ...";
                Searcher searcher = new IndexSearcher(@"Canon\index");
                Analyzer analyzer = new StandardAnalyzer();
                ArrayList resultList = new ArrayList();

                System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding);

                String line = QueryInputBox.Text;
                if (line.Length == - 1)
                    return;
                ProgressLabel.Text = "Progress: Parsing Query ...";
                Query query = QueryParser.Parse(line, "contents", analyzer);
                //int[] ix = qtm.GetTermFrequencies();

                Hits hits = searcher.Search(query);
                SearchProgressBar.Increment(1);
                ProgressLabel.Text = "Progress: Searched. Analyzing results ...";

                //QueryHighlightExtractor highlighter = new QueryHighlightExtractor(query, new WhitespaceAnalyzer(), "<B>", "</B>");
                Highlighter highlighter = new Highlighter(new QueryScorer(query));
                highlighter.SetTextFragmenter(new SimpleFragmenter(80));
                int maxNumFragmentsRequired = 1;

                    //int HITS_PER_PAGE = 10;
                    for (int i = 0; i < 10; i++)
                    {
                            SearchProgressBar.Increment(1);
                            ProgressLabel.Text = "Progress: Analyzing hit " + (i+1).ToString();
                            // get the document from index
                            Document doc = hits.Doc(i);
                            //SegmentReader ir = new SegmentReader();
                            //Lucene.Net.Index.TermFreqVector tfv =
                            //tfv.GetTermFrequencies
                            string score = hits.Score(i).ToString();
                            //Box += "Hit no. " + i + " scored: " + score + " occ: " + /*highlighter.tokenFrequency */ " best fragment: \n";
                            ResultSet a = new ResultSet();
                            a.BookName = doc.Get("path").Replace(@"c:\cscd\temp\","");
                            a.Score = hits.Score(i);
                            a.numberOfHits = hits.Length();

                            // get the document filename
                            // we can't get the text from the index
                            //because we didn't store it there
                            //so get it from archive
                            string path = doc.Get("path");
                            string name = GetInternalName(path);
                            PaliReaderUtils.AalekhDecoder.UnzipFromZipLibrary(name);
                            path = System.IO.Directory.GetCurrentDirectory() + @"\Work\" + name + ".htm";
                            string plainText = "";
                            //load text from zip archive temporarily
                            using (StreamReader sr = new StreamReader(path, System.Text.Encoding.Default))
                            {
                                plainText = parseHtml(sr.ReadToEnd());
                            }
            //-------------------------------Highlighter Code 1.4
                            TokenStream tokenStream = analyzer.TokenStream(new StringReader(plainText));
                            a.textFragment = highlighter.GetBestFragments(tokenStream, plainText, maxNumFragmentsRequired, "...");
                            if(File.Exists(path))
                                File.Delete(path);
            //-------------------------------
                            resultList.Add(a);
                        }
                SearchProgressBar.Value = 0;
                searcher.Close();
                ssr = new ShowSearchResults(/*Box*/resultList);
                //this.Hide();
                ssr.OpenBookEvent += new ShowSearchResults.OpenBook(this.TriggerOpenBook);
                ssr.Closing += new System.ComponentModel.CancelEventHandler(this.Closing_ResultWindow);
                this.Hide();
                ssr.ShowDialog();

            }
            catch (System.Exception e)
            {
                MessageBox.Show(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
Example #55
0
 public override void  TearDown()
 {
     base.TearDown();
     searcher.Close();
 }
Example #56
0
		// Apply buffered delete terms, queries and docIDs to the
		// provided reader
		private bool ApplyDeletes(IndexReader reader, int docIDStart)
		{
			lock (this)
			{
				
				int docEnd = docIDStart + reader.MaxDoc();
				bool any = false;
				
                System.Diagnostics.Debug.Assert(CheckDeleteTerm(null));

				// Delete by term
				TermDocs docs = reader.TermDocs();
				try
				{
                    foreach(KeyValuePair<Term,BufferedDeletes.Num> entry in deletesFlushed.terms)
					{
						Term term = entry.Key;
						// LUCENE-2086: we should be iterating a TreeMap,
                        // here, so terms better be in order:
                        System.Diagnostics.Debug.Assert(CheckDeleteTerm(term));
						docs.Seek(term);
						int limit = entry.Value.GetNum();
						while (docs.Next())
						{
							int docID = docs.Doc();
							if (docIDStart + docID >= limit)
								break;
							reader.DeleteDocument(docID);
							any = true;
						}
					}
				}
				finally
				{
					docs.Close();
				}
				
				// Delete by docID
                foreach(int docID in deletesFlushed.docIDs)
				{
					if (docID >= docIDStart && docID < docEnd)
					{
						reader.DeleteDocument(docID - docIDStart);
						any = true;
					}
				}
				
				// Delete by query
				IndexSearcher searcher = new IndexSearcher(reader);
                foreach(KeyValuePair<Query,int> entry in new Support.Dictionary<Query,int>(deletesFlushed.queries))
				{
					Query query = entry.Key;
					int limit = entry.Value;
					Weight weight = query.Weight(searcher);
					Scorer scorer = weight.Scorer(reader, true, false);
					if (scorer != null)
					{
						while (true)
						{
							int doc = scorer.NextDoc();
							if (((long) docIDStart) + doc >= limit)
								break;
							reader.DeleteDocument(doc);
							any = true;
						}
					}
				}
				searcher.Close();
				return any;
			}
		}
Example #57
0
        public virtual void  TestGetValuesForIndexedDocument()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true);
			writer.AddDocument(MakeDocumentWithFields());
			writer.Close();
			
			Searcher searcher = new IndexSearcher(dir);
			
			// search for something that does exists
			Query query = new TermQuery(new Term("keyword", "test1"));
			
			// ensure that queries return expected results without DateFilter first
			Hits hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			
			try
			{
				DoAssert(hits.Doc(0), true);
			}
			catch (System.Exception e)
			{
                System.Console.Error.WriteLine(e.StackTrace);
				System.Console.Error.Write("\n");
			}
			finally
			{
				searcher.Close();
			}
		}