コード例 #1
0
        public virtual void  TestRareVectors()
        {
            IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            for (int i = 0; i < 100; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                writer.AddDocument(doc);
            }
            for (int i = 0; i < 10; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("field", English.IntToEnglish(100 + i), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                writer.AddDocument(doc);
            }

            writer.Close();
            searcher = new IndexSearcher(directory);

            Query query = new TermQuery(new Term("field", "hundred"));

            ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(10, hits.Length);
            for (int i = 0; i < hits.Length; i++)
            {
                TermFreqVector[] vector = searcher.reader_ForNUnit.GetTermFreqVectors(hits[i].doc);
                Assert.IsTrue(vector != null);
                Assert.IsTrue(vector.Length == 1);
            }
        }
コード例 #2
0
        public override void  SetUp()
        {
            base.SetUp();
            RAMDirectory    directory = new RAMDirectory();
            PayloadAnalyzer analyzer  = new PayloadAnalyzer(this);
            IndexWriter     writer    = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetSimilarity(similarity);
            //writer.infoStream = System.out;
            for (int i = 0; i < 1000; i++)
            {
                Document doc            = new Document();
                Field    noPayloadField = new Field(PayloadHelper.NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED);
                //noPayloadField.setBoost(0);
                doc.Add(noPayloadField);
                doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field("multiField", English.IntToEnglish(i) + "  " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }
            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(directory);
            searcher.SetSimilarity(similarity);
        }
コード例 #3
0
        public override void  SetUp()
        {
            base.SetUp();
            System.String tempDir = System.IO.Path.GetTempPath();
            if (tempDir == null)
            {
                throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test");
            }
            indexDir = new System.IO.DirectoryInfo(Path.Combine(tempDir, "RAMDirIndex"));

            Directory   dir    = FSDirectory.Open(indexDir);
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);
            // add some documents
            Document doc = null;

            for (int i = 0; i < docsToAdd; i++)
            {
                doc = new Document();
                doc.Add(new Field("content", English.IntToEnglish(i).Trim(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                writer.AddDocument(doc, null);
            }
            Assert.AreEqual(docsToAdd, writer.MaxDoc());
            writer.Close();
            dir.Close();
        }
コード例 #4
0
 override public void  Run()
 {
     try
     {
         for (int j = 0; j < Lucene.Net.Index.TestThreadedOptimize.NUM_ITER2; j++)
         {
             writerFinal.Optimize(false, null);
             for (int k = 0; k < 17 * (1 + iFinal); k++)
             {
                 Document d = new Document();
                 d.Add(new Field("id", iterFinal + "_" + iFinal + "_" + j + "_" + k, Field.Store.YES, Field.Index.NOT_ANALYZED));
                 d.Add(new Field("contents", English.IntToEnglish(iFinal + k), Field.Store.NO, Field.Index.ANALYZED));
                 writerFinal.AddDocument(d, null);
             }
             for (int k = 0; k < 9 * (1 + iFinal); k++)
             {
                 writerFinal.DeleteDocuments(null, new Term("id", iterFinal + "_" + iFinal + "_" + j + "_" + k));
             }
             writerFinal.Optimize(null);
         }
     }
     catch (System.Exception t)
     {
         Enclosing_Instance.setFailed();
         System.Console.Out.WriteLine(ThreadClass.Current().Name + ": hit exception");
         System.Console.Out.WriteLine(t.StackTrace);
     }
 }
コード例 #5
0
        public virtual void  Test()
        {
            PayloadNearQuery query;
            TopDocs          hits;

            query = NewPhraseQuery("field", "twenty two", true);
            QueryUtils.Check(query);

            // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
            // and all the similarity factors are set to 1
            hits = searcher.Search(query, null, 100);
            Assert.IsTrue(hits != null, "hits is null and it shouldn't be");
            Assert.IsTrue(hits.TotalHits == 10, "should be 10 hits");
            for (int j = 0; j < hits.ScoreDocs.Length; j++)
            {
                ScoreDoc doc = hits.ScoreDocs[j];
                Assert.IsTrue(doc.Score == 3, doc.Score + " does not equal: " + 3);
            }
            for (int i = 1; i < 10; i++)
            {
                query = NewPhraseQuery("field", English.IntToEnglish(i) + " hundred", true);
                // all should have score = 3 because adjacent terms have payloads of 2,4
                // and all the similarity factors are set to 1
                hits = searcher.Search(query, null, 100);
                Assert.IsTrue(hits != null, "hits is null and it shouldn't be");
                Assert.IsTrue(hits.TotalHits == 100, "should be 100 hits");
                for (int j = 0; j < hits.ScoreDocs.Length; j++)
                {
                    ScoreDoc doc = hits.ScoreDocs[j];
                    //                System.out.println("Doc: " + doc.toString());
                    //                System.out.println("Explain: " + searcher.explain(query, doc.doc));
                    Assert.IsTrue(doc.Score == 3, doc.Score + " does not equal: " + 3);
                }
            }
        }
コード例 #6
0
        public override void  SetUp()
        {
            base.SetUp();
            IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            //writer.setUseCompoundFile(true);
            //writer.infoStream = System.out;
            for (int i = 0; i < 1000; i++)
            {
                Document         doc = new Document();
                Field.TermVector termVector;
                int mod3 = i % 3;
                int mod2 = i % 2;
                if (mod2 == 0 && mod3 == 0)
                {
                    termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
                }
                else if (mod2 == 0)
                {
                    termVector = Field.TermVector.WITH_POSITIONS;
                }
                else if (mod3 == 0)
                {
                    termVector = Field.TermVector.WITH_OFFSETS;
                }
                else
                {
                    termVector = Field.TermVector.YES;
                }
                doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED, termVector));
                writer.AddDocument(doc);
            }
            writer.Close();
            searcher = new IndexSearcher(directory);
        }
コード例 #7
0
        public override void  SetUp()
        {
            base.SetUp();
            IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            //writer.setUseCompoundFile(false);
            //writer.infoStream = System.out;
            for (int i = 0; i < numDocs; i++)
            {
                Document  doc = new Document();
                Fieldable fld = new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES);
                doc.Add(fld);
                writer.AddDocument(doc);
            }
            writer.Close();
        }
コード例 #8
0
ファイル: TestTermVectors.cs プロジェクト: yonder/mono
        public virtual void  SetUp()
        {
            IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true);

            //writer.setUseCompoundFile(true);
            //writer.infoStream = System.out;
            System.Text.StringBuilder buffer = new System.Text.StringBuilder();
            for (int i = 0; i < 1000; i++)
            {
                Document doc = new Document();
                doc.Add(Field.Text("Field", English.IntToEnglish(i), true));
                writer.AddDocument(doc);
            }
            writer.Close();
            searcher = new IndexSearcher(directory);
        }
コード例 #9
0
ファイル: TestRAMDirectory.cs プロジェクト: stgwilli/ravendb
 override public void  Run()
 {
     for (int j = 1; j < Enclosing_Instance.docsPerThread; j++)
     {
         Document doc = new Document();
         doc.Add(new Field("sizeContent", English.IntToEnglish(num * Enclosing_Instance.docsPerThread + j).Trim(), Field.Store.YES, Field.Index.NOT_ANALYZED));
         try
         {
             writer.AddDocument(doc);
         }
         catch (System.IO.IOException e)
         {
             throw new System.SystemException("", e);
         }
     }
 }
コード例 #10
0
ファイル: TestBasics.cs プロジェクト: raj581/Marvin
        public virtual void  SetUp()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new SimpleAnalyzer(), true);

            //writer.infoStream = System.out;
            for (int i = 0; i < 1000; i++)
            {
                Document doc = new Document();
                doc.Add(Field.Text("Field", English.IntToEnglish(i)));
                writer.AddDocument(doc);
            }

            writer.Close();

            searcher = new IndexSearcher(directory);
        }
コード例 #11
0
        private void  DoTestStopPositons(StopFilter stpf, bool enableIcrements)
        {
            Log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled"));
            stpf.EnablePositionIncrements = enableIcrements;
            ITermAttribute termAtt = stpf.GetAttribute <ITermAttribute>();
            IPositionIncrementAttribute posIncrAtt = stpf.GetAttribute <IPositionIncrementAttribute>();

            for (int i = 0; i < 20; i += 3)
            {
                Assert.IsTrue(stpf.IncrementToken());
                Log("Token " + i + ": " + stpf);
                System.String w = English.IntToEnglish(i).Trim();
                Assert.AreEqual(w, termAtt.Term, "expecting token " + i + " to be " + w);
                Assert.AreEqual(enableIcrements?(i == 0?1:3):1, posIncrAtt.PositionIncrement, "all but first token must have position increment of 3");
            }
            Assert.IsFalse(stpf.IncrementToken());
        }
コード例 #12
0
        public override void  SetUp()
        {
            base.SetUp();
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            //writer.infoStream = System.out;
            for (int i = 0; i < 1000; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc, null);
            }

            writer.Close();

            searcher = new IndexSearcher(directory, true, null);
        }
コード例 #13
0
        private void  VerifyVectors(TermFreqVector[] vectors, int num)
        {
            System.Text.StringBuilder temp  = new System.Text.StringBuilder();
            System.String[]           terms = null;
            for (int i = 0; i < vectors.Length; i++)
            {
                terms = vectors[i].GetTerms();
                for (int z = 0; z < terms.Length; z++)
                {
                    temp.Append(terms[z]);
                }
            }

            if (!English.IntToEnglish(num).Trim().Equals(temp.ToString().Trim()))
            {
                System.Console.Out.WriteLine("wrong term result");
            }
        }
コード例 #14
0
        public virtual void  TestFilterWorks()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            for (int i = 0; i < 500; i++)
            {
                Document document = new Document();
                document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(document, null);
            }
            writer.Close();

            IndexReader reader = IndexReader.Open(dir, true, null);

            SpanTermQuery    query    = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim()));
            SpanQueryFilter  filter   = new SpanQueryFilter(query);
            SpanFilterResult result   = filter.BitSpans(reader, null);
            DocIdSet         docIdSet = result.DocIdSet;

            Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be");
            AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10);
            var spans = result.Positions;

            Assert.IsTrue(spans != null, "spans is null and it shouldn't be");
            int size = GetDocIdSetSize(docIdSet);

            Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size);
            for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext();)
            {
                SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo)iterator.Current;
                Assert.IsTrue(info != null, "info is null and it shouldn't be");
                //The doc should indicate the bit is on
                AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc);
                //There should be two positions in each
                Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2);
            }
            reader.Close();
        }
コード例 #15
0
        public override void  SetUp()
        {
            base.SetUp();
            RAMDirectory    directory = new RAMDirectory();
            PayloadAnalyzer analyzer  = new PayloadAnalyzer(this);
            IndexWriter     writer    = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetSimilarity(similarity);
            //writer.infoStream = System.out;
            for (int i = 0; i < 1000; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
                System.String txt = English.IntToEnglish(i) + ' ' + English.IntToEnglish(i + 1);
                doc.Add(new Field("field2", txt, Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }
            writer.Optimize();
            writer.Close();

            searcher            = new IndexSearcher(directory, true);
            searcher.Similarity = similarity;
        }
コード例 #16
0
        /// <summary> Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField
        /// and analyzes them using the PayloadAnalyzer
        /// </summary>
        /// <param name="similarity">The Similarity class to use in the Searcher
        /// </param>
        /// <param name="numDocs">The num docs to add
        /// </param>
        /// <returns> An IndexSearcher
        /// </returns>
        /// <throws>  IOException </throws>
        public virtual IndexSearcher SetUp(Similarity similarity, int numDocs)
        {
            RAMDirectory    directory = new RAMDirectory();
            PayloadAnalyzer analyzer  = new PayloadAnalyzer(this);
            IndexWriter     writer    = new IndexWriter(directory, analyzer, true);

            writer.SetSimilarity(similarity);
            //writer.infoStream = System.out;
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new Field(FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field(MULTI_FIELD, English.IntToEnglish(i) + "  " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field(NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }
            //writer.optimize();
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(directory);

            searcher.SetSimilarity(similarity);
            return(searcher);
        }
コード例 #17
0
        /// <summary> Not an explicit test, just useful to print out some info on performance
        ///
        /// </summary>
        /// <throws>  Exception </throws>
        public virtual void Performance()
        {
            int[] tokCount  = new int[] { 100, 500, 1000, 2000, 5000, 10000 };
            int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 };
            for (int k = 0; k < tokCount.Length; k++)
            {
                System.Text.StringBuilder buffer = new System.Text.StringBuilder();
                System.Console.Out.WriteLine("-----Tokens: " + tokCount[k] + "-----");
                for (int i = 0; i < tokCount[k]; i++)
                {
                    buffer.Append(English.IntToEnglish(i).ToUpper()).Append(' ');
                }
                //make sure we produce the same tokens
                TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))));
                TokenStream        sink      = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, 100));
                teeStream.ConsumeAllTokens();
                TokenStream    stream  = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))), 100);
                ITermAttribute tfTok   = stream.AddAttribute <ITermAttribute>();
                ITermAttribute sinkTok = sink.AddAttribute <ITermAttribute>();
                for (int i = 0; stream.IncrementToken(); i++)
                {
                    Assert.IsTrue(sink.IncrementToken());
                    Assert.IsTrue(tfTok.Equals(sinkTok) == true, tfTok + " is not equal to " + sinkTok + " at token: " + i);
                }

                //simulate two fields, each being analyzed once, for 20 documents
                for (int j = 0; j < modCounts.Length; j++)
                {
                    int  tfPos = 0;
                    long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    for (int i = 0; i < 20; i++)
                    {
                        stream = new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString())));
                        IPositionIncrementAttribute posIncrAtt = stream.GetAttribute <IPositionIncrementAttribute>();
                        while (stream.IncrementToken())
                        {
                            tfPos += posIncrAtt.PositionIncrement;
                        }
                        stream     = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))), modCounts[j]);
                        posIncrAtt = stream.GetAttribute <IPositionIncrementAttribute>();
                        while (stream.IncrementToken())
                        {
                            tfPos += posIncrAtt.PositionIncrement;
                        }
                    }
                    long finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
                    int sinkPos = 0;
                    //simulate one field with one sink
                    start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    for (int i = 0; i < 20; i++)
                    {
                        teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))));
                        sink      = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, modCounts[j]));
                        IPositionIncrementAttribute posIncrAtt = teeStream.GetAttribute <IPositionIncrementAttribute>();
                        while (teeStream.IncrementToken())
                        {
                            sinkPos += posIncrAtt.PositionIncrement;
                        }
                        //System.out.println("Modulo--------");
                        posIncrAtt = sink.GetAttribute <IPositionIncrementAttribute>();
                        while (sink.IncrementToken())
                        {
                            sinkPos += posIncrAtt.PositionIncrement;
                        }
                    }
                    finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
                    Assert.IsTrue(sinkPos == tfPos, sinkPos + " does not equal: " + tfPos);
                }
                System.Console.Out.WriteLine("- End Tokens: " + tokCount[k] + "-----");
            }
        }
コード例 #18
0
        /// <summary> Not an explicit test, just useful to print out some info on performance
        ///
        /// </summary>
        /// <throws>  Exception </throws>
        public virtual void  Performance()
        {
            int[] tokCount  = new int[] { 100, 500, 1000, 2000, 5000, 10000 };
            int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 };
            for (int k = 0; k < tokCount.Length; k++)
            {
                System.Text.StringBuilder buffer = new System.Text.StringBuilder();
                System.Console.Out.WriteLine("-----Tokens: " + tokCount[k] + "-----");
                for (int i = 0; i < tokCount[k]; i++)
                {
                    buffer.Append(English.IntToEnglish(i).ToUpper()).Append(' ');
                }
                //make sure we produce the same tokens
                ModuloSinkTokenizer sink  = new ModuloSinkTokenizer(this, tokCount[k], 100);
                Token       reusableToken = new Token();
                TokenStream stream        = new TeeTokenFilter(new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), sink);
                while (stream.Next(reusableToken) != null)
                {
                }
                stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), 100);
                System.Collections.IList tmp = new System.Collections.ArrayList();
                for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
                {
                    tmp.Add(nextToken.Clone());
                }
                System.Collections.IList sinkList = sink.GetTokens();
                Assert.IsTrue(tmp.Count == sinkList.Count, "tmp Size: " + tmp.Count + " is not: " + sinkList.Count);
                for (int i = 0; i < tmp.Count; i++)
                {
                    Token tfTok   = (Token)tmp[i];
                    Token sinkTok = (Token)sinkList[i];
                    Assert.IsTrue(tfTok.Term().Equals(sinkTok.Term()) == true, tfTok.Term() + " is not equal to " + sinkTok.Term() + " at token: " + i);
                }
                //simulate two fields, each being analyzed once, for 20 documents

                for (int j = 0; j < modCounts.Length; j++)
                {
                    int  tfPos = 0;
                    long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    for (int i = 0; i < 20; i++)
                    {
                        stream = new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString())));
                        for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
                        {
                            tfPos += nextToken.GetPositionIncrement();
                        }
                        stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), modCounts[j]);
                        for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
                        {
                            tfPos += nextToken.GetPositionIncrement();
                        }
                    }
                    long finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
                    int sinkPos = 0;
                    //simulate one field with one sink
                    start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    for (int i = 0; i < 20; i++)
                    {
                        sink   = new ModuloSinkTokenizer(this, tokCount[k], modCounts[j]);
                        stream = new TeeTokenFilter(new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), sink);
                        for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
                        {
                            sinkPos += nextToken.GetPositionIncrement();
                        }
                        //System.out.println("Modulo--------");
                        stream = sink;
                        for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
                        {
                            sinkPos += nextToken.GetPositionIncrement();
                        }
                    }
                    finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
                    Assert.IsTrue(sinkPos == tfPos, sinkPos + " does not equal: " + tfPos);
                }
                System.Console.Out.WriteLine("- End Tokens: " + tokCount[k] + "-----");
            }
        }
コード例 #19
0
        public virtual void  TestStopPositons()
        {
            System.Text.StringBuilder sb = new System.Text.StringBuilder();
            System.Collections.Generic.List <string> a = new System.Collections.Generic.List <string>();
            for (int i = 0; i < 20; i++)
            {
                System.String w = English.IntToEnglish(i).Trim();
                sb.Append(w).Append(" ");
                if (i % 3 != 0)
                {
                    a.Add(w);
                }
            }
            Log(sb.ToString());
            System.String[] stopWords = (System.String[])a.ToArray();
            for (int i = 0; i < a.Count; i++)
            {
                Log("Stop: " + stopWords[i]);
            }
            var stopSet = StopFilter.MakeStopSet(stopWords);

            // with increments
            System.IO.StringReader reader = new System.IO.StringReader(sb.ToString());
            StopFilter             stpf   = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);

            DoTestStopPositons(stpf, true);
            // without increments
            reader = new System.IO.StringReader(sb.ToString());
            stpf   = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
            DoTestStopPositons(stpf, false);
            // with increments, concatenating two stop filters
            System.Collections.Generic.List <System.String> a0 = new System.Collections.Generic.List <System.String>();
            System.Collections.Generic.List <System.String> a1 = new System.Collections.Generic.List <System.String>();
            for (int i = 0; i < a.Count; i++)
            {
                if (i % 2 == 0)
                {
                    a0.Add(a[i]);
                }
                else
                {
                    a1.Add(a[i]);
                }
            }
            System.String[] stopWords0 = (System.String[])a0.ToArray();
            for (int i = 0; i < a0.Count; i++)
            {
                Log("Stop0: " + stopWords0[i]);
            }
            System.String[] stopWords1 = (System.String[])a1.ToArray();
            for (int i = 0; i < a1.Count; i++)
            {
                Log("Stop1: " + stopWords1[i]);
            }
            var stopSet0 = StopFilter.MakeStopSet(stopWords0);
            var stopSet1 = StopFilter.MakeStopSet(stopWords1);

            reader = new System.IO.StringReader(sb.ToString());
            StopFilter stpf0 = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet0);             // first part of the set

            stpf0.EnablePositionIncrements = true;
            StopFilter stpf01 = new StopFilter(false, stpf0, stopSet1);             // two stop filters concatenated!

            DoTestStopPositons(stpf01, true);
        }
コード例 #20
0
        public virtual void  runTest(Directory directory, MergeScheduler merger)
        {
            IndexWriter writer = new IndexWriter(directory, ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED, null);

            writer.SetMaxBufferedDocs(2);
            if (merger != null)
            {
                writer.SetMergeScheduler(merger, null);
            }

            for (int iter = 0; iter < NUM_ITER; iter++)
            {
                int iterFinal = iter;

                writer.MergeFactor = 1000;

                for (int i = 0; i < 200; i++)
                {
                    Document d = new Document();
                    d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    d.Add(new Field("contents", English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED));
                    writer.AddDocument(d, null);
                }

                writer.MergeFactor = 4;
                //writer.setInfoStream(System.out);

                ThreadClass[] threads = new ThreadClass[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    int         iFinal      = i;
                    IndexWriter writerFinal = writer;
                    threads[i] = new AnonymousClassThread(writerFinal, iFinal, iterFinal, this);
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Join();
                }

                Assert.IsTrue(!failed);

                int expectedDocCount = (int)((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS)));

                // System.out.println("TEST: now index=" + writer.segString());

                Assert.AreEqual(expectedDocCount, writer.MaxDoc());

                writer.Close();
                writer = new IndexWriter(directory, ANALYZER, false, IndexWriter.MaxFieldLength.UNLIMITED, null);
                writer.SetMaxBufferedDocs(2);

                IndexReader reader = IndexReader.Open(directory, true, null);
                Assert.IsTrue(reader.IsOptimized());
                Assert.AreEqual(expectedDocCount, reader.NumDocs());
                reader.Close();
            }
            writer.Close();
        }