public virtual void  TestFilterIndexReader_Renamed()
        {
            RAMDirectory directory = new MockRAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Document d1 = new Document();

            d1.Add(new Field("default", "one two", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d1);

            Document d2 = new Document();

            d2.Add(new Field("default", "one three", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d2);

            Document d3 = new Document();

            d3.Add(new Field("default", "two four", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d3);

            writer.Close();

            IndexReader reader = new TestReader(IndexReader.Open(directory));

            Assert.IsTrue(reader.IsOptimized());

            TermEnum terms = reader.Terms();

            while (terms.Next())
            {
                Assert.IsTrue(terms.Term().Text().IndexOf('e') != -1);
            }
            terms.Close();

            TermPositions positions = reader.TermPositions(new Term("default", "one"));

            while (positions.Next())
            {
                Assert.IsTrue((positions.Doc() % 2) == 1);
            }

            int NUM_DOCS = 3;

            TermDocs td = reader.TermDocs(null);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(td.Next());
                Assert.AreEqual(i, td.Doc());
                Assert.AreEqual(1, td.Freq());
            }
            td.Close();
            reader.Close();
            directory.Close();
        }
        public virtual void  TestAllTermDocs()
        {
            IndexReader reader   = OpenReader();
            int         NUM_DOCS = 2;
            TermDocs    td       = reader.TermDocs(null);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(td.Next());
                Assert.AreEqual(i, td.Doc());
                Assert.AreEqual(1, td.Freq());
            }
            td.Close();
            reader.Close();
        }
示例#3
0
        private int TfOfCorpus(string term)
        {
            int      tf       = 0;
            TermDocs termDocs = Reader.TermDocs(new Term(SandoField.Name.ToString(), term));

            if (termDocs != null)
            {
                //tf += termDocs.Freq();
                while (termDocs.Next())
                {
                    tf += termDocs.Freq();
                }
            }
            return(tf);
        }
示例#4
0
        /// <summary>
        /// 得到指定Term的文档
        /// </summary>
        /// <param name="term"></param>
        /// <returns></returns>
        public IList <TermDoc> DocumentCount(Term term)
        {
            TermDocs       docs = open.Reader.TermDocs(term);
            List <TermDoc> list = new List <TermDoc>();

            while (docs.Next())
            {
                TermDoc doc2 = new TermDoc();
                doc2.Freq = docs.Freq();
                doc2.Doc  = docs.Doc();
                doc2.Term = term;
                doc2.Norm = GetNorm(open.Reader, term.Field(), doc2.Doc);
                TermDoc item = doc2;
                list.Add(item);
            }
            docs.Close();
            return(list);
        }
示例#5
0
        private void _ShowTermDoc(TermDocs td)
        {
            if (_luke.IndexReader == null)
            {
                _luke.ShowStatus(_luke.resources.GetString("NoIndex"));
                return;
            }
            try
            {
                Document doc = _luke.IndexReader.Document(td.Doc());

                labelDocNum.Text   = td.Doc().ToString();
                labelTermFreq.Text = td.Freq().ToString();

                _ShowDocFields(td.Doc(), doc);
            }
            catch (Exception e)
            {
                _luke.ShowStatus(e.Message);
            }
        }
示例#6
0
        private double Var(string term)
        {
            int           num_docs = 0;
            List <double> freqs    = new List <double>();
            TermDocs      termDocs = Reader.TermDocs(new Term(SandoField.Name.ToString(), term));

            if (termDocs != null)
            {
                while (termDocs.Next())
                {
                    num_docs++;
                    freqs.Add(termDocs.Freq());
                }
            }

            double var = 0.0;

            if (freqs.Count > 0)
            {
                List <double> weights = new List <double>();
                foreach (var freq in freqs)
                {
                    weights.Add((Math.Log(1 + freq) * Idf(term)) / num_docs);
                }

                double avg_w = 0.0;
                foreach (var w in weights)
                {
                    avg_w += w;
                }
                avg_w = avg_w / num_docs;

                foreach (var w in weights)
                {
                    var += Math.Abs(w - avg_w);
                }
                var = var / num_docs;
            }
            return(var);
        }
示例#7
0
        public virtual void  TestAllTermDocs()
        {
            Directory      dir1 = GetDir1();
            Directory      dir2 = GetDir2();
            ParallelReader pr   = new ParallelReader();

            pr.Add(IndexReader.Open(dir1));
            pr.Add(IndexReader.Open(dir2));
            int      NUM_DOCS = 2;
            TermDocs td       = pr.TermDocs(null);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(td.Next());
                Assert.AreEqual(i, td.Doc());
                Assert.AreEqual(1, td.Freq());
            }
            td.Close();
            pr.Close();
            dir1.Close();
            dir2.Close();
        }
        ////////////////////////////////////////////////////////////////

        static private void ScoreHits(Dictionary <int, Hit> hits_by_id,
                                      IndexReader reader,
                                      ICollection term_list)
        {
            LNS.Similarity similarity;
            similarity = LNS.Similarity.GetDefault();

            TermDocs term_docs = reader.TermDocs();
            Hit      hit;

            foreach (Term term in term_list)
            {
                double idf;
                idf = similarity.Idf(reader.DocFreq(term), reader.MaxDoc());

                int hit_count;
                hit_count = hits_by_id.Count;

                term_docs.Seek(term);
                while (term_docs.Next() && hit_count > 0)
                {
                    int id;
                    id = term_docs.Doc();

                    if (hits_by_id.TryGetValue(id, out hit))
                    {
                        double tf;
                        tf         = similarity.Tf(term_docs.Freq());
                        hit.Score += tf * idf;
                        --hit_count;
                    }
                }
            }

            term_docs.Close();
        }
示例#9
0
 public virtual int Freq()
 {
     return(termDocs.Freq());
 }
示例#10
0
 public virtual int Freq()
 {
     return(current.Freq());
 }
示例#11
0
        public virtual void  TestKnownSetOfDocuments()
        {
            System.String[] termArray = new System.String[] { "eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored", "with", "an" };
            System.String   test1     = "eating chocolate in a computer lab";                                             //6 terms
            System.String   test2     = "computer in a computer lab";                                                     //5 terms
            System.String   test3     = "a chocolate lab grows old";                                                      //5 terms
            System.String   test4     = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
            System.Collections.IDictionary test4Map = new System.Collections.Hashtable();
            test4Map["chocolate"] = 3;
            test4Map["lab"]       = 2;
            test4Map["eating"]    = 1;
            test4Map["computer"]  = 1;
            test4Map["with"]      = 1;
            test4Map["a"]         = 1;
            test4Map["colored"]   = 1;
            test4Map["in"]        = 1;
            test4Map["an"]        = 1;
            test4Map["computer"]  = 1;
            test4Map["old"]       = 1;

            Document testDoc1 = new Document();

            SetupDoc(testDoc1, test1);
            Document testDoc2 = new Document();

            SetupDoc(testDoc2, test2);
            Document testDoc3 = new Document();

            SetupDoc(testDoc3, test3);
            Document testDoc4 = new Document();

            SetupDoc(testDoc4, test4);

            Directory dir = new RAMDirectory();

            try
            {
                IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
                Assert.IsTrue(writer != null);
                writer.AddDocument(testDoc1);
                writer.AddDocument(testDoc2);
                writer.AddDocument(testDoc3);
                writer.AddDocument(testDoc4);
                writer.Close();
                IndexSearcher knownSearcher = new IndexSearcher(dir);
                TermEnum      termEnum      = knownSearcher.reader.Terms();
                TermDocs      termDocs      = knownSearcher.reader.TermDocs();
                //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);

                Similarity sim = knownSearcher.GetSimilarity();
                while (termEnum.Next() == true)
                {
                    Term term = termEnum.Term();
                    //System.out.println("Term: " + term);
                    termDocs.Seek(term);
                    while (termDocs.Next())
                    {
                        int docId = termDocs.Doc();
                        int freq  = termDocs.Freq();
                        //System.out.println("Doc Id: " + docId + " freq " + freq);
                        TermFreqVector vector = knownSearcher.reader.GetTermFreqVector(docId, "Field");
                        float          tf     = sim.Tf(freq);
                        float          idf    = sim.Idf(term, knownSearcher);
                        //float qNorm = sim.queryNorm()
                        //This is fine since we don't have stop words
                        float lNorm = sim.LengthNorm("Field", vector.GetTerms().Length);
                        //float coord = sim.coord()
                        //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
                        Assert.IsTrue(vector != null);
                        System.String[] vTerms = vector.GetTerms();
                        int[]           freqs  = vector.GetTermFrequencies();
                        for (int i = 0; i < vTerms.Length; i++)
                        {
                            if (term.Text().Equals(vTerms[i]) == true)
                            {
                                Assert.IsTrue(freqs[i] == freq);
                            }
                        }
                    }
                    //System.out.println("--------");
                }
                Query query = new TermQuery(new Term("Field", "chocolate"));
                Hits  hits  = knownSearcher.Search(query);
                //doc 3 should be the first hit b/c it is the shortest match
                Assert.IsTrue(hits.Length() == 3);
                float score = hits.Score(0);

                /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
                 * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
                 * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " +  hits.doc(2).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
                Assert.IsTrue(testDoc3.ToString().Equals(hits.Doc(0).ToString()));
                Assert.IsTrue(testDoc4.ToString().Equals(hits.Doc(1).ToString()));
                Assert.IsTrue(testDoc1.ToString().Equals(hits.Doc(2).ToString()));
                TermFreqVector vector2 = knownSearcher.reader.GetTermFreqVector(hits.Id(1), "Field");
                Assert.IsTrue(vector2 != null);
                //System.out.println("Vector: " + vector);
                System.String[] terms  = vector2.GetTerms();
                int[]           freqs2 = vector2.GetTermFrequencies();
                Assert.IsTrue(terms != null && terms.Length == 10);
                for (int i = 0; i < terms.Length; i++)
                {
                    System.String term = terms[i];
                    //System.out.println("Term: " + term);
                    int freq = freqs2[i];
                    Assert.IsTrue(test4.IndexOf(term) != -1);
                    System.Int32  freqInt    = (System.Int32)test4Map[term];
                    System.Object tmpFreqInt = test4Map[term];
                    Assert.IsTrue(tmpFreqInt != null);
                    Assert.IsTrue(freqInt == freq);
                }
                knownSearcher.Close();
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
示例#12
0
 public virtual int Freq()
 {
     return(in_Renamed.Freq());
 }
示例#13
0
        public virtual void  TestKnownSetOfDocuments()
        {
            System.String test1 = "eating chocolate in a computer lab";                                             //6 terms
            System.String test2 = "computer in a computer lab";                                                     //5 terms
            System.String test3 = "a chocolate lab grows old";                                                      //5 terms
            System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
            System.Collections.IDictionary test4Map = new System.Collections.Hashtable();
            test4Map["chocolate"] = 3;
            test4Map["lab"]       = 2;
            test4Map["eating"]    = 1;
            test4Map["computer"]  = 1;
            test4Map["with"]      = 1;
            test4Map["a"]         = 1;
            test4Map["colored"]   = 1;
            test4Map["in"]        = 1;
            test4Map["an"]        = 1;
            test4Map["computer"]  = 1;
            test4Map["old"]       = 1;

            Document testDoc1 = new Document();

            SetupDoc(testDoc1, test1);
            Document testDoc2 = new Document();

            SetupDoc(testDoc2, test2);
            Document testDoc3 = new Document();

            SetupDoc(testDoc3, test3);
            Document testDoc4 = new Document();

            SetupDoc(testDoc4, test4);

            Directory dir = new MockRAMDirectory();

            try
            {
                IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
                Assert.IsTrue(writer != null);
                writer.AddDocument(testDoc1);
                writer.AddDocument(testDoc2);
                writer.AddDocument(testDoc3);
                writer.AddDocument(testDoc4);
                writer.Close();
                IndexSearcher knownSearcher = new IndexSearcher(dir);
                TermEnum      termEnum      = knownSearcher.reader_ForNUnit.Terms();
                TermDocs      termDocs      = knownSearcher.reader_ForNUnit.TermDocs();
                //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);

                Similarity sim = knownSearcher.GetSimilarity();
                while (termEnum.Next() == true)
                {
                    Term term = termEnum.Term();
                    //System.out.println("Term: " + term);
                    termDocs.Seek(term);
                    while (termDocs.Next())
                    {
                        int docId = termDocs.Doc();
                        int freq  = termDocs.Freq();
                        //System.out.println("Doc Id: " + docId + " freq " + freq);
                        TermFreqVector vector = knownSearcher.reader_ForNUnit.GetTermFreqVector(docId, "field");
                        float          tf     = sim.Tf(freq);
                        float          idf    = sim.Idf(term, knownSearcher);
                        //float qNorm = sim.queryNorm()
                        //This is fine since we don't have stop words
                        float lNorm = sim.LengthNorm("field", vector.GetTerms().Length);
                        //float coord = sim.coord()
                        //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
                        Assert.IsTrue(vector != null);
                        System.String[] vTerms = vector.GetTerms();
                        int[]           freqs  = vector.GetTermFrequencies();
                        for (int i = 0; i < vTerms.Length; i++)
                        {
                            if (term.Text().Equals(vTerms[i]))
                            {
                                Assert.IsTrue(freqs[i] == freq);
                            }
                        }
                    }
                    //System.out.println("--------");
                }
                Query      query = new TermQuery(new Term("field", "chocolate"));
                ScoreDoc[] hits  = knownSearcher.Search(query, null, 1000).scoreDocs;
                //doc 3 should be the first hit b/c it is the shortest match
                Assert.IsTrue(hits.Length == 3);
                float score = hits[0].score;

                /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
                 * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
                 * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " +  hits.doc(2).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
                Assert.IsTrue(hits[0].doc == 2);
                Assert.IsTrue(hits[1].doc == 3);
                Assert.IsTrue(hits[2].doc == 0);
                TermFreqVector vector2 = knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, "field");
                Assert.IsTrue(vector2 != null);
                //System.out.println("Vector: " + vector);
                System.String[] terms  = vector2.GetTerms();
                int[]           freqs2 = vector2.GetTermFrequencies();
                Assert.IsTrue(terms != null && terms.Length == 10);
                for (int i = 0; i < terms.Length; i++)
                {
                    System.String term = terms[i];
                    //System.out.println("Term: " + term);
                    int freq = freqs2[i];
                    Assert.IsTrue(test4.IndexOf(term) != -1);
                    System.Int32 freqInt = -1;
                    try
                    {
                        freqInt = (System.Int32)test4Map[term];
                    }
                    catch (Exception)
                    {
                        Assert.IsTrue(false);
                    }
                    Assert.IsTrue(freqInt == freq);
                }
                SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
                knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, mapper);
                System.Collections.Generic.SortedDictionary <object, object> vectorEntrySet = mapper.GetTermVectorEntrySet();
                Assert.IsTrue(vectorEntrySet.Count == 10, "mapper.getTermVectorEntrySet() Size: " + vectorEntrySet.Count + " is not: " + 10);
                TermVectorEntry last = null;
                foreach (TermVectorEntry tve in vectorEntrySet.Keys)
                {
                    if (tve != null && last != null)
                    {
                        Assert.IsTrue(last.GetFrequency() >= tve.GetFrequency(), "terms are not properly sorted");
                        System.Int32 expectedFreq = (System.Int32)test4Map[tve.GetTerm()];
                        //we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields
                        Assert.IsTrue(tve.GetFrequency() == 2 * expectedFreq, "Frequency is not correct:");
                    }
                    last = tve;
                }

                FieldSortedTermVectorMapper fieldMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
                knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, fieldMapper);
                System.Collections.IDictionary map = fieldMapper.GetFieldToTerms();
                Assert.IsTrue(map.Count == 2, "map Size: " + map.Count + " is not: " + 2);
                vectorEntrySet = (System.Collections.Generic.SortedDictionary <Object, Object>)map["field"];
                Assert.IsTrue(vectorEntrySet != null, "vectorEntrySet is null and it shouldn't be");
                Assert.IsTrue(vectorEntrySet.Count == 10, "vectorEntrySet Size: " + vectorEntrySet.Count + " is not: " + 10);
                knownSearcher.Close();
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
示例#14
0
        public static void  VerifyEquals(IndexReader r1, IndexReader r2, System.String idField)
        {
            Assert.AreEqual(r1.NumDocs(), r2.NumDocs());
            bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc());

            int[] r2r1 = new int[r2.MaxDoc()];             // r2 id to r1 id mapping

            TermDocs termDocs1 = r1.TermDocs();
            TermDocs termDocs2 = r2.TermDocs();

            // create mapping from id2 space to id2 based on idField
            idField = StringHelper.Intern(idField);
            TermEnum termEnum = r1.Terms(new Term(idField, ""));

            do
            {
                Term term = termEnum.Term();
                if (term == null || (System.Object)term.Field() != (System.Object)idField)
                {
                    break;
                }

                termDocs1.Seek(termEnum);
                if (!termDocs1.Next())
                {
                    // This doc is deleted and wasn't replaced
                    termDocs2.Seek(termEnum);
                    Assert.IsFalse(termDocs2.Next());
                    continue;
                }

                int id1 = termDocs1.Doc();
                Assert.IsFalse(termDocs1.Next());

                termDocs2.Seek(termEnum);
                Assert.IsTrue(termDocs2.Next());
                int id2 = termDocs2.Doc();
                Assert.IsFalse(termDocs2.Next());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (System.Exception t)
                {
                    System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    System.Console.Out.WriteLine("  d1=" + r1.Document(id1));
                    System.Console.Out.WriteLine("  d2=" + r2.Document(id2));
                    throw t;
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2));
                }
                catch (System.Exception e)
                {
                    System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1);
                    System.Console.Out.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        for (int i = 0; i < tv1.Length; i++)
                        {
                            System.Console.Out.WriteLine("    " + i + ": " + tv1[i]);
                        }
                    }

                    TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2);
                    System.Console.Out.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        for (int i = 0; i < tv2.Length; i++)
                        {
                            System.Console.Out.WriteLine("    " + i + ": " + tv2[i]);
                        }
                    }

                    throw e;
                }
            }while (termEnum.Next());

            termEnum.Close();

            // Verify postings
            TermEnum termEnum1 = r1.Terms(new Term("", ""));
            TermEnum termEnum2 = r2.Terms(new Term("", ""));

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs()];
            long[] info2 = new long[r2.NumDocs()];

            for (; ;)
            {
                Term term1, term2;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1  = 0;
                    term1 = termEnum1.Term();
                    if (term1 == null)
                    {
                        break;
                    }
                    termDocs1.Seek(termEnum1);
                    while (termDocs1.Next())
                    {
                        int d1 = termDocs1.Doc();
                        int f1 = termDocs1.Freq();
                        info1[len1] = (((long)d1) << 32) | f1;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                    if (!termEnum1.Next())
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2  = 0;
                    term2 = termEnum2.Term();
                    if (term2 == null)
                    {
                        break;
                    }
                    termDocs2.Seek(termEnum2);
                    while (termDocs2.Next())
                    {
                        int d2 = termDocs2.Doc();
                        int f2 = termDocs2.Freq();
                        info2[len2] = (((long)r2r1[d2]) << 32) | f2;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                    if (!termEnum2.Next())
                    {
                        break;
                    }
                }

                if (!hasDeletes)
                {
                    Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq());
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0)
                {
                    break;                     // no more terms
                }
                Assert.AreEqual(term1, term2);

                // sort info2 to get it into ascending docid
                System.Array.Sort(info2, 0, len2 - 0);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i]);
                }

                termEnum1.Next();
                termEnum2.Next();
            }
        }
        public virtual void  testSkipTo(int indexDivisor)
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Term ta = new Term("content", "aaa");

            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, "aaa aaa aaa aaa");
            }

            Term tb = new Term("content", "bbb");

            for (int i = 0; i < 16; i++)
            {
                AddDoc(writer, "bbb bbb bbb bbb");
            }

            Term tc = new Term("content", "ccc");

            for (int i = 0; i < 50; i++)
            {
                AddDoc(writer, "ccc ccc ccc ccc");
            }

            // assure that we deal with a single segment
            writer.Optimize();
            writer.Close();

            IndexReader reader = IndexReader.Open(dir, null, true, indexDivisor);

            TermDocs tdocs = reader.TermDocs();

            // without optimization (assumption skipInterval == 16)

            // with next
            tdocs.Seek(ta);
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(0, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(1, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.SkipTo(0));
            Assert.AreEqual(2, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(4));
            Assert.AreEqual(4, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(9));
            Assert.AreEqual(9, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(10));

            // without next
            tdocs.Seek(ta);
            Assert.IsTrue(tdocs.SkipTo(0));
            Assert.AreEqual(0, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(4));
            Assert.AreEqual(4, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(9));
            Assert.AreEqual(9, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(10));

            // exactly skipInterval documents and therefore with optimization

            // with next
            tdocs.Seek(tb);
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(10, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(11, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(12, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(15));
            Assert.AreEqual(15, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(24));
            Assert.AreEqual(24, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(25));
            Assert.AreEqual(25, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(26));

            // without next
            tdocs.Seek(tb);
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(10, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(15));
            Assert.AreEqual(15, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(24));
            Assert.AreEqual(24, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(25));
            Assert.AreEqual(25, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(26));

            // much more than skipInterval documents and therefore with optimization

            // with next
            tdocs.Seek(tc);
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(26, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(27, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(28, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(40));
            Assert.AreEqual(40, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(57));
            Assert.AreEqual(57, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(74));
            Assert.AreEqual(74, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(75));
            Assert.AreEqual(75, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(76));

            //without next
            tdocs.Seek(tc);
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(26, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(40));
            Assert.AreEqual(40, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(57));
            Assert.AreEqual(57, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(74));
            Assert.AreEqual(74, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(75));
            Assert.AreEqual(75, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(76));

            tdocs.Close();
            reader.Close();
            dir.Close();
        }
示例#16
0
        private void _ShowTermDoc(TermDocs td)
        {
            if (_luke.IndexReader == null)
            {
                _luke.ShowStatus(_luke.resources.GetString("NoIndex"));
                return;
            }
            try
            {
                Document doc = _luke.IndexReader.Document(td.Doc());

                labelDocNum.Text = td.Doc().ToString();
                labelTermFreq.Text = td.Freq().ToString();

                _ShowDocFields(td.Doc(), doc);
            }
            catch (Exception e)
            {
                _luke.ShowStatus(e.Message);
            }
        }