Ejemplo n.º 1
0
        private static Hit CreateHit(Document primary_doc,
                                     IndexReader secondary_reader,
                                     TermDocs term_docs,
                                     FieldSelector fields)
        {
            Hit hit = DocumentToHit(primary_doc);

            if (secondary_reader == null)
            {
                return(hit);
            }

            // Get the stringified version of the URI
            // exactly as it comes out of the index.
            Term term = new Term("Uri", primary_doc.Get("Uri"));

            term_docs.Seek(term);

            // Move to the first (and only) matching term doc
            term_docs.Next();
            Document secondary_doc =
                (fields == null) ?
                secondary_reader.Document(term_docs.Doc()) :
                secondary_reader.Document(term_docs.Doc(), fields);

            // If we are using the secondary index, now we need to
            // merge the properties from the secondary index
            AddPropertiesToHit(hit, secondary_doc, false);

            return(hit);
        }
Ejemplo n.º 2
0
        public override BitArray Bits(IndexReader reader)
        {
            // reader.GetVersion could be used to cache
            // Debug.WriteLine(reader.GetVersion()); // could be used to cache
            // if (cached reader == reader && _revFirst ==

            if (_revFirst == All || _revLast == All) // optimization
            {
                return(new BitArray(reader.MaxDoc(), true));
            }

            BitArray last_bits = new BitArray(reader.MaxDoc(), false);

            TermEnum t = reader.Terms(new Term(FieldName.RevisionLast, _revFirst.ToString(RevFormat)));
            TermDocs d = reader.TermDocs();

            //if (t.SkipTo((new Term(FieldName.RevisionLast, revision.ToString(RevFormat))))) // extremely slow
            if (t.Term() != null)
            {
                while (t.Term().Field() == FieldName.RevisionLast)
                {
                    d.Seek(t);
                    while (d.Next())
                    {
                        last_bits[d.Doc()] = true;
                    }
                    if (!t.Next())
                    {
                        break;
                    }
                }
            }

            // optimization, skip if we just using the head revision
            if (_revLast == Head)
            {
                return(last_bits);
            }

            BitArray first_bits = new BitArray(reader.MaxDoc(), true);

            t = reader.Terms(new Term("rev_first", (_revLast + 1).ToString(RevFormat)));
            //if (t.SkipTo((new Term("rev_first", (revision + 1).ToString(RevFormat))))) // extremely slow
            if (t.Term() != null)
            {
                while (t.Term().Field() == "rev_first")
                {
                    d.Seek(t);
                    while (d.Next())
                    {
                        first_bits[d.Doc()] = false;
                    }
                    if (!t.Next())
                    {
                        break;
                    }
                }
            }
            return(last_bits.And(first_bits));
        }
Ejemplo n.º 3
0
        /// <summary>Deletes all documents that have a given <code>term</code> indexed.
        /// This is useful if one uses a document field to hold a unique ID string for
        /// the document.  Then to delete such a document, one merely constructs a
        /// term with the appropriate field and the unique ID string as its text and
        /// passes it to this method.
        /// See {@link #DeleteDocument(int)} for information about when this deletion will
        /// become effective.
        ///
        /// </summary>
        /// <returns> the number of documents deleted
        /// </returns>
        /// <throws>  StaleReaderException if the index has changed </throws>
        /// <summary>  since this reader was opened
        /// </summary>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  LockObtainFailedException if another writer </throws>
        /// <summary>  has this index open (<code>write.lock</code> could not
        /// be obtained)
        /// </summary>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public int DeleteDocuments(Term term)
        {
            EnsureOpen();
            TermDocs docs = TermDocs(term);

            if (docs == null)
            {
                return(0);
            }
            int n = 0;

            try
            {
                while (docs.Next())
                {
                    DeleteDocument(docs.Doc());
                    n++;
                }
            }
            finally
            {
                docs.Close();
            }
            return(n);
        }
Ejemplo n.º 4
0
        public virtual int doTest(int iter, int ndocs, int maxTF, float percentDocs)
        {
            Directory dir = new RAMDirectory();

            long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);

            AddDocs(dir, ndocs, "foo", "val", maxTF, percentDocs);
            long end = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);

            System.Console.Out.WriteLine("milliseconds for creation of " + ndocs + " docs = " + (end - start));

            IndexReader reader = IndexReader.Open(dir);
            TermEnum    tenum  = reader.Terms(new Term("foo", "val"));
            TermDocs    tdocs  = reader.TermDocs();

            start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);

            int ret = 0;

            for (int i = 0; i < iter; i++)
            {
                tdocs.Seek(tenum);
                while (tdocs.Next())
                {
                    ret += tdocs.Doc();
                }
            }

            end = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
            System.Console.Out.WriteLine("milliseconds for " + iter + " TermDocs iteration: " + (end - start));

            return(ret);
        }
Ejemplo n.º 5
0
        static string GetProperty(IndexReader reader, string property)
        {
            TermDocs td = reader.TermDocs(GetPropertyId(property));

            if (!td.Next())
            {
                return(null);
            }
            return(reader.Document(td.Doc()).Get(ValueField));
        }
        public virtual void  TestFilterIndexReader_Renamed()
        {
            RAMDirectory directory = new MockRAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Document d1 = new Document();

            d1.Add(new Field("default", "one two", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d1);

            Document d2 = new Document();

            d2.Add(new Field("default", "one three", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d2);

            Document d3 = new Document();

            d3.Add(new Field("default", "two four", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d3);

            writer.Close();

            IndexReader reader = new TestReader(IndexReader.Open(directory));

            Assert.IsTrue(reader.IsOptimized());

            TermEnum terms = reader.Terms();

            while (terms.Next())
            {
                Assert.IsTrue(terms.Term().Text().IndexOf('e') != -1);
            }
            terms.Close();

            TermPositions positions = reader.TermPositions(new Term("default", "one"));

            while (positions.Next())
            {
                Assert.IsTrue((positions.Doc() % 2) == 1);
            }

            int NUM_DOCS = 3;

            TermDocs td = reader.TermDocs(null);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(td.Next());
                Assert.AreEqual(i, td.Doc());
                Assert.AreEqual(1, td.Freq());
            }
            td.Close();
            reader.Close();
            directory.Close();
        }
Ejemplo n.º 7
0
        private static List <Document> GetDocumentsFromTermDocs(TermDocs termDocs, IndexReaderFrame readerFrame)
        {
            var docs = new List <Document>();

            while (termDocs.Next())
            {
                docs.Add(readerFrame.IndexReader.Document(termDocs.Doc()));
            }
            docs.Sort(new DocumentVersionComparer());
            return(docs);
        }
Ejemplo n.º 8
0
        private void _ShowTermDoc(TermDocs td)
        {
            if (_luke.IndexReader == null)
            {
                _luke.ShowStatus(_luke.resources.GetString("NoIndex"));
                return;
            }
            try
            {
                Document doc = _luke.IndexReader.Document(td.Doc());

                labelDocNum.Text   = td.Doc().ToString();
                labelTermFreq.Text = td.Freq().ToString();

                _ShowDocFields(td.Doc(), doc);
            }
            catch (Exception e)
            {
                _luke.ShowStatus(e.Message);
            }
        }
Ejemplo n.º 9
0
        public override BitArray Bits(IndexReader reader)
        {
            BitArray bitArray = new BitArray(reader.MaxDoc());
            TermDocs termDocs = reader.TermDocs(new Term("score", "5"));

            while (termDocs.Next())
            {
                bitArray.Set(termDocs.Doc(), true);
            }

            return(bitArray);
        }
Ejemplo n.º 10
0
        public static int Count(Term t, IndexReader r)
        {
            int      count = 0;
            TermDocs td    = r.TermDocs(t);

            while (td.Next())
            {
                td.Doc();
                count++;
            }
            td.Close();
            return(count);
        }
Ejemplo n.º 11
0
        private OpenBitSet CorrectBits(IndexReader reader)
        {
            OpenBitSet bits      = new OpenBitSet(reader.MaxDoc()); //assume all are INvalid
            Term       startTerm = new Term(fieldName);
            TermEnum   te        = reader.Terms(startTerm);

            if (te != null)
            {
                Term currTerm = te.Term();
                while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned
                {
                    int lastDoc = -1;
                    //set non duplicates
                    TermDocs td = reader.TermDocs(currTerm);
                    if (td.Next())
                    {
                        if (keepMode == KM_USE_FIRST_OCCURRENCE)
                        {
                            bits.Set(td.Doc());
                        }
                        else
                        {
                            do
                            {
                                lastDoc = td.Doc();
                            } while (td.Next());
                            bits.Set(lastDoc);
                        }
                    }
                    if (!te.Next())
                    {
                        break;
                    }
                    currTerm = te.Term();
                }
            }
            return(bits);
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Deletes a number of documents that conform to the specified Term-s
        /// </summary>
        /// <param name="terms">Term-s to be deleted</param>
        /// <returns>A number of documents deleted</returns>
        public int OptimizedDeleteDocuments(Term[] terms)
        {
            int n = 0;

            lock (this)
            {
                if (directoryOwner)
                {
                    AquireWriteLock();
                }

                foreach (Term term in terms)
                {
                    TermDocs docs = TermDocs(term);

                    if (docs == null)
                    {
                        continue;
                    }

                    try
                    {
                        while (docs.Next())
                        {
                            DoDelete(docs.Doc());

                            hasChanges = true;

                            n++;
                        }
                    }
                    finally
                    {
                        docs.Close();
                    }
                }

                // Release the lock ASAP if there are no changes

                if (!hasChanges &&
                    writeLock != null)
                {
                    writeLock.Release();
                    writeLock = null;
                }
            }

            return(n);
        }
Ejemplo n.º 13
0
        private void Remove(System.Type entity, object id, IDirectoryProvider provider)
        {
            /*
             * even with Lucene 2.1, use of indexWriter to delete is not an option
             * We can only delete by term, and the index doesn't have a termt that
             * uniquely identify the entry. See logic below
             */
            log.DebugFormat("remove from Lucene index: {0}#{1}", entity, id);
            DocumentBuilder builder  = workspace.GetDocumentBuilder(entity);
            Term            term     = builder.GetTerm(id);
            IndexReader     reader   = workspace.GetIndexReader(provider, entity);
            TermDocs        termDocs = null;

            try
            {
                // TODO is there a faster way?
                // TODO include TermDocs into the workspace?
                termDocs = reader.TermDocs(term);
                string entityName = TypeHelper.LuceneTypeName(entity);
                while (termDocs.Next())
                {
                    int docIndex = termDocs.Doc();
                    if (entityName.Equals(reader.Document(docIndex).Get(DocumentBuilder.CLASS_FIELDNAME)))
                    {
                        // remove only the one of the right class
                        // loop all to remove all the matches (defensive code)
                        reader.DeleteDocument(docIndex);
                    }
                }
            }
            catch (Exception e)
            {
                throw new SearchException("Unable to remove from Lucene index: " + entity + "#" + id, e);
            }
            finally
            {
                if (termDocs != null)
                {
                    try
                    {
                        termDocs.Close();
                    }
                    catch (IOException e)
                    {
                        log.Warn("Unable to close termDocs properly", e);
                    }
                }
            }
        }
Ejemplo n.º 14
0
        public virtual void  TestAllTermDocs()
        {
            IndexReader reader   = OpenReader();
            int         NUM_DOCS = 2;
            TermDocs    td       = reader.TermDocs(null);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(td.Next());
                Assert.AreEqual(i, td.Doc());
                Assert.AreEqual(1, td.Freq());
            }
            td.Close();
            reader.Close();
        }
Ejemplo n.º 15
0
 public void TestKeepsFirstFilter()
 {
     DuplicateFilter df = new DuplicateFilter(KEY_FIELD);
     df.SetKeepMode(DuplicateFilter.KM_USE_FIRST_OCCURRENCE);
     Hits h = searcher.Search(tq, df);
     Assert.IsTrue(h.Length() > 0, "Filtered searching should have found some matches");
     for (int i = 0; i < h.Length(); i++)
     {
         Document d = h.Doc(i);
         String url = d.Get(KEY_FIELD);
         TermDocs td = reader.TermDocs(new Term(KEY_FIELD, url));
         int lastDoc = 0;
         td.Next();
         lastDoc = td.Doc();
         Assert.AreEqual(lastDoc, h.Id((i)), "Duplicate urls should return first doc");
     }
 }
Ejemplo n.º 16
0
        /// <summary>
        /// 得到指定Term的文档
        /// </summary>
        /// <param name="term"></param>
        /// <returns></returns>
        public IList <TermDoc> DocumentCount(Term term)
        {
            TermDocs       docs = open.Reader.TermDocs(term);
            List <TermDoc> list = new List <TermDoc>();

            while (docs.Next())
            {
                TermDoc doc2 = new TermDoc();
                doc2.Freq = docs.Freq();
                doc2.Doc  = docs.Doc();
                doc2.Term = term;
                doc2.Norm = GetNorm(open.Reader, term.Field(), doc2.Doc);
                TermDoc item = doc2;
                list.Add(item);
            }
            docs.Close();
            return(list);
        }
Ejemplo n.º 17
0
        private OpenBitSet FastBits(IndexReader reader)
        {
            OpenBitSet bits = new OpenBitSet(reader.MaxDoc());

            bits.Set(0, reader.MaxDoc()); //assume all are valid
            Term     startTerm = new Term(fieldName);
            TermEnum te        = reader.Terms(startTerm);

            if (te != null)
            {
                Term currTerm = te.Term();

                while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned
                {
                    if (te.DocFreq() > 1)
                    {
                        int lastDoc = -1;
                        //unset potential duplicates
                        TermDocs td = reader.TermDocs(currTerm);
                        td.Next();
                        if (keepMode == KM_USE_FIRST_OCCURRENCE)
                        {
                            td.Next();
                        }
                        do
                        {
                            lastDoc = td.Doc();
                            bits.Clear(lastDoc);
                        } while (td.Next());
                        if (keepMode == KM_USE_LAST_OCCURRENCE)
                        {
                            //restore the last bit
                            bits.Set(lastDoc);
                        }
                    }
                    if (!te.Next())
                    {
                        break;
                    }
                    currTerm = te.Term();
                }
            }
            return(bits);
        }
Ejemplo n.º 18
0
        public virtual void  TestAllTermDocs()
        {
            Directory      dir1 = GetDir1();
            Directory      dir2 = GetDir2();
            ParallelReader pr   = new ParallelReader();

            pr.Add(IndexReader.Open(dir1));
            pr.Add(IndexReader.Open(dir2));
            int      NUM_DOCS = 2;
            TermDocs td       = pr.TermDocs(null);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(td.Next());
                Assert.AreEqual(i, td.Doc());
                Assert.AreEqual(1, td.Freq());
            }
            td.Close();
            pr.Close();
            dir1.Close();
            dir2.Close();
        }
Ejemplo n.º 19
0
        public int Get(string path)
        {
            int revision;

            lock (_highest)
            {
                if (_highest.TryGetValue(path, out revision))
                {
                    return(revision);
                }
            }

            if (Reader == null)
            {
                return(0);
            }
            path += "@";
            TermEnum t   = Reader.Terms(new Term(FieldName.Id, path));
            int      doc = -1;

            while (t.Term() != null && t.Term().Text().StartsWith(path))
            {
                int r = int.Parse(t.Term().Text().Substring(path.Length));
                if (r > revision)
                {
                    revision = r;
                    TermDocs d = Reader.TermDocs(t.Term());
                    d.Next();
                    doc = d.Doc();
                }
                t.Next();
            }
            t.Close();
            if (revision != 0 && Reader.Document(doc).Get(FieldName.RevisionLast) != Revision.HeadString)
            {
                return(0);
            }
            return(revision);
        }
Ejemplo n.º 20
0
        /// <summary>
        /// Get the DocIdSet.
        /// </summary>
        /// <param name="reader">Applcible reader.</param>
        /// <returns>The set.</returns>
        public override DocIdSet GetDocIdSet(IndexReader reader)
        {
            OpenBitSet result = new OpenBitSet(reader.MaxDoc());
            TermDocs   td     = reader.TermDocs();

            try
            {
                foreach (Term t in this.terms)
                {
                    td.Seek(t);
                    while (td.Next())
                    {
                        result.Set(td.Doc());
                    }
                }
            }
            finally
            {
                td.Close();
            }

            return(result);
        }
Ejemplo n.º 21
0
        public virtual void  TestMultiTermDocs()
        {
            RAMDirectory ramDir1 = new RAMDirectory();

            AddDoc(ramDir1, "test foo", true);
            RAMDirectory ramDir2 = new RAMDirectory();

            AddDoc(ramDir2, "test blah", true);
            RAMDirectory ramDir3 = new RAMDirectory();

            AddDoc(ramDir3, "test wow", true);

            IndexReader[] readers1 = new IndexReader[] { IndexReader.Open(ramDir1), IndexReader.Open(ramDir3) };
            IndexReader[] readers2 = new IndexReader[] { IndexReader.Open(ramDir1), IndexReader.Open(ramDir2), IndexReader.Open(ramDir3) };
            MultiReader   mr2      = new MultiReader(readers1);
            MultiReader   mr3      = new MultiReader(readers2);

            // test mixing up TermDocs and TermEnums from different readers.
            TermDocs td2 = mr2.TermDocs();
            TermEnum te3 = mr3.Terms(new Term("body", "wow"));

            td2.Seek(te3);
            int ret = 0;

            // This should blow up if we forget to check that the TermEnum is from the same
            // reader as the TermDocs.
            while (td2.Next())
            {
                ret += td2.Doc();
            }
            td2.Close();
            te3.Close();

            // really a dummy assert to ensure that we got some docs and to ensure that
            // nothing is optimized out.
            Assert.IsTrue(ret > 0);
        }
Ejemplo n.º 22
0
        ////////////////////////////////////////////////////////////////

        static private void ScoreHits(Dictionary <int, Hit> hits_by_id,
                                      IndexReader reader,
                                      ICollection term_list)
        {
            LNS.Similarity similarity;
            similarity = LNS.Similarity.GetDefault();

            TermDocs term_docs = reader.TermDocs();
            Hit      hit;

            foreach (Term term in term_list)
            {
                double idf;
                idf = similarity.Idf(reader.DocFreq(term), reader.MaxDoc());

                int hit_count;
                hit_count = hits_by_id.Count;

                term_docs.Seek(term);
                while (term_docs.Next() && hit_count > 0)
                {
                    int id;
                    id = term_docs.Doc();

                    if (hits_by_id.TryGetValue(id, out hit))
                    {
                        double tf;
                        tf         = similarity.Tf(term_docs.Freq());
                        hit.Score += tf * idf;
                        --hit_count;
                    }
                }
            }

            term_docs.Close();
        }
Ejemplo n.º 23
0
        public void FlushUris()
        {
            if (pending_uris == null)
            {
                return;
            }

            TermDocs term_docs = this.searcher.Reader.TermDocs();

            for (int i = 0; i < pending_uris.Count; i++)
            {
                Term term = new Term("Uri", (string)pending_uris [i]);
                term_docs.Seek(term);

                if (term_docs.Next())
                {
                    this.Set(term_docs.Doc(), true);
                }
            }

            term_docs.Close();

            pending_uris = null;
        }
Ejemplo n.º 24
0
        /// <summary>
        /// Deletes the specified reader.
        /// </summary>
        /// <param name="reader">The reader.</param>
        /// <param name="term">The term.</param>
        /// <returns></returns>
        public int Delete(IndexReader reader, Term term)
        {
            TermDocs docs = reader.TermDocs(term);

            if (docs == null)
            {
                return(0);
            }
            int num = 0;

            try
            {
                while (docs.Next())
                {
                    reader.DeleteDocument(docs.Doc());
                    num++;
                }
            }
            finally
            {
                docs.Close();
            }
            return(num);
        }
Ejemplo n.º 25
0
        public override DocIdSet GetDocIdSet(IndexReader reader)
        {
            var bits = new OpenBitSet(reader.MaxDoc());

            TermDocs      termDocs = reader.TermDocs();
            List <double> area     = _shape.Area;
            int           sz       = area.Count;

            // iterate through each boxid
            for (int i = 0; i < sz; i++)
            {
                double boxId = area[i];
                termDocs.Seek(new Term(_fieldName, NumericUtils.DoubleToPrefixCoded(boxId)));

                // iterate through all documents
                // which have this boxId
                while (termDocs.Next())
                {
                    bits.FastSet(termDocs.Doc());
                }
            }

            return(bits);
        }
Ejemplo n.º 26
0
 public virtual int Doc()
 {
     return(base_Renamed + current.Doc());
 }
Ejemplo n.º 27
0
 public virtual int Doc()
 {
     return(termDocs.Doc());
 }
Ejemplo n.º 28
0
		private static Hit CreateHit ( Document primary_doc,
					IndexReader secondary_reader,
					TermDocs term_docs,
					FieldSelector fields)
		{
			Hit hit = DocumentToHit (primary_doc);

			if (secondary_reader == null)
				return hit;

			// Get the stringified version of the URI
			// exactly as it comes out of the index.
			Term term = new Term ("Uri", primary_doc.Get ("Uri"));
			term_docs.Seek (term);

			// Move to the first (and only) matching term doc
			term_docs.Next ();
			Document secondary_doc =
				(fields == null) ?
				secondary_reader.Document (term_docs.Doc ()) :
				secondary_reader.Document (term_docs.Doc (), fields);

			// If we are using the secondary index, now we need to
			// merge the properties from the secondary index
			AddPropertiesToHit (hit, secondary_doc, false);

			return hit;
		}
        public virtual void  Test1()
        {
            ParallelReader pr = new ParallelReader();

            pr.Add(ir1);
            pr.Add(ir2);

            TermDocs td = pr.TermDocs();

            TermEnum te = pr.Terms();

            Assert.IsTrue(te.Next());
            Assert.AreEqual("field1:brown", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field1:fox", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field1:jumps", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field1:quick", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field1:the", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field2:brown", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field2:fox", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field2:jumps", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field2:quick", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field2:the", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field3:dog", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field3:fox", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field3:jumps", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field3:lazy", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field3:over", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsTrue(te.Next());
            Assert.AreEqual("field3:the", te.Term().ToString());
            td.Seek(te.Term());
            Assert.IsTrue(td.Next());
            Assert.AreEqual(0, td.Doc());
            Assert.IsFalse(td.Next());
            Assert.IsFalse(te.Next());
        }
Ejemplo n.º 30
0
        public virtual void  TestKnownSetOfDocuments()
        {
            System.String[] termArray = new System.String[] { "eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored", "with", "an" };
            System.String   test1     = "eating chocolate in a computer lab";                                             //6 terms
            System.String   test2     = "computer in a computer lab";                                                     //5 terms
            System.String   test3     = "a chocolate lab grows old";                                                      //5 terms
            System.String   test4     = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
            System.Collections.IDictionary test4Map = new System.Collections.Hashtable();
            test4Map["chocolate"] = 3;
            test4Map["lab"]       = 2;
            test4Map["eating"]    = 1;
            test4Map["computer"]  = 1;
            test4Map["with"]      = 1;
            test4Map["a"]         = 1;
            test4Map["colored"]   = 1;
            test4Map["in"]        = 1;
            test4Map["an"]        = 1;
            test4Map["computer"]  = 1;
            test4Map["old"]       = 1;

            Document testDoc1 = new Document();

            SetupDoc(testDoc1, test1);
            Document testDoc2 = new Document();

            SetupDoc(testDoc2, test2);
            Document testDoc3 = new Document();

            SetupDoc(testDoc3, test3);
            Document testDoc4 = new Document();

            SetupDoc(testDoc4, test4);

            Directory dir = new RAMDirectory();

            try
            {
                IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
                Assert.IsTrue(writer != null);
                writer.AddDocument(testDoc1);
                writer.AddDocument(testDoc2);
                writer.AddDocument(testDoc3);
                writer.AddDocument(testDoc4);
                writer.Close();
                IndexSearcher knownSearcher = new IndexSearcher(dir);
                TermEnum      termEnum      = knownSearcher.reader.Terms();
                TermDocs      termDocs      = knownSearcher.reader.TermDocs();
                //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);

                Similarity sim = knownSearcher.GetSimilarity();
                while (termEnum.Next() == true)
                {
                    Term term = termEnum.Term();
                    //System.out.println("Term: " + term);
                    termDocs.Seek(term);
                    while (termDocs.Next())
                    {
                        int docId = termDocs.Doc();
                        int freq  = termDocs.Freq();
                        //System.out.println("Doc Id: " + docId + " freq " + freq);
                        TermFreqVector vector = knownSearcher.reader.GetTermFreqVector(docId, "Field");
                        float          tf     = sim.Tf(freq);
                        float          idf    = sim.Idf(term, knownSearcher);
                        //float qNorm = sim.queryNorm()
                        //This is fine since we don't have stop words
                        float lNorm = sim.LengthNorm("Field", vector.GetTerms().Length);
                        //float coord = sim.coord()
                        //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
                        Assert.IsTrue(vector != null);
                        System.String[] vTerms = vector.GetTerms();
                        int[]           freqs  = vector.GetTermFrequencies();
                        for (int i = 0; i < vTerms.Length; i++)
                        {
                            if (term.Text().Equals(vTerms[i]) == true)
                            {
                                Assert.IsTrue(freqs[i] == freq);
                            }
                        }
                    }
                    //System.out.println("--------");
                }
                Query query = new TermQuery(new Term("Field", "chocolate"));
                Hits  hits  = knownSearcher.Search(query);
                //doc 3 should be the first hit b/c it is the shortest match
                Assert.IsTrue(hits.Length() == 3);
                float score = hits.Score(0);

                /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
                 * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
                 * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " +  hits.doc(2).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
                Assert.IsTrue(testDoc3.ToString().Equals(hits.Doc(0).ToString()));
                Assert.IsTrue(testDoc4.ToString().Equals(hits.Doc(1).ToString()));
                Assert.IsTrue(testDoc1.ToString().Equals(hits.Doc(2).ToString()));
                TermFreqVector vector2 = knownSearcher.reader.GetTermFreqVector(hits.Id(1), "Field");
                Assert.IsTrue(vector2 != null);
                //System.out.println("Vector: " + vector);
                System.String[] terms  = vector2.GetTerms();
                int[]           freqs2 = vector2.GetTermFrequencies();
                Assert.IsTrue(terms != null && terms.Length == 10);
                for (int i = 0; i < terms.Length; i++)
                {
                    System.String term = terms[i];
                    //System.out.println("Term: " + term);
                    int freq = freqs2[i];
                    Assert.IsTrue(test4.IndexOf(term) != -1);
                    System.Int32  freqInt    = (System.Int32)test4Map[term];
                    System.Object tmpFreqInt = test4Map[term];
                    Assert.IsTrue(tmpFreqInt != null);
                    Assert.IsTrue(freqInt == freq);
                }
                knownSearcher.Close();
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
Ejemplo n.º 31
0
        private void _ShowTermDoc(TermDocs td)
        {
            if (_luke.IndexReader == null)
            {
                _luke.ShowStatus(_luke.resources.GetString("NoIndex"));
                return;
            }
            try
            {
                Document doc = _luke.IndexReader.Document(td.Doc());

                labelDocNum.Text = td.Doc().ToString();
                labelTermFreq.Text = td.Freq().ToString();

                _ShowDocFields(td.Doc(), doc);
            }
            catch (Exception e)
            {
                _luke.ShowStatus(e.Message);
            }
        }
Ejemplo n.º 32
0
 public virtual int Doc()
 {
     return(in_Renamed.Doc());
 }