private static Hit CreateHit(Document primary_doc, IndexReader secondary_reader, TermDocs term_docs, FieldSelector fields) { Hit hit = DocumentToHit(primary_doc); if (secondary_reader == null) { return(hit); } // Get the stringified version of the URI // exactly as it comes out of the index. Term term = new Term("Uri", primary_doc.Get("Uri")); term_docs.Seek(term); // Move to the first (and only) matching term doc term_docs.Next(); Document secondary_doc = (fields == null) ? secondary_reader.Document(term_docs.Doc()) : secondary_reader.Document(term_docs.Doc(), fields); // If we are using the secondary index, now we need to // merge the properties from the secondary index AddPropertiesToHit(hit, secondary_doc, false); return(hit); }
public override BitArray Bits(IndexReader reader) { // reader.GetVersion could be used to cache // Debug.WriteLine(reader.GetVersion()); // could be used to cache // if (cached reader == reader && _revFirst == if (_revFirst == All || _revLast == All) // optimization { return(new BitArray(reader.MaxDoc(), true)); } BitArray last_bits = new BitArray(reader.MaxDoc(), false); TermEnum t = reader.Terms(new Term(FieldName.RevisionLast, _revFirst.ToString(RevFormat))); TermDocs d = reader.TermDocs(); //if (t.SkipTo((new Term(FieldName.RevisionLast, revision.ToString(RevFormat))))) // extremely slow if (t.Term() != null) { while (t.Term().Field() == FieldName.RevisionLast) { d.Seek(t); while (d.Next()) { last_bits[d.Doc()] = true; } if (!t.Next()) { break; } } } // optimization, skip if we just using the head revision if (_revLast == Head) { return(last_bits); } BitArray first_bits = new BitArray(reader.MaxDoc(), true); t = reader.Terms(new Term("rev_first", (_revLast + 1).ToString(RevFormat))); //if (t.SkipTo((new Term("rev_first", (revision + 1).ToString(RevFormat))))) // extremely slow if (t.Term() != null) { while (t.Term().Field() == "rev_first") { d.Seek(t); while (d.Next()) { first_bits[d.Doc()] = false; } if (!t.Next()) { break; } } } return(last_bits.And(first_bits)); }
/// <summary>Deletes all documents that have a given <code>term</code> indexed. /// This is useful if one uses a document field to hold a unique ID string for /// the document. Then to delete such a document, one merely constructs a /// term with the appropriate field and the unique ID string as its text and /// passes it to this method. /// See {@link #DeleteDocument(int)} for information about when this deletion will /// become effective. /// /// </summary> /// <returns> the number of documents deleted /// </returns> /// <throws> StaleReaderException if the index has changed </throws> /// <summary> since this reader was opened /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<code>write.lock</code> could not /// be obtained) /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public int DeleteDocuments(Term term) { EnsureOpen(); TermDocs docs = TermDocs(term); if (docs == null) { return(0); } int n = 0; try { while (docs.Next()) { DeleteDocument(docs.Doc()); n++; } } finally { docs.Close(); } return(n); }
public virtual int doTest(int iter, int ndocs, int maxTF, float percentDocs) { Directory dir = new RAMDirectory(); long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); AddDocs(dir, ndocs, "foo", "val", maxTF, percentDocs); long end = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); System.Console.Out.WriteLine("milliseconds for creation of " + ndocs + " docs = " + (end - start)); IndexReader reader = IndexReader.Open(dir); TermEnum tenum = reader.Terms(new Term("foo", "val")); TermDocs tdocs = reader.TermDocs(); start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); int ret = 0; for (int i = 0; i < iter; i++) { tdocs.Seek(tenum); while (tdocs.Next()) { ret += tdocs.Doc(); } } end = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); System.Console.Out.WriteLine("milliseconds for " + iter + " TermDocs iteration: " + (end - start)); return(ret); }
static string GetProperty(IndexReader reader, string property) { TermDocs td = reader.TermDocs(GetPropertyId(property)); if (!td.Next()) { return(null); } return(reader.Document(td.Doc()).Get(ValueField)); }
public virtual void TestFilterIndexReader_Renamed() { RAMDirectory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document d1 = new Document(); d1.Add(new Field("default", "one two", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d1); Document d2 = new Document(); d2.Add(new Field("default", "one three", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d2); Document d3 = new Document(); d3.Add(new Field("default", "two four", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d3); writer.Close(); IndexReader reader = new TestReader(IndexReader.Open(directory)); Assert.IsTrue(reader.IsOptimized()); TermEnum terms = reader.Terms(); while (terms.Next()) { Assert.IsTrue(terms.Term().Text().IndexOf('e') != -1); } terms.Close(); TermPositions positions = reader.TermPositions(new Term("default", "one")); while (positions.Next()) { Assert.IsTrue((positions.Doc() % 2) == 1); } int NUM_DOCS = 3; TermDocs td = reader.TermDocs(null); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(td.Next()); Assert.AreEqual(i, td.Doc()); Assert.AreEqual(1, td.Freq()); } td.Close(); reader.Close(); directory.Close(); }
private static List <Document> GetDocumentsFromTermDocs(TermDocs termDocs, IndexReaderFrame readerFrame) { var docs = new List <Document>(); while (termDocs.Next()) { docs.Add(readerFrame.IndexReader.Document(termDocs.Doc())); } docs.Sort(new DocumentVersionComparer()); return(docs); }
private void _ShowTermDoc(TermDocs td) { if (_luke.IndexReader == null) { _luke.ShowStatus(_luke.resources.GetString("NoIndex")); return; } try { Document doc = _luke.IndexReader.Document(td.Doc()); labelDocNum.Text = td.Doc().ToString(); labelTermFreq.Text = td.Freq().ToString(); _ShowDocFields(td.Doc(), doc); } catch (Exception e) { _luke.ShowStatus(e.Message); } }
public override BitArray Bits(IndexReader reader) { BitArray bitArray = new BitArray(reader.MaxDoc()); TermDocs termDocs = reader.TermDocs(new Term("score", "5")); while (termDocs.Next()) { bitArray.Set(termDocs.Doc(), true); } return(bitArray); }
public static int Count(Term t, IndexReader r) { int count = 0; TermDocs td = r.TermDocs(t); while (td.Next()) { td.Doc(); count++; } td.Close(); return(count); }
private OpenBitSet CorrectBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); //assume all are INvalid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term(); while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned { int lastDoc = -1; //set non duplicates TermDocs td = reader.TermDocs(currTerm); if (td.Next()) { if (keepMode == KM_USE_FIRST_OCCURRENCE) { bits.Set(td.Doc()); } else { do { lastDoc = td.Doc(); } while (td.Next()); bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term(); } } return(bits); }
/// <summary> /// Deletes a number of documents that conform to the specified Term-s /// </summary> /// <param name="terms">Term-s to be deleted</param> /// <returns>A number of documents deleted</returns> public int OptimizedDeleteDocuments(Term[] terms) { int n = 0; lock (this) { if (directoryOwner) { AquireWriteLock(); } foreach (Term term in terms) { TermDocs docs = TermDocs(term); if (docs == null) { continue; } try { while (docs.Next()) { DoDelete(docs.Doc()); hasChanges = true; n++; } } finally { docs.Close(); } } // Release the lock ASAP if there are no changes if (!hasChanges && writeLock != null) { writeLock.Release(); writeLock = null; } } return(n); }
private void Remove(System.Type entity, object id, IDirectoryProvider provider) { /* * even with Lucene 2.1, use of indexWriter to delete is not an option * We can only delete by term, and the index doesn't have a termt that * uniquely identify the entry. See logic below */ log.DebugFormat("remove from Lucene index: {0}#{1}", entity, id); DocumentBuilder builder = workspace.GetDocumentBuilder(entity); Term term = builder.GetTerm(id); IndexReader reader = workspace.GetIndexReader(provider, entity); TermDocs termDocs = null; try { // TODO is there a faster way? // TODO include TermDocs into the workspace? termDocs = reader.TermDocs(term); string entityName = TypeHelper.LuceneTypeName(entity); while (termDocs.Next()) { int docIndex = termDocs.Doc(); if (entityName.Equals(reader.Document(docIndex).Get(DocumentBuilder.CLASS_FIELDNAME))) { // remove only the one of the right class // loop all to remove all the matches (defensive code) reader.DeleteDocument(docIndex); } } } catch (Exception e) { throw new SearchException("Unable to remove from Lucene index: " + entity + "#" + id, e); } finally { if (termDocs != null) { try { termDocs.Close(); } catch (IOException e) { log.Warn("Unable to close termDocs properly", e); } } } }
public virtual void TestAllTermDocs() { IndexReader reader = OpenReader(); int NUM_DOCS = 2; TermDocs td = reader.TermDocs(null); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(td.Next()); Assert.AreEqual(i, td.Doc()); Assert.AreEqual(1, td.Freq()); } td.Close(); reader.Close(); }
public void TestKeepsFirstFilter() { DuplicateFilter df = new DuplicateFilter(KEY_FIELD); df.SetKeepMode(DuplicateFilter.KM_USE_FIRST_OCCURRENCE); Hits h = searcher.Search(tq, df); Assert.IsTrue(h.Length() > 0, "Filtered searching should have found some matches"); for (int i = 0; i < h.Length(); i++) { Document d = h.Doc(i); String url = d.Get(KEY_FIELD); TermDocs td = reader.TermDocs(new Term(KEY_FIELD, url)); int lastDoc = 0; td.Next(); lastDoc = td.Doc(); Assert.AreEqual(lastDoc, h.Id((i)), "Duplicate urls should return first doc"); } }
/// <summary> /// 得到指定Term的文档 /// </summary> /// <param name="term"></param> /// <returns></returns> public IList <TermDoc> DocumentCount(Term term) { TermDocs docs = open.Reader.TermDocs(term); List <TermDoc> list = new List <TermDoc>(); while (docs.Next()) { TermDoc doc2 = new TermDoc(); doc2.Freq = docs.Freq(); doc2.Doc = docs.Doc(); doc2.Term = term; doc2.Norm = GetNorm(open.Reader, term.Field(), doc2.Doc); TermDoc item = doc2; list.Add(item); } docs.Close(); return(list); }
private OpenBitSet FastBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); bits.Set(0, reader.MaxDoc()); //assume all are valid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term(); while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned { if (te.DocFreq() > 1) { int lastDoc = -1; //unset potential duplicates TermDocs td = reader.TermDocs(currTerm); td.Next(); if (keepMode == KM_USE_FIRST_OCCURRENCE) { td.Next(); } do { lastDoc = td.Doc(); bits.Clear(lastDoc); } while (td.Next()); if (keepMode == KM_USE_LAST_OCCURRENCE) { //restore the last bit bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term(); } } return(bits); }
public virtual void TestAllTermDocs() { Directory dir1 = GetDir1(); Directory dir2 = GetDir2(); ParallelReader pr = new ParallelReader(); pr.Add(IndexReader.Open(dir1)); pr.Add(IndexReader.Open(dir2)); int NUM_DOCS = 2; TermDocs td = pr.TermDocs(null); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(td.Next()); Assert.AreEqual(i, td.Doc()); Assert.AreEqual(1, td.Freq()); } td.Close(); pr.Close(); dir1.Close(); dir2.Close(); }
public int Get(string path) { int revision; lock (_highest) { if (_highest.TryGetValue(path, out revision)) { return(revision); } } if (Reader == null) { return(0); } path += "@"; TermEnum t = Reader.Terms(new Term(FieldName.Id, path)); int doc = -1; while (t.Term() != null && t.Term().Text().StartsWith(path)) { int r = int.Parse(t.Term().Text().Substring(path.Length)); if (r > revision) { revision = r; TermDocs d = Reader.TermDocs(t.Term()); d.Next(); doc = d.Doc(); } t.Next(); } t.Close(); if (revision != 0 && Reader.Document(doc).Get(FieldName.RevisionLast) != Revision.HeadString) { return(0); } return(revision); }
/// <summary> /// Get the DocIdSet. /// </summary> /// <param name="reader">Applcible reader.</param> /// <returns>The set.</returns> public override DocIdSet GetDocIdSet(IndexReader reader) { OpenBitSet result = new OpenBitSet(reader.MaxDoc()); TermDocs td = reader.TermDocs(); try { foreach (Term t in this.terms) { td.Seek(t); while (td.Next()) { result.Set(td.Doc()); } } } finally { td.Close(); } return(result); }
public virtual void TestMultiTermDocs() { RAMDirectory ramDir1 = new RAMDirectory(); AddDoc(ramDir1, "test foo", true); RAMDirectory ramDir2 = new RAMDirectory(); AddDoc(ramDir2, "test blah", true); RAMDirectory ramDir3 = new RAMDirectory(); AddDoc(ramDir3, "test wow", true); IndexReader[] readers1 = new IndexReader[] { IndexReader.Open(ramDir1), IndexReader.Open(ramDir3) }; IndexReader[] readers2 = new IndexReader[] { IndexReader.Open(ramDir1), IndexReader.Open(ramDir2), IndexReader.Open(ramDir3) }; MultiReader mr2 = new MultiReader(readers1); MultiReader mr3 = new MultiReader(readers2); // test mixing up TermDocs and TermEnums from different readers. TermDocs td2 = mr2.TermDocs(); TermEnum te3 = mr3.Terms(new Term("body", "wow")); td2.Seek(te3); int ret = 0; // This should blow up if we forget to check that the TermEnum is from the same // reader as the TermDocs. while (td2.Next()) { ret += td2.Doc(); } td2.Close(); te3.Close(); // really a dummy assert to ensure that we got some docs and to ensure that // nothing is optimized out. Assert.IsTrue(ret > 0); }
//////////////////////////////////////////////////////////////// static private void ScoreHits(Dictionary <int, Hit> hits_by_id, IndexReader reader, ICollection term_list) { LNS.Similarity similarity; similarity = LNS.Similarity.GetDefault(); TermDocs term_docs = reader.TermDocs(); Hit hit; foreach (Term term in term_list) { double idf; idf = similarity.Idf(reader.DocFreq(term), reader.MaxDoc()); int hit_count; hit_count = hits_by_id.Count; term_docs.Seek(term); while (term_docs.Next() && hit_count > 0) { int id; id = term_docs.Doc(); if (hits_by_id.TryGetValue(id, out hit)) { double tf; tf = similarity.Tf(term_docs.Freq()); hit.Score += tf * idf; --hit_count; } } } term_docs.Close(); }
public void FlushUris() { if (pending_uris == null) { return; } TermDocs term_docs = this.searcher.Reader.TermDocs(); for (int i = 0; i < pending_uris.Count; i++) { Term term = new Term("Uri", (string)pending_uris [i]); term_docs.Seek(term); if (term_docs.Next()) { this.Set(term_docs.Doc(), true); } } term_docs.Close(); pending_uris = null; }
/// <summary> /// Deletes the specified reader. /// </summary> /// <param name="reader">The reader.</param> /// <param name="term">The term.</param> /// <returns></returns> public int Delete(IndexReader reader, Term term) { TermDocs docs = reader.TermDocs(term); if (docs == null) { return(0); } int num = 0; try { while (docs.Next()) { reader.DeleteDocument(docs.Doc()); num++; } } finally { docs.Close(); } return(num); }
public override DocIdSet GetDocIdSet(IndexReader reader) { var bits = new OpenBitSet(reader.MaxDoc()); TermDocs termDocs = reader.TermDocs(); List <double> area = _shape.Area; int sz = area.Count; // iterate through each boxid for (int i = 0; i < sz; i++) { double boxId = area[i]; termDocs.Seek(new Term(_fieldName, NumericUtils.DoubleToPrefixCoded(boxId))); // iterate through all documents // which have this boxId while (termDocs.Next()) { bits.FastSet(termDocs.Doc()); } } return(bits); }
public virtual int Doc() { return(base_Renamed + current.Doc()); }
public virtual int Doc() { return(termDocs.Doc()); }
private static Hit CreateHit ( Document primary_doc, IndexReader secondary_reader, TermDocs term_docs, FieldSelector fields) { Hit hit = DocumentToHit (primary_doc); if (secondary_reader == null) return hit; // Get the stringified version of the URI // exactly as it comes out of the index. Term term = new Term ("Uri", primary_doc.Get ("Uri")); term_docs.Seek (term); // Move to the first (and only) matching term doc term_docs.Next (); Document secondary_doc = (fields == null) ? secondary_reader.Document (term_docs.Doc ()) : secondary_reader.Document (term_docs.Doc (), fields); // If we are using the secondary index, now we need to // merge the properties from the secondary index AddPropertiesToHit (hit, secondary_doc, false); return hit; }
public virtual void Test1() { ParallelReader pr = new ParallelReader(); pr.Add(ir1); pr.Add(ir2); TermDocs td = pr.TermDocs(); TermEnum te = pr.Terms(); Assert.IsTrue(te.Next()); Assert.AreEqual("field1:brown", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field1:fox", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field1:jumps", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field1:quick", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field1:the", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field2:brown", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field2:fox", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field2:jumps", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field2:quick", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field2:the", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field3:dog", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field3:fox", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field3:jumps", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field3:lazy", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field3:over", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsTrue(te.Next()); Assert.AreEqual("field3:the", te.Term().ToString()); td.Seek(te.Term()); Assert.IsTrue(td.Next()); Assert.AreEqual(0, td.Doc()); Assert.IsFalse(td.Next()); Assert.IsFalse(te.Next()); }
public virtual void TestKnownSetOfDocuments() { System.String[] termArray = new System.String[] { "eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored", "with", "an" }; System.String test1 = "eating chocolate in a computer lab"; //6 terms System.String test2 = "computer in a computer lab"; //5 terms System.String test3 = "a chocolate lab grows old"; //5 terms System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms System.Collections.IDictionary test4Map = new System.Collections.Hashtable(); test4Map["chocolate"] = 3; test4Map["lab"] = 2; test4Map["eating"] = 1; test4Map["computer"] = 1; test4Map["with"] = 1; test4Map["a"] = 1; test4Map["colored"] = 1; test4Map["in"] = 1; test4Map["an"] = 1; test4Map["computer"] = 1; test4Map["old"] = 1; Document testDoc1 = new Document(); SetupDoc(testDoc1, test1); Document testDoc2 = new Document(); SetupDoc(testDoc2, test2); Document testDoc3 = new Document(); SetupDoc(testDoc3, test3); Document testDoc4 = new Document(); SetupDoc(testDoc4, test4); Directory dir = new RAMDirectory(); try { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true); Assert.IsTrue(writer != null); writer.AddDocument(testDoc1); writer.AddDocument(testDoc2); writer.AddDocument(testDoc3); writer.AddDocument(testDoc4); writer.Close(); IndexSearcher knownSearcher = new IndexSearcher(dir); TermEnum termEnum = knownSearcher.reader.Terms(); TermDocs termDocs = knownSearcher.reader.TermDocs(); //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length); Similarity sim = knownSearcher.GetSimilarity(); while (termEnum.Next() == true) { Term term = termEnum.Term(); //System.out.println("Term: " + term); termDocs.Seek(term); while (termDocs.Next()) { int docId = termDocs.Doc(); int freq = termDocs.Freq(); //System.out.println("Doc Id: " + docId + " freq " + freq); TermFreqVector vector = knownSearcher.reader.GetTermFreqVector(docId, "Field"); float tf = sim.Tf(freq); float idf = sim.Idf(term, knownSearcher); //float qNorm = sim.queryNorm() //This is fine since we don't have stop words float lNorm = sim.LengthNorm("Field", vector.GetTerms().Length); //float coord = sim.coord() //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); Assert.IsTrue(vector != null); System.String[] vTerms = vector.GetTerms(); int[] freqs = vector.GetTermFrequencies(); for (int i = 0; i < vTerms.Length; i++) { if (term.Text().Equals(vTerms[i]) == true) { Assert.IsTrue(freqs[i] == freq); } } } //System.out.println("--------"); } Query query = new TermQuery(new Term("Field", "chocolate")); Hits hits = knownSearcher.Search(query); //doc 3 should be the first hit b/c it is the shortest match Assert.IsTrue(hits.Length() == 3); float score = hits.Score(0); /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0))); * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1))); * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/ Assert.IsTrue(testDoc3.ToString().Equals(hits.Doc(0).ToString())); Assert.IsTrue(testDoc4.ToString().Equals(hits.Doc(1).ToString())); Assert.IsTrue(testDoc1.ToString().Equals(hits.Doc(2).ToString())); TermFreqVector vector2 = knownSearcher.reader.GetTermFreqVector(hits.Id(1), "Field"); Assert.IsTrue(vector2 != null); //System.out.println("Vector: " + vector); System.String[] terms = vector2.GetTerms(); int[] freqs2 = vector2.GetTermFrequencies(); Assert.IsTrue(terms != null && terms.Length == 10); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); int freq = freqs2[i]; Assert.IsTrue(test4.IndexOf(term) != -1); System.Int32 freqInt = (System.Int32)test4Map[term]; System.Object tmpFreqInt = test4Map[term]; Assert.IsTrue(tmpFreqInt != null); Assert.IsTrue(freqInt == freq); } knownSearcher.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
private void _ShowTermDoc(TermDocs td) { if (_luke.IndexReader == null) { _luke.ShowStatus(_luke.resources.GetString("NoIndex")); return; } try { Document doc = _luke.IndexReader.Document(td.Doc()); labelDocNum.Text = td.Doc().ToString(); labelTermFreq.Text = td.Freq().ToString(); _ShowDocFields(td.Doc(), doc); } catch (Exception e) { _luke.ShowStatus(e.Message); } }
public virtual int Doc() { return(in_Renamed.Doc()); }