public virtual void TestTermVectors_() { Query query = new TermQuery(new Term("Field", "seventy")); try { Hits hits = searcher.Search(query); Assert.AreEqual(100, hits.Length()); for (int i = 0; i < hits.Length(); i++) { TermFreqVector[] vector = searcher.reader.GetTermFreqVectors(hits.Id(i)); Assert.IsTrue(vector != null); Assert.IsTrue(vector.Length == 1); //Assert.IsTrue(); } TermFreqVector[] vector2 = searcher.reader.GetTermFreqVectors(hits.Id(50)); //System.out.println("Explain: " + searcher.explain(query, hits.id(50))); //System.out.println("Vector: " + vector[0].toString()); } catch (System.IO.IOException e) { Assert.IsTrue(false); } }
public virtual void TestFilteredQuery_() { Query filteredquery = new FilteredQuery(query, filter); Hits hits = searcher.Search(filteredquery); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(1, hits.Id(0)); hits = searcher.Search(filteredquery, new Sort("sorter")); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(1, hits.Id(0)); filteredquery = new FilteredQuery(new TermQuery(new Term("Field", "one")), filter); hits = searcher.Search(filteredquery); Assert.AreEqual(2, hits.Length()); filteredquery = new FilteredQuery(new TermQuery(new Term("Field", "x")), filter); hits = searcher.Search(filteredquery); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(3, hits.Id(0)); filteredquery = new FilteredQuery(new TermQuery(new Term("Field", "y")), filter); hits = searcher.Search(filteredquery); Assert.AreEqual(0, hits.Length()); }
private void DoTestSearchHitsDeleteEvery(int k, bool deleteInFront) { bool intermittent = k < 0; Log("Test search hits with " + (intermittent?"intermittent deletions.":"deletions of every " + k + " hit.")); IndexSearcher searcher = new IndexSearcher(directory); IndexReader reader = searcher.GetIndexReader(); Query q = new TermQuery(new Term(TEXT_FIELD, "text")); // matching all docs Hits hits = searcher.Search(q); Log("Got " + hits.Length() + " results"); Assert.AreEqual(N, hits.Length(), "must match all " + N + " docs, not only " + hits.Length() + " docs!"); if (deleteInFront) { Log("deleting hits that was not yet retrieved!"); reader.DeleteDocument(reader.MaxDoc() - 1); reader.DeleteDocument(reader.MaxDoc() - 2); reader.DeleteDocument(reader.MaxDoc() - 3); } try { for (int i = 0; i < hits.Length(); i++) { int id = hits.Id(i); Assert.AreEqual(i, hits.Id(i), "Hit " + i + " has doc id " + hits.Id(i) + " instead of " + i); if ((intermittent && (i == 50 || i == 250 || i == 950)) || (!intermittent && (k < 2 || (i > 0 && i % k == 0)))) { Document doc = hits.Doc(id); Log("Deleting hit " + i + " - doc " + doc + " with id " + id); reader.DeleteDocument(id); } if (intermittent) { // check internal behavior of Hits (go 50 ahead of getMoreDocs points because the deletions cause to use more of the available hits) if (i == 150 || i == 450 || i == 1650) { Assert.IsTrue(hits.debugCheckedForDeletions, "Hit " + i + ": hits should have checked for deletions in last call to getMoreDocs()"); } else if (i == 50 || i == 250 || i == 850) { Assert.IsFalse(hits.debugCheckedForDeletions, "Hit " + i + ": hits should have NOT checked for deletions in last call to getMoreDocs()"); } } } } catch (System.Exception e) { // this is the only valid exception, and only when deletng in front. Assert.IsTrue(deleteInFront, e.Message + " not expected unless deleting hits that were not yet seen!"); } searcher.Close(); }
public static void CheckHits_(Query query, System.String defaultFieldName, Searcher searcher, int[] results, TestCase testCase) { Hits hits = searcher.Search(query); System.Collections.Hashtable correct = new System.Collections.Hashtable(); for (int i = 0; i < results.Length; i++) { correct.Add((System.Int32)results[i], null); } System.Collections.Hashtable actual = new System.Collections.Hashtable(); for (int i = 0; i < hits.Length(); i++) { actual.Add((System.Int32)hits.Id(i), null); } //Assert.AreEqual(correct, actual, query.ToString(defaultFieldName)); if (correct.Count != 0) { System.Collections.IDictionaryEnumerator iter = correct.GetEnumerator(); bool status = false; while (iter.MoveNext()) { status = actual.ContainsKey(iter.Key); if (status == false) { break; } } Assert.IsTrue(status, query.ToString(defaultFieldName)); } }
public static void PrintDocNrs(Hits hits) { System.Console.Out.Write("new int[] {"); for (int i = 0; i < hits.Length(); i++) { System.Console.Out.Write(hits.Id(i)); if (i != hits.Length() - 1) System.Console.Out.Write(", "); } System.Console.Out.WriteLine("}"); }
public static void PrintDocNrs(Hits hits) { System.Console.Out.Write("new int[] {"); for (int i = 0; i < hits.Length(); i++) { System.Console.Out.Write(hits.Id(i)); if (i != hits.Length() - 1) { System.Console.Out.Write(", "); } } System.Console.Out.WriteLine("}"); }
public EntityInfo Extract(Hits hits, int index) { Document doc = hits.Doc(index); //TODO if we are lonly looking for score (unlikely), avoid accessing doc (lazy load) EntityInfo entityInfo = Extract(doc); object[] eip = entityInfo.Projection; if (eip != null && eip.Length > 0) { for (int x = 0; x < projection.Length; x++) { switch (projection[x]) { case ProjectionConstants.SCORE: eip[x] = hits.Score(index); break; case ProjectionConstants.ID: eip[x] = entityInfo.Id; break; case ProjectionConstants.DOCUMENT: eip[x] = doc; break; case ProjectionConstants.DOCUMENT_ID: eip[x] = hits.Id(index); break; case ProjectionConstants.BOOST: eip[x] = doc.GetBoost(); break; case ProjectionConstants.THIS: //THIS could be projected more than once //THIS loading delayed to the Loader phase if (entityInfo.IndexesOfThis == null) { entityInfo.IndexesOfThis = new List<int>(1); } entityInfo.IndexesOfThis.Add(x); break; } } } return entityInfo; }
public void TestKeepsFirstFilter() { DuplicateFilter df = new DuplicateFilter(KEY_FIELD); df.SetKeepMode(DuplicateFilter.KM_USE_FIRST_OCCURRENCE); Hits h = searcher.Search(tq, df); Assert.IsTrue(h.Length() > 0, "Filtered searching should have found some matches"); for (int i = 0; i < h.Length(); i++) { Document d = h.Doc(i); String url = d.Get(KEY_FIELD); TermDocs td = reader.TermDocs(new Term(KEY_FIELD, url)); int lastDoc = 0; td.Next(); lastDoc = td.Doc(); Assert.AreEqual(lastDoc, h.Id((i)), "Duplicate urls should return first doc"); } }
public virtual void TestTermPositionVectors() { Query query = new TermQuery(new Term("Field", "fifty")); try { Hits hits = searcher.Search(query); Assert.AreEqual(100, hits.Length()); for (int i = 0; i < hits.Length(); i++) { TermFreqVector[] vector = searcher.reader.GetTermFreqVectors(hits.Id(i)); Assert.IsTrue(vector != null); Assert.IsTrue(vector.Length == 1); //Assert.IsTrue(); } } catch (System.IO.IOException e) { Assert.IsTrue(false); } }
public virtual void TestKnownSetOfDocuments() { System.String[] termArray = new System.String[] { "eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored", "with", "an" }; System.String test1 = "eating chocolate in a computer lab"; //6 terms System.String test2 = "computer in a computer lab"; //5 terms System.String test3 = "a chocolate lab grows old"; //5 terms System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms System.Collections.IDictionary test4Map = new System.Collections.Hashtable(); test4Map["chocolate"] = 3; test4Map["lab"] = 2; test4Map["eating"] = 1; test4Map["computer"] = 1; test4Map["with"] = 1; test4Map["a"] = 1; test4Map["colored"] = 1; test4Map["in"] = 1; test4Map["an"] = 1; test4Map["computer"] = 1; test4Map["old"] = 1; Document testDoc1 = new Document(); SetupDoc(testDoc1, test1); Document testDoc2 = new Document(); SetupDoc(testDoc2, test2); Document testDoc3 = new Document(); SetupDoc(testDoc3, test3); Document testDoc4 = new Document(); SetupDoc(testDoc4, test4); Directory dir = new RAMDirectory(); try { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true); Assert.IsTrue(writer != null); writer.AddDocument(testDoc1); writer.AddDocument(testDoc2); writer.AddDocument(testDoc3); writer.AddDocument(testDoc4); writer.Close(); IndexSearcher knownSearcher = new IndexSearcher(dir); TermEnum termEnum = knownSearcher.reader.Terms(); TermDocs termDocs = knownSearcher.reader.TermDocs(); //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length); Similarity sim = knownSearcher.GetSimilarity(); while (termEnum.Next() == true) { Term term = termEnum.Term(); //System.out.println("Term: " + term); termDocs.Seek(term); while (termDocs.Next()) { int docId = termDocs.Doc(); int freq = termDocs.Freq(); //System.out.println("Doc Id: " + docId + " freq " + freq); TermFreqVector vector = knownSearcher.reader.GetTermFreqVector(docId, "Field"); float tf = sim.Tf(freq); float idf = sim.Idf(term, knownSearcher); //float qNorm = sim.queryNorm() //This is fine since we don't have stop words float lNorm = sim.LengthNorm("Field", vector.GetTerms().Length); //float coord = sim.coord() //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); Assert.IsTrue(vector != null); System.String[] vTerms = vector.GetTerms(); int[] freqs = vector.GetTermFrequencies(); for (int i = 0; i < vTerms.Length; i++) { if (term.Text().Equals(vTerms[i]) == true) { Assert.IsTrue(freqs[i] == freq); } } } //System.out.println("--------"); } Query query = new TermQuery(new Term("Field", "chocolate")); Hits hits = knownSearcher.Search(query); //doc 3 should be the first hit b/c it is the shortest match Assert.IsTrue(hits.Length() == 3); float score = hits.Score(0); /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0))); * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1))); * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/ Assert.IsTrue(testDoc3.ToString().Equals(hits.Doc(0).ToString())); Assert.IsTrue(testDoc4.ToString().Equals(hits.Doc(1).ToString())); Assert.IsTrue(testDoc1.ToString().Equals(hits.Doc(2).ToString())); TermFreqVector vector2 = knownSearcher.reader.GetTermFreqVector(hits.Id(1), "Field"); Assert.IsTrue(vector2 != null); //System.out.println("Vector: " + vector); System.String[] terms = vector2.GetTerms(); int[] freqs2 = vector2.GetTermFrequencies(); Assert.IsTrue(terms != null && terms.Length == 10); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); int freq = freqs2[i]; Assert.IsTrue(test4.IndexOf(term) != -1); System.Int32 freqInt = (System.Int32)test4Map[term]; System.Object tmpFreqInt = test4Map[term]; Assert.IsTrue(tmpFreqInt != null); Assert.IsTrue(freqInt == freq); } knownSearcher.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
/// <summary> Returns id for this hit. /// /// </summary> /// <seealso cref="Hits.Id(int)"> /// </seealso> public virtual int GetId() { return(hits.Id(hitNumber)); }
/// <summary> Check the hits for duplicates.</summary> /// <param name="hits"> /// </param> private void CheckHits(Hits hits, System.String prefix) { if (hits != null) { System.Collections.IDictionary idMap = new System.Collections.SortedList(); for (int docnum = 0; docnum < hits.Length(); ++docnum) { System.Int32 luceneId; try { luceneId = (System.Int32) hits.Id(docnum); if (idMap.Contains(luceneId)) { System.Text.StringBuilder message = new System.Text.StringBuilder(prefix); message.Append("Duplicate key for hit index = "); message.Append(docnum); message.Append(", previous index = "); message.Append(((System.Int32) idMap[luceneId]).ToString()); message.Append(", Lucene ID = "); message.Append(luceneId); Log(message.ToString()); } else { idMap[luceneId] = (System.Int32) docnum; } } catch (System.IO.IOException ioe) { System.Text.StringBuilder message = new System.Text.StringBuilder(prefix); message.Append("Error occurred for hit index = "); message.Append(docnum); message.Append(" ("); message.Append(ioe.Message); message.Append(")"); Log(message.ToString()); } } } }
public static System.String Hits2str(Hits hits1, Hits hits2, int start, int end) { System.Text.StringBuilder sb = new System.Text.StringBuilder(); int len1 = hits1 == null?0:hits1.Length(); int len2 = hits2 == null?0:hits2.Length(); if (end <= 0) { end = System.Math.Max(len1, len2); } sb.Append("Hits length1=").Append(len1).Append("\tlength2=").Append(len2); sb.Append('\n'); for (int i = start; i < end; i++) { sb.Append("hit=").Append(i).Append(':'); if (i < len1) { sb.Append(" doc").Append(hits1.Id(i)).Append('=').Append(hits1.Score(i)); } else { sb.Append(" "); } sb.Append(",\t"); if (i < len2) { sb.Append(" doc").Append(hits2.Id(i)).Append('=').Append(hits2.Score(i)); } sb.Append('\n'); } return sb.ToString(); }
public static void CheckEqual(Query query, Hits hits1, Hits hits2) { float scoreTolerance = 1.0e-6f; if (hits1.Length() != hits2.Length()) { Assert.Fail("Unequal lengths: hits1=" + hits1.Length() + ",hits2=" + hits2.Length()); } for (int i = 0; i < hits1.Length(); i++) { if (hits1.Id(i) != hits2.Id(i)) { Assert.Fail("Hit " + i + " docnumbers don't match\n" + Hits2str(hits1, hits2, 0, 0) + "for query:" + query.ToString()); } if ((hits1.Id(i) != hits2.Id(i)) || System.Math.Abs(hits1.Score(i) - hits2.Score(i)) > scoreTolerance) { Assert.Fail("Hit " + i + ", doc nrs " + hits1.Id(i) + " and " + hits2.Id(i) + "\nunequal : " + hits1.Score(i) + "\n and: " + hits2.Score(i) + "\nfor query:" + query.ToString()); } } }
/// <summary>Tests that a Hits has an expected order of documents </summary> public static void CheckDocIds(System.String mes, int[] results, Hits hits) { Assert.AreEqual(results.Length, hits.Length(), mes + " nr of hits"); for (int i = 0; i < results.Length; i++) { Assert.AreEqual(results[i], hits.Id(i), mes + " doc nrs for hit " + i); } }