Пример #1
0
        public virtual void  TestTermVectors_()
        {
            Query query = new TermQuery(new Term("Field", "seventy"));

            try
            {
                Hits hits = searcher.Search(query);
                Assert.AreEqual(100, hits.Length());

                for (int i = 0; i < hits.Length(); i++)
                {
                    TermFreqVector[] vector = searcher.reader.GetTermFreqVectors(hits.Id(i));
                    Assert.IsTrue(vector != null);
                    Assert.IsTrue(vector.Length == 1);
                    //Assert.IsTrue();
                }
                TermFreqVector[] vector2 = searcher.reader.GetTermFreqVectors(hits.Id(50));
                //System.out.println("Explain: " + searcher.explain(query, hits.id(50)));
                //System.out.println("Vector: " + vector[0].toString());
            }
            catch (System.IO.IOException e)
            {
                Assert.IsTrue(false);
            }
        }
Пример #2
0
        public virtual void  TestFilteredQuery_()
        {
            Query filteredquery = new FilteredQuery(query, filter);
            Hits  hits          = searcher.Search(filteredquery);

            Assert.AreEqual(1, hits.Length());
            Assert.AreEqual(1, hits.Id(0));

            hits = searcher.Search(filteredquery, new Sort("sorter"));
            Assert.AreEqual(1, hits.Length());
            Assert.AreEqual(1, hits.Id(0));

            filteredquery = new FilteredQuery(new TermQuery(new Term("Field", "one")), filter);
            hits          = searcher.Search(filteredquery);
            Assert.AreEqual(2, hits.Length());

            filteredquery = new FilteredQuery(new TermQuery(new Term("Field", "x")), filter);
            hits          = searcher.Search(filteredquery);
            Assert.AreEqual(1, hits.Length());
            Assert.AreEqual(3, hits.Id(0));

            filteredquery = new FilteredQuery(new TermQuery(new Term("Field", "y")), filter);
            hits          = searcher.Search(filteredquery);
            Assert.AreEqual(0, hits.Length());
        }
        private void  DoTestSearchHitsDeleteEvery(int k, bool deleteInFront)
        {
            bool intermittent = k < 0;

            Log("Test search hits with " + (intermittent?"intermittent deletions.":"deletions of every " + k + " hit."));
            IndexSearcher searcher = new IndexSearcher(directory);
            IndexReader   reader   = searcher.GetIndexReader();
            Query         q        = new TermQuery(new Term(TEXT_FIELD, "text")); // matching all docs
            Hits          hits     = searcher.Search(q);

            Log("Got " + hits.Length() + " results");
            Assert.AreEqual(N, hits.Length(), "must match all " + N + " docs, not only " + hits.Length() + " docs!");
            if (deleteInFront)
            {
                Log("deleting hits that was not yet retrieved!");
                reader.DeleteDocument(reader.MaxDoc() - 1);
                reader.DeleteDocument(reader.MaxDoc() - 2);
                reader.DeleteDocument(reader.MaxDoc() - 3);
            }
            try
            {
                for (int i = 0; i < hits.Length(); i++)
                {
                    int id = hits.Id(i);
                    Assert.AreEqual(i, hits.Id(i), "Hit " + i + " has doc id " + hits.Id(i) + " instead of " + i);
                    if ((intermittent && (i == 50 || i == 250 || i == 950)) || (!intermittent && (k < 2 || (i > 0 && i % k == 0))))
                    {
                        Document doc = hits.Doc(id);
                        Log("Deleting hit " + i + " - doc " + doc + " with id " + id);
                        reader.DeleteDocument(id);
                    }
                    if (intermittent)
                    {
                        // check internal behavior of Hits (go 50 ahead of getMoreDocs points because the deletions cause to use more of the available hits)
                        if (i == 150 || i == 450 || i == 1650)
                        {
                            Assert.IsTrue(hits.debugCheckedForDeletions, "Hit " + i + ": hits should have checked for deletions in last call to getMoreDocs()");
                        }
                        else if (i == 50 || i == 250 || i == 850)
                        {
                            Assert.IsFalse(hits.debugCheckedForDeletions, "Hit " + i + ": hits should have NOT checked for deletions in last call to getMoreDocs()");
                        }
                    }
                }
            }
            catch (System.Exception e)
            {
                // this is the only valid exception, and only when deletng in front.
                Assert.IsTrue(deleteInFront, e.Message + " not expected unless deleting hits that were not yet seen!");
            }
            searcher.Close();
        }
Пример #4
0
        public static void  CheckHits_(Query query, System.String defaultFieldName, Searcher searcher, int[] results, TestCase testCase)
        {
            Hits hits = searcher.Search(query);

            System.Collections.Hashtable correct = new System.Collections.Hashtable();
            for (int i = 0; i < results.Length; i++)
            {
                correct.Add((System.Int32)results[i], null);
            }

            System.Collections.Hashtable actual = new System.Collections.Hashtable();
            for (int i = 0; i < hits.Length(); i++)
            {
                actual.Add((System.Int32)hits.Id(i), null);
            }

            //Assert.AreEqual(correct, actual, query.ToString(defaultFieldName));
            if (correct.Count != 0)
            {
                System.Collections.IDictionaryEnumerator iter = correct.GetEnumerator();
                bool status = false;
                while (iter.MoveNext())
                {
                    status = actual.ContainsKey(iter.Key);
                    if (status == false)
                    {
                        break;
                    }
                }
                Assert.IsTrue(status, query.ToString(defaultFieldName));
            }
        }
Пример #5
0
		public static void  PrintDocNrs(Hits hits)
		{
			System.Console.Out.Write("new int[] {");
			for (int i = 0; i < hits.Length(); i++)
			{
				System.Console.Out.Write(hits.Id(i));
				if (i != hits.Length() - 1)
					System.Console.Out.Write(", ");
			}
			System.Console.Out.WriteLine("}");
		}
Пример #6
0
 public static void  PrintDocNrs(Hits hits)
 {
     System.Console.Out.Write("new int[] {");
     for (int i = 0; i < hits.Length(); i++)
     {
         System.Console.Out.Write(hits.Id(i));
         if (i != hits.Length() - 1)
         {
             System.Console.Out.Write(", ");
         }
     }
     System.Console.Out.WriteLine("}");
 }
        public EntityInfo Extract(Hits hits, int index)
        {
            Document doc = hits.Doc(index);
            //TODO if we are lonly looking for score (unlikely), avoid accessing doc (lazy load)
            EntityInfo entityInfo = Extract(doc);
            object[] eip = entityInfo.Projection;

            if (eip != null && eip.Length > 0)
            {
                for (int x = 0; x < projection.Length; x++)
                {
                    switch (projection[x])
                    {
                        case ProjectionConstants.SCORE:
                            eip[x] = hits.Score(index);
                            break;

                        case ProjectionConstants.ID:
                            eip[x] = entityInfo.Id;
                            break;

                        case ProjectionConstants.DOCUMENT:
                            eip[x] = doc;
                            break;

                        case ProjectionConstants.DOCUMENT_ID:
                            eip[x] = hits.Id(index);
                            break;

                        case ProjectionConstants.BOOST:
                            eip[x] = doc.GetBoost();
                            break;

                        case ProjectionConstants.THIS:
                            //THIS could be projected more than once
                            //THIS loading delayed to the Loader phase
                            if (entityInfo.IndexesOfThis == null)
                            {
                                entityInfo.IndexesOfThis = new List<int>(1);
                            }
                            entityInfo.IndexesOfThis.Add(x);
                            break;
                    }
                }
            }

            return entityInfo;
        }
Пример #8
0
 public void TestKeepsFirstFilter()
 {
     DuplicateFilter df = new DuplicateFilter(KEY_FIELD);
     df.SetKeepMode(DuplicateFilter.KM_USE_FIRST_OCCURRENCE);
     Hits h = searcher.Search(tq, df);
     Assert.IsTrue(h.Length() > 0, "Filtered searching should have found some matches");
     for (int i = 0; i < h.Length(); i++)
     {
         Document d = h.Doc(i);
         String url = d.Get(KEY_FIELD);
         TermDocs td = reader.TermDocs(new Term(KEY_FIELD, url));
         int lastDoc = 0;
         td.Next();
         lastDoc = td.Doc();
         Assert.AreEqual(lastDoc, h.Id((i)), "Duplicate urls should return first doc");
     }
 }
Пример #9
0
        public virtual void  TestTermPositionVectors()
        {
            Query query = new TermQuery(new Term("Field", "fifty"));

            try
            {
                Hits hits = searcher.Search(query);
                Assert.AreEqual(100, hits.Length());

                for (int i = 0; i < hits.Length(); i++)
                {
                    TermFreqVector[] vector = searcher.reader.GetTermFreqVectors(hits.Id(i));
                    Assert.IsTrue(vector != null);
                    Assert.IsTrue(vector.Length == 1);
                    //Assert.IsTrue();
                }
            }
            catch (System.IO.IOException e)
            {
                Assert.IsTrue(false);
            }
        }
Пример #10
0
        public virtual void  TestKnownSetOfDocuments()
        {
            System.String[] termArray = new System.String[] { "eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored", "with", "an" };
            System.String   test1     = "eating chocolate in a computer lab";                                             //6 terms
            System.String   test2     = "computer in a computer lab";                                                     //5 terms
            System.String   test3     = "a chocolate lab grows old";                                                      //5 terms
            System.String   test4     = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
            System.Collections.IDictionary test4Map = new System.Collections.Hashtable();
            test4Map["chocolate"] = 3;
            test4Map["lab"]       = 2;
            test4Map["eating"]    = 1;
            test4Map["computer"]  = 1;
            test4Map["with"]      = 1;
            test4Map["a"]         = 1;
            test4Map["colored"]   = 1;
            test4Map["in"]        = 1;
            test4Map["an"]        = 1;
            test4Map["computer"]  = 1;
            test4Map["old"]       = 1;

            Document testDoc1 = new Document();

            SetupDoc(testDoc1, test1);
            Document testDoc2 = new Document();

            SetupDoc(testDoc2, test2);
            Document testDoc3 = new Document();

            SetupDoc(testDoc3, test3);
            Document testDoc4 = new Document();

            SetupDoc(testDoc4, test4);

            Directory dir = new RAMDirectory();

            try
            {
                IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
                Assert.IsTrue(writer != null);
                writer.AddDocument(testDoc1);
                writer.AddDocument(testDoc2);
                writer.AddDocument(testDoc3);
                writer.AddDocument(testDoc4);
                writer.Close();
                IndexSearcher knownSearcher = new IndexSearcher(dir);
                TermEnum      termEnum      = knownSearcher.reader.Terms();
                TermDocs      termDocs      = knownSearcher.reader.TermDocs();
                //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);

                Similarity sim = knownSearcher.GetSimilarity();
                while (termEnum.Next() == true)
                {
                    Term term = termEnum.Term();
                    //System.out.println("Term: " + term);
                    termDocs.Seek(term);
                    while (termDocs.Next())
                    {
                        int docId = termDocs.Doc();
                        int freq  = termDocs.Freq();
                        //System.out.println("Doc Id: " + docId + " freq " + freq);
                        TermFreqVector vector = knownSearcher.reader.GetTermFreqVector(docId, "Field");
                        float          tf     = sim.Tf(freq);
                        float          idf    = sim.Idf(term, knownSearcher);
                        //float qNorm = sim.queryNorm()
                        //This is fine since we don't have stop words
                        float lNorm = sim.LengthNorm("Field", vector.GetTerms().Length);
                        //float coord = sim.coord()
                        //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
                        Assert.IsTrue(vector != null);
                        System.String[] vTerms = vector.GetTerms();
                        int[]           freqs  = vector.GetTermFrequencies();
                        for (int i = 0; i < vTerms.Length; i++)
                        {
                            if (term.Text().Equals(vTerms[i]) == true)
                            {
                                Assert.IsTrue(freqs[i] == freq);
                            }
                        }
                    }
                    //System.out.println("--------");
                }
                Query query = new TermQuery(new Term("Field", "chocolate"));
                Hits  hits  = knownSearcher.Search(query);
                //doc 3 should be the first hit b/c it is the shortest match
                Assert.IsTrue(hits.Length() == 3);
                float score = hits.Score(0);

                /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
                 * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
                 * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " +  hits.doc(2).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
                Assert.IsTrue(testDoc3.ToString().Equals(hits.Doc(0).ToString()));
                Assert.IsTrue(testDoc4.ToString().Equals(hits.Doc(1).ToString()));
                Assert.IsTrue(testDoc1.ToString().Equals(hits.Doc(2).ToString()));
                TermFreqVector vector2 = knownSearcher.reader.GetTermFreqVector(hits.Id(1), "Field");
                Assert.IsTrue(vector2 != null);
                //System.out.println("Vector: " + vector);
                System.String[] terms  = vector2.GetTerms();
                int[]           freqs2 = vector2.GetTermFrequencies();
                Assert.IsTrue(terms != null && terms.Length == 10);
                for (int i = 0; i < terms.Length; i++)
                {
                    System.String term = terms[i];
                    //System.out.println("Term: " + term);
                    int freq = freqs2[i];
                    Assert.IsTrue(test4.IndexOf(term) != -1);
                    System.Int32  freqInt    = (System.Int32)test4Map[term];
                    System.Object tmpFreqInt = test4Map[term];
                    Assert.IsTrue(tmpFreqInt != null);
                    Assert.IsTrue(freqInt == freq);
                }
                knownSearcher.Close();
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
Пример #11
0
 /// <summary> Returns id for this hit.
 ///
 /// </summary>
 /// <seealso cref="Hits.Id(int)">
 /// </seealso>
 public virtual int GetId()
 {
     return(hits.Id(hitNumber));
 }
		/// <summary> Check the hits for duplicates.</summary>
		/// <param name="hits">
		/// </param>
		private void  CheckHits(Hits hits, System.String prefix)
		{
			if (hits != null)
			{
				System.Collections.IDictionary idMap = new System.Collections.SortedList();
				for (int docnum = 0; docnum < hits.Length(); ++docnum)
				{
					System.Int32 luceneId;
					try
					{
						luceneId = (System.Int32) hits.Id(docnum);
						if (idMap.Contains(luceneId))
						{
							System.Text.StringBuilder message = new System.Text.StringBuilder(prefix);
							message.Append("Duplicate key for hit index = ");
							message.Append(docnum);
							message.Append(", previous index = ");
							message.Append(((System.Int32) idMap[luceneId]).ToString());
							message.Append(", Lucene ID = ");
							message.Append(luceneId);
							Log(message.ToString());
						}
						else
						{
							idMap[luceneId] = (System.Int32) docnum;
						}
					}
					catch (System.IO.IOException ioe)
					{
						System.Text.StringBuilder message = new System.Text.StringBuilder(prefix);
						message.Append("Error occurred for hit index = ");
						message.Append(docnum);
						message.Append(" (");
						message.Append(ioe.Message);
						message.Append(")");
						Log(message.ToString());
					}
				}
			}
		}
Пример #13
0
		public static System.String Hits2str(Hits hits1, Hits hits2, int start, int end)
		{
			System.Text.StringBuilder sb = new System.Text.StringBuilder();
			int len1 = hits1 == null?0:hits1.Length();
			int len2 = hits2 == null?0:hits2.Length();
			if (end <= 0)
			{
				end = System.Math.Max(len1, len2);
			}
			
			sb.Append("Hits length1=").Append(len1).Append("\tlength2=").Append(len2);
			
			sb.Append('\n');
			for (int i = start; i < end; i++)
			{
				sb.Append("hit=").Append(i).Append(':');
				if (i < len1)
				{
					sb.Append(" doc").Append(hits1.Id(i)).Append('=').Append(hits1.Score(i));
				}
				else
				{
					sb.Append("               ");
				}
				sb.Append(",\t");
				if (i < len2)
				{
					sb.Append(" doc").Append(hits2.Id(i)).Append('=').Append(hits2.Score(i));
				}
				sb.Append('\n');
			}
			return sb.ToString();
		}
Пример #14
0
		public static void  CheckEqual(Query query, Hits hits1, Hits hits2)
		{
			float scoreTolerance = 1.0e-6f;
			if (hits1.Length() != hits2.Length())
			{
				Assert.Fail("Unequal lengths: hits1=" + hits1.Length() + ",hits2=" + hits2.Length());
			}
			for (int i = 0; i < hits1.Length(); i++)
			{
				if (hits1.Id(i) != hits2.Id(i))
				{
					Assert.Fail("Hit " + i + " docnumbers don't match\n" + Hits2str(hits1, hits2, 0, 0) + "for query:" + query.ToString());
				}
				
				if ((hits1.Id(i) != hits2.Id(i)) || System.Math.Abs(hits1.Score(i) - hits2.Score(i)) > scoreTolerance)
				{
					Assert.Fail("Hit " + i + ", doc nrs " + hits1.Id(i) + " and " + hits2.Id(i) + "\nunequal       : " + hits1.Score(i) + "\n           and: " + hits2.Score(i) + "\nfor query:" + query.ToString());
				}
			}
		}
Пример #15
0
		/// <summary>Tests that a Hits has an expected order of documents </summary>
		public static void  CheckDocIds(System.String mes, int[] results, Hits hits)
		{
			Assert.AreEqual(results.Length, hits.Length(), mes + " nr of hits");
			for (int i = 0; i < results.Length; i++)
			{
				Assert.AreEqual(results[i], hits.Id(i), mes + " doc nrs for hit " + i);
			}
		}