예제 #1
0
파일: TestSort.cs 프로젝트: raj581/Marvin
        private System.Collections.Hashtable GetScores(Hits hits)
        {
            System.Collections.Hashtable scoreMap = new System.Collections.Hashtable();
            int n = hits.Length();

            for (int i = 0; i < n; ++i)
            {
                Document        doc = hits.Doc(i);
                System.String[] v   = doc.GetValues("tracer");
                Assert.AreEqual(v.Length, 1);
                scoreMap[v[0]] = (float)hits.Score(i);
            }
            return(scoreMap);
        }
        public EntityInfo Extract(Hits hits, int index)
        {
            Document doc = hits.Doc(index);
            //TODO if we are lonly looking for score (unlikely), avoid accessing doc (lazy load)
            EntityInfo entityInfo = Extract(doc);
            object[] eip = entityInfo.Projection;

            if (eip != null && eip.Length > 0)
            {
                for (int x = 0; x < projection.Length; x++)
                {
                    switch (projection[x])
                    {
                        case ProjectionConstants.SCORE:
                            eip[x] = hits.Score(index);
                            break;

                        case ProjectionConstants.ID:
                            eip[x] = entityInfo.Id;
                            break;

                        case ProjectionConstants.DOCUMENT:
                            eip[x] = doc;
                            break;

                        case ProjectionConstants.DOCUMENT_ID:
                            eip[x] = hits.Id(index);
                            break;

                        case ProjectionConstants.BOOST:
                            eip[x] = doc.GetBoost();
                            break;

                        case ProjectionConstants.THIS:
                            //THIS could be projected more than once
                            //THIS loading delayed to the Loader phase
                            if (entityInfo.IndexesOfThis == null)
                            {
                                entityInfo.IndexesOfThis = new List<int>(1);
                            }
                            entityInfo.IndexesOfThis.Add(x);
                            break;
                    }
                }
            }

            return entityInfo;
        }
예제 #3
0
파일: Hit.cs 프로젝트: mrkurt/mubble-old
        internal static Hit FromRaw(Hits raw, int index)
        {
            var doc = raw.Doc(index);
            var fields = new FieldCollection();

            foreach (var f in doc.GetFields())
            {
                var rf = (Lucene.Net.Documents.Field)f;
                fields.Add(new Field
                {
                    Name = rf.Name(),
                    Values = { rf.StringValue() }
                });
            }

            return new Hit
            {
                Score = raw.Score(index),
                SchemaName = doc.GetValues(IndexHelper.SchemaNameField).Join(""),
                SchemaVersion = doc.GetValues(IndexHelper.SchemaVersionField).Join(""),
                Fields = fields
            };
        }
예제 #4
0
        public virtual void  TestKnownSetOfDocuments()
        {
            System.String[] termArray = new System.String[] { "eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored", "with", "an" };
            System.String   test1     = "eating chocolate in a computer lab";                                             //6 terms
            System.String   test2     = "computer in a computer lab";                                                     //5 terms
            System.String   test3     = "a chocolate lab grows old";                                                      //5 terms
            System.String   test4     = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
            System.Collections.IDictionary test4Map = new System.Collections.Hashtable();
            test4Map["chocolate"] = 3;
            test4Map["lab"]       = 2;
            test4Map["eating"]    = 1;
            test4Map["computer"]  = 1;
            test4Map["with"]      = 1;
            test4Map["a"]         = 1;
            test4Map["colored"]   = 1;
            test4Map["in"]        = 1;
            test4Map["an"]        = 1;
            test4Map["computer"]  = 1;
            test4Map["old"]       = 1;

            Document testDoc1 = new Document();

            SetupDoc(testDoc1, test1);
            Document testDoc2 = new Document();

            SetupDoc(testDoc2, test2);
            Document testDoc3 = new Document();

            SetupDoc(testDoc3, test3);
            Document testDoc4 = new Document();

            SetupDoc(testDoc4, test4);

            Directory dir = new RAMDirectory();

            try
            {
                IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
                Assert.IsTrue(writer != null);
                writer.AddDocument(testDoc1);
                writer.AddDocument(testDoc2);
                writer.AddDocument(testDoc3);
                writer.AddDocument(testDoc4);
                writer.Close();
                IndexSearcher knownSearcher = new IndexSearcher(dir);
                TermEnum      termEnum      = knownSearcher.reader.Terms();
                TermDocs      termDocs      = knownSearcher.reader.TermDocs();
                //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);

                Similarity sim = knownSearcher.GetSimilarity();
                while (termEnum.Next() == true)
                {
                    Term term = termEnum.Term();
                    //System.out.println("Term: " + term);
                    termDocs.Seek(term);
                    while (termDocs.Next())
                    {
                        int docId = termDocs.Doc();
                        int freq  = termDocs.Freq();
                        //System.out.println("Doc Id: " + docId + " freq " + freq);
                        TermFreqVector vector = knownSearcher.reader.GetTermFreqVector(docId, "Field");
                        float          tf     = sim.Tf(freq);
                        float          idf    = sim.Idf(term, knownSearcher);
                        //float qNorm = sim.queryNorm()
                        //This is fine since we don't have stop words
                        float lNorm = sim.LengthNorm("Field", vector.GetTerms().Length);
                        //float coord = sim.coord()
                        //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
                        Assert.IsTrue(vector != null);
                        System.String[] vTerms = vector.GetTerms();
                        int[]           freqs  = vector.GetTermFrequencies();
                        for (int i = 0; i < vTerms.Length; i++)
                        {
                            if (term.Text().Equals(vTerms[i]) == true)
                            {
                                Assert.IsTrue(freqs[i] == freq);
                            }
                        }
                    }
                    //System.out.println("--------");
                }
                Query query = new TermQuery(new Term("Field", "chocolate"));
                Hits  hits  = knownSearcher.Search(query);
                //doc 3 should be the first hit b/c it is the shortest match
                Assert.IsTrue(hits.Length() == 3);
                float score = hits.Score(0);

                /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
                 * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
                 * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " +  hits.doc(2).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
                Assert.IsTrue(testDoc3.ToString().Equals(hits.Doc(0).ToString()));
                Assert.IsTrue(testDoc4.ToString().Equals(hits.Doc(1).ToString()));
                Assert.IsTrue(testDoc1.ToString().Equals(hits.Doc(2).ToString()));
                TermFreqVector vector2 = knownSearcher.reader.GetTermFreqVector(hits.Id(1), "Field");
                Assert.IsTrue(vector2 != null);
                //System.out.println("Vector: " + vector);
                System.String[] terms  = vector2.GetTerms();
                int[]           freqs2 = vector2.GetTermFrequencies();
                Assert.IsTrue(terms != null && terms.Length == 10);
                for (int i = 0; i < terms.Length; i++)
                {
                    System.String term = terms[i];
                    //System.out.println("Term: " + term);
                    int freq = freqs2[i];
                    Assert.IsTrue(test4.IndexOf(term) != -1);
                    System.Int32  freqInt    = (System.Int32)test4Map[term];
                    System.Object tmpFreqInt = test4Map[term];
                    Assert.IsTrue(tmpFreqInt != null);
                    Assert.IsTrue(freqInt == freq);
                }
                knownSearcher.Close();
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
예제 #5
0
파일: TestSort.cs 프로젝트: emtees/old-code
		private System.Collections.Hashtable GetScores(Hits hits)
		{
			System.Collections.Hashtable scoreMap = new System.Collections.Hashtable();
			int n = hits.Length();
			for (int i = 0; i < n; ++i)
			{
				Document doc = hits.Doc(i);
				System.String[] v = doc.GetValues("tracer");
				Assert.AreEqual(v.Length, 1);
				scoreMap[v[0]] = (float) hits.Score(i);
			}
			return scoreMap;
		}
예제 #6
0
 /// <summary> Returns score for this hit.
 ///
 /// </summary>
 /// <seealso cref="Hits.Score(int)">
 /// </seealso>
 public virtual float GetScore()
 {
     return(hits.Score(hitNumber));
 }
예제 #7
0
 public void AddResult(SearchInfo info, Hits hits, int maxMatches)
 {
     if ( docs == null)
         docs = new Dictionary<SearchInfo, List<ExDocument>>();
     if (hits == null)
         return;
     if (info == null)
         return;
     List<ExDocument> exdl = new List<ExDocument>();
     for (int i = 0; i < maxMatches && i < hits.Length(); i++)
     {
         exdl.Add(new ExDocument(hits.Doc(i),hits.Score(i)));
     }
     if ( exdl.Count > 0)
        docs.Add(info, exdl);
 }
예제 #8
0
 public void AddResult(Hits hits, int maxMatches)
 {
     if (docList == null)
         docList = new List<ExDocument>();
     if (hits == null)
         return;
     for (int i = 0; i < maxMatches && i < hits.Length(); i++)
     {
         docList.Add(new ExDocument(hits.Doc(i), hits.Score(i)));
     }
 }
		protected internal virtual void  PrintHits(System.String test, Hits h)
		{
			
			System.Console.Error.WriteLine("------- " + test + " -------");
			
			for (int i = 0; i < h.Length(); i++)
			{
				Lucene.Net.Documents.Document d = h.Doc(i);
				float score = h.Score(i);
				System.Console.Error.WriteLine("#" + i + ": {0.000000000}" + score + " - " + d.Get("id"));
			}
		}
예제 #10
0
		public static System.String Hits2str(Hits hits1, Hits hits2, int start, int end)
		{
			System.Text.StringBuilder sb = new System.Text.StringBuilder();
			int len1 = hits1 == null?0:hits1.Length();
			int len2 = hits2 == null?0:hits2.Length();
			if (end <= 0)
			{
				end = System.Math.Max(len1, len2);
			}
			
			sb.Append("Hits length1=").Append(len1).Append("\tlength2=").Append(len2);
			
			sb.Append('\n');
			for (int i = start; i < end; i++)
			{
				sb.Append("hit=").Append(i).Append(':');
				if (i < len1)
				{
					sb.Append(" doc").Append(hits1.Id(i)).Append('=').Append(hits1.Score(i));
				}
				else
				{
					sb.Append("               ");
				}
				sb.Append(",\t");
				if (i < len2)
				{
					sb.Append(" doc").Append(hits2.Id(i)).Append('=').Append(hits2.Score(i));
				}
				sb.Append('\n');
			}
			return sb.ToString();
		}
예제 #11
0
		public static void  CheckEqual(Query query, Hits hits1, Hits hits2)
		{
			float scoreTolerance = 1.0e-6f;
			if (hits1.Length() != hits2.Length())
			{
				Assert.Fail("Unequal lengths: hits1=" + hits1.Length() + ",hits2=" + hits2.Length());
			}
			for (int i = 0; i < hits1.Length(); i++)
			{
				if (hits1.Id(i) != hits2.Id(i))
				{
					Assert.Fail("Hit " + i + " docnumbers don't match\n" + Hits2str(hits1, hits2, 0, 0) + "for query:" + query.ToString());
				}
				
				if ((hits1.Id(i) != hits2.Id(i)) || System.Math.Abs(hits1.Score(i) - hits2.Score(i)) > scoreTolerance)
				{
					Assert.Fail("Hit " + i + ", doc nrs " + hits1.Id(i) + " and " + hits2.Id(i) + "\nunequal       : " + hits1.Score(i) + "\n           and: " + hits2.Score(i) + "\nfor query:" + query.ToString());
				}
			}
		}
        public static void  Main(String[] a)
        {
            String indexName = "localhost_index";
            String fn        = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en";
            Uri    url       = null;

            for (int i = 0; i < a.Length; i++)
            {
                if (a[i].Equals("-i"))
                {
                    indexName = a[++i];
                }
                else if (a[i].Equals("-f"))
                {
                    fn = a[++i];
                }
                else if (a[i].Equals("-url"))
                {
                    url = new Uri(a[++i]);
                }
            }

            StreamWriter temp_writer;

            temp_writer           = new StreamWriter(Console.OpenStandardOutput(), Console.Out.Encoding);
            temp_writer.AutoFlush = true;
            StreamWriter o = temp_writer;
            IndexReader  r = IndexReader.Open(indexName);

            o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs");

            LuceneMoreLikeThis mlt = new LuceneMoreLikeThis(r);

            o.WriteLine("Query generation parameters:");
            o.WriteLine(mlt.DescribeParams());
            o.WriteLine();

            Query query = null;

            if (url != null)
            {
                o.WriteLine("Parsing URL: " + url);
                query = mlt.Like(url);
            }
            else if (fn != null)
            {
                o.WriteLine("Parsing file: " + fn);
                query = mlt.Like(new FileInfo(fn));
            }

            o.WriteLine("q: " + query);
            o.WriteLine();
            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(indexName);

            Lucene.Net.Search.Hits hits = searcher.Search(query);
            int len = hits.Length();

            o.WriteLine("found: " + len + " documents matching");
            o.WriteLine();
            for (int i = 0; i < Math.Min(25, len); i++)
            {
                Lucene.Net.Documents.Document d = hits.Doc(i);
                String summary = d.Get("summary");
                o.WriteLine("score  : " + hits.Score(i));
                o.WriteLine("url    : " + d.Get("url"));
                o.WriteLine("\ttitle  : " + d.Get("title"));
                if (summary != null)
                {
                    o.WriteLine("\tsummary: " + d.Get("summary"));
                }
                o.WriteLine();
            }
        }