private System.Collections.Hashtable GetScores(Hits hits) { System.Collections.Hashtable scoreMap = new System.Collections.Hashtable(); int n = hits.Length(); for (int i = 0; i < n; ++i) { Document doc = hits.Doc(i); System.String[] v = doc.GetValues("tracer"); Assert.AreEqual(v.Length, 1); scoreMap[v[0]] = (float)hits.Score(i); } return(scoreMap); }
public EntityInfo Extract(Hits hits, int index) { Document doc = hits.Doc(index); //TODO if we are lonly looking for score (unlikely), avoid accessing doc (lazy load) EntityInfo entityInfo = Extract(doc); object[] eip = entityInfo.Projection; if (eip != null && eip.Length > 0) { for (int x = 0; x < projection.Length; x++) { switch (projection[x]) { case ProjectionConstants.SCORE: eip[x] = hits.Score(index); break; case ProjectionConstants.ID: eip[x] = entityInfo.Id; break; case ProjectionConstants.DOCUMENT: eip[x] = doc; break; case ProjectionConstants.DOCUMENT_ID: eip[x] = hits.Id(index); break; case ProjectionConstants.BOOST: eip[x] = doc.GetBoost(); break; case ProjectionConstants.THIS: //THIS could be projected more than once //THIS loading delayed to the Loader phase if (entityInfo.IndexesOfThis == null) { entityInfo.IndexesOfThis = new List<int>(1); } entityInfo.IndexesOfThis.Add(x); break; } } } return entityInfo; }
internal static Hit FromRaw(Hits raw, int index) { var doc = raw.Doc(index); var fields = new FieldCollection(); foreach (var f in doc.GetFields()) { var rf = (Lucene.Net.Documents.Field)f; fields.Add(new Field { Name = rf.Name(), Values = { rf.StringValue() } }); } return new Hit { Score = raw.Score(index), SchemaName = doc.GetValues(IndexHelper.SchemaNameField).Join(""), SchemaVersion = doc.GetValues(IndexHelper.SchemaVersionField).Join(""), Fields = fields }; }
public virtual void TestKnownSetOfDocuments() { System.String[] termArray = new System.String[] { "eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored", "with", "an" }; System.String test1 = "eating chocolate in a computer lab"; //6 terms System.String test2 = "computer in a computer lab"; //5 terms System.String test3 = "a chocolate lab grows old"; //5 terms System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms System.Collections.IDictionary test4Map = new System.Collections.Hashtable(); test4Map["chocolate"] = 3; test4Map["lab"] = 2; test4Map["eating"] = 1; test4Map["computer"] = 1; test4Map["with"] = 1; test4Map["a"] = 1; test4Map["colored"] = 1; test4Map["in"] = 1; test4Map["an"] = 1; test4Map["computer"] = 1; test4Map["old"] = 1; Document testDoc1 = new Document(); SetupDoc(testDoc1, test1); Document testDoc2 = new Document(); SetupDoc(testDoc2, test2); Document testDoc3 = new Document(); SetupDoc(testDoc3, test3); Document testDoc4 = new Document(); SetupDoc(testDoc4, test4); Directory dir = new RAMDirectory(); try { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true); Assert.IsTrue(writer != null); writer.AddDocument(testDoc1); writer.AddDocument(testDoc2); writer.AddDocument(testDoc3); writer.AddDocument(testDoc4); writer.Close(); IndexSearcher knownSearcher = new IndexSearcher(dir); TermEnum termEnum = knownSearcher.reader.Terms(); TermDocs termDocs = knownSearcher.reader.TermDocs(); //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length); Similarity sim = knownSearcher.GetSimilarity(); while (termEnum.Next() == true) { Term term = termEnum.Term(); //System.out.println("Term: " + term); termDocs.Seek(term); while (termDocs.Next()) { int docId = termDocs.Doc(); int freq = termDocs.Freq(); //System.out.println("Doc Id: " + docId + " freq " + freq); TermFreqVector vector = knownSearcher.reader.GetTermFreqVector(docId, "Field"); float tf = sim.Tf(freq); float idf = sim.Idf(term, knownSearcher); //float qNorm = sim.queryNorm() //This is fine since we don't have stop words float lNorm = sim.LengthNorm("Field", vector.GetTerms().Length); //float coord = sim.coord() //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); Assert.IsTrue(vector != null); System.String[] vTerms = vector.GetTerms(); int[] freqs = vector.GetTermFrequencies(); for (int i = 0; i < vTerms.Length; i++) { if (term.Text().Equals(vTerms[i]) == true) { Assert.IsTrue(freqs[i] == freq); } } } //System.out.println("--------"); } Query query = new TermQuery(new Term("Field", "chocolate")); Hits hits = knownSearcher.Search(query); //doc 3 should be the first hit b/c it is the shortest match Assert.IsTrue(hits.Length() == 3); float score = hits.Score(0); /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0))); * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1))); * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/ Assert.IsTrue(testDoc3.ToString().Equals(hits.Doc(0).ToString())); Assert.IsTrue(testDoc4.ToString().Equals(hits.Doc(1).ToString())); Assert.IsTrue(testDoc1.ToString().Equals(hits.Doc(2).ToString())); TermFreqVector vector2 = knownSearcher.reader.GetTermFreqVector(hits.Id(1), "Field"); Assert.IsTrue(vector2 != null); //System.out.println("Vector: " + vector); System.String[] terms = vector2.GetTerms(); int[] freqs2 = vector2.GetTermFrequencies(); Assert.IsTrue(terms != null && terms.Length == 10); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); int freq = freqs2[i]; Assert.IsTrue(test4.IndexOf(term) != -1); System.Int32 freqInt = (System.Int32)test4Map[term]; System.Object tmpFreqInt = test4Map[term]; Assert.IsTrue(tmpFreqInt != null); Assert.IsTrue(freqInt == freq); } knownSearcher.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
private System.Collections.Hashtable GetScores(Hits hits) { System.Collections.Hashtable scoreMap = new System.Collections.Hashtable(); int n = hits.Length(); for (int i = 0; i < n; ++i) { Document doc = hits.Doc(i); System.String[] v = doc.GetValues("tracer"); Assert.AreEqual(v.Length, 1); scoreMap[v[0]] = (float) hits.Score(i); } return scoreMap; }
/// <summary> Returns score for this hit. /// /// </summary> /// <seealso cref="Hits.Score(int)"> /// </seealso> public virtual float GetScore() { return(hits.Score(hitNumber)); }
public void AddResult(SearchInfo info, Hits hits, int maxMatches) { if ( docs == null) docs = new Dictionary<SearchInfo, List<ExDocument>>(); if (hits == null) return; if (info == null) return; List<ExDocument> exdl = new List<ExDocument>(); for (int i = 0; i < maxMatches && i < hits.Length(); i++) { exdl.Add(new ExDocument(hits.Doc(i),hits.Score(i))); } if ( exdl.Count > 0) docs.Add(info, exdl); }
public void AddResult(Hits hits, int maxMatches) { if (docList == null) docList = new List<ExDocument>(); if (hits == null) return; for (int i = 0; i < maxMatches && i < hits.Length(); i++) { docList.Add(new ExDocument(hits.Doc(i), hits.Score(i))); } }
protected internal virtual void PrintHits(System.String test, Hits h) { System.Console.Error.WriteLine("------- " + test + " -------"); for (int i = 0; i < h.Length(); i++) { Lucene.Net.Documents.Document d = h.Doc(i); float score = h.Score(i); System.Console.Error.WriteLine("#" + i + ": {0.000000000}" + score + " - " + d.Get("id")); } }
public static System.String Hits2str(Hits hits1, Hits hits2, int start, int end) { System.Text.StringBuilder sb = new System.Text.StringBuilder(); int len1 = hits1 == null?0:hits1.Length(); int len2 = hits2 == null?0:hits2.Length(); if (end <= 0) { end = System.Math.Max(len1, len2); } sb.Append("Hits length1=").Append(len1).Append("\tlength2=").Append(len2); sb.Append('\n'); for (int i = start; i < end; i++) { sb.Append("hit=").Append(i).Append(':'); if (i < len1) { sb.Append(" doc").Append(hits1.Id(i)).Append('=').Append(hits1.Score(i)); } else { sb.Append(" "); } sb.Append(",\t"); if (i < len2) { sb.Append(" doc").Append(hits2.Id(i)).Append('=').Append(hits2.Score(i)); } sb.Append('\n'); } return sb.ToString(); }
public static void CheckEqual(Query query, Hits hits1, Hits hits2) { float scoreTolerance = 1.0e-6f; if (hits1.Length() != hits2.Length()) { Assert.Fail("Unequal lengths: hits1=" + hits1.Length() + ",hits2=" + hits2.Length()); } for (int i = 0; i < hits1.Length(); i++) { if (hits1.Id(i) != hits2.Id(i)) { Assert.Fail("Hit " + i + " docnumbers don't match\n" + Hits2str(hits1, hits2, 0, 0) + "for query:" + query.ToString()); } if ((hits1.Id(i) != hits2.Id(i)) || System.Math.Abs(hits1.Score(i) - hits2.Score(i)) > scoreTolerance) { Assert.Fail("Hit " + i + ", doc nrs " + hits1.Id(i) + " and " + hits2.Id(i) + "\nunequal : " + hits1.Score(i) + "\n and: " + hits2.Score(i) + "\nfor query:" + query.ToString()); } } }
public static void Main(String[] a) { String indexName = "localhost_index"; String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en"; Uri url = null; for (int i = 0; i < a.Length; i++) { if (a[i].Equals("-i")) { indexName = a[++i]; } else if (a[i].Equals("-f")) { fn = a[++i]; } else if (a[i].Equals("-url")) { url = new Uri(a[++i]); } } StreamWriter temp_writer; temp_writer = new StreamWriter(Console.OpenStandardOutput(), Console.Out.Encoding); temp_writer.AutoFlush = true; StreamWriter o = temp_writer; IndexReader r = IndexReader.Open(indexName); o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs"); LuceneMoreLikeThis mlt = new LuceneMoreLikeThis(r); o.WriteLine("Query generation parameters:"); o.WriteLine(mlt.DescribeParams()); o.WriteLine(); Query query = null; if (url != null) { o.WriteLine("Parsing URL: " + url); query = mlt.Like(url); } else if (fn != null) { o.WriteLine("Parsing file: " + fn); query = mlt.Like(new FileInfo(fn)); } o.WriteLine("q: " + query); o.WriteLine(); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(indexName); Lucene.Net.Search.Hits hits = searcher.Search(query); int len = hits.Length(); o.WriteLine("found: " + len + " documents matching"); o.WriteLine(); for (int i = 0; i < Math.Min(25, len); i++) { Lucene.Net.Documents.Document d = hits.Doc(i); String summary = d.Get("summary"); o.WriteLine("score : " + hits.Score(i)); o.WriteLine("url : " + d.Get("url")); o.WriteLine("\ttitle : " + d.Get("title")); if (summary != null) { o.WriteLine("\tsummary: " + d.Get("summary")); } o.WriteLine(); } }