public WeightedPhraseInfo(IList <TermInfo> terms, float boost, int seqnum) { this.boost = boost; this.seqnum = seqnum; // We keep TermInfos for further operations termsInfos = new List <TermInfo>(terms); termsOffsets = new List <Toffs>(terms.Count); TermInfo ti = terms[0]; termsOffsets.Add(new Toffs(ti.StartOffset, ti.EndOffset)); if (terms.Count == 1) { return; } int pos = ti.Position; for (int i = 1; i < terms.Count; i++) { ti = terms[i]; if (ti.Position - pos == 1) { Toffs to = termsOffsets[termsOffsets.Count - 1]; to.EndOffset = ti.EndOffset; } else { termsOffsets.Add(new Toffs(ti.StartOffset, ti.EndOffset)); } pos = ti.Position; } }
/// <summary> /// Returns the top <see cref="TermInfo"/> object of the stack /// </summary> /// <returns>the top <see cref="TermInfo"/> object of the stack</returns> public virtual TermInfo Pop() { if (termList.Count == 0) { return(null); } TermInfo first = termList[0]; termList.Remove(first); return(first); }
public void TestFieldTermStackIndex1wSearch2terms() { makeIndex1w(); BooleanQuery bq = new BooleanQuery(); bq.Add(tq("Mac"), Occur.SHOULD); bq.Add(tq("MacBook"), Occur.SHOULD); FieldQuery fq = new FieldQuery(bq, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); assertEquals(1, stack.termList.size()); TermInfo ti = stack.Pop(); assertEquals("Mac(11,20,3)", ti.toString()); assertEquals("MacBook(11,20,3)", ti.Next.toString()); assertSame(ti, ti.Next.Next); }
public void TestTermInfoComparisonConsistency() { TermInfo a = new TermInfo(TestUtil.RandomUnicodeString(Random()), 0, 0, 0, 1); TermInfo b = new TermInfo(TestUtil.RandomUnicodeString(Random()), 0, 0, 1, 1); TermInfo c = new TermInfo(TestUtil.RandomUnicodeString(Random()), 0, 0, 2, 1); TermInfo d = new TermInfo(TestUtil.RandomUnicodeString(Random()), 0, 0, 0, 1); assertConsistentEquals(a, a); assertConsistentEquals(b, b); assertConsistentEquals(c, c); assertConsistentEquals(d, d); assertConsistentEquals(a, d); assertConsistentLessThan(a, b); assertConsistentLessThan(b, c); assertConsistentLessThan(a, c); assertConsistentLessThan(d, b); assertConsistentLessThan(d, c); }
public void TestFieldTermStackIndex1w2wSearch1term1phrase() { makeIndex1w2w(); BooleanQuery bq = new BooleanQuery(); bq.Add(tq("pc"), Occur.SHOULD); bq.Add(pqF("personal", "computer"), Occur.SHOULD); FieldQuery fq = new FieldQuery(bq, true, true); FieldTermStack stack = new FieldTermStack(reader, 0, F, fq); assertEquals(2, stack.termList.size()); TermInfo ti = stack.Pop(); assertEquals("pc(3,5,1)", ti.toString()); assertEquals("personal(3,5,1)", ti.Next.toString()); assertSame(ti, ti.Next.Next); assertEquals("computer(3,5,2)", stack.Pop().toString()); }
/// <summary> /// a constructor. /// </summary> /// <param name="fieldTermStack"><see cref="FieldTermStack"/> object</param> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> /// <param name="phraseLimit">maximum size of phraseList</param> public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit) { string field = fieldTermStack.FieldName; List <TermInfo> phraseCandidate = new List <TermInfo>(); QueryPhraseMap currMap = null; QueryPhraseMap nextMap = null; while (!fieldTermStack.IsEmpty && (phraseList.Count < phraseLimit)) { phraseCandidate.Clear(); TermInfo ti = null; TermInfo first = null; first = ti = fieldTermStack.Pop(); currMap = fieldQuery.GetFieldTermMap(field, ti.Text); while (currMap == null && ti.Next != first) { ti = ti.Next; currMap = fieldQuery.GetFieldTermMap(field, ti.Text); } // if not found, discard top TermInfo from stack, then try next element if (currMap == null) { continue; } // if found, search the longest phrase phraseCandidate.Add(ti); while (true) { first = ti = fieldTermStack.Pop(); nextMap = null; if (ti != null) { nextMap = currMap.GetTermMap(ti.Text); while (nextMap == null && ti.Next != first) { ti = ti.Next; nextMap = currMap.GetTermMap(ti.Text); } } if (ti == null || nextMap == null) { if (ti != null) { fieldTermStack.Push(ti); } if (currMap.IsValidTermOrPhrase(phraseCandidate)) { AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber)); } else { while (phraseCandidate.Count > 1) { //fieldTermStack.Push(phraseCandidate.Last.Value); //phraseCandidate.RemoveLast(); TermInfo last = phraseCandidate[phraseCandidate.Count - 1]; phraseCandidate.Remove(last); fieldTermStack.Push(last); currMap = fieldQuery.SearchPhrase(field, phraseCandidate); if (currMap != null) { AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber)); break; } } } break; } else { phraseCandidate.Add(ti); currMap = nextMap; } } } }
//public static void main( string[] args ) throws Exception { // Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); // QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer ); // Query query = parser.parse( "a x:b" ); // FieldQuery fieldQuery = new FieldQuery( query, true, false ); // Directory dir = new RAMDirectory(); // IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)); // Document doc = new Document(); // FieldType ft = new FieldType(TextField.TYPE_STORED); // ft.setStoreTermVectors(true); // ft.setStoreTermVectorOffsets(true); // ft.setStoreTermVectorPositions(true); // doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) ); // doc.add( new Field( "f", ft, "b a b a f" ) ); // writer.addDocument( doc ); // writer.close(); // IndexReader reader = IndexReader.open(dir1); // new FieldTermStack( reader, 0, "f", fieldQuery ); // reader.close(); //} /// <summary> /// a constructor. /// </summary> /// <param name="reader"><see cref="IndexReader"/> of the index</param> /// <param name="docId">document id to be highlighted</param> /// <param name="fieldName">field of the document to be highlighted</param> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> /// <exception cref="System.IO.IOException">If there is a low-level I/O error</exception> public FieldTermStack(IndexReader reader, int docId, string fieldName, FieldQuery fieldQuery) { this.fieldName = fieldName; ISet <string> termSet = fieldQuery.GetTermSet(fieldName); // just return to make null snippet if un-matched fieldName specified when fieldMatch == true if (termSet == null) { return; } Fields vectors = reader.GetTermVectors(docId); if (vectors == null) { // null snippet return; } Terms vector = vectors.GetTerms(fieldName); if (vector == null) { // null snippet return; } CharsRef spare = new CharsRef(); TermsEnum termsEnum = vector.GetIterator(null); DocsAndPositionsEnum dpEnum = null; BytesRef text; int numDocs = reader.MaxDoc; while ((text = termsEnum.Next()) != null) { UnicodeUtil.UTF8toUTF16(text, spare); string term = spare.ToString(); if (!termSet.Contains(term)) { continue; } dpEnum = termsEnum.DocsAndPositions(null, dpEnum); if (dpEnum == null) { // null snippet return; } dpEnum.NextDoc(); // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html float weight = (float)(Math.Log(numDocs / (double)(reader.DocFreq(new Term(fieldName, text)) + 1)) + 1.0); int freq = dpEnum.Freq; for (int i = 0; i < freq; i++) { int pos = dpEnum.NextPosition(); if (dpEnum.StartOffset < 0) { return; // no offsets, null snippet } termList.Add(new TermInfo(term, dpEnum.StartOffset, dpEnum.EndOffset, pos, weight)); } } // sort by position CollectionUtil.TimSort(termList); // now look for dups at the same position, linking them together int currentPos = -1; TermInfo previous = null; TermInfo first = null; for (int i = 0; i < termList.Count;) { TermInfo current = termList[i]; if (current.Position == currentPos) { Debug.Assert(previous != null); previous.SetNext(current); previous = current; //iterator.Remove(); // LUCENENET NOTE: Remove, but don't advance the i position (since removing will advance to the next item) termList.RemoveAt(i); } else { if (previous != null) { previous.SetNext(first); } previous = first = current; currentPos = current.Position; // LUCENENET NOTE: Only increment the position if we don't do a delete. i++; } } if (previous != null) { previous.SetNext(first); } }
/// <summary> /// Puts a <see cref="TermInfo"/> onto the top of the stack /// </summary> /// <param name="termInfo">the <see cref="TermInfo"/> object to be put on the top of the stack</param> public virtual void Push(TermInfo termInfo) { termList.Insert(0, termInfo); }