/// <summary> /// Build a <see cref="FieldFragList"/> for more than one field. /// </summary> private FieldFragList GetFieldFragList(IFragListBuilder fragListBuilder, FieldQuery fieldQuery, IndexReader reader, int docId, ISet <string> matchedFields, int fragCharSize) { if (matchedFields.Count == 0) { throw new ArgumentException("matchedFields must contain at least on field name."); } FieldPhraseList[] toMerge = new FieldPhraseList[matchedFields.Count]; int i = 0; foreach (var matchedField in matchedFields) { FieldTermStack stack = new FieldTermStack(reader, docId, matchedField, fieldQuery); toMerge[i++] = new FieldPhraseList(stack, fieldQuery, phraseLimit); } return(fragListBuilder.CreateFieldFragList(new FieldPhraseList(toMerge), fragCharSize)); }
/// <summary> /// a constructor. /// </summary> /// <param name="fieldTermStack"><see cref="FieldTermStack"/> object</param> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> /// <param name="phraseLimit">maximum size of phraseList</param> public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit) { string field = fieldTermStack.FieldName; List <TermInfo> phraseCandidate = new List <TermInfo>(); QueryPhraseMap currMap; // LUCENENET: IDE0059: Remove unnecessary value assignment QueryPhraseMap nextMap; // LUCENENET: IDE0059: Remove unnecessary value assignment while (!fieldTermStack.IsEmpty && (phraseList.Count < phraseLimit)) { phraseCandidate.Clear(); TermInfo ti; // LUCENENET: IDE0059: Remove unnecessary value assignment TermInfo first; // LUCENENET: IDE0059: Remove unnecessary value assignment first = ti = fieldTermStack.Pop(); currMap = fieldQuery.GetFieldTermMap(field, ti.Text); while (currMap == null && ti.Next != first) { ti = ti.Next; currMap = fieldQuery.GetFieldTermMap(field, ti.Text); } // if not found, discard top TermInfo from stack, then try next element if (currMap == null) { continue; } // if found, search the longest phrase phraseCandidate.Add(ti); while (true) { first = ti = fieldTermStack.Pop(); nextMap = null; if (ti != null) { nextMap = currMap.GetTermMap(ti.Text); while (nextMap == null && ti.Next != first) { ti = ti.Next; nextMap = currMap.GetTermMap(ti.Text); } } if (ti == null || nextMap == null) { if (ti != null) { fieldTermStack.Push(ti); } if (currMap.IsValidTermOrPhrase(phraseCandidate)) { AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber)); } else { while (phraseCandidate.Count > 1) { //fieldTermStack.Push(phraseCandidate.Last.Value); //phraseCandidate.RemoveLast(); TermInfo last = phraseCandidate[phraseCandidate.Count - 1]; phraseCandidate.Remove(last); fieldTermStack.Push(last); currMap = fieldQuery.SearchPhrase(field, phraseCandidate); if (currMap != null) { AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber)); break; } } } break; } else { phraseCandidate.Add(ti); currMap = nextMap; } } } }
/// <summary> /// create a <see cref="FieldPhraseList"/> that has no limit on the number of phrases to analyze /// </summary> /// <param name="fieldTermStack"><see cref="FieldTermStack"/> object</param> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery) : this(fieldTermStack, fieldQuery, int.MaxValue) { }
public QueryPhraseMap(FieldQuery fieldQuery) { this.fieldQuery = fieldQuery; }
//public static void main( string[] args ) throws Exception { // Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); // QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer ); // Query query = parser.parse( "a x:b" ); // FieldQuery fieldQuery = new FieldQuery( query, true, false ); // Directory dir = new RAMDirectory(); // IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)); // Document doc = new Document(); // IndexableFieldType ft = new IndexableFieldType(TextField.TYPE_STORED); // ft.setStoreTermVectors(true); // ft.setStoreTermVectorOffsets(true); // ft.setStoreTermVectorPositions(true); // doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) ); // doc.add( new Field( "f", ft, "b a b a f" ) ); // writer.addDocument( doc ); // writer.close(); // IndexReader reader = IndexReader.open(dir1); // new FieldTermStack( reader, 0, "f", fieldQuery ); // reader.close(); //} /// <summary> /// a constructor. /// </summary> /// <param name="reader"><see cref="IndexReader"/> of the index</param> /// <param name="docId">document id to be highlighted</param> /// <param name="fieldName">field of the document to be highlighted</param> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> /// <exception cref="System.IO.IOException">If there is a low-level I/O error</exception> public FieldTermStack(IndexReader reader, int docId, string fieldName, FieldQuery fieldQuery) { this.fieldName = fieldName; ISet <string> termSet = fieldQuery.GetTermSet(fieldName); // just return to make null snippet if un-matched fieldName specified when fieldMatch == true if (termSet == null) { return; } Fields vectors = reader.GetTermVectors(docId); if (vectors == null) { // null snippet return; } Terms vector = vectors.GetTerms(fieldName); if (vector == null) { // null snippet return; } CharsRef spare = new CharsRef(); TermsEnum termsEnum = vector.GetIterator(null); DocsAndPositionsEnum dpEnum = null; BytesRef text; int numDocs = reader.MaxDoc; while ((text = termsEnum.Next()) != null) { UnicodeUtil.UTF8toUTF16(text, spare); string term = spare.ToString(); if (!termSet.Contains(term)) { continue; } dpEnum = termsEnum.DocsAndPositions(null, dpEnum); if (dpEnum == null) { // null snippet return; } dpEnum.NextDoc(); // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html float weight = (float)(Math.Log(numDocs / (double)(reader.DocFreq(new Term(fieldName, text)) + 1)) + 1.0); int freq = dpEnum.Freq; for (int i = 0; i < freq; i++) { int pos = dpEnum.NextPosition(); if (dpEnum.StartOffset < 0) { return; // no offsets, null snippet } termList.Add(new TermInfo(term, dpEnum.StartOffset, dpEnum.EndOffset, pos, weight)); } } // sort by position CollectionUtil.TimSort(termList); // now look for dups at the same position, linking them together int currentPos = -1; TermInfo previous = null; TermInfo first = null; for (int i = 0; i < termList.Count;) { TermInfo current = termList[i]; if (current.Position == currentPos) { Debug.Assert(previous != null); previous.SetNext(current); previous = current; //iterator.Remove(); // LUCENENET NOTE: Remove, but don't advance the i position (since removing will advance to the next item) termList.RemoveAt(i); } else { if (previous != null) { previous.SetNext(first); } previous = first = current; currentPos = current.Position; // LUCENENET NOTE: Only increment the position if we don't do a delete. i++; } } if (previous != null) { previous.SetNext(first); } }