/// <summary> /// create a <see cref="FieldPhraseList"/> that has no limit on the number of phrases to analyze /// </summary> /// <param name="fieldTermStack"><see cref="FieldTermStack"/> object</param> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery) : this(fieldTermStack, fieldQuery, int.MaxValue) { }
/// <summary> /// a constructor. /// </summary> /// <param name="fieldTermStack"><see cref="FieldTermStack"/> object</param> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> /// <param name="phraseLimit">maximum size of phraseList</param> public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit) { string field = fieldTermStack.FieldName; List <TermInfo> phraseCandidate = new List <TermInfo>(); QueryPhraseMap currMap; // LUCENENET: IDE0059: Remove unnecessary value assignment QueryPhraseMap nextMap; // LUCENENET: IDE0059: Remove unnecessary value assignment while (!fieldTermStack.IsEmpty && (phraseList.Count < phraseLimit)) { phraseCandidate.Clear(); TermInfo ti; // LUCENENET: IDE0059: Remove unnecessary value assignment TermInfo first; // LUCENENET: IDE0059: Remove unnecessary value assignment first = ti = fieldTermStack.Pop(); currMap = fieldQuery.GetFieldTermMap(field, ti.Text); while (currMap == null && ti.Next != first) { ti = ti.Next; currMap = fieldQuery.GetFieldTermMap(field, ti.Text); } // if not found, discard top TermInfo from stack, then try next element if (currMap == null) { continue; } // if found, search the longest phrase phraseCandidate.Add(ti); while (true) { first = ti = fieldTermStack.Pop(); nextMap = null; if (ti != null) { nextMap = currMap.GetTermMap(ti.Text); while (nextMap == null && ti.Next != first) { ti = ti.Next; nextMap = currMap.GetTermMap(ti.Text); } } if (ti == null || nextMap == null) { if (ti != null) { fieldTermStack.Push(ti); } if (currMap.IsValidTermOrPhrase(phraseCandidate)) { AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber)); } else { while (phraseCandidate.Count > 1) { //fieldTermStack.Push(phraseCandidate.Last.Value); //phraseCandidate.RemoveLast(); TermInfo last = phraseCandidate[phraseCandidate.Count - 1]; phraseCandidate.Remove(last); fieldTermStack.Push(last); currMap = fieldQuery.SearchPhrase(field, phraseCandidate); if (currMap != null) { AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber)); break; } } } break; } else { phraseCandidate.Add(ti); currMap = nextMap; } } } }
public void TestRandomDiscreteMultiValueHighlighting() { String[] randomValues = new String[3 + Random.nextInt(10 * RANDOM_MULTIPLIER)]; for (int i = 0; i < randomValues.Length; i++) { String randomValue; do { randomValue = TestUtil.RandomSimpleString(Random); } while ("".Equals(randomValue, StringComparison.Ordinal)); randomValues[i] = randomValue; } Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.StoreTermVectors = (true); customType.StoreTermVectorOffsets = (true); customType.StoreTermVectorPositions = (true); int numDocs = randomValues.Length * 5; int numFields = 2 + Random.nextInt(5); int numTerms = 2 + Random.nextInt(3); List <Doc> docs = new List <Doc>(numDocs); List <Document> documents = new List <Document>(numDocs); IDictionary <String, ISet <int> > valueToDocId = new JCG.Dictionary <String, ISet <int> >(); for (int i = 0; i < numDocs; i++) { Document document = new Document(); String[][] fields = RectangularArrays.ReturnRectangularArray <string>(numFields, numTerms); //new String[numFields][numTerms]; for (int j = 0; j < numFields; j++) { String[] fieldValues = new String[numTerms]; fieldValues[0] = getRandomValue(randomValues, valueToDocId, i); StringBuilder builder = new StringBuilder(fieldValues[0]); for (int k = 1; k < numTerms; k++) { fieldValues[k] = getRandomValue(randomValues, valueToDocId, i); builder.Append(' ').Append(fieldValues[k]); } document.Add(new Field(F, builder.ToString(), customType)); fields[j] = fieldValues; } docs.Add(new Doc(fields)); documents.Add(document); } writer.AddDocuments(documents); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); try { int highlightIters = 1 + Random.nextInt(120 * RANDOM_MULTIPLIER); for (int highlightIter = 0; highlightIter < highlightIters; highlightIter++) { Console.WriteLine($"Highlighter iter: {highlightIter}"); String queryTerm = randomValues[Random.nextInt(randomValues.Length)]; int randomHit = valueToDocId[queryTerm].First(); List <StringBuilder> builders = new List <StringBuilder>(); foreach (String[] fieldValues in docs[randomHit].fieldValues) { StringBuilder builder = new StringBuilder(); bool hit = false; for (int i = 0; i < fieldValues.Length; i++) { if (queryTerm.Equals(fieldValues[i], StringComparison.Ordinal)) { builder.Append("<b>").Append(queryTerm).Append("</b>"); hit = true; } else { builder.Append(fieldValues[i]); } if (i != fieldValues.Length - 1) { builder.Append(' '); } } if (hit) { builders.Add(builder); } } FieldQuery fq = new FieldQuery(tq(queryTerm), true, true); FieldTermStack stack = new FieldTermStack(reader, randomHit, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(100); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 300); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); sfb.IsDiscreteMultiValueHighlighting = (true); String[] actualFragments = sfb.CreateFragments(reader, randomHit, F, ffl, numFields); assertEquals(builders.Count, actualFragments.Length); for (int i = 0; i < actualFragments.Length; i++) { assertEquals(builders[i].ToString(), actualFragments[i]); } } } finally { reader.Dispose(); dir.Dispose(); } }