public void TestPreceding() { int pos = bi.Preceding(0); TestFmwk.assertEquals("BreakIterator preceding position not correct", BreakIterator.Done, pos); pos = bi.Preceding(5); TestFmwk.assertEquals("BreakIterator preceding position not correct", 4, pos); }
private void Test1Sentence(BreakIterator bi, String text) { int start = bi.Text.BeginIndex; assertEquals(start, bi.First()); int current = bi.Current; assertEquals(bi.Text.EndIndex, bi.Next()); int end = bi.Current - start; assertEquals(text, text.Substring(current - start, end - start)); assertEquals(text.Length, bi.Last() - start); end = bi.Current; bi.Previous(); assertEquals(BreakIterator.Done, bi.Previous()); int previous = bi.Current; assertEquals(text, text.Substring(previous - start, end - start)); assertEquals(start, bi.Current); assertEquals(BreakIterator.Done, bi.Following(bi.Last() / 2 + start)); assertEquals(BreakIterator.Done, bi.Preceding(bi.Last() / 2 + start)); assertEquals(start, bi.First()); assertEquals(BreakIterator.Done, bi.Next(13)); assertEquals(BreakIterator.Done, bi.Next(-8)); }
private void Do3SentenceTest(BreakIterator bi) // LUCENENET NOTE: Refactored a bit because Substring in .NET requires some light math to match Java { assertEquals(0, bi.Current); assertEquals(0, bi.First()); int current = bi.Current; assertEquals(SENTENCES[0], TEXT.Substring(current, bi.Next() - current)); // LUCNENENET: Corrected 2nd parameter current = bi.Current; assertEquals(SENTENCES[1], TEXT.Substring(current, bi.Next() - current)); // LUCNENENET: Corrected 2nd parameter current = bi.Current; assertEquals(bi.Text.EndIndex, bi.Next()); int next = bi.Current; assertEquals(SENTENCES[2], TEXT.Substring(current, next - current)); // LUCNENENET: Corrected 2nd parameter assertEquals(BreakIterator.Done, bi.Next()); assertEquals(TEXT.Length, bi.Last()); int end = bi.Current; int prev = bi.Previous(); assertEquals(SENTENCES[2], TEXT.Substring(prev, end - prev)); // LUCNENENET: Corrected 2nd parameter end = bi.Current; prev = bi.Previous(); assertEquals(SENTENCES[1], TEXT.Substring(prev, end - prev)); // LUCNENENET: Corrected 2nd parameter end = bi.Current; prev = bi.Previous(); assertEquals(SENTENCES[0], TEXT.Substring(prev, end - prev)); // LUCNENENET: Corrected 2nd parameter assertEquals(BreakIterator.Done, bi.Previous()); assertEquals(0, bi.Current); assertEquals(59, bi.Following(39)); assertEquals(59, bi.Following(31)); assertEquals(31, bi.Following(30)); assertEquals(0, bi.Preceding(57)); assertEquals(0, bi.Preceding(58)); assertEquals(31, bi.Preceding(59)); assertEquals(0, bi.First()); assertEquals(59, bi.Next(2)); assertEquals(0, bi.Next(-2)); }
private void Test0Sentences(BreakIterator bi) { assertEquals(0, bi.Current); assertEquals(0, bi.First()); assertEquals(BreakIterator.Done, bi.Next()); assertEquals(0, bi.Last()); assertEquals(BreakIterator.Done, bi.Previous()); assertEquals(BreakIterator.Done, bi.Following(0)); assertEquals(BreakIterator.Done, bi.Preceding(0)); assertEquals(0, bi.First()); assertEquals(BreakIterator.Done, bi.Next(13)); assertEquals(BreakIterator.Done, bi.Next(-8)); }
/** Asserts that two breakiterators break the text the same way */ public void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual) { expected.SetText(one); actual.SetText(two); assertEquals(expected.Current, actual.Current); // next() int v = expected.Current; while (v != BreakIterator.DONE) { assertEquals(v = expected.Next(), actual.Next()); assertEquals(expected.Current, actual.Current); } // first() assertEquals(expected.First(), actual.First()); assertEquals(expected.Current, actual.Current); // last() assertEquals(expected.Last(), actual.Last()); assertEquals(expected.Current, actual.Current); // previous() v = expected.Current; while (v != BreakIterator.DONE) { assertEquals(v = expected.Previous(), actual.Previous()); assertEquals(expected.Current, actual.Current); } // following() for (int i = one.BeginIndex; i <= one.EndIndex; i++) { expected.First(); actual.First(); assertEquals(expected.Following(i), actual.Following(i)); assertEquals(expected.Current, actual.Current); } // preceding() for (int i = one.BeginIndex; i <= one.EndIndex; i++) { expected.Last(); actual.Last(); assertEquals(expected.Preceding(i), actual.Preceding(i)); assertEquals(expected.Current, actual.Current); } }
private void MakeLayoutWindow(int localStart) { int compStart = localStart; int compLimit = FChars.Length; // If we've already gone past the layout window, format to end of paragraph if (LayoutCount > 0 && !HaveLayoutWindow) { float avgLineLength = System.Math.Max(LayoutCharCount / LayoutCount, 1); compLimit = System.Math.Min(localStart + (int)(avgLineLength * EST_LINES), FChars.Length); } if (localStart > 0 || compLimit < FChars.Length) { if (CharIter == null) { CharIter = new CharArrayIterator(FChars); } else { CharIter.Reset(FChars); } if (FLineBreak == null) { FLineBreak = BreakIterator.LineInstance; } FLineBreak.SetText(CharIter); if (localStart > 0) { if (!FLineBreak.IsBoundary(localStart)) { compStart = FLineBreak.Preceding(localStart); } } if (compLimit < FChars.Length) { if (!FLineBreak.IsBoundary(compLimit)) { compLimit = FLineBreak.Following(compLimit); } } } EnsureComponents(compStart, compLimit); HaveLayoutWindow = true; }
private void _testPreceding(BreakIterator bi, String text, int[] boundaries) { Logln("testPreceding():"); int p = 0; for (int i = 0; i <= text.Length; i++) { int b = bi.Preceding(i); Logln("bi.preceding(" + i + ") -> " + b); if (b != boundaries[p]) { Errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p] + ", got " + b); } if (i == boundaries[p + 1]) { ++p; } } }
public void TestPreceding() { String words3 = "aaa bbb ccc"; BreakIterator e = BreakIterator.GetWordInstance(CultureInfo.CurrentCulture); e.SetText(words3); e.First(); int p1 = e.Next(); int p2 = e.Next(); int p3 = e.Next(); int p4 = e.Next(); int f = e.Following(p2 + 1); int p = e.Preceding(p2 + 1); if (f != p3) { Errln("IntlTestTextBoundary::TestPreceding: f!=p3"); } if (p != p2) { Errln("IntlTestTextBoundary::TestPreceding: p!=p2"); } if (p1 + 1 != p2) { Errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2"); } if (p3 + 1 != p4) { Errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4"); } if (!e.IsBoundary(p2) || e.IsBoundary(p2 + 1) || !e.IsBoundary(p3)) { Errln("IntlTestTextBoundary::TestPreceding: isBoundary err"); } }
// algorithm: treat sentence snippets as miniature documents // we can intersect these with the postings lists via BreakIterator.preceding(offset),s // score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq)) private Passage[] HighlightDoc(string field, BytesRef[] terms, int contentLength, BreakIterator bi, int doc, TermsEnum termsEnum, DocsAndPositionsEnum[] postings, int n) { PassageScorer scorer = GetScorer(field); if (scorer is null) { // LUCENENET: Changed from NullPointerException to InvalidOperationException (which isn't caught anywhere outside of tests) throw IllegalStateException.Create("PassageScorer cannot be null"); } JCG.PriorityQueue <OffsetsEnum> pq = new JCG.PriorityQueue <OffsetsEnum>(); float[] weights = new float[terms.Length]; // initialize postings for (int i = 0; i < terms.Length; i++) { DocsAndPositionsEnum de = postings[i]; int pDoc; if (de == EMPTY) { continue; } else if (de is null) { postings[i] = EMPTY; // initially if (!termsEnum.SeekExact(terms[i])) { continue; // term not found } de = postings[i] = termsEnum.DocsAndPositions(null, null, DocsAndPositionsFlags.OFFSETS); if (de is null) { // no positions available throw new ArgumentException("field '" + field + "' was indexed without offsets, cannot highlight"); } pDoc = de.Advance(doc); } else { pDoc = de.DocID; if (pDoc < doc) { pDoc = de.Advance(doc); } } if (doc == pDoc) { weights[i] = scorer.Weight(contentLength, de.Freq); de.NextPosition(); pq.Add(new OffsetsEnum(de, i)); } } pq.Add(new OffsetsEnum(EMPTY, int.MaxValue)); // a sentinel for termination JCG.PriorityQueue <Passage> passageQueue = new JCG.PriorityQueue <Passage>(n, Comparer <Passage> .Create((left, right) => { if (left.score < right.score) { return(-1); } else if (left.score > right.score) { return(1); } else { return(left.startOffset - right.startOffset); } })); Passage current = new Passage(); while (pq.TryDequeue(out OffsetsEnum off)) { DocsAndPositionsEnum dp = off.dp; int start = dp.StartOffset; if (start == -1) { throw new ArgumentException("field '" + field + "' was indexed without offsets, cannot highlight"); } int end = dp.EndOffset; // LUCENE-5166: this hit would span the content limit... however more valid // hits may exist (they are sorted by start). so we pretend like we never // saw this term, it won't cause a passage to be added to passageQueue or anything. if (Debugging.AssertsEnabled) { Debugging.Assert(EMPTY.StartOffset == int.MaxValue); } if (start < contentLength && end > contentLength) { continue; } if (start >= current.endOffset) { if (current.startOffset >= 0) { // finalize current current.score *= scorer.Norm(current.startOffset); // new sentence: first add 'current' to queue if (passageQueue.Count == n && current.score < passageQueue.Peek().score) { current.Reset(); // can't compete, just reset it } else { passageQueue.Enqueue(current); if (passageQueue.Count > n) { current = passageQueue.Dequeue(); current.Reset(); } else { current = new Passage(); } } } // if we exceed limit, we are done if (start >= contentLength) { Passage[] passages = passageQueue.ToArray(); foreach (Passage p in passages) { p.Sort(); } // sort in ascending order ArrayUtil.TimSort(passages, Comparer <Passage> .Create((left, right) => left.startOffset - right.startOffset)); return(passages); } // advance breakiterator if (Debugging.AssertsEnabled) { Debugging.Assert(BreakIterator.Done < 0); } current.startOffset = Math.Max(bi.Preceding(start + 1), 0); current.endOffset = Math.Min(bi.Next(), contentLength); } int tf = 0; while (true) { tf++; BytesRef term = terms[off.id]; if (term is null) { // multitermquery match, pull from payload term = off.dp.GetPayload(); if (Debugging.AssertsEnabled) { Debugging.Assert(term != null); } } current.AddMatch(start, end, term); if (off.pos == dp.Freq) { break; // removed from pq } else { off.pos++; dp.NextPosition(); start = dp.StartOffset; end = dp.EndOffset; } if (start >= current.endOffset || end > contentLength) { pq.Enqueue(off); break; } } current.score += weights[off.id] * scorer.Tf(tf, current.endOffset - current.startOffset); } // Dead code but compiler disagrees: if (Debugging.AssertsEnabled) { Debugging.Assert(false); } return(null); }
public void TestThaiDictionaryBreakIterator() { int position; int index; int[] result = { 1, 2, 5, 10, 11, 12, 11, 10, 5, 2, 1, 0 }; char[] ctext = { (char)0x0041, (char)0x0020, (char)0x0E01, (char)0x0E32,(char)0x0E23, (char)0x0E17, (char)0x0E14, (char)0x0E25, (char)0x0E2D, (char)0x0E07, (char)0x0020, (char)0x0041 }; String text = new String(ctext); ULocale locale = ULocale.CreateCanonical("th"); BreakIterator b = BreakIterator.GetWordInstance(locale); b.SetText(text); index = 0; // Test forward iteration while ((position = b.Next()) != BreakIterator.Done) { if (position != result[index++]) { Errln("Error with ThaiDictionaryBreakIterator forward iteration test at " + position + ".\nShould have been " + result[index - 1]); } } // Test backward iteration while ((position = b.Previous()) != BreakIterator.Done) { if (position != result[index++]) { Errln("Error with ThaiDictionaryBreakIterator backward iteration test at " + position + ".\nShould have been " + result[index - 1]); } } //Test invalid sequence and spaces char[] text2 = { (char)0x0E01, (char)0x0E39, (char)0x0020, (char)0x0E01, (char)0x0E34, (char)0x0E19, (char)0x0E01, (char)0x0E38, (char)0x0E49, (char)0x0E07, (char)0x0020, (char)0x0E1B, (char)0x0E34, (char)0x0E49, (char)0x0E48, (char)0x0E07, (char)0x0E2D, (char)0x0E22, (char)0x0E39, (char)0x0E48, (char)0x0E43, (char)0x0E19, (char)0x0E16, (char)0x0E49, (char)0x0E33 }; int[] expectedWordResult = { 2, 3, 6, 10, 11, 15, 17, 20, 22 }; int[] expectedLineResult = { 3, 6, 11, 15, 17, 20, 22 }; BreakIterator brk = BreakIterator.GetWordInstance(new ULocale("th")); brk.SetText(new String(text2)); position = index = 0; while ((position = brk.Next()) != BreakIterator.Done && position < text2.Length) { if (position != expectedWordResult[index++]) { Errln("Incorrect break given by thai word break iterator. Expected: " + expectedWordResult[index - 1] + " Got: " + position); } } brk = BreakIterator.GetLineInstance(new ULocale("th")); brk.SetText(new String(text2)); position = index = 0; while ((position = brk.Next()) != BreakIterator.Done && position < text2.Length) { if (position != expectedLineResult[index++]) { Errln("Incorrect break given by thai line break iterator. Expected: " + expectedLineResult[index - 1] + " Got: " + position); } } // Improve code coverage if (brk.Preceding(expectedLineResult[1]) != expectedLineResult[0]) { Errln("Incorrect preceding position."); } if (brk.Following(expectedLineResult[1]) != expectedLineResult[2]) { Errln("Incorrect following position."); } int[] fillInArray = new int[2]; if (((RuleBasedBreakIterator)brk).GetRuleStatusVec(fillInArray) != 1 || fillInArray[0] != 0) { Errln("Error: Since getRuleStatusVec is not supported in DictionaryBasedBreakIterator, it should return 1 and fillInArray[0] == 0."); } }
public override int Preceding(int offset) { return(InternalPrev(@delegate.Preceding(offset))); }
/// <summary> /// Returns the position at the end of the next layout. Does NOT /// update the current position of this <code>LineBreakMeasurer</code>. /// </summary> /// <param name="wrappingWidth"> the maximum visible advance permitted for /// the text in the next layout </param> /// <param name="offsetLimit"> the first character that can not be included /// in the next layout, even if the text after the limit would fit /// within the wrapping width; <code>offsetLimit</code> must be /// greater than the current position </param> /// <param name="requireNextWord"> if <code>true</code>, the current position /// that is returned if the entire next word does not fit within /// <code>wrappingWidth</code>; if <code>false</code>, the offset /// returned is at least one greater than the current position </param> /// <returns> an offset in the text representing the limit of the /// next <code>TextLayout</code> </returns> public int NextOffset(float wrappingWidth, int offsetLimit, bool requireNextWord) { int nextOffset = Pos; if (Pos < Limit) { if (offsetLimit <= Pos) { throw new IllegalArgumentException("offsetLimit must be after current position"); } int charAtMaxAdvance = Measurer.GetLineBreakIndex(Pos, wrappingWidth); if (charAtMaxAdvance == Limit) { nextOffset = Limit; } else if (char.IsWhiteSpace(Measurer.Chars[charAtMaxAdvance - Start])) { nextOffset = BreakIter.Following(charAtMaxAdvance); } else { // Break is in a word; back up to previous break. // NOTE: I think that breakIter.preceding(limit) should be // equivalent to breakIter.last(), breakIter.previous() but // the authors of BreakIterator thought otherwise... // If they were equivalent then the first branch would be // unnecessary. int testPos = charAtMaxAdvance + 1; if (testPos == Limit) { BreakIter.Last(); nextOffset = BreakIter.Previous(); } else { nextOffset = BreakIter.Preceding(testPos); } if (nextOffset <= Pos) { // first word doesn't fit on line if (requireNextWord) { nextOffset = Pos; } else { nextOffset = System.Math.Max(Pos + 1, charAtMaxAdvance); } } } } if (nextOffset > offsetLimit) { nextOffset = offsetLimit; } return(nextOffset); }