Example #1
0
        public void TestPreceding()
        {
            int pos = bi.Preceding(0);

            TestFmwk.assertEquals("BreakIterator preceding position not correct", BreakIterator.Done, pos);

            pos = bi.Preceding(5);
            TestFmwk.assertEquals("BreakIterator preceding position not correct", 4, pos);
        }
        private void Test1Sentence(BreakIterator bi, String text)
        {
            int start = bi.Text.BeginIndex;

            assertEquals(start, bi.First());
            int current = bi.Current;

            assertEquals(bi.Text.EndIndex, bi.Next());
            int end = bi.Current - start;

            assertEquals(text, text.Substring(current - start, end - start));

            assertEquals(text.Length, bi.Last() - start);
            end = bi.Current;
            bi.Previous();
            assertEquals(BreakIterator.Done, bi.Previous());
            int previous = bi.Current;

            assertEquals(text, text.Substring(previous - start, end - start));
            assertEquals(start, bi.Current);

            assertEquals(BreakIterator.Done, bi.Following(bi.Last() / 2 + start));

            assertEquals(BreakIterator.Done, bi.Preceding(bi.Last() / 2 + start));

            assertEquals(start, bi.First());
            assertEquals(BreakIterator.Done, bi.Next(13));
            assertEquals(BreakIterator.Done, bi.Next(-8));
        }
        private void Do3SentenceTest(BreakIterator bi) // LUCENENET NOTE: Refactored a bit because Substring in .NET requires some light math to match Java
        {
            assertEquals(0, bi.Current);
            assertEquals(0, bi.First());
            int current = bi.Current;

            assertEquals(SENTENCES[0], TEXT.Substring(current, bi.Next() - current)); // LUCNENENET: Corrected 2nd parameter
            current = bi.Current;
            assertEquals(SENTENCES[1], TEXT.Substring(current, bi.Next() - current)); // LUCNENENET: Corrected 2nd parameter
            current = bi.Current;
            assertEquals(bi.Text.EndIndex, bi.Next());
            int next = bi.Current;

            assertEquals(SENTENCES[2], TEXT.Substring(current, next - current)); // LUCNENENET: Corrected 2nd parameter
            assertEquals(BreakIterator.Done, bi.Next());

            assertEquals(TEXT.Length, bi.Last());
            int end  = bi.Current;
            int prev = bi.Previous();

            assertEquals(SENTENCES[2], TEXT.Substring(prev, end - prev)); // LUCNENENET: Corrected 2nd parameter
            end  = bi.Current;
            prev = bi.Previous();
            assertEquals(SENTENCES[1], TEXT.Substring(prev, end - prev)); // LUCNENENET: Corrected 2nd parameter
            end  = bi.Current;
            prev = bi.Previous();
            assertEquals(SENTENCES[0], TEXT.Substring(prev, end - prev)); // LUCNENENET: Corrected 2nd parameter
            assertEquals(BreakIterator.Done, bi.Previous());
            assertEquals(0, bi.Current);

            assertEquals(59, bi.Following(39));
            assertEquals(59, bi.Following(31));
            assertEquals(31, bi.Following(30));

            assertEquals(0, bi.Preceding(57));
            assertEquals(0, bi.Preceding(58));
            assertEquals(31, bi.Preceding(59));

            assertEquals(0, bi.First());
            assertEquals(59, bi.Next(2));
            assertEquals(0, bi.Next(-2));
        }
 private void Test0Sentences(BreakIterator bi)
 {
     assertEquals(0, bi.Current);
     assertEquals(0, bi.First());
     assertEquals(BreakIterator.Done, bi.Next());
     assertEquals(0, bi.Last());
     assertEquals(BreakIterator.Done, bi.Previous());
     assertEquals(BreakIterator.Done, bi.Following(0));
     assertEquals(BreakIterator.Done, bi.Preceding(0));
     assertEquals(0, bi.First());
     assertEquals(BreakIterator.Done, bi.Next(13));
     assertEquals(BreakIterator.Done, bi.Next(-8));
 }
Example #5
0
        /** Asserts that two breakiterators break the text the same way */
        public void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual)
        {
            expected.SetText(one);
            actual.SetText(two);

            assertEquals(expected.Current, actual.Current);

            // next()
            int v = expected.Current;

            while (v != BreakIterator.DONE)
            {
                assertEquals(v = expected.Next(), actual.Next());
                assertEquals(expected.Current, actual.Current);
            }

            // first()
            assertEquals(expected.First(), actual.First());
            assertEquals(expected.Current, actual.Current);
            // last()
            assertEquals(expected.Last(), actual.Last());
            assertEquals(expected.Current, actual.Current);

            // previous()
            v = expected.Current;
            while (v != BreakIterator.DONE)
            {
                assertEquals(v = expected.Previous(), actual.Previous());
                assertEquals(expected.Current, actual.Current);
            }

            // following()
            for (int i = one.BeginIndex; i <= one.EndIndex; i++)
            {
                expected.First();
                actual.First();
                assertEquals(expected.Following(i), actual.Following(i));
                assertEquals(expected.Current, actual.Current);
            }

            // preceding()
            for (int i = one.BeginIndex; i <= one.EndIndex; i++)
            {
                expected.Last();
                actual.Last();
                assertEquals(expected.Preceding(i), actual.Preceding(i));
                assertEquals(expected.Current, actual.Current);
            }
        }
Example #6
0
        private void MakeLayoutWindow(int localStart)
        {
            int compStart = localStart;
            int compLimit = FChars.Length;

            // If we've already gone past the layout window, format to end of paragraph
            if (LayoutCount > 0 && !HaveLayoutWindow)
            {
                float avgLineLength = System.Math.Max(LayoutCharCount / LayoutCount, 1);
                compLimit = System.Math.Min(localStart + (int)(avgLineLength * EST_LINES), FChars.Length);
            }

            if (localStart > 0 || compLimit < FChars.Length)
            {
                if (CharIter == null)
                {
                    CharIter = new CharArrayIterator(FChars);
                }
                else
                {
                    CharIter.Reset(FChars);
                }
                if (FLineBreak == null)
                {
                    FLineBreak = BreakIterator.LineInstance;
                }
                FLineBreak.SetText(CharIter);
                if (localStart > 0)
                {
                    if (!FLineBreak.IsBoundary(localStart))
                    {
                        compStart = FLineBreak.Preceding(localStart);
                    }
                }
                if (compLimit < FChars.Length)
                {
                    if (!FLineBreak.IsBoundary(compLimit))
                    {
                        compLimit = FLineBreak.Following(compLimit);
                    }
                }
            }

            EnsureComponents(compStart, compLimit);
            HaveLayoutWindow = true;
        }
Example #7
0
        private void _testPreceding(BreakIterator bi, String text, int[] boundaries)
        {
            Logln("testPreceding():");
            int p = 0;

            for (int i = 0; i <= text.Length; i++)
            {
                int b = bi.Preceding(i);
                Logln("bi.preceding(" + i + ") -> " + b);
                if (b != boundaries[p])
                {
                    Errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
                          + ", got " + b);
                }

                if (i == boundaries[p + 1])
                {
                    ++p;
                }
            }
        }
Example #8
0
        public void TestPreceding()
        {
            String        words3 = "aaa bbb ccc";
            BreakIterator e      = BreakIterator.GetWordInstance(CultureInfo.CurrentCulture);

            e.SetText(words3);
            e.First();
            int p1 = e.Next();
            int p2 = e.Next();
            int p3 = e.Next();
            int p4 = e.Next();

            int f = e.Following(p2 + 1);
            int p = e.Preceding(p2 + 1);

            if (f != p3)
            {
                Errln("IntlTestTextBoundary::TestPreceding: f!=p3");
            }
            if (p != p2)
            {
                Errln("IntlTestTextBoundary::TestPreceding: p!=p2");
            }

            if (p1 + 1 != p2)
            {
                Errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2");
            }

            if (p3 + 1 != p4)
            {
                Errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4");
            }

            if (!e.IsBoundary(p2) || e.IsBoundary(p2 + 1) || !e.IsBoundary(p3))
            {
                Errln("IntlTestTextBoundary::TestPreceding: isBoundary err");
            }
        }
Example #9
0
        // algorithm: treat sentence snippets as miniature documents
        // we can intersect these with the postings lists via BreakIterator.preceding(offset),s
        // score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
        private Passage[] HighlightDoc(string field, BytesRef[] terms, int contentLength, BreakIterator bi, int doc,
                                       TermsEnum termsEnum, DocsAndPositionsEnum[] postings, int n)
        {
            PassageScorer scorer = GetScorer(field);

            if (scorer is null)
            {
                // LUCENENET: Changed from NullPointerException to InvalidOperationException (which isn't caught anywhere outside of tests)
                throw IllegalStateException.Create("PassageScorer cannot be null");
            }
            JCG.PriorityQueue <OffsetsEnum> pq = new JCG.PriorityQueue <OffsetsEnum>();
            float[] weights = new float[terms.Length];
            // initialize postings
            for (int i = 0; i < terms.Length; i++)
            {
                DocsAndPositionsEnum de = postings[i];
                int pDoc;
                if (de == EMPTY)
                {
                    continue;
                }
                else if (de is null)
                {
                    postings[i] = EMPTY; // initially
                    if (!termsEnum.SeekExact(terms[i]))
                    {
                        continue; // term not found
                    }
                    de = postings[i] = termsEnum.DocsAndPositions(null, null, DocsAndPositionsFlags.OFFSETS);
                    if (de is null)
                    {
                        // no positions available
                        throw new ArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
                    }
                    pDoc = de.Advance(doc);
                }
                else
                {
                    pDoc = de.DocID;
                    if (pDoc < doc)
                    {
                        pDoc = de.Advance(doc);
                    }
                }

                if (doc == pDoc)
                {
                    weights[i] = scorer.Weight(contentLength, de.Freq);
                    de.NextPosition();
                    pq.Add(new OffsetsEnum(de, i));
                }
            }

            pq.Add(new OffsetsEnum(EMPTY, int.MaxValue)); // a sentinel for termination

            JCG.PriorityQueue <Passage> passageQueue = new JCG.PriorityQueue <Passage>(n, Comparer <Passage> .Create((left, right) =>
            {
                if (left.score < right.score)
                {
                    return(-1);
                }
                else if (left.score > right.score)
                {
                    return(1);
                }
                else
                {
                    return(left.startOffset - right.startOffset);
                }
            }));
            Passage current = new Passage();

            while (pq.TryDequeue(out OffsetsEnum off))
            {
                DocsAndPositionsEnum dp = off.dp;
                int start = dp.StartOffset;
                if (start == -1)
                {
                    throw new ArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
                }
                int end = dp.EndOffset;
                // LUCENE-5166: this hit would span the content limit... however more valid
                // hits may exist (they are sorted by start). so we pretend like we never
                // saw this term, it won't cause a passage to be added to passageQueue or anything.
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(EMPTY.StartOffset == int.MaxValue);
                }
                if (start < contentLength && end > contentLength)
                {
                    continue;
                }
                if (start >= current.endOffset)
                {
                    if (current.startOffset >= 0)
                    {
                        // finalize current
                        current.score *= scorer.Norm(current.startOffset);
                        // new sentence: first add 'current' to queue
                        if (passageQueue.Count == n && current.score < passageQueue.Peek().score)
                        {
                            current.Reset(); // can't compete, just reset it
                        }
                        else
                        {
                            passageQueue.Enqueue(current);
                            if (passageQueue.Count > n)
                            {
                                current = passageQueue.Dequeue();
                                current.Reset();
                            }
                            else
                            {
                                current = new Passage();
                            }
                        }
                    }
                    // if we exceed limit, we are done
                    if (start >= contentLength)
                    {
                        Passage[] passages = passageQueue.ToArray();
                        foreach (Passage p in passages)
                        {
                            p.Sort();
                        }
                        // sort in ascending order
                        ArrayUtil.TimSort(passages, Comparer <Passage> .Create((left, right) => left.startOffset - right.startOffset));
                        return(passages);
                    }
                    // advance breakiterator
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(BreakIterator.Done < 0);
                    }
                    current.startOffset = Math.Max(bi.Preceding(start + 1), 0);
                    current.endOffset   = Math.Min(bi.Next(), contentLength);
                }
                int tf = 0;
                while (true)
                {
                    tf++;
                    BytesRef term = terms[off.id];
                    if (term is null)
                    {
                        // multitermquery match, pull from payload
                        term = off.dp.GetPayload();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(term != null);
                        }
                    }
                    current.AddMatch(start, end, term);
                    if (off.pos == dp.Freq)
                    {
                        break; // removed from pq
                    }
                    else
                    {
                        off.pos++;
                        dp.NextPosition();
                        start = dp.StartOffset;
                        end   = dp.EndOffset;
                    }
                    if (start >= current.endOffset || end > contentLength)
                    {
                        pq.Enqueue(off);
                        break;
                    }
                }
                current.score += weights[off.id] * scorer.Tf(tf, current.endOffset - current.startOffset);
            }

            // Dead code but compiler disagrees:
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(false);
            }
            return(null);
        }
Example #10
0
        public void TestThaiDictionaryBreakIterator()
        {
            int position;
            int index;

            int[]  result = { 1, 2, 5, 10, 11, 12, 11, 10, 5, 2, 1, 0 };
            char[] ctext  =
            {
                (char)0x0041, (char)0x0020,
                (char)0x0E01, (char)0x0E32,(char)0x0E23,  (char)0x0E17, (char)0x0E14, (char)0x0E25, (char)0x0E2D, (char)0x0E07,
                (char)0x0020, (char)0x0041
            };
            String text = new String(ctext);

            ULocale       locale = ULocale.CreateCanonical("th");
            BreakIterator b      = BreakIterator.GetWordInstance(locale);

            b.SetText(text);

            index = 0;
            // Test forward iteration
            while ((position = b.Next()) != BreakIterator.Done)
            {
                if (position != result[index++])
                {
                    Errln("Error with ThaiDictionaryBreakIterator forward iteration test at " + position + ".\nShould have been " + result[index - 1]);
                }
            }

            // Test backward iteration
            while ((position = b.Previous()) != BreakIterator.Done)
            {
                if (position != result[index++])
                {
                    Errln("Error with ThaiDictionaryBreakIterator backward iteration test at " + position + ".\nShould have been " + result[index - 1]);
                }
            }

            //Test invalid sequence and spaces
            char[] text2 =
            {
                (char)0x0E01, (char)0x0E39, (char)0x0020, (char)0x0E01, (char)0x0E34, (char)0x0E19, (char)0x0E01, (char)0x0E38, (char)0x0E49, (char)0x0E07, (char)0x0020, (char)0x0E1B,
                (char)0x0E34, (char)0x0E49, (char)0x0E48, (char)0x0E07, (char)0x0E2D, (char)0x0E22, (char)0x0E39, (char)0x0E48, (char)0x0E43, (char)0x0E19,
                (char)0x0E16, (char)0x0E49, (char)0x0E33
            };
            int[] expectedWordResult =
            {
                2, 3, 6, 10, 11, 15, 17, 20, 22
            };
            int[] expectedLineResult =
            {
                3, 6, 11, 15, 17, 20, 22
            };
            BreakIterator brk = BreakIterator.GetWordInstance(new ULocale("th"));

            brk.SetText(new String(text2));
            position = index = 0;
            while ((position = brk.Next()) != BreakIterator.Done && position < text2.Length)
            {
                if (position != expectedWordResult[index++])
                {
                    Errln("Incorrect break given by thai word break iterator. Expected: " + expectedWordResult[index - 1] + " Got: " + position);
                }
            }

            brk = BreakIterator.GetLineInstance(new ULocale("th"));
            brk.SetText(new String(text2));
            position = index = 0;
            while ((position = brk.Next()) != BreakIterator.Done && position < text2.Length)
            {
                if (position != expectedLineResult[index++])
                {
                    Errln("Incorrect break given by thai line break iterator. Expected: " + expectedLineResult[index - 1] + " Got: " + position);
                }
            }
            // Improve code coverage
            if (brk.Preceding(expectedLineResult[1]) != expectedLineResult[0])
            {
                Errln("Incorrect preceding position.");
            }
            if (brk.Following(expectedLineResult[1]) != expectedLineResult[2])
            {
                Errln("Incorrect following position.");
            }
            int[] fillInArray = new int[2];
            if (((RuleBasedBreakIterator)brk).GetRuleStatusVec(fillInArray) != 1 || fillInArray[0] != 0)
            {
                Errln("Error: Since getRuleStatusVec is not supported in DictionaryBasedBreakIterator, it should return 1 and fillInArray[0] == 0.");
            }
        }
Example #11
0
 public override int Preceding(int offset)
 {
     return(InternalPrev(@delegate.Preceding(offset)));
 }
        /// <summary>
        /// Returns the position at the end of the next layout.  Does NOT
        /// update the current position of this <code>LineBreakMeasurer</code>.
        /// </summary>
        /// <param name="wrappingWidth"> the maximum visible advance permitted for
        ///    the text in the next layout </param>
        /// <param name="offsetLimit"> the first character that can not be included
        ///    in the next layout, even if the text after the limit would fit
        ///    within the wrapping width; <code>offsetLimit</code> must be
        ///    greater than the current position </param>
        /// <param name="requireNextWord"> if <code>true</code>, the current position
        ///    that is returned if the entire next word does not fit within
        ///    <code>wrappingWidth</code>; if <code>false</code>, the offset
        ///    returned is at least one greater than the current position </param>
        /// <returns> an offset in the text representing the limit of the
        ///    next <code>TextLayout</code> </returns>
        public int NextOffset(float wrappingWidth, int offsetLimit, bool requireNextWord)
        {
            int nextOffset = Pos;

            if (Pos < Limit)
            {
                if (offsetLimit <= Pos)
                {
                    throw new IllegalArgumentException("offsetLimit must be after current position");
                }

                int charAtMaxAdvance = Measurer.GetLineBreakIndex(Pos, wrappingWidth);

                if (charAtMaxAdvance == Limit)
                {
                    nextOffset = Limit;
                }
                else if (char.IsWhiteSpace(Measurer.Chars[charAtMaxAdvance - Start]))
                {
                    nextOffset = BreakIter.Following(charAtMaxAdvance);
                }
                else
                {
                    // Break is in a word;  back up to previous break.

                    // NOTE:  I think that breakIter.preceding(limit) should be
                    // equivalent to breakIter.last(), breakIter.previous() but
                    // the authors of BreakIterator thought otherwise...
                    // If they were equivalent then the first branch would be
                    // unnecessary.
                    int testPos = charAtMaxAdvance + 1;
                    if (testPos == Limit)
                    {
                        BreakIter.Last();
                        nextOffset = BreakIter.Previous();
                    }
                    else
                    {
                        nextOffset = BreakIter.Preceding(testPos);
                    }

                    if (nextOffset <= Pos)
                    {
                        // first word doesn't fit on line
                        if (requireNextWord)
                        {
                            nextOffset = Pos;
                        }
                        else
                        {
                            nextOffset = System.Math.Max(Pos + 1, charAtMaxAdvance);
                        }
                    }
                }
            }

            if (nextOffset > offsetLimit)
            {
                nextOffset = offsetLimit;
            }

            return(nextOffset);
        }