예제 #1
0
        public virtual int FindBreaks(CharacterIterator text, int startPos, int endPos,
                                      int breakType, DequeI foundBreaks)
        {
            int result = 0;

            // Find the span of characters included in the set.
            //   The span to break begins at the current position int the text, and
            //   extends towards the start or end of the text, depending on 'reverse'.

            int start = text.Index;
            int current;
            int rangeStart;
            int rangeEnd;
            int c = CharacterIteration.Current32(text);

            while ((current = text.Index) < endPos && fSet.Contains(c))
            {
                CharacterIteration.Next32(text);
                c = CharacterIteration.Current32(text);
            }
            rangeStart = start;
            rangeEnd   = current;

            // if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
            // TODO: Why does icu4c have this?
            result = DivideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
            text.SetIndex(current);

            return(result);
        }
예제 #2
0
        // LUCENENET: This method override didn't exist in Lucene 4.8.1 and it isn't clear why this was
        // here because there were no comments.
        //public override bool IsBoundary(int offset)
        //{
        //    if (offset == 0)
        //    {
        //        return true;
        //    }
        //    int boundary = Following(offset - 1);
        //    if (boundary == Done)
        //    {
        //        throw new ArgumentException();
        //    }
        //    return boundary == offset;
        //}

        public override void SetText(CharacterIterator newText)
        {
            start   = newText.BeginIndex;
            end     = newText.EndIndex;
            text    = newText;
            current = start;
        }
예제 #3
0
        public static int Previous32(CharacterIterator ci)
        {
            if (ci.Index <= ci.BeginIndex)
            {
                return(DONE32);
            }
            char trail  = ci.Previous();
            int  retVal = trail;

            if (UTF16.IsTrailSurrogate(trail) && ci.Index > ci.BeginIndex)
            {
                char lead = ci.Previous();
                if (UTF16.IsLeadSurrogate(lead))
                {
                    retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
                             ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
                             UTF16.SUPPLEMENTARY_MIN_VALUE;
                }
                else
                {
                    ci.Next();
                }
            }
            return(retVal);
        }
예제 #4
0
        public void TestAPI()
        {
            String text = "Hello, World";

            ICharSequence     cs   = text.ToCharSequence();
            CharacterIterator csci = new CSCharacterIterator(cs);
            CharacterIterator sci  = new StringCharacterIterator(text);

            assertEquals("", sci.SetIndex(6), csci.SetIndex(6));
            assertEquals("", sci.Index, csci.Index);
            assertEquals("", sci.Current, csci.Current);
            assertEquals("", sci.Previous(), csci.Previous());
            assertEquals("", sci.Next(), csci.Next());
            assertEquals("", sci.BeginIndex, csci.BeginIndex);
            assertEquals("", sci.EndIndex, csci.EndIndex);
            assertEquals("", sci.First(), csci.First());
            assertEquals("", sci.Last(), csci.Last());

            csci.SetIndex(4);
            sci.SetIndex(4);
            CharacterIterator clci = (CharacterIterator)csci.Clone();

            for (int i = 0; i < 50; ++i)
            {
                assertEquals("", sci.Next(), clci.Next());
            }
            for (int i = 0; i < 50; ++i)
            {
                assertEquals("", sci.Previous(), clci.Previous());
            }
        }
예제 #5
0
        // protected constructor ----------------------------------------------

        /**
         * Protected constructor for use by subclasses.
         * Initializes the iterator with the argument target text for searching
         * and sets the BreakIterator.
         * See class documentation for more details on the use of the target text
         * and {@link BreakIterator}.
         *
         * @param target The target text to be searched.
         * @param breaker A {@link BreakIterator} that is used to determine the
         *                boundaries of a logical match. This argument can be null.
         * @exception IllegalArgumentException thrown when argument target is null,
         *            or of length 0
         * @see BreakIterator
         * @stable ICU 2.0
         */
        protected SearchIterator(CharacterIterator target, BreakIterator breaker)
        {
            this.search_ = new Search(this);

            if (target == null ||
                (target.EndIndex - target.BeginIndex) == 0)
            {
                throw new ArgumentException(
                          "Illegal argument target. " +
                          " Argument can not be null or of length 0");
            }

            search_.SetTarget(target);
            search_.BreakIterator = breaker;
            if (search_.BreakIterator != null)
            {
                search_.BreakIterator.SetText((CharacterIterator)target.Clone());
            }
            search_.isOverlap_             = false;
            search_.isCanonicalMatch_      = false;
            search_.elementComparisonType_ = SearchIteratorElementComparisonType.StandardElementComparison;
            search_.isForwardSearching_    = true;
            search_.reset_        = true;
            search_.matchedIndex_ = Done;
            search_.MatchedLength = 0;
        }
예제 #6
0
            /**
             * Returns the bounds of the characters indexed in the specified
             * <code>CharacterIterator</code> in the
             * specified <code>Graphics</code> context.
             * @param ci the specified <code>CharacterIterator</code>
             * @param beginIndex the initial offset in <code>ci</code>
             * @param limit the end index of <code>ci</code>
             * @param context the specified <code>Graphics</code> context
             * @return a <code>Rectangle2D</code> that is the bounding box of the
             * characters indexed in the specified <code>CharacterIterator</code>
             * in the specified <code>Graphics</code> context.
             * @see java.awt.Font#getStringBounds(CharacterIterator, int, int, FontRenderContext)
             */
            public Rectangle2D getStringBounds(CharacterIterator ci, int beginIndex, int limit, Graphics context)
            {
                int start = ci.getBeginIndex();
                int end   = ci.getEndIndex();

                if (beginIndex < start)
                {
                    throw new IndexOutOfBoundsException("beginIndex: " + beginIndex);
                }
                if (limit > end)
                {
                    throw new IndexOutOfBoundsException("limit: " + limit);
                }
                if (beginIndex > limit)
                {
                    throw new IndexOutOfBoundsException("range length: " + (limit - beginIndex));
                }

                char[] arr = new char[limit - beginIndex];

                ci.setIndex(beginIndex);
                for (int idx = 0; idx < arr.length; idx++)
                {
                    arr[idx] = ci.current();
                    ci.next();
                }

                return(getStringBounds(arr, 0, arr.length, context));
            }
예제 #7
0
 /// <summary>
 /// CollationElementIterator constructor.  This takes the source string and
 /// the collation object.  The cursor will walk thru the source string based
 /// on the predefined collation rules.  If the source string is empty,
 /// NULLORDER will be returned on the calls to next(). </summary>
 /// <param name="sourceText"> the source string. </param>
 /// <param name="owner"> the collation object. </param>
 internal CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner)
 {
     this.Owner = owner;
     Ordering   = owner.Tables;
     NormalizerBase.Mode mode = CollatorUtilities.toNormalizerMode(owner.Decomposition);
     Text_Renamed = new NormalizerBase(sourceText, mode);
 }
예제 #8
0
        public static int Previous32(CharacterIterator ci)
        {
            if (ci.Index <= ci.BeginIndex)
            {
                return(Done32);
            }
            char trail  = ci.Previous();
            int  retVal = trail;

            if (UTF16.IsTrailSurrogate(trail) && ci.Index > ci.BeginIndex)
            {
                char lead = ci.Previous();
                if (UTF16.IsLeadSurrogate(lead))
                {
                    retVal = (((int)lead - UTF16.LeadSurrogateMinValue) << 10) +
                             ((int)trail - UTF16.TrailSurrogateMinValue) +
                             UTF16.SupplementaryMinValue;
                }
                else
                {
                    ci.Next();
                }
            }
            return(retVal);
        }
예제 #9
0
        /// <summary>
        /// Move the iterator forward to the next code point, and return that code point,
        /// leaving the iterator positioned at char returned.
        /// For Supplementary chars, the iterator is left positioned at the lead surrogate.
        /// </summary>
        /// <param name="ci">The character iterator.</param>
        /// <returns>The next code point.</returns>
        public static int Next32(CharacterIterator ci)
        {
            // If the current position is at a surrogate pair, move to the trail surrogate
            //   which leaves it in position for underlying iterator's next() to work.
            int c = ci.Current;

            if (c >= UTF16.LeadSurrogateMinValue && c <= UTF16.LeadSurrogateMaxValue)
            {
                c = ci.Next();
                if (c < UTF16.TrailSurrogateMinValue || c > UTF16.TrailSurrogateMaxValue)
                {
                    ci.Previous();
                }
            }

            // For BMP chars, this next() is the real deal.
            c = ci.Next();

            // If we might have a lead surrogate, we need to peak ahead to get the trail
            //  even though we don't want to really be positioned there.
            if (c >= UTF16.LeadSurrogateMinValue)
            {
                c = NextTrail32(ci, c);
            }

            if (c >= UTF16.SupplementaryMinValue && c != Done32)
            {
                // We got a supplementary char.  Back the iterator up to the postion
                // of the lead surrogate.
                ci.Previous();
            }
            return(c);
        }
예제 #10
0
        public static int Current32(CharacterIterator ci)
        {
            char lead   = ci.Current;
            int  retVal = lead;

            if (retVal < UTF16.LeadSurrogateMinValue)
            {
                return(retVal);
            }
            if (UTF16.IsLeadSurrogate(lead))
            {
                int trail = (int)ci.Next();
                ci.Previous();
                if (UTF16.IsTrailSurrogate((char)trail))
                {
                    retVal = ((lead - UTF16.LeadSurrogateMinValue) << 10) +
                             (trail - UTF16.TrailSurrogateMinValue) +
                             UTF16.SupplementaryMinValue;
                }
            }
            else
            {
                if (lead == CharacterIterator.Done)
                {
                    if (ci.Index >= ci.EndIndex)
                    {
                        retVal = Done32;
                    }
                }
            }
            return(retVal);
        }
예제 #11
0
        public static int Current32(CharacterIterator ci)
        {
            char lead   = ci.Current;
            int  retVal = lead;

            if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE)
            {
                return(retVal);
            }
            if (UTF16.IsLeadSurrogate(lead))
            {
                int trail = (int)ci.Next();
                ci.Previous();
                if (UTF16.IsTrailSurrogate((char)trail))
                {
                    retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
                             (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
                             UTF16.SUPPLEMENTARY_MIN_VALUE;
                }
            }
            else
            {
                if (lead == CharacterIterator.DONE)
                {
                    if (ci.Index >= ci.EndIndex)
                    {
                        retVal = DONE32;
                    }
                }
            }
            return(retVal);
        }
예제 #12
0
        /// <summary>
        /// Set a new source string iterator for iteration, and reset the
        /// offset to the beginning of the text.
        /// </summary>
        /// <param name="source">The new source string iterator for iteration.</param>
        /// <stable>ICU 2.8</stable>
        public void SetText(CharacterIterator source)
        {
            // Note: In C++, we just setText(source.getText()).
            // In Java, we actually operate on a character iterator.
            // TODO: do we need to remember the iterator in a field?
            // TODO: apparently we don't clone a CharacterIterator in Java,
            // we only clone the text for a UCharacterIterator?? see the old code in the constructors
            UCharacterIterator src = new CharacterIteratorWrapper(source);

            src.SetToStart();
            string_ = src.GetText(); // TODO: do we need to remember the source string in a field?
            CollationIterator newIter;
            bool numeric = rbc_.settings.ReadOnly.IsNumeric;

            if (rbc_.settings.ReadOnly.DontCheckFCD)
            {
                newIter = new IterCollationIterator(rbc_.data, numeric, src);
            }
            else
            {
                newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0);
            }
            iter_      = newIter;
            otherHalf_ = 0;
            dir_       = 0;
        }
예제 #13
0
 private void consume(BreakIterator bi, CharacterIterator ci)
 {
     bi.Text = ci;
     while (bi.next() != BreakIterator.DONE)
     {
         ;
     }
 }
예제 #14
0
 public CharacterIteratorWrapper(CharacterIterator iter)
 {
     if (iter == null)
     {
         throw new ArgumentException(nameof(iter));
     }
     iterator = iter;
 }
예제 #15
0
        public override void SetText(CharacterIterator newText)
        {
            text = newText.GetTextAsString();
            currentBoundaryIndex = 0;
            m_start = newText.BeginIndex;
            m_end   = newText.EndIndex;

            LoadBoundaries(m_start, m_end);
        }
예제 #16
0
 // Backup from the current candidate to the next shorter one; return true if that exists
 // and point the text after it
 public virtual bool BackUp(CharacterIterator fIter)
 {
     if (current > 0)
     {
         fIter.SetIndex(offset + lengths[--current]);
         return(true);
     }
     return(false);
 }
예제 #17
0
        public override int Matches(CharacterIterator text_, int maxLength, int[] lengths, int[] count_, int limit, int[] values)
        {
            UCharacterIterator text = UCharacterIterator.GetInstance(text_);
            BytesTrie          bt   = new BytesTrie(characters, 0);
            int c = text.NextCodePoint();

            if (c == UCharacterIterator.DONE)
            {
                return(0);
            }
            Result result = bt.First(Transform(c));
            // TODO: should numChars count Character.charCount() ?
            int numChars = 1;
            int count    = 0;

            for (; ;)
            {
                if (result.HasValue())
                {
                    if (count < limit)
                    {
                        if (values != null)
                        {
                            values[count] = bt.GetValue();
                        }
                        lengths[count] = numChars;
                        count++;
                    }
                    if (result == Result.FinalValue)
                    {
                        break;
                    }
                }
                else if (result == Result.NoMatch)
                {
                    break;
                }

                if (numChars >= maxLength)
                {
                    break;
                }

                c = text.NextCodePoint();
                if (c == UCharacterIterator.DONE)
                {
                    break;
                }
                ++numChars;
                result = bt.Next(Transform(c));
            }
            count_[0] = count;
            return(numChars);
        }
예제 #18
0
        private static String getString(CharacterIterator ci)
        {
            StringBuffer buf = new StringBuffer(ci.EndIndex - ci.BeginIndex + 2);

            buf.Append("'");
            for (char c = ci.First(); c != CharacterIterator.DONE; c = ci.Next())
            {
                buf.Append(c);
            }
            buf.Append("'");
            return(buf.ToString());
        }
예제 #19
0
 public override void SetText(CharacterIterator newText)
 {
     text = newText;
     text.SetIndex(text.BeginIndex);
     currentSentence = 0;
     Span[] spans = sentenceOp.SplitSentences(CharacterIteratorToString());
     sentenceStarts = new int[spans.Length];
     for (int i = 0; i < spans.Length; ++i)
     {
         // Adjust start positions to match those of the passed-in CharacterIterator
         sentenceStarts[i] = spans[i].getStart() + text.BeginIndex;
     }
 }
예제 #20
0
        /** Asserts that two breakiterators break the text the same way */
        public void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual)
        {
            expected.SetText(one);
            actual.SetText(two);

            assertEquals(expected.Current, actual.Current);

            // next()
            int v = expected.Current;

            while (v != BreakIterator.DONE)
            {
                assertEquals(v = expected.Next(), actual.Next());
                assertEquals(expected.Current, actual.Current);
            }

            // first()
            assertEquals(expected.First(), actual.First());
            assertEquals(expected.Current, actual.Current);
            // last()
            assertEquals(expected.Last(), actual.Last());
            assertEquals(expected.Current, actual.Current);

            // previous()
            v = expected.Current;
            while (v != BreakIterator.DONE)
            {
                assertEquals(v = expected.Previous(), actual.Previous());
                assertEquals(expected.Current, actual.Current);
            }

            // following()
            for (int i = one.BeginIndex; i <= one.EndIndex; i++)
            {
                expected.First();
                actual.First();
                assertEquals(expected.Following(i), actual.Following(i));
                assertEquals(expected.Current, actual.Current);
            }

            // preceding()
            for (int i = one.BeginIndex; i <= one.EndIndex; i++)
            {
                expected.Last();
                actual.Last();
                assertEquals(expected.Preceding(i), actual.Preceding(i));
                assertEquals(expected.Current, actual.Current);
            }
        }
예제 #21
0
 public int FindBreaks(CharacterIterator text, int startPos, int endPos,
                       int breakType, DictionaryBreakEngine.DequeI foundBreaks)
 {
     if (breakType >= 0 && breakType < fHandled.Length)
     {
         UnicodeSet uniset = fHandled[breakType];
         int        c      = CharacterIteration.Current32(text);
         while (text.Index < endPos && uniset.Contains(c))
         {
             CharacterIteration.Next32(text);
             c = CharacterIteration.Current32(text);
         }
     }
     return(0);
 }
예제 #22
0
 /// <summary>
 /// Returns the text that was matched by the most recent call to
 /// <see cref="First()"/>, <see cref="Next()"/>, <see cref="Previous()"/>, or <see cref="Last()"/>.
 /// If the iterator is not pointing at a valid match (e.g. just after
 /// construction or after <see cref="Done"/> has been returned,
 /// returns an empty string.
 /// </summary>
 /// <returns>The substring in the target test of the most recent match,
 /// or null if there is no match currently.</returns>
 /// <seealso cref="First()"/>
 /// <seealso cref="Next()"/>
 /// <seealso cref="Previous()"/>
 /// <seealso cref="Last()"/>
 /// <stable>ICU 2.0</stable>
 public virtual string GetMatchedText()
 {
     if (search_.MatchedLength > 0)
     {
         int               limit  = search_.matchedIndex_ + search_.MatchedLength;
         StringBuilder     result = new StringBuilder(search_.MatchedLength);
         CharacterIterator it     = search_.Text;
         it.SetIndex(search_.matchedIndex_);
         while (it.Index < limit)
         {
             result.Append(it.Current);
             it.Next();
         }
         it.SetIndex(search_.matchedIndex_);
         return(result.ToString());
     }
     return(null);
 }
예제 #23
0
            // Fill the list of candidates if needed, select the longest, and return the number found
            public virtual int Candidates(CharacterIterator fIter, DictionaryMatcher dict, int rangeEnd)
            {
                int start = fIter.Index;

                if (start != offset)
                {
                    offset = start;
                    prefix = dict.Matches(fIter, rangeEnd - start, lengths, count, lengths.Length);
                    // Dictionary leaves text after longest prefix, not longest word. Back up.
                    if (count[0] <= 0)
                    {
                        fIter.SetIndex(start);
                    }
                }
                if (count[0] > 0)
                {
                    fIter.SetIndex(start + lengths[count[0] - 1]);
                }
                current = count[0] - 1;
                mark    = current;
                return(count[0]);
            }
예제 #24
0
        /// <summary>
        /// Out-of-line portion of the in-line <see cref="Next32(CharacterIterator)"/> code.
        /// The call site does an initial ci.Next() and calls this function
        /// if the 16 bit value it gets is >= <see cref="UTF16.LEAD_SURROGATE_MIN_VALUE"/>.
        /// </summary>
        // NOTE:  we leave the underlying char iterator positioned in the
        //        middle of a surrogate pair.  ci.next() will work correctly
        //        from there, but the ci.getIndex() will be wrong, and needs
        //        adjustment.
        public static int NextTrail32(CharacterIterator ci, int lead)
        {
            if (lead == CharacterIterator.DONE && ci.Index >= ci.EndIndex)
            {
                return(DONE32);
            }
            int retVal = lead;

            if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE)
            {
                char cTrail = ci.Next();
                if (UTF16.IsTrailSurrogate(cTrail))
                {
                    retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
                             (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
                             UTF16.SUPPLEMENTARY_MIN_VALUE;
                }
                else
                {
                    ci.Previous();
                }
            }
            return(retVal);
        }
예제 #25
0
        /// <summary>
        /// Out-of-line portion of the in-line <see cref="Next32(CharacterIterator)"/> code.
        /// The call site does an initial ci.Next() and calls this function
        /// if the 16 bit value it gets is >= <see cref="UTF16.LeadSurrogateMinValue"/>.
        /// </summary>
        // NOTE:  we leave the underlying char iterator positioned in the
        //        middle of a surrogate pair.  ci.next() will work correctly
        //        from there, but the ci.getIndex() will be wrong, and needs
        //        adjustment.
        public static int NextTrail32(CharacterIterator ci, int lead)
        {
            if (lead == CharacterIterator.Done && ci.Index >= ci.EndIndex)
            {
                return(Done32);
            }
            int retVal = lead;

            if (lead <= UTF16.LeadSurrogateMaxValue)
            {
                char cTrail = ci.Next();
                if (UTF16.IsTrailSurrogate(cTrail))
                {
                    retVal = ((lead - UTF16.LeadSurrogateMinValue) << 10) +
                             (cTrail - UTF16.TrailSurrogateMinValue) +
                             UTF16.SupplementaryMinValue;
                }
                else
                {
                    ci.Previous();
                }
            }
            return(retVal);
        }
예제 #26
0
        /// <summary>
        /// Set the target text to be searched. Text iteration will then begin at
        /// the start of the text string. This method is useful if you want to
        /// reuse an iterator to search within a different body of text.
        /// </summary>
        /// <param name="text">New text iterator to look for match.</param>
        /// <exception cref="ArgumentException">Thrown when text is null or has 0 length.</exception>
        /// <see cref="Target"/>
        /// <stable>ICU 2.4</stable>
        public virtual void SetTarget(CharacterIterator text)
        {
            if (text == null || text.EndIndex == text.Index)
            {
                throw new ArgumentException("Illegal null or empty text");
            }

            text.SetIndex(text.BeginIndex);
            search_.SetTarget(text);
            search_.matchedIndex_       = Done;
            search_.MatchedLength       = 0;
            search_.reset_              = true;
            search_.isForwardSearching_ = true;
            if (search_.BreakIterator != null)
            {
                // Create a clone of CharacterItearator, so it won't
                // affect the position currently held by search_.text()
                search_.BreakIterator.SetText((CharacterIterator)text.Clone());
            }
            if (search_.internalBreakIter_ != null)
            {
                search_.internalBreakIter_.SetText((CharacterIterator)text.Clone());
            }
        }
 public override void SetText(CharacterIterator newText)
 {
     @delegate.SetText(newText);
 }
예제 #28
0
 public override void SetText(CharacterIterator newText)
 {
 }
예제 #29
0
	  private void consume(BreakIterator bi, CharacterIterator ci)
	  {
		bi.Text = ci;
		while (bi.next() != BreakIterator.DONE)
		{
		  ;
		}
	  }
예제 #30
0
        public override int DivideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd,
                                                    DequeI foundBreaks)
        {
            if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD)
            {
                return(0);  // Not enough characters for word
            }
            int wordsFound = 0;
            int wordLength;
            int current;

            PossibleWord[] words = new PossibleWord[BURMESE_LOOKAHEAD];
            for (int i = 0; i < BURMESE_LOOKAHEAD; i++)
            {
                words[i] = new PossibleWord();
            }
            int uc;

            fIter.SetIndex(rangeStart);
            while ((current = fIter.Index) < rangeEnd)
            {
                wordLength = 0;

                //Look for candidate words at the current position
                int candidates = words[wordsFound % BURMESE_LOOKAHEAD].Candidates(fIter, fDictionary, rangeEnd);

                // If we found exactly one, use that
                if (candidates == 1)
                {
                    wordLength  = words[wordsFound % BURMESE_LOOKAHEAD].AcceptMarked(fIter);
                    wordsFound += 1;
                }

                // If there was more than one, see which one can take us forward the most words
                else if (candidates > 1)
                {
                    bool foundBest = false;
                    // If we're already at the end of the range, we're done
                    if (fIter.Index < rangeEnd)
                    {
                        do
                        {
                            int wordsMatched = 1;
                            if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].Candidates(fIter, fDictionary, rangeEnd) > 0)
                            {
                                if (wordsMatched < 2)
                                {
                                    // Followed by another dictionary word; mark first word as a good candidate
                                    words[wordsFound % BURMESE_LOOKAHEAD].MarkCurrent();
                                    wordsMatched = 2;
                                }

                                // If we're already at the end of the range, we're done
                                if (fIter.Index >= rangeEnd)
                                {
                                    break;
                                }

                                // See if any of the possible second words is followed by a third word
                                do
                                {
                                    // If we find a third word, stop right away
                                    if (words[(wordsFound + 2) % BURMESE_LOOKAHEAD].Candidates(fIter, fDictionary, rangeEnd) > 0)
                                    {
                                        words[wordsFound % BURMESE_LOOKAHEAD].MarkCurrent();
                                        foundBest = true;
                                        break;
                                    }
                                } while (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].BackUp(fIter));
                            }
                        } while (words[wordsFound % BURMESE_LOOKAHEAD].BackUp(fIter) && !foundBest);
                    }
                    wordLength  = words[wordsFound % BURMESE_LOOKAHEAD].AcceptMarked(fIter);
                    wordsFound += 1;
                }

                // We come here after having either found a word or not. We look ahead to the
                // next word. If it's not a dictionary word, we will combine it with the word we
                // just found (if there is one), but only if the preceding word does not exceed
                // the threshold.
                // The text iterator should now be positioned at the end of the word we found.
                if (fIter.Index < rangeEnd && wordLength < BURMESE_ROOT_COMBINE_THRESHOLD)
                {
                    // If it is a dictionary word, do nothing. If it isn't, then if there is
                    // no preceding word, or the non-word shares less than the minimum threshold
                    // of characters with a dictionary word, then scan to resynchronize
                    if (words[wordsFound % BURMESE_LOOKAHEAD].Candidates(fIter, fDictionary, rangeEnd) <= 0 &&
                        (wordLength == 0 ||
                         words[wordsFound % BURMESE_LOOKAHEAD].LongestPrefix < BURMESE_PREFIX_COMBINE_THRESHOLD))
                    {
                        // Look for a plausible word boundary
                        int remaining = rangeEnd - (current + wordLength);
                        int pc        = fIter.Current;
                        int chars     = 0;
                        for (; ;)
                        {
                            fIter.Next();
                            uc     = fIter.Current;
                            chars += 1;
                            if (--remaining <= 0)
                            {
                                break;
                            }
                            if (fEndWordSet.Contains(pc) && fBeginWordSet.Contains(uc))
                            {
                                // Maybe. See if it's in the dictionary.
                                int candidate = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].Candidates(fIter, fDictionary, rangeEnd);
                                fIter.SetIndex(current + wordLength + chars);
                                if (candidate > 0)
                                {
                                    break;
                                }
                            }
                            pc = uc;
                        }

                        // Bump the word count if there wasn't already one
                        if (wordLength <= 0)
                        {
                            wordsFound += 1;
                        }

                        // Update the length with the passed-over characters
                        wordLength += chars;
                    }
                    else
                    {
                        // Backup to where we were for next iteration
                        fIter.SetIndex(current + wordLength);
                    }
                }

                // Never stop before a combining mark.
                int currPos;
                while ((currPos = fIter.Index) < rangeEnd && fMarkSet.Contains(fIter.Current))
                {
                    fIter.Next();
                    wordLength += fIter.Index - currPos;
                }

                // Look ahead for possible suffixes if a dictionary word does not follow.
                // We do this in code rather than using a rule so that the heuristic
                // resynch continues to function. For example, one of the suffix characters
                // could be a typo in the middle of a word.
                // NOT CURRENTLY APPLICABLE TO BURMESE

                // Did we find a word on this iteration? If so, push it on the break stack
                if (wordLength > 0)
                {
                    foundBreaks.Push(current + wordLength);
                }
            }

            // Don't return a break for the end of the dictionary range if there is one there
            if (foundBreaks.Peek() >= rangeEnd)
            {
                foundBreaks.Pop();
                wordsFound -= 1;
            }

            return(wordsFound);
        }
예제 #31
0
		/// <summary>
		/// Sets the new text to be analyzed by the given
		/// <code>CharacterIterator</code>
		/// .
		/// The position will be reset to the beginning of the new text, and other
		/// status information of this iterator will be kept.
		/// </summary>
		/// <param name="newText">
		/// the
		/// <code>CharacterIterator</code>
		/// referring to the text to be
		/// analyzed.
		/// </param>
		public abstract void setText (CharacterIterator newText);
예제 #32
0
 public void setText(CharacterIterator arg0)
 {
     Instance.CallMethod("setText", "(Ljava/text/CharacterIterator;)V", arg0);
 }
예제 #33
0
 public override void SetText(CharacterIterator newText)
 {
     length = newText.EndIndex;
 }
예제 #34
0
		/// <summary>
		/// Returns the <A HREF="../../java/awt/font/LineMetrics.html" title="class in java.awt.font"><CODE>LineMetrics</CODE></A> object for the specified
		/// <A HREF="../../java/text/CharacterIterator.html" title="interface in java.text"><CODE>CharacterIterator</CODE></A> in the specified <A HREF="../../java/awt/Graphics.html" title="class in java.awt"><CODE>Graphics</CODE></A>
		/// context.
		/// </summary>
		public LineMetrics getLineMetrics(CharacterIterator @ci, int @beginIndex, int @limit, Graphics @context)
		{
			return default(LineMetrics);
		}
예제 #35
0
		/// <summary>
		/// Returns the bounds of the characters indexed in the specified
		/// <code>CharacterIterator</code> in the
		/// specified <code>Graphics</code> context.
		/// </summary>
		public Rectangle2D getStringBounds(CharacterIterator @ci, int @beginIndex, int @limit, Graphics @context)
		{
			return default(Rectangle2D);
		}
예제 #36
0
		public override void setText (CharacterIterator iterator)
		{
			this.iterator = iterator;
			last_boundary = 0;
		}
예제 #37
0
 public CollationElementIterator getCollationElementIterator(CharacterIterator arg0)
 {
     return Instance.CallMethod<CollationElementIterator>("getCollationElementIterator", "(Ljava/text/CharacterIterator;)Ljava/text/CollationElementIterator;", arg0);
 }