int FindBreakOffset(float targetPosition, Func <char, char, bool> breakEvaluator)
        {
            var start = Offset;
            var end   = EndOffset - HardTrimAtEnd;

            var it            = new BreakIterator <bool>(Node.Document.Content, breakEvaluator, start, end);
            var cursor        = start;
            var lastGoodBreak = cursor;

            while (it.MoveNext())
            {
                if (it.Current == false)
                {
                    cursor += 1;
                    continue;
                }

                var width = WidthFor(start, cursor);
                if (width < targetPosition)
                {
                    lastGoodBreak = cursor;
                }
                else if (lastGoodBreak == start || Math.Abs(width - targetPosition) < 0.5)
                {
                    return(cursor);
                }
                else
                {
                    return(lastGoodBreak);
                }
                cursor += 1;
            }
            return(lastGoodBreak);
        }
        void ChopText(ITextChunkView <TDocument> chunk)
        {
            var doc       = chunk.Node.Document.Content;
            var it        = new BreakIterator <WordBreakType>(doc, rules.IsWordBreak, chunk.TrimmedStartOffset, chunk.TrimmedEndOffset);
            var cursor    = chunk.TrimmedStartOffset;
            var lastBreak = chunk.TrimmedStartOffset;

            while (it.MoveNext())
            {
                var wb = it.Current;
                if (wb != WordBreakType.WordBreak)
                {
                    cursor += 1;
                    continue;
                }

                if (cursor == lastBreak)
                {
                    continue;
                }

                // have a new word
                ITextChunkView <TDocument> first;
                ITextChunkView <TDocument> second;
                chunk.BreakAtOffset(cursor, out first, out second);
                if (second != null)
                {
                    Add(first);
                    chunk = second;
                }
            }

            Add(chunk);
        }
        public void InsertAt(int offset, string text, ITextDocumentFilterChain bypass)
        {
            bufferContent.Clear();
            bufferContent.EnsureCapacity(text.Length);
            bufferContent.Insert(0, text);

            buffer.Clear();
            buffer.EnsureCapacity(text.Length);

            var breaker = new BreakIterator <LineBreakType>(bufferContent, rules.IsLineBreak, 0, bufferContent.Length);

            while (breaker.MoveNext())
            {
                if (breaker.Current == LineBreakType.None)
                {
                    buffer.Append(breaker.CurrentChar);
                }
            }

            if (buffer.Length > 0)
            {
                bypass.InsertAt(offset, buffer.ToString());
            }
        }
Beispiel #4
0
        protected override IUndoableEdit InsertUpdate(int offset, int length)
        {
            if (length <= 0)
            {
                throw new ArgumentOutOfRangeException();
            }

            var splitPending = false;

            removedNodes.Clear();
            addedNodes.Clear();

            ITextNode     removeCandidate;
            ITextPosition startOffset;
            ITextPosition endOffset;

            int start, end;
            var insertIdx = FindRemovedParagraphs(offset, out start, out end);

            if (insertIdx != -1)
            {
                startOffset = Content.CreatePosition(start, Bias.Forward);
                endOffset   = Content.CreatePosition(end, Bias.Backward);
            }
            else if (rootNode.Count > 0)
            {
                removeCandidate = rootNode[rootNode.Count - 1];
                insertIdx       = rootNode.Count - 1;
                startOffset     = Content.CreatePosition(removeCandidate.Offset, Bias.Forward);
                endOffset       = Content.CreatePosition(removeCandidate.EndOffset, Bias.Backward);
            }
            else
            {
                startOffset     = Content.CreatePosition(offset, Bias.Forward);
                endOffset       = Content.CreatePosition(offset + length, Bias.Backward);
                removeCandidate = null;
                insertIdx       = 0;
            }

            var it = new BreakIterator <LineBreakType>(Content, rules.IsLineBreak, Math.Max(0, offset - 1), offset + length);

            if (offset > 0)
            {
                it.MoveNext();
                splitPending = it.Current != LineBreakType.None;
            }

            var cursor = offset;

            while (it.MoveNext())
            {
                var c = it.Current;
                if (c != LineBreakType.Continuation)
                {
                    if (splitPending)
                    {
                        addedNodes.Add(new ImmutableLeafTextNode(this, startOffset, Content.CreatePosition(cursor, Bias.Backward)));
                        startOffset  = Content.CreatePosition(cursor, Bias.Forward);
                        offset       = cursor;
                        splitPending = false;
                    }

                    if (c == LineBreakType.LineBreak)
                    {
                        splitPending = true;
                    }
                }
                cursor += 1;
            }

            if (cursor != offset)
            {
                if (splitPending)
                {
                    addedNodes.Add(new ImmutableLeafTextNode(this, startOffset, Content.CreatePosition(cursor, Bias.Backward)));
                    addedNodes.Add(new ImmutableLeafTextNode(this, Content.CreatePosition(cursor, Bias.Forward), endOffset));
                }
                else
                {
                    addedNodes.Add(new ImmutableLeafTextNode(this, startOffset, endOffset));
                }
            }

            var addedNodesArray = addedNodes.ToArray();
            var oldRoot         = Root;
            var newRoot         = rootNode.Replace(insertIdx, removedNodes.Count, addedNodesArray);

            ReplaceRoot(newRoot);
            return(new ElementEdit(this, TreePath.Create(oldRoot), TreePath.Create(Root), insertIdx, addedNodesArray, removedNodes.ToArray()));
        }
        public void CanIterateForwards(BreakIterator.UBreakIteratorType type, string text, int[] expected, BreakIterator.UWordBreak[] ruleStatus)
        {
            var locale = new Locale("zh");

            BreakIterator bi = default(BreakIterator);

            try
            {
                switch (type)
                {
                case BreakIterator.UBreakIteratorType.SENTENCE:
                    bi = BreakIterator.CreateSentenceInstance(locale);
                    break;

                case BreakIterator.UBreakIteratorType.WORD:
                    bi = BreakIterator.CreateWordInstance(locale);
                    break;

                default:
                    throw new NotSupportedException("This iterator type is not supported in this test yet. [" + type + "]");
                }

                bi.SetText(text);

                CollectionAssert.AreEqual(expected, bi.Boundaries);

                // Verify each boundary for the sentences
                for (int i = 0; i < expected.Length; i++)
                {
                    int current = bi.Current;
                    int status  = bi.GetRuleStatus();

                    int expectedStatus = (int)ruleStatus[i];

                    Assert.AreEqual(expected[i], current);
                    Assert.AreEqual(expectedStatus, status);
                    CollectionAssert.AreEqual(new[] { expectedStatus }, bi.GetRuleStatusVector());

                    int moveNext = bi.MoveNext();
                    int next     = i + 1;

                    if (next < expected.Length)
                    {
                        Assert.AreEqual(expected[next], moveNext);
                    }
                    else
                    {
                        // Verify that the BreakIterator is exhausted because we've
                        // moved past every item.
                        Assert.AreEqual(BreakIterator.DONE, moveNext);
                    }
                }

                int lastIndex = expected.Length - 1;
                Assert.AreEqual(expected[lastIndex], bi.Current);

                // We've moved past the last word, it should return the last offset.
                Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());
                Assert.AreEqual(expected[lastIndex], bi.Current);

                // Verify that the first element is correct now that we've moved to the end.
                Assert.AreEqual(expected[0], bi.MoveFirst());
                Assert.AreEqual(expected[0], bi.Current);
            }
            finally
            {
                if (bi != default(BreakIterator))
                {
                    bi.Dispose();
                }
            }
        }
Beispiel #6
0
 /// <inheritdoc/>
 public bool MoveNext()
 {
     _currentStart = _currentLimit;
     _currentLimit = _breakIterator.MoveNext();
     return(_currentLimit != BreakIterator.DONE);
 }
Beispiel #7
0
        public void TestLineIteration()
        {
            BreakIterator bi = GetLineInstance(System.Globalization.CultureInfo.InvariantCulture);

            // Test empty
            Assert.AreEqual(0, bi.Current);
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());
            Assert.AreEqual(0, bi.Current);

            bi.SetText(LINE_TEXT);

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Check first boundary (Apache\t^Lucene) - Ensure we break on \t
            Assert.AreEqual(7, bi.MoveNext());

            // Ensure Current returns the most recent boundary
            Assert.AreEqual(7, bi.Current);

            // Check next boundary (Lucene^(TM))
            Assert.AreEqual(13, bi.MoveNext());

            // Ensure Current returns the most recent boundary
            Assert.AreEqual(13, bi.Current);

            // Check next boundary (Lucene(TM) ^is a)
            Assert.AreEqual(18, bi.MoveNext());

            // Ensure Current returns the most recent boundary
            Assert.AreEqual(18, bi.Current);

            // Move to start of high-performance
            bi.MoveNext();
            bi.MoveNext();

            // Check next boundary (high-\n^performance)
            Assert.AreEqual(29, bi.MoveNext());


            // Check last boundary (in Java.^)
            Assert.AreEqual(108, bi.MoveLast());


            // Check move past last boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());

            // Ensure we are still at last boundary
            Assert.AreEqual(108, bi.Current);


            // Check MovePrevious
            Assert.AreEqual(103, bi.MovePrevious());

            // Ensure we get the same value for Current as the last move
            Assert.AreEqual(103, bi.Current);


            // Check MoveFirst
            Assert.AreEqual(0, bi.MoveFirst());

            // Ensure we get the same value for Current as the last move
            Assert.AreEqual(0, bi.Current);


            // Check moving beyond first boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious());

            // Ensure we are still at first boundary
            Assert.AreEqual(0, bi.Current);


            // Check MoveLast()
            Assert.AreEqual(108, bi.MoveLast());
        }
Beispiel #8
0
        public void TestWordIteration()
        {
            BreakIterator bi = GetWordInstance(System.Globalization.CultureInfo.InvariantCulture);

            // Test empty
            Assert.AreEqual(0, bi.Current);
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());
            Assert.AreEqual(0, bi.Current);

            bi.SetText(TEXT);

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Check first boundary (Apache^)
            Assert.AreEqual(6, bi.MoveNext());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(6, bi.Current);

            // Check second boundary (^Lucene)
            Assert.AreEqual(7, bi.MoveNext());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(7, bi.Current);

            // Check third boundary (Lucene^)
            Assert.AreEqual(13, bi.MoveNext());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(13, bi.Current);

            // Check fourth boundary (^TM)
            Assert.AreEqual(14, bi.MoveNext());

            // Check fifth boundary (TM^)
            Assert.AreEqual(16, bi.MoveNext());

            // Check sixth boundary (TM)^
            Assert.AreEqual(17, bi.MoveNext());

            // Check seventh boundary (^is)
            Assert.AreEqual(18, bi.MoveNext());

            // Move to (^high-performance)
            bi.MoveNext();
            bi.MoveNext();
            bi.MoveNext();

            // Check next boundary (^high-performance)
            Assert.AreEqual(23, bi.MoveNext());

            // Ensure we don't break on hyphen (high-performance^)
            Assert.AreEqual(39, bi.MoveNext());


            // Check MoveLast()
            Assert.AreEqual(107, bi.MoveLast());

            // Check going past last boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());

            // Check we are still at last boundary
            Assert.AreEqual(107, bi.Current);


            // Check MoveFirst()
            Assert.AreEqual(0, bi.MoveFirst());

            // Check going past first boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious());

            // Check we are still at first boundary
            Assert.AreEqual(0, bi.Current);
        }
Beispiel #9
0
        public void TestSentenceIteration()
        {
            BreakIterator bi = GetSentenceInstance(System.Globalization.CultureInfo.InvariantCulture);

            // Test empty
            Assert.AreEqual(0, bi.Current);
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());
            Assert.AreEqual(0, bi.Current);

            bi.SetText(SENTENCE_TEXT);

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Check first boundary (in Java.^) - Ensure we don't break on \n
            Assert.AreEqual(108, bi.MoveNext());

            // Ensure Current returns the most recent boundary
            Assert.AreEqual(108, bi.Current);

            // Check next boundary (especially cross-platform.^)
            Assert.AreEqual(221, bi.MoveNext());

            // Check next boundary (free download.^)
            Assert.AreEqual(290, bi.MoveNext());

            // Check next boundary (things easy.^)
            Assert.AreEqual(324, bi.MoveNext());

            // Check next boundary (is powerful.^)
            Assert.AreEqual(344, bi.MoveNext());

            // Check next boundary (is exciting.^)
            Assert.AreEqual(364, bi.MoveNext());

            // Check next boundary (is cool.^)
            Assert.AreEqual(380, bi.MoveNext());

            // Check last boundary (Lucene now?^)
            Assert.AreEqual(400, bi.MoveNext());

            // Check move past last boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());

            // Ensure we are still at last boundary
            Assert.AreEqual(400, bi.Current);


            // Check MovePrevious
            Assert.AreEqual(380, bi.MovePrevious());

            // Ensure we get the same value for Current as the last move
            Assert.AreEqual(380, bi.Current);


            // Check MoveFirst
            Assert.AreEqual(0, bi.MoveFirst());

            // Ensure we get the same value for Current as the last move
            Assert.AreEqual(0, bi.Current);

            // Check moving beyond first boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious());

            // Ensure we are still at first boundary
            Assert.AreEqual(0, bi.Current);


            // Check MoveLast()
            Assert.AreEqual(400, bi.MoveLast());
        }
Beispiel #10
0
        public void TestWordIterationThai()
        {
            BreakIterator bi = GetWordInstance(new System.Globalization.CultureInfo("th"));

            // Test empty
            Assert.AreEqual(0, bi.Current);
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());
            Assert.AreEqual(0, bi.Current);

            bi.SetText("บริษัทMicrosoftบริการดีที่สุด");

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Check first boundary (บริษัท^Microsoft)
            Assert.AreEqual(6, bi.MoveNext());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(6, bi.Current);

            // Check second boundary (Microsoft^บริการ)
            Assert.AreEqual(15, bi.MoveNext());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(15, bi.Current);

            // Check third boundary (บริการ^ดี)
            Assert.AreEqual(21, bi.MoveNext());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(21, bi.Current);

            // Check fourth boundary (ดี^ที่สุด)
            Assert.AreEqual(23, bi.MoveNext());

            // Check fifth boundary (ดีที่สุด^)
            Assert.AreEqual(29, bi.MoveNext());

            // Check beyond last boundary (ดีที่สุด)^
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());

            // Check we are still at last boundary
            Assert.AreEqual(29, bi.Current);

            // Check MovePrevious() (ดี^ที่สุด)
            Assert.AreEqual(23, bi.MovePrevious());


            // Check MoveFirst()
            Assert.AreEqual(0, bi.MoveFirst());

            // Check going past first boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious());

            // Check we are still at first boundary
            Assert.AreEqual(0, bi.Current);


            // Check Numerals
            bi.SetText("๑23๔๕๖7");

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Ensure Hindu and Thai numerals stay in one group
            Assert.AreEqual(7, bi.MoveNext());
        }