int FindBreakOffset(float targetPosition, Func <char, char, bool> breakEvaluator) { var start = Offset; var end = EndOffset - HardTrimAtEnd; var it = new BreakIterator <bool>(Node.Document.Content, breakEvaluator, start, end); var cursor = start; var lastGoodBreak = cursor; while (it.MoveNext()) { if (it.Current == false) { cursor += 1; continue; } var width = WidthFor(start, cursor); if (width < targetPosition) { lastGoodBreak = cursor; } else if (lastGoodBreak == start || Math.Abs(width - targetPosition) < 0.5) { return(cursor); } else { return(lastGoodBreak); } cursor += 1; } return(lastGoodBreak); }
void ChopText(ITextChunkView <TDocument> chunk) { var doc = chunk.Node.Document.Content; var it = new BreakIterator <WordBreakType>(doc, rules.IsWordBreak, chunk.TrimmedStartOffset, chunk.TrimmedEndOffset); var cursor = chunk.TrimmedStartOffset; var lastBreak = chunk.TrimmedStartOffset; while (it.MoveNext()) { var wb = it.Current; if (wb != WordBreakType.WordBreak) { cursor += 1; continue; } if (cursor == lastBreak) { continue; } // have a new word ITextChunkView <TDocument> first; ITextChunkView <TDocument> second; chunk.BreakAtOffset(cursor, out first, out second); if (second != null) { Add(first); chunk = second; } } Add(chunk); }
public void InsertAt(int offset, string text, ITextDocumentFilterChain bypass) { bufferContent.Clear(); bufferContent.EnsureCapacity(text.Length); bufferContent.Insert(0, text); buffer.Clear(); buffer.EnsureCapacity(text.Length); var breaker = new BreakIterator <LineBreakType>(bufferContent, rules.IsLineBreak, 0, bufferContent.Length); while (breaker.MoveNext()) { if (breaker.Current == LineBreakType.None) { buffer.Append(breaker.CurrentChar); } } if (buffer.Length > 0) { bypass.InsertAt(offset, buffer.ToString()); } }
protected override IUndoableEdit InsertUpdate(int offset, int length) { if (length <= 0) { throw new ArgumentOutOfRangeException(); } var splitPending = false; removedNodes.Clear(); addedNodes.Clear(); ITextNode removeCandidate; ITextPosition startOffset; ITextPosition endOffset; int start, end; var insertIdx = FindRemovedParagraphs(offset, out start, out end); if (insertIdx != -1) { startOffset = Content.CreatePosition(start, Bias.Forward); endOffset = Content.CreatePosition(end, Bias.Backward); } else if (rootNode.Count > 0) { removeCandidate = rootNode[rootNode.Count - 1]; insertIdx = rootNode.Count - 1; startOffset = Content.CreatePosition(removeCandidate.Offset, Bias.Forward); endOffset = Content.CreatePosition(removeCandidate.EndOffset, Bias.Backward); } else { startOffset = Content.CreatePosition(offset, Bias.Forward); endOffset = Content.CreatePosition(offset + length, Bias.Backward); removeCandidate = null; insertIdx = 0; } var it = new BreakIterator <LineBreakType>(Content, rules.IsLineBreak, Math.Max(0, offset - 1), offset + length); if (offset > 0) { it.MoveNext(); splitPending = it.Current != LineBreakType.None; } var cursor = offset; while (it.MoveNext()) { var c = it.Current; if (c != LineBreakType.Continuation) { if (splitPending) { addedNodes.Add(new ImmutableLeafTextNode(this, startOffset, Content.CreatePosition(cursor, Bias.Backward))); startOffset = Content.CreatePosition(cursor, Bias.Forward); offset = cursor; splitPending = false; } if (c == LineBreakType.LineBreak) { splitPending = true; } } cursor += 1; } if (cursor != offset) { if (splitPending) { addedNodes.Add(new ImmutableLeafTextNode(this, startOffset, Content.CreatePosition(cursor, Bias.Backward))); addedNodes.Add(new ImmutableLeafTextNode(this, Content.CreatePosition(cursor, Bias.Forward), endOffset)); } else { addedNodes.Add(new ImmutableLeafTextNode(this, startOffset, endOffset)); } } var addedNodesArray = addedNodes.ToArray(); var oldRoot = Root; var newRoot = rootNode.Replace(insertIdx, removedNodes.Count, addedNodesArray); ReplaceRoot(newRoot); return(new ElementEdit(this, TreePath.Create(oldRoot), TreePath.Create(Root), insertIdx, addedNodesArray, removedNodes.ToArray())); }
public void CanIterateForwards(BreakIterator.UBreakIteratorType type, string text, int[] expected, BreakIterator.UWordBreak[] ruleStatus) { var locale = new Locale("zh"); BreakIterator bi = default(BreakIterator); try { switch (type) { case BreakIterator.UBreakIteratorType.SENTENCE: bi = BreakIterator.CreateSentenceInstance(locale); break; case BreakIterator.UBreakIteratorType.WORD: bi = BreakIterator.CreateWordInstance(locale); break; default: throw new NotSupportedException("This iterator type is not supported in this test yet. [" + type + "]"); } bi.SetText(text); CollectionAssert.AreEqual(expected, bi.Boundaries); // Verify each boundary for the sentences for (int i = 0; i < expected.Length; i++) { int current = bi.Current; int status = bi.GetRuleStatus(); int expectedStatus = (int)ruleStatus[i]; Assert.AreEqual(expected[i], current); Assert.AreEqual(expectedStatus, status); CollectionAssert.AreEqual(new[] { expectedStatus }, bi.GetRuleStatusVector()); int moveNext = bi.MoveNext(); int next = i + 1; if (next < expected.Length) { Assert.AreEqual(expected[next], moveNext); } else { // Verify that the BreakIterator is exhausted because we've // moved past every item. Assert.AreEqual(BreakIterator.DONE, moveNext); } } int lastIndex = expected.Length - 1; Assert.AreEqual(expected[lastIndex], bi.Current); // We've moved past the last word, it should return the last offset. Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); Assert.AreEqual(expected[lastIndex], bi.Current); // Verify that the first element is correct now that we've moved to the end. Assert.AreEqual(expected[0], bi.MoveFirst()); Assert.AreEqual(expected[0], bi.Current); } finally { if (bi != default(BreakIterator)) { bi.Dispose(); } } }
/// <inheritdoc/> public bool MoveNext() { _currentStart = _currentLimit; _currentLimit = _breakIterator.MoveNext(); return(_currentLimit != BreakIterator.DONE); }
public void TestLineIteration() { BreakIterator bi = GetLineInstance(System.Globalization.CultureInfo.InvariantCulture); // Test empty Assert.AreEqual(0, bi.Current); Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); Assert.AreEqual(0, bi.Current); bi.SetText(LINE_TEXT); // Ensure position starts at 0 when initialized Assert.AreEqual(0, bi.Current); // Check first boundary (Apache\t^Lucene) - Ensure we break on \t Assert.AreEqual(7, bi.MoveNext()); // Ensure Current returns the most recent boundary Assert.AreEqual(7, bi.Current); // Check next boundary (Lucene^(TM)) Assert.AreEqual(13, bi.MoveNext()); // Ensure Current returns the most recent boundary Assert.AreEqual(13, bi.Current); // Check next boundary (Lucene(TM) ^is a) Assert.AreEqual(18, bi.MoveNext()); // Ensure Current returns the most recent boundary Assert.AreEqual(18, bi.Current); // Move to start of high-performance bi.MoveNext(); bi.MoveNext(); // Check next boundary (high-\n^performance) Assert.AreEqual(29, bi.MoveNext()); // Check last boundary (in Java.^) Assert.AreEqual(108, bi.MoveLast()); // Check move past last boundary Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); // Ensure we are still at last boundary Assert.AreEqual(108, bi.Current); // Check MovePrevious Assert.AreEqual(103, bi.MovePrevious()); // Ensure we get the same value for Current as the last move Assert.AreEqual(103, bi.Current); // Check MoveFirst Assert.AreEqual(0, bi.MoveFirst()); // Ensure we get the same value for Current as the last move Assert.AreEqual(0, bi.Current); // Check moving beyond first boundary Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious()); // Ensure we are still at first boundary Assert.AreEqual(0, bi.Current); // Check MoveLast() Assert.AreEqual(108, bi.MoveLast()); }
public void TestWordIteration() { BreakIterator bi = GetWordInstance(System.Globalization.CultureInfo.InvariantCulture); // Test empty Assert.AreEqual(0, bi.Current); Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); Assert.AreEqual(0, bi.Current); bi.SetText(TEXT); // Ensure position starts at 0 when initialized Assert.AreEqual(0, bi.Current); // Check first boundary (Apache^) Assert.AreEqual(6, bi.MoveNext()); // Ensure Current returns the last boundary iterated to Assert.AreEqual(6, bi.Current); // Check second boundary (^Lucene) Assert.AreEqual(7, bi.MoveNext()); // Ensure Current returns the last boundary iterated to Assert.AreEqual(7, bi.Current); // Check third boundary (Lucene^) Assert.AreEqual(13, bi.MoveNext()); // Ensure Current returns the last boundary iterated to Assert.AreEqual(13, bi.Current); // Check fourth boundary (^TM) Assert.AreEqual(14, bi.MoveNext()); // Check fifth boundary (TM^) Assert.AreEqual(16, bi.MoveNext()); // Check sixth boundary (TM)^ Assert.AreEqual(17, bi.MoveNext()); // Check seventh boundary (^is) Assert.AreEqual(18, bi.MoveNext()); // Move to (^high-performance) bi.MoveNext(); bi.MoveNext(); bi.MoveNext(); // Check next boundary (^high-performance) Assert.AreEqual(23, bi.MoveNext()); // Ensure we don't break on hyphen (high-performance^) Assert.AreEqual(39, bi.MoveNext()); // Check MoveLast() Assert.AreEqual(107, bi.MoveLast()); // Check going past last boundary Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); // Check we are still at last boundary Assert.AreEqual(107, bi.Current); // Check MoveFirst() Assert.AreEqual(0, bi.MoveFirst()); // Check going past first boundary Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious()); // Check we are still at first boundary Assert.AreEqual(0, bi.Current); }
public void TestSentenceIteration() { BreakIterator bi = GetSentenceInstance(System.Globalization.CultureInfo.InvariantCulture); // Test empty Assert.AreEqual(0, bi.Current); Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); Assert.AreEqual(0, bi.Current); bi.SetText(SENTENCE_TEXT); // Ensure position starts at 0 when initialized Assert.AreEqual(0, bi.Current); // Check first boundary (in Java.^) - Ensure we don't break on \n Assert.AreEqual(108, bi.MoveNext()); // Ensure Current returns the most recent boundary Assert.AreEqual(108, bi.Current); // Check next boundary (especially cross-platform.^) Assert.AreEqual(221, bi.MoveNext()); // Check next boundary (free download.^) Assert.AreEqual(290, bi.MoveNext()); // Check next boundary (things easy.^) Assert.AreEqual(324, bi.MoveNext()); // Check next boundary (is powerful.^) Assert.AreEqual(344, bi.MoveNext()); // Check next boundary (is exciting.^) Assert.AreEqual(364, bi.MoveNext()); // Check next boundary (is cool.^) Assert.AreEqual(380, bi.MoveNext()); // Check last boundary (Lucene now?^) Assert.AreEqual(400, bi.MoveNext()); // Check move past last boundary Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); // Ensure we are still at last boundary Assert.AreEqual(400, bi.Current); // Check MovePrevious Assert.AreEqual(380, bi.MovePrevious()); // Ensure we get the same value for Current as the last move Assert.AreEqual(380, bi.Current); // Check MoveFirst Assert.AreEqual(0, bi.MoveFirst()); // Ensure we get the same value for Current as the last move Assert.AreEqual(0, bi.Current); // Check moving beyond first boundary Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious()); // Ensure we are still at first boundary Assert.AreEqual(0, bi.Current); // Check MoveLast() Assert.AreEqual(400, bi.MoveLast()); }
public void TestWordIterationThai() { BreakIterator bi = GetWordInstance(new System.Globalization.CultureInfo("th")); // Test empty Assert.AreEqual(0, bi.Current); Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); Assert.AreEqual(0, bi.Current); bi.SetText("บริษัทMicrosoftบริการดีที่สุด"); // Ensure position starts at 0 when initialized Assert.AreEqual(0, bi.Current); // Check first boundary (บริษัท^Microsoft) Assert.AreEqual(6, bi.MoveNext()); // Ensure Current returns the last boundary iterated to Assert.AreEqual(6, bi.Current); // Check second boundary (Microsoft^บริการ) Assert.AreEqual(15, bi.MoveNext()); // Ensure Current returns the last boundary iterated to Assert.AreEqual(15, bi.Current); // Check third boundary (บริการ^ดี) Assert.AreEqual(21, bi.MoveNext()); // Ensure Current returns the last boundary iterated to Assert.AreEqual(21, bi.Current); // Check fourth boundary (ดี^ที่สุด) Assert.AreEqual(23, bi.MoveNext()); // Check fifth boundary (ดีที่สุด^) Assert.AreEqual(29, bi.MoveNext()); // Check beyond last boundary (ดีที่สุด)^ Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); // Check we are still at last boundary Assert.AreEqual(29, bi.Current); // Check MovePrevious() (ดี^ที่สุด) Assert.AreEqual(23, bi.MovePrevious()); // Check MoveFirst() Assert.AreEqual(0, bi.MoveFirst()); // Check going past first boundary Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious()); // Check we are still at first boundary Assert.AreEqual(0, bi.Current); // Check Numerals bi.SetText("๑23๔๕๖7"); // Ensure position starts at 0 when initialized Assert.AreEqual(0, bi.Current); // Ensure Hindu and Thai numerals stay in one group Assert.AreEqual(7, bi.MoveNext()); }