protected override void SetNextSentence(int sentenceStart, int sentenceEnd) { this.sentenceStart = sentenceStart; this.sentenceEnd = sentenceEnd; wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart); wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length)); }
protected override void SetNextSentence(int sentenceStart, int sentenceEnd) { // LUCENENET TODO: This class isn't passing thread safety checks. // Adding locking and extra cloning of BreakIterator seems to help, but // it is not a complete fix. lock (syncLock) { this.sentenceStart = sentenceStart; this.sentenceEnd = sentenceEnd; wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart); wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length)); } }
protected override void SetNextSentence(int sentenceStart, int sentenceEnd) { UninterruptableMonitor.Enter(syncLock); try { this.sentenceStart = sentenceStart; this.sentenceEnd = sentenceEnd; wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart); wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length)); } finally { UninterruptableMonitor.Exit(syncLock); } }
public override bool IncrementToken() { if (hasMoreTokensInClone) { int start = breaker.Current; int end = breaker.Next(); if (end != BreakIterator.Done) { clonedToken.CopyTo(this); termAtt.CopyBuffer(clonedTermAtt.Buffer, start, end - start); if (hasIllegalOffsets) { offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.EndOffset); } else { offsetAtt.SetOffset(clonedOffsetAtt.StartOffset + start, clonedOffsetAtt.StartOffset + end); } if (handlePosIncr) { posAtt.PositionIncrement = 1; } return(true); } hasMoreTokensInClone = false; } if (!m_input.IncrementToken()) { return(false); } if (termAtt.Length == 0 || !thaiPattern.IsMatch(string.Empty + termAtt[0])) { return(true); } hasMoreTokensInClone = true; // if length by start + end offsets doesn't match the term text then assume // this is a synonym and don't adjust the offsets. hasIllegalOffsets = offsetAtt.EndOffset - offsetAtt.StartOffset != termAtt.Length; // we lazy init the cloned token, as in ctor not all attributes may be added if (clonedToken == null) { clonedToken = CloneAttributes(); clonedTermAtt = clonedToken.GetAttribute <ICharTermAttribute>(); clonedOffsetAtt = clonedToken.GetAttribute <IOffsetAttribute>(); } else { this.CopyTo(clonedToken); } // reinit CharacterIterator charIterator.SetText(clonedTermAtt.Buffer, 0, clonedTermAtt.Length); breaker.SetText(new string(charIterator.Text, charIterator.Start, charIterator.Length)); int end2 = breaker.Next(); if (end2 != BreakIterator.Done) { termAtt.Length = end2; if (hasIllegalOffsets) { offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.EndOffset); } else { offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.StartOffset + end2); } // position increment keeps as it is for first token return(true); } return(false); }
/* run this to test if your JRE is buggy * public void testSentenceInstanceJREBUG() { * // we use the default locale, as its randomized by LuceneTestCase * BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault()); * Segment ci = new Segment(); * for (int i = 0; i < 10000; i++) { * char text[] = TestUtil.randomUnicodeString(random).toCharArray(); * ci.array = text; * ci.offset = 0; * ci.count = text.length; * consume(bi, ci); * } * } */ private void DoTests(CharArrayIterator ci) { // basics ci.SetText("testing".ToCharArray(), 0, "testing".Length); assertEquals(0, ci.BeginIndex); assertEquals(7, ci.EndIndex); assertEquals(0, ci.Index); assertEquals('t', ci.Current()); assertEquals('e', ci.Next()); assertEquals('g', ci.Last()); assertEquals('n', ci.Previous()); assertEquals('t', ci.First()); assertEquals(CharacterIterator.DONE, ci.Previous()); // first() ci.SetText("testing".ToCharArray(), 0, "testing".Length); ci.Next(); // Sets the position to getBeginIndex() and returns the character at that position. assertEquals('t', ci.First()); assertEquals(ci.BeginIndex, ci.Index); // or DONE if the text is empty ci.SetText(new char[] { }, 0, 0); assertEquals(CharacterIterator.DONE, ci.First()); // last() ci.SetText("testing".ToCharArray(), 0, "testing".Length); // Sets the position to getEndIndex()-1 (getEndIndex() if the text is empty) // and returns the character at that position. assertEquals('g', ci.Last()); assertEquals(ci.Index, ci.EndIndex - 1); // or DONE if the text is empty ci.SetText(new char[] { }, 0, 0); assertEquals(CharacterIterator.DONE, ci.Last()); assertEquals(ci.EndIndex, ci.Index); // current() // Gets the character at the current position (as returned by getIndex()). ci.SetText("testing".ToCharArray(), 0, "testing".Length); assertEquals('t', ci.Current()); ci.Last(); ci.Next(); // or DONE if the current position is off the end of the text. assertEquals(CharacterIterator.DONE, ci.Current()); // next() ci.SetText("te".ToCharArray(), 0, 2); // Increments the iterator's index by one and returns the character at the new index. assertEquals('e', ci.Next()); assertEquals(1, ci.Index); // or DONE if the new position is off the end of the text range. assertEquals(CharacterIterator.DONE, ci.Next()); assertEquals(ci.EndIndex, ci.Index); // setIndex() ci.SetText("test".ToCharArray(), 0, "test".Length); try { ci.SetIndex(5); fail(); } catch (Exception e) { assertTrue(e is System.ArgumentException); } // clone() var text = "testing".ToCharArray(); ci.SetText(text, 0, text.Length); ci.Next(); var ci2 = ci.Clone() as CharArrayIterator; assertEquals(ci.Index, ci2.Index); assertEquals(ci.Next(), ci2.Next()); assertEquals(ci.Last(), ci2.Last()); }