Пример #1
0
 protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
 {
     this.sentenceStart = sentenceStart;
     this.sentenceEnd   = sentenceEnd;
     wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart);
     wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
 }
Пример #2
0
 protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
 {
     // LUCENENET TODO: This class isn't passing thread safety checks.
     // Adding locking and extra cloning of BreakIterator seems to help, but
     // it is not a complete fix.
     lock (syncLock)
     {
         this.sentenceStart = sentenceStart;
         this.sentenceEnd   = sentenceEnd;
         wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart);
         wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
     }
 }
Пример #3
0
 protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
 {
     UninterruptableMonitor.Enter(syncLock);
     try
     {
         this.sentenceStart = sentenceStart;
         this.sentenceEnd   = sentenceEnd;
         wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart);
         wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
     }
     finally
     {
         UninterruptableMonitor.Exit(syncLock);
     }
 }
Пример #4
0
        public override bool IncrementToken()
        {
            if (hasMoreTokensInClone)
            {
                int start = breaker.Current;
                int end   = breaker.Next();
                if (end != BreakIterator.Done)
                {
                    clonedToken.CopyTo(this);
                    termAtt.CopyBuffer(clonedTermAtt.Buffer, start, end - start);
                    if (hasIllegalOffsets)
                    {
                        offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.EndOffset);
                    }
                    else
                    {
                        offsetAtt.SetOffset(clonedOffsetAtt.StartOffset + start, clonedOffsetAtt.StartOffset + end);
                    }
                    if (handlePosIncr)
                    {
                        posAtt.PositionIncrement = 1;
                    }
                    return(true);
                }
                hasMoreTokensInClone = false;
            }

            if (!m_input.IncrementToken())
            {
                return(false);
            }

            if (termAtt.Length == 0 || !thaiPattern.IsMatch(string.Empty + termAtt[0]))
            {
                return(true);
            }

            hasMoreTokensInClone = true;

            // if length by start + end offsets doesn't match the term text then assume
            // this is a synonym and don't adjust the offsets.
            hasIllegalOffsets = offsetAtt.EndOffset - offsetAtt.StartOffset != termAtt.Length;

            // we lazy init the cloned token, as in ctor not all attributes may be added
            if (clonedToken == null)
            {
                clonedToken     = CloneAttributes();
                clonedTermAtt   = clonedToken.GetAttribute <ICharTermAttribute>();
                clonedOffsetAtt = clonedToken.GetAttribute <IOffsetAttribute>();
            }
            else
            {
                this.CopyTo(clonedToken);
            }

            // reinit CharacterIterator
            charIterator.SetText(clonedTermAtt.Buffer, 0, clonedTermAtt.Length);
            breaker.SetText(new string(charIterator.Text, charIterator.Start, charIterator.Length));
            int end2 = breaker.Next();

            if (end2 != BreakIterator.Done)
            {
                termAtt.Length = end2;
                if (hasIllegalOffsets)
                {
                    offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.EndOffset);
                }
                else
                {
                    offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.StartOffset + end2);
                }
                // position increment keeps as it is for first token
                return(true);
            }
            return(false);
        }
Пример #5
0
        /* run this to test if your JRE is buggy
         * public void testSentenceInstanceJREBUG() {
         * // we use the default locale, as its randomized by LuceneTestCase
         * BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
         * Segment ci = new Segment();
         * for (int i = 0; i < 10000; i++) {
         *  char text[] = TestUtil.randomUnicodeString(random).toCharArray();
         *  ci.array = text;
         *  ci.offset = 0;
         *  ci.count = text.length;
         *  consume(bi, ci);
         * }
         * }
         */

        private void DoTests(CharArrayIterator ci)
        {
            // basics
            ci.SetText("testing".ToCharArray(), 0, "testing".Length);
            assertEquals(0, ci.BeginIndex);
            assertEquals(7, ci.EndIndex);
            assertEquals(0, ci.Index);
            assertEquals('t', ci.Current());
            assertEquals('e', ci.Next());
            assertEquals('g', ci.Last());
            assertEquals('n', ci.Previous());
            assertEquals('t', ci.First());
            assertEquals(CharacterIterator.DONE, ci.Previous());

            // first()
            ci.SetText("testing".ToCharArray(), 0, "testing".Length);
            ci.Next();
            // Sets the position to getBeginIndex() and returns the character at that position.
            assertEquals('t', ci.First());
            assertEquals(ci.BeginIndex, ci.Index);
            // or DONE if the text is empty
            ci.SetText(new char[] { }, 0, 0);
            assertEquals(CharacterIterator.DONE, ci.First());

            // last()
            ci.SetText("testing".ToCharArray(), 0, "testing".Length);
            // Sets the position to getEndIndex()-1 (getEndIndex() if the text is empty)
            // and returns the character at that position.
            assertEquals('g', ci.Last());
            assertEquals(ci.Index, ci.EndIndex - 1);
            // or DONE if the text is empty
            ci.SetText(new char[] { }, 0, 0);
            assertEquals(CharacterIterator.DONE, ci.Last());
            assertEquals(ci.EndIndex, ci.Index);

            // current()
            // Gets the character at the current position (as returned by getIndex()).
            ci.SetText("testing".ToCharArray(), 0, "testing".Length);
            assertEquals('t', ci.Current());
            ci.Last();
            ci.Next();
            // or DONE if the current position is off the end of the text.
            assertEquals(CharacterIterator.DONE, ci.Current());

            // next()
            ci.SetText("te".ToCharArray(), 0, 2);
            // Increments the iterator's index by one and returns the character at the new index.
            assertEquals('e', ci.Next());
            assertEquals(1, ci.Index);
            // or DONE if the new position is off the end of the text range.
            assertEquals(CharacterIterator.DONE, ci.Next());
            assertEquals(ci.EndIndex, ci.Index);

            // setIndex()
            ci.SetText("test".ToCharArray(), 0, "test".Length);
            try
            {
                ci.SetIndex(5);
                fail();
            }
            catch (Exception e)
            {
                assertTrue(e is System.ArgumentException);
            }

            // clone()
            var text = "testing".ToCharArray();

            ci.SetText(text, 0, text.Length);
            ci.Next();
            var ci2 = ci.Clone() as CharArrayIterator;

            assertEquals(ci.Index, ci2.Index);
            assertEquals(ci.Next(), ci2.Next());
            assertEquals(ci.Last(), ci2.Last());
        }