Exemplo n.º 1
0
 protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
 {
     this.sentenceStart = sentenceStart;
     this.sentenceEnd   = sentenceEnd;
     wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart);
     wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
 }
Exemplo n.º 2
0
 protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
 {
     // LUCENENET TODO: This class isn't passing thread safety checks.
     // Adding locking and extra cloning of BreakIterator seems to help, but
     // it is not a complete fix.
     lock (syncLock)
     {
         this.sentenceStart = sentenceStart;
         this.sentenceEnd   = sentenceEnd;
         wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart);
         wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
     }
 }
Exemplo n.º 3
0
 protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
 {
     UninterruptableMonitor.Enter(syncLock);
     try
     {
         this.sentenceStart = sentenceStart;
         this.sentenceEnd   = sentenceEnd;
         wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart);
         wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
     }
     finally
     {
         UninterruptableMonitor.Exit(syncLock);
     }
 }
Exemplo n.º 4
0
        public override bool IncrementToken()
        {
            if (hasMoreTokensInClone)
            {
                int start = breaker.Current;
                int end   = breaker.Next();
                if (end != BreakIterator.Done)
                {
                    clonedToken.CopyTo(this);
                    termAtt.CopyBuffer(clonedTermAtt.Buffer, start, end - start);
                    if (hasIllegalOffsets)
                    {
                        offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.EndOffset);
                    }
                    else
                    {
                        offsetAtt.SetOffset(clonedOffsetAtt.StartOffset + start, clonedOffsetAtt.StartOffset + end);
                    }
                    if (handlePosIncr)
                    {
                        posAtt.PositionIncrement = 1;
                    }
                    return(true);
                }
                hasMoreTokensInClone = false;
            }

            if (!m_input.IncrementToken())
            {
                return(false);
            }

            if (termAtt.Length == 0 || !thaiPattern.IsMatch(string.Empty + termAtt[0]))
            {
                return(true);
            }

            hasMoreTokensInClone = true;

            // if length by start + end offsets doesn't match the term text then assume
            // this is a synonym and don't adjust the offsets.
            hasIllegalOffsets = offsetAtt.EndOffset - offsetAtt.StartOffset != termAtt.Length;

            // we lazy init the cloned token, as in ctor not all attributes may be added
            if (clonedToken == null)
            {
                clonedToken     = CloneAttributes();
                clonedTermAtt   = clonedToken.GetAttribute <ICharTermAttribute>();
                clonedOffsetAtt = clonedToken.GetAttribute <IOffsetAttribute>();
            }
            else
            {
                this.CopyTo(clonedToken);
            }

            // reinit CharacterIterator
            charIterator.SetText(clonedTermAtt.Buffer, 0, clonedTermAtt.Length);
            breaker.SetText(new string(charIterator.Text, charIterator.Start, charIterator.Length));
            int end2 = breaker.Next();

            if (end2 != BreakIterator.Done)
            {
                termAtt.Length = end2;
                if (hasIllegalOffsets)
                {
                    offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.EndOffset);
                }
                else
                {
                    offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.StartOffset + end2);
                }
                // position increment keeps as it is for first token
                return(true);
            }
            return(false);
        }