Пример #1
0
 public InputWindowToken(ShingleFilter outerInstance, AttributeSource attSource)
 {
     this.outerInstance = outerInstance;
     this.attSource     = attSource;
     this.termAtt       = attSource.GetAttribute <ICharTermAttribute>();
     this.offsetAtt     = attSource.GetAttribute <IOffsetAttribute>();
 }
Пример #2
0
        private SlowSynonymMap Match(SlowSynonymMap map)
        {
            SlowSynonymMap result = null;

            if (map.Submap != null)
            {
                AttributeSource tok = NextTok();
                if (tok != null)
                {
                    // clone ourselves.
                    if (tok == this)
                    {
                        tok = CloneAttributes();
                    }
                    // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
                    var            termAtt = tok.GetAttribute <ICharTermAttribute>();
                    SlowSynonymMap subMap  = map.Submap.Get(termAtt.Buffer, 0, termAtt.Length);

                    if (subMap != null)
                    {
                        // recurse
                        result = Match(subMap);
                    }

                    if (result != null)
                    {
                        matched.AddFirst(tok);
                    }
                    else
                    {
                        // push back unmatched token
                        PushTok(tok);
                    }
                }
            }

            // if no longer sequence matched, so if this node has synonyms, it's the match.
            if (result == null && map.Synonyms != null)
            {
                result = map;
            }

            return(result);
        }
Пример #3
0
            public override bool Accept(AttributeSource a)
            {
                ICharTermAttribute termAtt = a.GetAttribute <ICharTermAttribute>();

                return(termAtt.ToString().Equals("Dogs", StringComparison.CurrentCultureIgnoreCase));
            }
Пример #4
0
 public InputWindowToken(AttributeSource attSource)
 {
     this.attSource = attSource;
     this.termAtt   = attSource.GetAttribute <ICharTermAttribute>();
     this.offsetAtt = attSource.GetAttribute <IOffsetAttribute>();
 }
Пример #5
0
        public override bool IncrementToken()
        {
            if (hasMoreTokensInClone)
            {
                int start = breaker.Current;
                int end   = breaker.Next();
                if (end != BreakIterator.Done)
                {
                    clonedToken.CopyTo(this);
                    termAtt.CopyBuffer(clonedTermAtt.Buffer, start, end - start);
                    if (hasIllegalOffsets)
                    {
                        offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.EndOffset);
                    }
                    else
                    {
                        offsetAtt.SetOffset(clonedOffsetAtt.StartOffset + start, clonedOffsetAtt.StartOffset + end);
                    }
                    if (handlePosIncr)
                    {
                        posAtt.PositionIncrement = 1;
                    }
                    return(true);
                }
                hasMoreTokensInClone = false;
            }

            if (!m_input.IncrementToken())
            {
                return(false);
            }

            if (termAtt.Length == 0 || !thaiPattern.IsMatch(string.Empty + termAtt[0]))
            {
                return(true);
            }

            hasMoreTokensInClone = true;

            // if length by start + end offsets doesn't match the term text then assume
            // this is a synonym and don't adjust the offsets.
            hasIllegalOffsets = offsetAtt.EndOffset - offsetAtt.StartOffset != termAtt.Length;

            // we lazy init the cloned token, as in ctor not all attributes may be added
            if (clonedToken == null)
            {
                clonedToken     = CloneAttributes();
                clonedTermAtt   = clonedToken.GetAttribute <ICharTermAttribute>();
                clonedOffsetAtt = clonedToken.GetAttribute <IOffsetAttribute>();
            }
            else
            {
                this.CopyTo(clonedToken);
            }

            // reinit CharacterIterator
            charIterator.SetText(clonedTermAtt.Buffer, 0, clonedTermAtt.Length);
            breaker.SetText(new string(charIterator.Text, charIterator.Start, charIterator.Length));
            int end2 = breaker.Next();

            if (end2 != BreakIterator.Done)
            {
                termAtt.Length = end2;
                if (hasIllegalOffsets)
                {
                    offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.EndOffset);
                }
                else
                {
                    offsetAtt.SetOffset(clonedOffsetAtt.StartOffset, clonedOffsetAtt.StartOffset + end2);
                }
                // position increment keeps as it is for first token
                return(true);
            }
            return(false);
        }
Пример #6
0
            public override bool Accept(AttributeSource a)
            {
                ICharTermAttribute termAtt = a.GetAttribute <ICharTermAttribute>();

                return(termAtt.ToString().Equals("The", StringComparison.OrdinalIgnoreCase));
            }
Пример #7
0
		public InputWindowToken(ShingleFilter outerInstance, AttributeSource attSource)
		{
			this.outerInstance = outerInstance;
		  this.attSource = attSource;
		  this.termAtt = attSource.GetAttribute(typeof(CharTermAttribute));
		  this.offsetAtt = attSource.GetAttribute(typeof(OffsetAttribute));
		}