示例#1
0
        public DutchAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap <string> stemOverrideDict)
        {
            this.matchVersion = matchVersion;
            this.stoptable    = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stopwords));
            this.excltable    = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionTable));
#pragma warning disable 612, 618
            if (stemOverrideDict.Count == 0 || !matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
#pragma warning restore 612, 618
            {
                this.stemdict     = null;
                this.origStemdict = CharArrayMap.UnmodifiableMap(CharArrayMap.Copy(matchVersion, stemOverrideDict));
            }
            else
            {
                this.origStemdict = null;
                // we don't need to ignore case here since we lowercase in this analyzer anyway
                StemmerOverrideFilter.Builder        builder = new StemmerOverrideFilter.Builder(false);
                CharArrayMap <string> .EntryIterator iter    = (CharArrayMap <string> .EntryIterator)stemOverrideDict.EntrySet().GetEnumerator();
                CharsRef spare = new CharsRef();
                while (iter.HasNext)
                {
                    char[] nextKey = iter.NextKey();
                    spare.CopyChars(nextKey, 0, nextKey.Length);
                    builder.Add(new string(spare.Chars), iter.CurrentValue);
                }
                try
                {
                    this.stemdict = builder.Build();
                }
                catch (IOException ex)
                {
                    throw new Exception("can not build stem dict", ex);
                }
            }
        }
示例#2
0
        public override bool IncrementToken()
        {
            if (currentMatcher != -1 && NextCapture())
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(state != null);
                }
                ClearAttributes();
                RestoreState(state);
                int start = matchers[currentMatcher].Groups[currentGroup[currentMatcher]].Index;
                int end   = matchers[currentMatcher].Groups[currentGroup[currentMatcher]].Index +
                            matchers[currentMatcher].Groups[currentGroup[currentMatcher]].Length;

                posAttr.PositionIncrement = 0;
                charTermAttr.CopyBuffer(spare.Chars, start, end - start);
                currentGroup[currentMatcher]++;
                return(true);
            }

            if (!m_input.IncrementToken())
            {
                return(false);
            }

            char[] buffer = charTermAttr.Buffer;
            int    length = charTermAttr.Length;

            spare.CopyChars(buffer, 0, length);
            state = CaptureState();

            for (int i = 0; i < matchers.Length; i++)
            {
                matchers[i]     = null;
                currentGroup[i] = -1;
            }

            if (preserveOriginal)
            {
                currentMatcher = 0;
            }
            else if (NextCapture())
            {
                int start = matchers[currentMatcher].Groups[currentGroup[currentMatcher]].Index;
                int end   = matchers[currentMatcher].Groups[currentGroup[currentMatcher]].Index +
                            matchers[currentMatcher].Groups[currentGroup[currentMatcher]].Length;

                // if we start at 0 we can simply set the length and save the copy
                if (start == 0)
                {
                    charTermAttr.Length = end;
                }
                else
                {
                    charTermAttr.CopyBuffer(spare.Chars, start, end - start);
                }
                currentGroup[currentMatcher]++;
            }
            return(true);
        }
示例#3
0
 public virtual void TestCopyCharsRef()
 {
     char[] chars = new char[] { 'a', 'b', 'c', 'd' };
     CharsRef c = new CharsRef(chars, 1, 3); // bcd
     char[] otherchars = new char[] { 'b', 'c', 'd', 'e' };
     c.CopyChars(new CharsRef(otherchars, 0, 4));
     Assert.AreEqual("bcde", c.ToString());
 }
示例#4
0
 public virtual void TestCopy()
 {
     int numIters = AtLeast(10);
     for (int i = 0; i < numIters; i++)
     {
         CharsRef @ref = new CharsRef();
         char[] charArray = TestUtil.RandomRealisticUnicodeString(Random(), 1, 100).ToCharArray();
         int offset = Random().Next(charArray.Length);
         int length = charArray.Length - offset;
         string str = new string(charArray, offset, length);
         @ref.CopyChars(charArray, offset, length);
         Assert.AreEqual(str, @ref.ToString());
     }
 }
 public DutchAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<string> stemOverrideDict)
 {
     this.matchVersion = matchVersion;
     this.stoptable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stopwords));
     this.excltable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionTable));
     #pragma warning disable 612, 618
     if (stemOverrideDict.Count == 0 || !matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
     #pragma warning restore 612, 618
     {
         this.stemdict = null;
         this.origStemdict = CharArrayMap.UnmodifiableMap(CharArrayMap.Copy(matchVersion, stemOverrideDict));
     }
     else
     {
         this.origStemdict = null;
         // we don't need to ignore case here since we lowercase in this analyzer anyway
         StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
         CharArrayMap<string>.EntryIterator iter = (CharArrayMap<string>.EntryIterator)stemOverrideDict.EntrySet().GetEnumerator();
         CharsRef spare = new CharsRef();
         while (iter.HasNext)
         {
             char[] nextKey = iter.NextKey();
             spare.CopyChars(nextKey, 0, nextKey.Length);
             builder.Add(new string(spare.Chars), iter.CurrentValue);
         }
         try
         {
             this.stemdict = builder.Build();
         }
         catch (IOException ex)
         {
             throw new Exception("can not build stem dict", ex);
         }
     }
 }
示例#6
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            if (currentMatcher != -1 && nextCapture())
            {
                Debug.Assert(state != null);
                clearAttributes();
                restoreState(state);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int start = matchers[currentMatcher].start(currentGroup[currentMatcher]);
                int start = matchers[currentMatcher].start(currentGroup[currentMatcher]);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int end = matchers[currentMatcher].end(currentGroup[currentMatcher]);
                int end = matchers[currentMatcher].end(currentGroup[currentMatcher]);

                posAttr.PositionIncrement = 0;
                charTermAttr.copyBuffer(spare.chars, start, end - start);
                currentGroup[currentMatcher]++;
                return(true);
            }

            if (!input.incrementToken())
            {
                return(false);
            }

            char[] buffer = charTermAttr.buffer();
            int    length = charTermAttr.length();

            spare.CopyChars(buffer, 0, length);
            state = CaptureState();

            for (int i = 0; i < matchers.Length; i++)
            {
                matchers[i].reset(spare);
                currentGroup[i] = -1;
            }

            if (preserveOriginal)
            {
                currentMatcher = 0;
            }
            else if (nextCapture())
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int start = matchers[currentMatcher].start(currentGroup[currentMatcher]);
                int start = matchers[currentMatcher].start(currentGroup[currentMatcher]);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int end = matchers[currentMatcher].end(currentGroup[currentMatcher]);
                int end = matchers[currentMatcher].end(currentGroup[currentMatcher]);

                // if we start at 0 we can simply set the length and save the copy
                if (start == 0)
                {
                    charTermAttr.Length = end;
                }
                else
                {
                    charTermAttr.copyBuffer(spare.chars, start, end - start);
                }
                currentGroup[currentMatcher]++;
            }
            return(true);
        }