Exemplos de código com TokenStream, Lucene.Net.Analysis em C# (CSharp)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: TestBasics.cs Projeto: ChristopherHaws/lucenenet

 public SimplePayloadFilter(TokenStream input)
     : base(input)
 {
     Pos = 0;
     PayloadAttr = input.AddAttribute<IPayloadAttribute>();
     TermAttr = input.AddAttribute<ICharTermAttribute>();
 }

Exemplo n.º 2

0

Exibir arquivo

Arquivo: LengthFilter.cs Projeto: Cefa68000/lucenenet

 /// <summary> Build a filter that removes words that are too long or too
 /// short from the text.
 /// </summary>
 public LengthFilter(TokenStream in_Renamed, int min, int max)
     : base(in_Renamed)
 {
     this.min = min;
     this.max = max;
     termAtt = AddAttribute<ITermAttribute>();
 }

Exemplo n.º 3

0

Exibir arquivo

Arquivo: LengthFilter.cs Projeto: BackupTheBerlios/lyra2-svn

 /// <summary> Build a filter that removes words that are too long or too
 /// short from the text.
 /// </summary>
 public LengthFilter(TokenStream in_Renamed, int min, int max)
     : base(in_Renamed)
 {
     this.min = min;
     this.max = max;
     termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
 }

Exemplo n.º 4

0

Exibir arquivo

        private string GetTokenizerText(string text)
        {
            StringBuilder result = new StringBuilder();

            MyAnalyzer ma = new MyAnalyzer(AnalyzerFactory.stopWords);

            Lucene.Net.Analysis.TokenStream ts = ma.TokenStream("", new System.IO.StringReader(text));

            Lucene.Net.Analysis.Token token;
            while ((token = ts.Next()) != null)
            {
                int    len  = token.TermLength();
                char[] buff = token.TermBuffer();
                if (len == 1)
                {
                    if (buff[0] != MyFilter.Separator)
                    {
                        result.Append(buff, 0, 1);
                    }
                }
                else
                {
                    result.Append(buff, 0, len);
                }
            }
            ts.Close();

            return(result.ToString());
        }

Exemplo n.º 5

0

Exibir arquivo

Arquivo: Default.aspx.cs Projeto: rajmenon/AnalyzerViewer

        public string GetTokenView(TokenStream tokenStream, out int numberOfTokens)
        {
            var sb = new StringBuilder();
            numberOfTokens = 0;

            var termAttr = tokenStream.GetAttribute<ITermAttribute>();
            var startOffset = tokenStream.GetAttribute<Lucene.Net.Analysis.Tokenattributes.IOffsetAttribute>();
            while (tokenStream.IncrementToken())
            {

                sb.Append(termAttr.Term + "   Start: " + startOffset.StartOffset.ToString().PadLeft(5) + "  End: " + startOffset.EndOffset.ToString().PadLeft(5) + "\r\n");

                //var view = "[" + termAttr.Term + "]   ";
                //sb.Append(view);
                numberOfTokens++;
            }

            return sb.ToString();

            //StringBuilder sb = new StringBuilder();

            //Token token = tokenStream.Next();

            //numberOfTokens = 0;

            //while (token != null)
            //{
            //    numberOfTokens++;
            //    sb.Append(token.TermText() + "   Start: " + token.StartOffset().ToString().PadLeft(5) + "  End: " + token.EndOffset().ToString().PadLeft(5) + "\r\n");
            //    token = tokenStream.Next();
            //}

            //return sb.ToString();
        }

Exemplo n.º 6

0

Exibir arquivo

Arquivo: TokenUtils.cs Projeto: vebin/Lucene.Net.Analysis.MMSeg

        public static Token NextToken(TokenStream input, Token reusableToken)
        {
            if (input == null) 
                return null;
            if (!input.IncrementToken()) 
                return null;

            ITermAttribute termAtt = input.GetAttribute<ITermAttribute>();
            IOffsetAttribute offsetAtt = input.GetAttribute<IOffsetAttribute>();
            ITypeAttribute typeAtt = input.GetAttribute<ITypeAttribute>();

            if (reusableToken == null)
            {
                reusableToken = new Token();
            }
            reusableToken.Clear();

            if (termAtt != null)
                reusableToken.SetTermBuffer(termAtt.TermBuffer(), 0, termAtt.TermLength());

            if (offsetAtt != null)
            {
                reusableToken.StartOffset = offsetAtt.StartOffset;
                reusableToken.EndOffset = offsetAtt.EndOffset;
            }

            if (typeAtt != null)
                reusableToken.Type = typeAtt.Type;

            return reusableToken;
        }

Exemplo n.º 7

0

Exibir arquivo

Arquivo: MockHoleInjectingTokenFilter.cs Projeto: joyanta/lucene.net

 public MockHoleInjectingTokenFilter(Random random, TokenStream @in)
     : base(@in)
 {
     RandomSeed = random.Next();
     PosIncAtt = AddAttribute<IPositionIncrementAttribute>();
     PosLenAtt = AddAttribute<IPositionLengthAttribute>();
 }

Exemplo n.º 8

0

Exibir arquivo

Arquivo: MockVariableLengthPayloadFilter.cs Projeto: Cefa68000/lucenenet

 public MockVariableLengthPayloadFilter(Random random, TokenStream @in)
     : base(@in)
 {
     this.Random = random;
     this.Payload = new BytesRef(Bytes);
     this.PayloadAtt = AddAttribute<IPayloadAttribute>();
 }

Exemplo n.º 9

0

Exibir arquivo

Arquivo: CamelCaseFilter.cs Projeto: NuGet/Entropy

 public CamelCaseFilter(TokenStream stream)
     : base(stream)
 {
     _termAttribute = AddAttribute<ITermAttribute>();
     _offsetAttribute = AddAttribute<IOffsetAttribute>();
     _positionIncrementAttribute = AddAttribute<IPositionIncrementAttribute>();
 }

Exemplo n.º 10

0

Exibir arquivo

        public void v()
        {
            //Analyzer analyzer = new CJKAnalyzer();
            //TokenStream tokenStream = analyzer.TokenStream("", new StringReader("我爱你中国China中华人名共和国"));
            //Lucene.Net.Analysis.Token token = null;
            //while ((token = tokenStream.Next()) != null)
            //{
            //    Response.Write(token.TermText() + "<br/>");
            //}

            Lucene.Net.Analysis.Standard.StandardAnalyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            string s = "我日中华人民共和国";

            System.IO.StringReader          reader = new System.IO.StringReader(s);
            Lucene.Net.Analysis.TokenStream ts     = a.TokenStream(s, reader);
            bool hasnext = ts.IncrementToken();

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasnext)
            {
                ita = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                Console.WriteLine(ita.Term);
                hasnext = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            a.Close();
            Console.ReadKey();
        }

Exemplo n.º 11

0

Exibir arquivo

        private int GetTokenizerLength(string text)
        {
            int result = 0;

            MyAnalyzer ma = new MyAnalyzer(AnalyzerFactory.stopWords);

            Lucene.Net.Analysis.TokenStream ts = ma.TokenStream("", new System.IO.StringReader(text));

            Lucene.Net.Analysis.Token token;
            while ((token = ts.Next()) != null)
            {
                int len = token.TermLength();
                if (len == 1)
                {
                    char[] buff = token.TermBuffer();
                    if (buff[0] != MyFilter.Separator)
                    {
                        result++;
                    }
                }
                else
                {
                    result += len;
                }
            }
            ts.Close();

            return(result);
        }

Exemplo n.º 12

0

Exibir arquivo

Arquivo: FrenchStemFilter.cs Projeto: synhershko/lucene.net

        public FrenchStemFilter(TokenStream _in)
            : base(_in)
        {

            stemmer = new FrenchStemmer();
            termAtt = AddAttribute<ITermAttribute>();
        }

Exemplo n.º 13

0

Exibir arquivo

Arquivo: UnitTest1.cs Projeto: wingahi/DevLib.Comm

        public void TestMethod1()
        {
            Lucene.Net.Analysis.Standard.StandardAnalyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            string s = "我日中华人民共和国";

            System.IO.StringReader          reader = new System.IO.StringReader(s);
            Lucene.Net.Analysis.TokenStream ts     = a.TokenStream(s, reader);
            bool hasnext = ts.IncrementToken();

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;

            while (hasnext)
            {
                ita = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                Console.WriteLine(ita.Term);
                hasnext = ts.IncrementToken();
            }

            Console.WriteLine("over");


            ts.CloneAttributes();
            reader.Close();
            a.Close();
        }

Exemplo n.º 14

0

Exibir arquivo

Arquivo: MockTokenFilter.cs Projeto: Cefa68000/lucenenet

 /// <summary>
 /// Create a new MockTokenFilter.
 /// </summary>
 /// <param name="input"> TokenStream to filter </param>
 /// <param name="filter"> DFA representing the terms that should be removed. </param>
 public MockTokenFilter(TokenStream input, CharacterRunAutomaton filter)
     : base(input)
 {
     this.Filter = filter;
     TermAtt = AddAttribute<ICharTermAttribute>();
     PosIncrAtt = AddAttribute<IPositionIncrementAttribute>();
 }

Exemplo n.º 15

0

Exibir arquivo

Arquivo: AddSuffixFilter.cs Projeto: doronuziel71/HebMorph

        public AddSuffixFilter(TokenStream input, Dictionary<string, char[]> _suffixByTokenType)
            : base(input)
        {
			termAtt = AddAttribute <ITermAttribute>();
			typeAtt = AddAttribute <ITypeAttribute>();
            this.suffixByTokenType = _suffixByTokenType;
        }

Exemplo n.º 16

0

Exibir arquivo

Arquivo: NoiseFilter.cs Projeto: ArsenShnurkov/beagle-1

		public NoiseEmailHostFilter (TokenStream input, bool tokenize_email_hostname, LinkCallback link_call_back)
			: base (input)
		{
			this.token_stream = input;
			this.tokenize_email_hostname = tokenize_email_hostname;
			this.link_call_back = link_call_back;
		}

Exemplo n.º 17

0

Exibir arquivo

Arquivo: Analysis.Ext.cs Projeto: VirtueMe/ravendb

 public SingleCharTokenizer(TokenStream input): base(input)
 {
     _input = input;
     _termAttribute = (TermAttribute)AddAttribute(typeof(TermAttribute));
     _offsetAttribute = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
     _positionIncrementAttribute = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
 }

Exemplo n.º 18

0

Exibir arquivo

Arquivo: MoreLikeThis.cs Projeto: webmonger/ravendb

        /// <summary> Adds term frequencies found by tokenizing text from reader into the Map words</summary>
        /// <param name="r">a source of text to be tokenized
        /// </param>
        /// <param name="termFreqMap">a Map of terms and their frequencies
        /// </param>
        /// <param name="fieldName">Used by analyzer for any special per-field analysis
        /// </param>
        protected void AddTermFrequencies(System.IO.TextReader r, System.Collections.IDictionary termFreqMap, System.String fieldName)
        {
            TokenStream ts         = analyzer.TokenStream(fieldName, r);
            var         termAtt    = ts.AddAttribute <ITermAttribute>();
            int         tokenCount = 0;

            while (ts.IncrementToken())
            {
                // for every token
                System.String word = termAtt.Term;
                tokenCount++;
                if (tokenCount > maxNumTokensParsed)
                {
                    break;
                }
                if (IsNoiseWord(word))
                {
                    continue;
                }

                // increment frequency
                Int cnt = (Int)termFreqMap[word];
                if (cnt == null)
                {
                    termFreqMap[word] = new Int();
                }
                else
                {
                    cnt.x++;
                }
            }
        }

Exemplo n.º 19

0

Exibir arquivo

Arquivo: LuceneMoreLikeThis.cs Projeto: phillette/qiqqa-open-source

        /// <summary> Adds term frequencies found by tokenizing text from reader into the Map words</summary>
        /// <param name="r">a source of text to be tokenized
        /// </param>
        /// <param name="termFreqMap">a Map of terms and their frequencies
        /// </param>
        /// <param name="fieldName">Used by analyzer for any special per-field analysis
        /// </param>
        private void AddTermFrequencies(StreamReader r, IDictionary termFreqMap, String fieldName)
        {
            Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(fieldName, r);
            Lucene.Net.Analysis.Token       token;
            int tokenCount = 0;

            while ((token = ts.Next()) != null)
            {
                // for every token
                String word = token.TermText();
                tokenCount++;
                if (tokenCount > maxNumTokensParsed)
                {
                    break;
                }
                if (IsNoiseWord(word))
                {
                    continue;
                }

                // increment frequency
                Int cnt = (Int)termFreqMap[word];
                if (cnt == null)
                {
                    termFreqMap[word] = new Int();
                }
                else
                {
                    cnt.x++;
                }
            }
        }

Exemplo n.º 20

0

Exibir arquivo

Arquivo: BaseTokenStreamTestCase.cs Projeto: synhershko/lucene.net

        public static void AssertTokenStreamContents(TokenStream ts, System.String[] output, int[] startOffsets, int[] endOffsets, System.String[] types, int[] posIncrements, int? finalOffset)
        {
            Assert.IsNotNull(output);
            ICheckClearAttributesAttribute checkClearAtt = ts.AddAttribute<ICheckClearAttributesAttribute>();

            Assert.IsTrue(ts.HasAttribute<ITermAttribute>(), "has no TermAttribute");
            ITermAttribute termAtt = ts.GetAttribute<ITermAttribute>();

            IOffsetAttribute offsetAtt = null;
            if (startOffsets != null || endOffsets != null || finalOffset != null)
            {
                Assert.IsTrue(ts.HasAttribute<IOffsetAttribute>(), "has no OffsetAttribute");
                offsetAtt = ts.GetAttribute<IOffsetAttribute>();
            }
    
            ITypeAttribute typeAtt = null;
            if (types != null)
            {
                Assert.IsTrue(ts.HasAttribute<ITypeAttribute>(), "has no TypeAttribute");
                typeAtt = ts.GetAttribute<ITypeAttribute>();
            }
            
            IPositionIncrementAttribute posIncrAtt = null;
            if (posIncrements != null)
            {
                Assert.IsTrue(ts.HasAttribute<IPositionIncrementAttribute>(), "has no PositionIncrementAttribute");
                posIncrAtt = ts.GetAttribute<IPositionIncrementAttribute>();
            }

            ts.Reset();
            for (int i = 0; i < output.Length; i++)
            {
                // extra safety to enforce, that the state is not preserved and also assign bogus values
                ts.ClearAttributes();
                termAtt.SetTermBuffer("bogusTerm");
                if (offsetAtt != null) offsetAtt.SetOffset(14584724, 24683243);
                if (typeAtt != null) typeAtt.Type = "bogusType";
                if (posIncrAtt != null) posIncrAtt.PositionIncrement = 45987657;

                checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before
                Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist");
                Assert.IsTrue(checkClearAtt.GetAndResetClearCalled(), "clearAttributes() was not called correctly in TokenStream chain");

                Assert.AreEqual(output[i], termAtt.Term, "term " + i);
                if (startOffsets != null)
                    Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset, "startOffset " + i);
                if (endOffsets != null)
                    Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset, "endOffset " + i);
                if (types != null)
                    Assert.AreEqual(types[i], typeAtt.Type, "type " + i);
                if (posIncrements != null)
                    Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i);
            }
            Assert.IsFalse(ts.IncrementToken(), "end of stream");
            ts.End();
            if (finalOffset.HasValue)
                Assert.AreEqual(finalOffset, offsetAtt.EndOffset, "finalOffset ");
            ts.Close();
        }

Exemplo n.º 21

0

Exibir arquivo

Arquivo: SynonymFilter.cs Projeto: diegocaxito/LuceneTest

 public SynonymFilter(TokenStream input, ISynonymEngine engine)
     : base(input)
 {
     synonymStack = new Stack<string>();
     this.engine = engine;
     this.termAttr = AddAttribute(typeof(TermAttribute)) as TermAttribute;
     this.posIncrAttr = AddAttribute(typeof(PositionIncrementAttribute)) as PositionIncrementAttribute;
 }

Exemplo n.º 22

0

Exibir arquivo

Arquivo: CutLeterDigitFilter.cs Projeto: vebin/Lucene.Net.Analysis.MMSeg

 public CutLeterDigitFilter(TokenStream input)
     : base(input)
 {
     reusableToken = new Token();
     termAtt = AddAttribute<ITermAttribute>();
     offsetAtt = AddAttribute<IOffsetAttribute>();
     typeAtt = AddAttribute<ITypeAttribute>();
 }

Exemplo n.º 23

0

Exibir arquivo

Arquivo: AnalyzerUtil.cs Projeto: diegocaxito/LuceneTest

 private static void DisplayTokens(TokenStream stream)
 {
     TermAttribute term = (TermAttribute) stream.AddAttribute(typeof(TermAttribute));
     while (stream.IncrementToken())
     {
         Console.WriteLine("[{0}]  ", term.Term());
     }
 }

Exemplo n.º 24

0

Exibir arquivo

Arquivo: SynonymFilter.cs Projeto: BEXIS2/Core

 /// <summary>
 /// 
 /// </summary>
 /// <remarks></remarks>
 /// <seealso cref=""/>
 /// <param name="input"></param>
 /// <param name="synonymEngine"></param>
 /// <return></return>
 public SynonymFilter(TokenStream input, ISynonymEngine synonymEngine)
     : base(input)
 {
     synonymStack = new Stack<String>();
     this.engine = synonymEngine;
     this.termAtt = AddAttribute<ITermAttribute>();
     this.posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
 }

Exemplo n.º 25

0

Exibir arquivo

Arquivo: TestPositionIncrement.cs Projeto: thijswesterveld/lucene.net

            public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
            {
                TokenStream ts = a.TokenStream(fieldName, reader);

                return(new StopFilter(enablePositionIncrements, ts, new CharArraySet(new List <string> {
                    "stop"
                }, true)));
            }

Exemplo n.º 26

0

Exibir arquivo

Arquivo: SectionTokenStream.cs Projeto: modulexcite/BoboBrowse.Net

 public SectionTokenStream(TokenStream tokenStream, int sectionId)
     : base(tokenStream)
 {
     // NOTE: Calling the AddAttribute<T> method failed, so 
     // switched to using AddAttributeImpl.
     _payloadAtt = new PayloadAttribute();
     AddAttributeImpl(_payloadAtt);
     _payload = EncodeIntPayload(sectionId);
 }

Exemplo n.º 27

0

Exibir arquivo

Arquivo: TestPositionIncrement.cs Projeto: thijswesterveld/lucene.net

 public PayloadFilter(TokenStream input, System.String fieldName) : base(input)
 {
     this.fieldName = fieldName;
     pos            = 0;
     i           = 0;
     posIncrAttr = input.AddAttribute <IPositionIncrementAttribute>();
     payloadAttr = input.AddAttribute <IPayloadAttribute>();
     termAttr    = input.AddAttribute <ITermAttribute>();
 }

Exemplo n.º 28

0

Exibir arquivo

Arquivo: SynonymFilter.cs Projeto: NightOwl888/FacetedSearchPrototype

    public SynonymFilter(TokenStream in_Renamed, ISynonymEngine engine)
        : base(in_Renamed)
    {
        synonymStack = new Stack<string>();
            this.engine = engine;

            termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
            posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
    }

Exemplo n.º 29

0

Exibir arquivo

Arquivo: SuggestStopFilter.cs Projeto: ChristopherHaws/lucenenet

 /// <summary>
 /// Sole constructor. </summary>
 public SuggestStopFilter(TokenStream input, CharArraySet stopWords)
     : base(input)
 {
     this.stopWords = stopWords;
     this.termAtt = AddAttribute<ICharTermAttribute>();
     this.posIncAtt = AddAttribute<IPositionIncrementAttribute>();
     this.keywordAtt = AddAttribute<IKeywordAttribute>();
     this.offsetAtt = AddAttribute<IOffsetAttribute>();
 }

Exemplo n.º 30

0

Exibir arquivo

Arquivo: MockPayloadAnalyzer.cs Projeto: Cefa68000/lucenenet

 public MockPayloadFilter(TokenStream input, string fieldName)
     : base(input)
 {
     this.FieldName = fieldName;
     Pos = 0;
     i = 0;
     PosIncrAttr = input.AddAttribute<IPositionIncrementAttribute>();
     PayloadAttr = input.AddAttribute<IPayloadAttribute>();
     TermAttr = input.AddAttribute<ICharTermAttribute>();
 }

Exemplo n.º 31

0

Exibir arquivo

Arquivo: ExpandAcronymsFilter.cs Projeto: NuGet/NuGet.Services.Metadata

        public ExpandAcronymsFilter(TokenStream input, IAcronymExpansionProvider acronymExpansionProvider)
            : base(input)
        {
            _acronymExpansionProvider = acronymExpansionProvider;

            _termAttribute = AddAttribute<ITermAttribute>();
            _positionIncrementAttribute = AddAttribute<IPositionIncrementAttribute>();
            _tokenSet = new Queue<string>();
            _recognizedTokens = new HashSet<string>();
        }

Exemplo n.º 32

0

Exibir arquivo

Arquivo: LuceneExtensions.cs Projeto: xpohama/Luceneria

 public static void DisplayTokens(TokenStream stream)
 {
     // error in Lucene.Net? should work, look in source code why not
     // source: Lucene in Action, page ??
     var term = stream.AddAttribute<TermAttribute>();
     while (stream.IncrementToken()) {
     #if LuceneV303
         Trace.WriteLine("[" + term.Term + "] ");
     #endif
     }
 }

Exemplo n.º 33

0

Exibir arquivo

Arquivo: MockFixedLengthPayloadFilter.cs Projeto: Cefa68000/lucenenet

 public MockFixedLengthPayloadFilter(Random random, TokenStream @in, int length)
     : base(@in)
 {
     if (length < 0)
     {
         throw new System.ArgumentException("length must be >= 0");
     }
     this.Random = random;
     this.Bytes = new byte[length];
     this.Payload = new BytesRef(Bytes);
     this.PayloadAtt = AddAttribute<IPayloadAttribute>();
 }

Exemplo n.º 34

0

Exibir arquivo

Arquivo: ExpanderFilter.cs Projeto: devhost/Corelicious

        public ExpanderFilter(TokenStream input, [NotNull] Func<String, IEnumerable<Expansion>> expander, Boolean emitSource = true)
            : base(input)
        {
            if (expander == null)
                throw new ArgumentNullException("expander");

            _expander = expander;
            _emitSource = emitSource;
            _termAttr = AddAttribute<ITermAttribute>();
            _posAttr = AddAttribute<IPositionIncrementAttribute>();
            _typeAttr = AddAttribute<ITypeAttribute>();
        }

Exemplo n.º 35

0

Exibir arquivo

Arquivo: SynonymFilter.cs Projeto: aureliopires/gisa

        public SynonymFilter (TokenStream input, SynonymEngine engine) : base(input) {
            if (engine == null)
                throw new ArgumentNullException("synonymEngine");
            synonymStack = new Stack<string>();
            this.engine = engine;

            this.termAtt = (TermAttribute)AddAttribute<ITermAttribute>();
            this.posIncrAtt = (PositionIncrementAttribute)AddAttribute<IPositionIncrementAttribute>();

            //this.termAtt = this.AddAttribute<string>();
            //this.posIncrAtt = this.AddAttribute<string>();
        }

Exemplo n.º 36

0

Exibir arquivo

Arquivo: PrefixAndSuffixAwareTokenFilter.cs Projeto: kstenson/NHibernate.Search

 public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix) : base(suffix)
 {
     _suffix =
         new InjectablePrefixAwareTokenFilter(
             new InjectablePrefixAwareTokenFilter(prefix, input)
                 {
                     UpdateAction = UpdateInputToken
                 },
             suffix)
             {
                 UpdateAction = UpdateSuffixToken
             };
 }

Exemplo n.º 37

0

Exibir arquivo

Arquivo: TestAnalyzers.cs Projeto: synhershko/lucene.net

		internal virtual void  VerifyPayload(TokenStream ts)
		{
            IPayloadAttribute payloadAtt = ts.GetAttribute<IPayloadAttribute>();
			for (byte b = 1; ; b++)
			{
				bool hasNext = ts.IncrementToken();
				if (!hasNext)
					break;
				// System.out.println("id="+System.identityHashCode(nextToken) + " " + t);
				// System.out.println("payload=" + (int)nextToken.getPayload().toByteArray()[0]);
				Assert.AreEqual(b, payloadAtt.Payload.ToByteArray()[0]);
			}
		}

Exemplo n.º 38

0

Exibir arquivo

Arquivo: TestAnalyzers.cs Projeto: vikasraz/indexsearchutils

		internal virtual void  VerifyPayload(TokenStream ts)
		{
			Token t = new Token();
			for (byte b = 1; ; b++)
			{
				t.Clear();
				t = ts.Next(t);
				if (t == null)
					break;
				// System.out.println("id="+System.identityHashCode(t) + " " + t);
				// System.out.println("payload=" + (int)t.getPayload().toByteArray()[0]);
				Assert.AreEqual(b, t.GetPayload().ToByteArray()[0]);
			}
		}

Exemplo n.º 39

0

Exibir arquivo

        /// <summary>
        /// 分词测试
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public string Token(string keyword)
        {
            string ret = "";

            System.IO.StringReader          reader = new System.IO.StringReader(keyword);
            Lucene.Net.Analysis.TokenStream ts     = analyzer.TokenStream(keyword, reader);
            Lucene.Net.Analysis.Token       token  = ts.Next();
            while (token != null)
            {
                ret  += " " + token.TermText();
                token = ts.Next();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return(ret);
        }

Exemplo n.º 40

0

Exibir arquivo

Arquivo: PanGuLuceneHelper.cs Projeto: yxshu/AI_BAIDU

        /// <summary>
        /// 分词测试
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public string Token(string keyword)
        {
            string ret = "";

            System.IO.StringReader          reader = new System.IO.StringReader(keyword);
            Lucene.Net.Analysis.TokenStream ts     = analyzer.TokenStream(keyword, reader);
            bool hasNext = ts.IncrementToken();

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                ita     = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                ret    += ita.Term + "|";
                hasNext = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return(ret);
        }

Exemplo n.º 41

0

Exibir arquivo

        /// <summary>
        /// 分词方法
        /// </summary>
        /// <param name="words">待分词内容</param>
        /// <param name="analyzer"></param>
        /// <returns></returns>
        private string cutWords(string words, Analyzer analyzer)
        {
            string resultStr = "";

            System.IO.StringReader          reader = new System.IO.StringReader(words);
            Lucene.Net.Analysis.TokenStream ts     = analyzer.TokenStream(words, reader);
            bool hasNext = ts.IncrementToken();

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                ita        = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                resultStr += ita.Term + "|";
                hasNext    = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return(resultStr);
        }

Exemplo n.º 42

0

Exibir arquivo

Arquivo: StringSplitHelper.cs Projeto: sethHome/House

        public static List <string> SplitWords(string content)
        {
            List <string> strList = new List <string>();

            using (Analyzer analyzer = new PanGuAnalyzer())//指定使用盘古 PanGuAnalyzer 分词算法
            {
                using (System.IO.StringReader reader = new System.IO.StringReader(content))
                {
                    Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(content, reader);

                    while (ts.IncrementToken())
                    {
                        var ita = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                        strList.Add(ita.Term);
                    }
                    ts.CloneAttributes();
                }
            }

            return(strList);
        }

Exemplo n.º 43

0

Exibir arquivo

 public LowerCaseFilter(TokenStream in_Renamed) : base(in_Renamed)
 {
 }

Exemplo n.º 44

0

Exibir arquivo

Arquivo: StopFilter.cs Projeto: pchaozhong/FlexNet

 public StopFilter(TokenStream input, ICollection <string> stopWords, bool ignoreCase) : this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, ignoreCase)
 {
 }

Exemplo n.º 45

0

Exibir arquivo

Arquivo: StopFilter.cs Projeto: pchaozhong/FlexNet

 public StopFilter(TokenStream in_Renamed, ICollection <string> stopWords) : this(ENABLE_POSITION_INCREMENTS_DEFAULT, in_Renamed, stopWords, false)
 {
 }

Exemplo n.º 46

0

Exibir arquivo

 public MockGraphTokenFilter(Random random, TokenStream input)
     : base(input)
 {
     Seed    = random.Next();
     TermAtt = AddAttribute <ICharTermAttribute>();
 }

Exemplo n.º 47

0

Exibir arquivo

Arquivo: TestGraphTokenizers.cs Projeto: zfxsss/lucenenet

            internal readonly IPositionIncrementAttribute PosIncAtt; // = addAttribute(typeof(PositionIncrementAttribute));

            public RemoveATokens(TokenStream @in)
                : base(@in)
            {
                TermAtt   = AddAttribute <ICharTermAttribute>();
                PosIncAtt = AddAttribute <IPositionIncrementAttribute>();
            }

Exemplo n.º 48

0

Exibir arquivo

Arquivo: Analyzer.cs Projeto: zalintyre/lucenenet

 /// <summary>
 /// Creates a new <see cref="TokenStreamComponents"/> instance.
 /// </summary>
 /// <param name="source">
 ///          the analyzer's tokenizer </param>
 /// <param name="result">
 ///          the analyzer's resulting token stream </param>
 public TokenStreamComponents(Tokenizer source, TokenStream result)
 {
     this.m_source = source;
     this.m_sink   = result;
 }

Exemplo n.º 49

0

Exibir arquivo

Arquivo: StopFilter.cs Projeto: pchaozhong/FlexNet

 public StopFilter(bool enablePositionIncrements, TokenStream input, System.String[] stopWords) : this(enablePositionIncrements, input, stopWords, false)
 {
 }

Exemplo n.º 50

0

Exibir arquivo

Arquivo: StopFilter.cs Projeto: pchaozhong/FlexNet

 public StopFilter(TokenStream in_Renamed, System.String[] stopWords, bool ignoreCase) : this(ENABLE_POSITION_INCREMENTS_DEFAULT, in_Renamed, stopWords, ignoreCase)
 {
 }

Exemplo n.º 51

0

Exibir arquivo

Arquivo: CachingTokenFilter.cs Projeto: pchaozhong/FlexNet

 public CachingTokenFilter(TokenStream input) : base(input)
 {
 }

Exemplo n.º 52

0

Exibir arquivo

Arquivo: MockRandomLookaheadTokenFilter.cs Projeto: wwb/lucenenet

 public MockRandomLookaheadTokenFilter(Random random, TokenStream @in)
     : base(@in)
 {
     this.Seed   = random.Next();
     this.random = new Random((int)Seed);
 }

Exemplo n.º 53

0

Exibir arquivo

 public NeverPeeksLookaheadTokenFilter(TokenStream input)
     : base(input)
 {
 }

Exemplo n.º 54

0

Exibir arquivo

 /// <summary> Build a filter that removes words that are too long or too
 /// short from the text.
 /// </summary>
 public LengthFilter(TokenStream in_Renamed, int min, int max) : base(in_Renamed)
 {
     this.min = min;
     this.max = max;
     termAtt  = (TermAttribute)AddAttribute(typeof(TermAttribute));
 }

Exemplo n.º 55

0

Exibir arquivo

Arquivo: Analyzer.cs Projeto: zalintyre/lucenenet

 /// <summary>
 /// Creates a new <see cref="TokenStreamComponents"/> instance.
 /// </summary>
 /// <param name="source">
 ///          the analyzer's tokenizer </param>
 public TokenStreamComponents(Tokenizer source)
 {
     this.m_source = source;
     this.m_sink   = source;
 }

Exemplo n.º 56

0

Exibir arquivo

Arquivo: StopFilter.cs Projeto: pchaozhong/FlexNet

 public StopFilter(TokenStream input, System.String[] stopWords) : this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, false)
 {
 }

Exemplo n.º 57

0

Exibir arquivo

 /// <summary>Construct a token stream filtering the given input. </summary>
 protected internal TokenFilter(TokenStream input)
 {
     this.input = input;
 }

Exemplo n.º 58

0

Exibir arquivo

Arquivo: StopFilter.cs Projeto: pchaozhong/FlexNet

 /// <summary> Constructs a filter which removes words from the input
 /// TokenStream that are named in the Set.
 ///
 /// </summary>
 /// <param name="enablePositionIncrements">true if token positions should record the removed stop words
 /// </param>
 /// <param name="in">Input stream
 /// </param>
 /// <param name="stopWords">The set of Stop Words.
 /// </param>
 /// <seealso cref="MakeStopSet(java.lang.String[])">
 /// </seealso>
 public StopFilter(bool enablePositionIncrements, TokenStream in_Renamed, ICollection <string> stopWords) : this(enablePositionIncrements, in_Renamed, stopWords, false)
 {
 }

Exemplo n.º 59

0

Exibir arquivo

Arquivo: StopFilter.cs Projeto: pchaozhong/FlexNet

 public StopFilter(bool enablePositionIncrements, TokenStream in_Renamed, string[] stopWords, bool ignoreCase) : base(in_Renamed)
 {
     this.stopWords = (CharArraySet)MakeStopSet(stopWords, ignoreCase);
     this.enablePositionIncrements = enablePositionIncrements;
     Init();
 }

Exemplo n.º 60

0

Exibir arquivo

 public TeeTokenFilter(TokenStream input, SinkTokenizer sink) : base(input)
 {
     this.sink = sink;
 }

Exemplos de Lucene.Net.Analysis TokenStream em C# (CSharp)