Exemplos de código com TokenStream.IncrementToken, Lucene.Net.Analysis em C# (CSharp)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: UnitTest1.cs Projeto: wingahi/DevLib.Comm

        public void TestMethod1()
        {
            Lucene.Net.Analysis.Standard.StandardAnalyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            string s = "我日中华人民共和国";

            System.IO.StringReader          reader = new System.IO.StringReader(s);
            Lucene.Net.Analysis.TokenStream ts     = a.TokenStream(s, reader);
            bool hasnext = ts.IncrementToken();

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;

            while (hasnext)
            {
                ita = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                Console.WriteLine(ita.Term);
                hasnext = ts.IncrementToken();
            }

            Console.WriteLine("over");


            ts.CloneAttributes();
            reader.Close();
            a.Close();
        }

Exemplo n.º 2

0

Exibir arquivo

        public void v()
        {
            //Analyzer analyzer = new CJKAnalyzer();
            //TokenStream tokenStream = analyzer.TokenStream("", new StringReader("我爱你中国China中华人名共和国"));
            //Lucene.Net.Analysis.Token token = null;
            //while ((token = tokenStream.Next()) != null)
            //{
            //    Response.Write(token.TermText() + "<br/>");
            //}

            Lucene.Net.Analysis.Standard.StandardAnalyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            string s = "我日中华人民共和国";

            System.IO.StringReader          reader = new System.IO.StringReader(s);
            Lucene.Net.Analysis.TokenStream ts     = a.TokenStream(s, reader);
            bool hasnext = ts.IncrementToken();

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasnext)
            {
                ita = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                Console.WriteLine(ita.Term);
                hasnext = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            a.Close();
            Console.ReadKey();
        }

Exemplo n.º 3

0

Exibir arquivo

Arquivo: TokenUtils.cs Projeto: vebin/Lucene.Net.Analysis.MMSeg

        public static Token NextToken(TokenStream input, Token reusableToken)
        {
            if (input == null) 
                return null;
            if (!input.IncrementToken()) 
                return null;

            ITermAttribute termAtt = input.GetAttribute<ITermAttribute>();
            IOffsetAttribute offsetAtt = input.GetAttribute<IOffsetAttribute>();
            ITypeAttribute typeAtt = input.GetAttribute<ITypeAttribute>();

            if (reusableToken == null)
            {
                reusableToken = new Token();
            }
            reusableToken.Clear();

            if (termAtt != null)
                reusableToken.SetTermBuffer(termAtt.TermBuffer(), 0, termAtt.TermLength());

            if (offsetAtt != null)
            {
                reusableToken.StartOffset = offsetAtt.StartOffset;
                reusableToken.EndOffset = offsetAtt.EndOffset;
            }

            if (typeAtt != null)
                reusableToken.Type = typeAtt.Type;

            return reusableToken;
        }

Exemplo n.º 4

0

Exibir arquivo

Arquivo: MoreLikeThis.cs Projeto: webmonger/ravendb

        /// <summary> Adds term frequencies found by tokenizing text from reader into the Map words</summary>
        /// <param name="r">a source of text to be tokenized
        /// </param>
        /// <param name="termFreqMap">a Map of terms and their frequencies
        /// </param>
        /// <param name="fieldName">Used by analyzer for any special per-field analysis
        /// </param>
        protected void AddTermFrequencies(System.IO.TextReader r, System.Collections.IDictionary termFreqMap, System.String fieldName)
        {
            TokenStream ts         = analyzer.TokenStream(fieldName, r);
            var         termAtt    = ts.AddAttribute <ITermAttribute>();
            int         tokenCount = 0;

            while (ts.IncrementToken())
            {
                // for every token
                System.String word = termAtt.Term;
                tokenCount++;
                if (tokenCount > maxNumTokensParsed)
                {
                    break;
                }
                if (IsNoiseWord(word))
                {
                    continue;
                }

                // increment frequency
                Int cnt = (Int)termFreqMap[word];
                if (cnt == null)
                {
                    termFreqMap[word] = new Int();
                }
                else
                {
                    cnt.x++;
                }
            }
        }

Exemplo n.º 5

0

Exibir arquivo

Arquivo: Default.aspx.cs Projeto: rajmenon/AnalyzerViewer

        public string GetTokenView(TokenStream tokenStream, out int numberOfTokens)
        {
            var sb = new StringBuilder();
            numberOfTokens = 0;

            var termAttr = tokenStream.GetAttribute<ITermAttribute>();
            var startOffset = tokenStream.GetAttribute<Lucene.Net.Analysis.Tokenattributes.IOffsetAttribute>();
            while (tokenStream.IncrementToken())
            {

                sb.Append(termAttr.Term + "   Start: " + startOffset.StartOffset.ToString().PadLeft(5) + "  End: " + startOffset.EndOffset.ToString().PadLeft(5) + "\r\n");

                //var view = "[" + termAttr.Term + "]   ";
                //sb.Append(view);
                numberOfTokens++;
            }

            return sb.ToString();

            //StringBuilder sb = new StringBuilder();

            //Token token = tokenStream.Next();

            //numberOfTokens = 0;

            //while (token != null)
            //{
            //    numberOfTokens++;
            //    sb.Append(token.TermText() + "   Start: " + token.StartOffset().ToString().PadLeft(5) + "  End: " + token.EndOffset().ToString().PadLeft(5) + "\r\n");
            //    token = tokenStream.Next();
            //}

            //return sb.ToString();
        }

Exemplo n.º 6

0

Exibir arquivo

Arquivo: AnalyzerUtil.cs Projeto: diegocaxito/LuceneTest

 private static void DisplayTokens(TokenStream stream)
 {
     TermAttribute term = (TermAttribute) stream.AddAttribute(typeof(TermAttribute));
     while (stream.IncrementToken())
     {
         Console.WriteLine("[{0}]  ", term.Term());
     }
 }

Exemplo n.º 7

0

Exibir arquivo

Arquivo: PanGuLuceneHelper.cs Projeto: yxshu/AI_BAIDU

        /// <summary>
        /// 分词测试
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public string Token(string keyword)
        {
            string ret = "";

            System.IO.StringReader          reader = new System.IO.StringReader(keyword);
            Lucene.Net.Analysis.TokenStream ts     = analyzer.TokenStream(keyword, reader);
            bool hasNext = ts.IncrementToken();

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                ita     = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                ret    += ita.Term + "|";
                hasNext = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return(ret);
        }

Exemplo n.º 8

0

Exibir arquivo

Arquivo: LuceneExtensions.cs Projeto: xpohama/Luceneria

 public static void DisplayTokens(TokenStream stream)
 {
     // error in Lucene.Net? should work, look in source code why not
     // source: Lucene in Action, page ??
     var term = stream.AddAttribute<TermAttribute>();
     while (stream.IncrementToken()) {
     #if LuceneV303
         Trace.WriteLine("[" + term.Term + "] ");
     #endif
     }
 }

Exemplo n.º 9

0

Exibir arquivo

        /// <summary>
        /// 分词方法
        /// </summary>
        /// <param name="words">待分词内容</param>
        /// <param name="analyzer"></param>
        /// <returns></returns>
        private string cutWords(string words, Analyzer analyzer)
        {
            string resultStr = "";

            System.IO.StringReader          reader = new System.IO.StringReader(words);
            Lucene.Net.Analysis.TokenStream ts     = analyzer.TokenStream(words, reader);
            bool hasNext = ts.IncrementToken();

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                ita        = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                resultStr += ita.Term + "|";
                hasNext    = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return(resultStr);
        }

Exemplo n.º 10

0

Exibir arquivo

Arquivo: TestAnalyzers.cs Projeto: synhershko/lucene.net

		internal virtual void  VerifyPayload(TokenStream ts)
		{
            IPayloadAttribute payloadAtt = ts.GetAttribute<IPayloadAttribute>();
			for (byte b = 1; ; b++)
			{
				bool hasNext = ts.IncrementToken();
				if (!hasNext)
					break;
				// System.out.println("id="+System.identityHashCode(nextToken) + " " + t);
				// System.out.println("payload=" + (int)nextToken.getPayload().toByteArray()[0]);
				Assert.AreEqual(b, payloadAtt.Payload.ToByteArray()[0]);
			}
		}

Exemplo n.º 11

0

Exibir arquivo

Arquivo: TestStopAnalyzer.cs Projeto: vivekshimpi01/lucene.net

        public virtual void  TestDefaults()
        {
            Assert.IsTrue(stop != null);
            System.IO.StringReader reader = new System.IO.StringReader("This is a test of the english stop analyzer");
            TokenStream            stream = stop.TokenStream("test", reader);

            Assert.IsTrue(stream != null);
            TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            while (stream.IncrementToken())
            {
                Assert.IsFalse(inValidTokens.Contains(termAtt.Term()));
            }
        }

Exemplo n.º 12

0

Exibir arquivo

 public override void Run()
 {
     try
     {
         foreach (var mapping in this.map)
         {
             string      term           = mapping.Key;
             BytesRef    expected       = mapping.Value;
             Exception   priorException = null; // LUCENENET: No need to cast to IOExcpetion
             TokenStream ts             = this.analyzer.GetTokenStream("fake", new StringReader(term));
             try
             {
                 ITermToBytesRefAttribute termAtt = ts.AddAttribute <ITermToBytesRefAttribute>();
                 BytesRef bytes = termAtt.BytesRef;
                 ts.Reset();
                 Assert.IsTrue(ts.IncrementToken());
                 termAtt.FillBytesRef();
                 Assert.AreEqual(expected, bytes);
                 Assert.IsFalse(ts.IncrementToken());
                 ts.End();
             }
             catch (Exception e) when(e.IsIOException())
             {
                 priorException = e;
             }
             finally
             {
                 IOUtils.DisposeWhileHandlingException(priorException, ts);
             }
         }
     }
     catch (Exception e) when(e.IsIOException())
     {
         throw RuntimeException.Create(e);
     }
 }

Exemplo n.º 13

0

Exibir arquivo

Arquivo: CollationTestBase.cs Projeto: zfxsss/lucenenet

 public override void Run()
 {
     try
     {
         foreach (KeyValuePair <string, BytesRef> mapping in Map)
         {
             string      term           = mapping.Key;
             BytesRef    expected       = mapping.Value;
             IOException priorException = null;
             TokenStream ts             = Analyzer.TokenStream("fake", new StreamReader(term));
             try
             {
                 ITermToBytesRefAttribute termAtt = ts.AddAttribute <ITermToBytesRefAttribute>();
                 BytesRef bytes = termAtt.BytesRef;
                 ts.Reset();
                 Assert.IsTrue(ts.IncrementToken());
                 termAtt.FillBytesRef();
                 Assert.AreEqual(expected, bytes);
                 Assert.IsFalse(ts.IncrementToken());
                 ts.End();
             }
             catch (IOException e)
             {
                 priorException = e;
             }
             finally
             {
                 IOUtils.CloseWhileHandlingException(priorException, ts);
             }
         }
     }
     catch (IOException e)
     {
         throw (Exception)e;
     }
 }

Exemplo n.º 14

0

Exibir arquivo

        private void CheckTokens(TokenStream stream)
        {
            int count = 0;

            ICharTermAttribute termAtt = stream.GetAttribute <ICharTermAttribute>();

            while (stream.IncrementToken())
            {
                Assert.IsTrue(count < Tokens.Length);
                Assert.AreEqual(Tokens[count], termAtt.ToString());
                count++;
            }

            Assert.AreEqual(Tokens.Length, count);
        }

Exemplo n.º 15

0

Exibir arquivo

Arquivo: TestCachingTokenFilter.cs Projeto: vivekshimpi01/lucene.net

        private void  checkTokens(TokenStream stream)
        {
            int count = 0;

            TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            Assert.IsNotNull(termAtt);
            while (stream.IncrementToken())
            {
                Assert.IsTrue(count < tokens.Length);
                Assert.AreEqual(tokens[count], termAtt.Term());
                count++;
            }

            Assert.AreEqual(tokens.Length, count);
        }

Exemplo n.º 16

0

Exibir arquivo

        internal virtual void  VerifyPayload(TokenStream ts)
        {
            IPayloadAttribute payloadAtt = ts.GetAttribute <IPayloadAttribute>();

            for (byte b = 1; ; b++)
            {
                bool hasNext = ts.IncrementToken();
                if (!hasNext)
                {
                    break;
                }
                // System.out.println("id="+System.identityHashCode(nextToken) + " " + t);
                // System.out.println("payload=" + (int)nextToken.getPayload().toByteArray()[0]);
                Assert.AreEqual(b, payloadAtt.Payload.ToByteArray()[0]);
            }
        }

Exemplo n.º 17

0

Exibir arquivo

        public void TestLUCENE_3042()
        {
            String testString = "t";

            Analyzer analyzer = new MockAnalyzer(Random);

            using (TokenStream stream = analyzer.GetTokenStream("dummy", testString))
            {
                stream.Reset();
                while (stream.IncrementToken())
                {
                    // consume
                }
                stream.End();
            }

            AssertAnalyzesTo(analyzer, testString, new String[] { "t" });
        }

Exemplo n.º 18

0

Exibir arquivo

        public void TestForwardOffsets()
        {
            int num = AtLeast(1000);

            for (int i = 0; i < num; i++)
            {
                String         s          = TestUtil.RandomHtmlishString(Random, 20);
                StringReader   reader     = new StringReader(s);
                MockCharFilter charfilter = new MockCharFilter(reader, 2);
                MockAnalyzer   analyzer   = new MockAnalyzer(Random);
                using TokenStream ts = analyzer.GetTokenStream("bogus", charfilter);
                ts.Reset();
                while (ts.IncrementToken())
                {
                    ;
                }
                ts.End();
            }
        }

Exemplo n.º 19

0

Exibir arquivo

Arquivo: StringSplitHelper.cs Projeto: sethHome/House

        public static List <string> SplitWords(string content)
        {
            List <string> strList = new List <string>();

            using (Analyzer analyzer = new PanGuAnalyzer())//指定使用盘古 PanGuAnalyzer 分词算法
            {
                using (System.IO.StringReader reader = new System.IO.StringReader(content))
                {
                    Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(content, reader);

                    while (ts.IncrementToken())
                    {
                        var ita = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                        strList.Add(ita.Term);
                    }
                    ts.CloneAttributes();
                }
            }

            return(strList);
        }

Exemplo n.º 20

0

Exibir arquivo

Arquivo: TestStopAnalyzer.cs Projeto: vivekshimpi01/lucene.net

        public virtual void  TestStopList()
        {
            System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable();
            stopWordsSet.Add("good", "good");
            stopWordsSet.Add("test", "test");
            stopWordsSet.Add("analyzer", "analyzer");
            StopAnalyzer newStop = new StopAnalyzer(stopWordsSet);

            System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer");
            TokenStream            stream = newStop.TokenStream("test", reader);

            Assert.IsNotNull(stream);
            TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));
            PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stream.AddAttribute(typeof(PositionIncrementAttribute));

            while (stream.IncrementToken())
            {
                System.String text = termAtt.Term();
                Assert.IsFalse(stopWordsSet.Contains(text));
                Assert.AreEqual(1, posIncrAtt.GetPositionIncrement());                 // by default stop tokenizer does not apply increments.
            }
        }

Exemplo n.º 21

0

Exibir arquivo

Arquivo: TestStopAnalyzer.cs Projeto: thijswesterveld/lucene.net

        public virtual void  TestStopList()
        {
            var stopWordsSet = Support.Compatibility.SetFactory.CreateHashSet <string>();

            stopWordsSet.Add("good");
            stopWordsSet.Add("test");
            stopWordsSet.Add("analyzer");
            StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet);

            System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer");
            TokenStream            stream = newStop.TokenStream("test", reader);

            Assert.IsNotNull(stream);
            ITermAttribute termAtt = stream.GetAttribute <ITermAttribute>();
            IPositionIncrementAttribute posIncrAtt = stream.AddAttribute <IPositionIncrementAttribute>();

            while (stream.IncrementToken())
            {
                System.String text = termAtt.Term;
                Assert.IsFalse(stopWordsSet.Contains(text));
                Assert.AreEqual(1, posIncrAtt.PositionIncrement); // in 2.4 stop tokenizer does not apply increments.
            }
        }

Exemplo n.º 22

0

Exibir arquivo

Arquivo: TestTokenStreamBWComp.cs Projeto: vivekshimpi01/lucene.net

        private static void  ConsumeStreamNewAPI(TokenStream stream)
        {
            stream.Reset();
            PayloadAttribute payloadAtt = (PayloadAttribute)stream.AddAttribute(typeof(PayloadAttribute));
            TermAttribute    termAtt    = (TermAttribute)stream.AddAttribute(typeof(TermAttribute));

            int i = 0;

            while (stream.IncrementToken())
            {
                System.String term = termAtt.Term();
                Payload       p    = payloadAtt.GetPayload();
                if (p != null && p.GetData().Length == 1 && p.GetData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION)
                {
                    Assert.IsTrue("tokenstream".Equals(term), "only TokenStream is a proper noun");
                }
                else
                {
                    Assert.IsFalse("tokenstream".Equals(term), "all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)");
                }
                Assert.AreEqual(results[i], term);
                i++;
            }
        }

Exemplo n.º 23

0

Exibir arquivo

Arquivo: TestISOLatin1AccentFilter.cs Projeto: VirtueMe/ravendb

		internal virtual void  AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt)
		{
			Assert.IsTrue(stream.IncrementToken());
			Assert.AreEqual(expected, termAtt.Term());
		}

Exemplo n.º 24

0

Exibir arquivo

Arquivo: Default.aspx.cs Projeto: rajmenon/AnalyzerViewer

        public string GetView(TokenStream tokenStream, out int numberOfTokens)
        {
            var sb = new StringBuilder();
            var termDictionary = new Dictionary<string, int>();

            var termAttr = tokenStream.GetAttribute<ITermAttribute>();

            while (tokenStream.IncrementToken())
            {
                if (termDictionary.Keys.Contains(termAttr.Term))
                    termDictionary[termAttr.Term] = termDictionary[termAttr.Term] + 1;
                else
                    termDictionary.Add(termAttr.Term, 1);
            }

            foreach (var item in termDictionary.OrderBy(x => x.Key))
            {
                sb.Append(item.Key + " [" + item.Value + "]   ");
            }

            numberOfTokens = termDictionary.Count;
            return sb.ToString();
        }

Exemplo n.º 25

0

Exibir arquivo

Arquivo: BaseTokenStreamTestCase.cs Projeto: synhershko/lucene.net

        public static void AssertTokenStreamContents(TokenStream ts, System.String[] output, int[] startOffsets, int[] endOffsets, System.String[] types, int[] posIncrements, int? finalOffset)
        {
            Assert.IsNotNull(output);
            ICheckClearAttributesAttribute checkClearAtt = ts.AddAttribute<ICheckClearAttributesAttribute>();

            Assert.IsTrue(ts.HasAttribute<ITermAttribute>(), "has no TermAttribute");
            ITermAttribute termAtt = ts.GetAttribute<ITermAttribute>();

            IOffsetAttribute offsetAtt = null;
            if (startOffsets != null || endOffsets != null || finalOffset != null)
            {
                Assert.IsTrue(ts.HasAttribute<IOffsetAttribute>(), "has no OffsetAttribute");
                offsetAtt = ts.GetAttribute<IOffsetAttribute>();
            }
    
            ITypeAttribute typeAtt = null;
            if (types != null)
            {
                Assert.IsTrue(ts.HasAttribute<ITypeAttribute>(), "has no TypeAttribute");
                typeAtt = ts.GetAttribute<ITypeAttribute>();
            }
            
            IPositionIncrementAttribute posIncrAtt = null;
            if (posIncrements != null)
            {
                Assert.IsTrue(ts.HasAttribute<IPositionIncrementAttribute>(), "has no PositionIncrementAttribute");
                posIncrAtt = ts.GetAttribute<IPositionIncrementAttribute>();
            }

            ts.Reset();
            for (int i = 0; i < output.Length; i++)
            {
                // extra safety to enforce, that the state is not preserved and also assign bogus values
                ts.ClearAttributes();
                termAtt.SetTermBuffer("bogusTerm");
                if (offsetAtt != null) offsetAtt.SetOffset(14584724, 24683243);
                if (typeAtt != null) typeAtt.Type = "bogusType";
                if (posIncrAtt != null) posIncrAtt.PositionIncrement = 45987657;

                checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before
                Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist");
                Assert.IsTrue(checkClearAtt.GetAndResetClearCalled(), "clearAttributes() was not called correctly in TokenStream chain");

                Assert.AreEqual(output[i], termAtt.Term, "term " + i);
                if (startOffsets != null)
                    Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset, "startOffset " + i);
                if (endOffsets != null)
                    Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset, "endOffset " + i);
                if (types != null)
                    Assert.AreEqual(types[i], typeAtt.Type, "type " + i);
                if (posIncrements != null)
                    Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i);
            }
            Assert.IsFalse(ts.IncrementToken(), "end of stream");
            ts.End();
            if (finalOffset.HasValue)
                Assert.AreEqual(finalOffset, offsetAtt.EndOffset, "finalOffset ");
            ts.Close();
        }

Exemplo n.º 26

0

Exibir arquivo

        public virtual void ToDot()
        {
            @in.Reset();
            WriteHeader();

            // TODO: is there some way to tell dot that it should
            // make the "main path" a straight line and have the
            // non-sausage arcs not affect node placement...

            int pos        = -1;
            int lastEndPos = -1;

            while (@in.IncrementToken())
            {
                bool isFirst = pos == -1;
                int  posInc  = PosIncAtt.PositionIncrement;
                if (isFirst && posInc == 0)
                {
                    // TODO: hmm are TS's still allowed to do this...?
                    Console.Error.WriteLine("WARNING: first posInc was 0; correcting to 1");
                    posInc = 1;
                }

                if (posInc > 0)
                {
                    // New node:
                    pos += posInc;
                    WriteNode(pos, Convert.ToString(pos));
                }

                if (posInc > 1)
                {
                    // Gap!
                    WriteArc(lastEndPos, pos, null, "dotted");
                }

                if (isFirst)
                {
                    WriteNode(-1, null);
                    WriteArc(-1, pos, null, null);
                }

                string arcLabel = TermAtt.ToString();
                if (OffsetAtt != null)
                {
                    int startOffset = OffsetAtt.StartOffset;
                    int endOffset   = OffsetAtt.EndOffset;
                    //System.out.println("start=" + startOffset + " end=" + endOffset + " len=" + inputText.length());
                    if (InputText != null)
                    {
                        arcLabel += " / " + InputText.Substring(startOffset, endOffset - startOffset);
                    }
                    else
                    {
                        arcLabel += " / " + startOffset + "-" + endOffset;
                    }
                }

                WriteArc(pos, pos + PosLengthAtt.PositionLength, arcLabel, null);
                lastEndPos = pos + PosLengthAtt.PositionLength;
            }

            @in.End();

            if (lastEndPos != -1)
            {
                // TODO: should we output any final text (from end
                // offsets) on this arc...?
                WriteNode(-2, null);
                WriteArc(lastEndPos, -2, null, null);
            }

            WriteTrailer();
        }

Exemplo n.º 27

0

Exibir arquivo

Arquivo: TestShingleMatrixFilter.cs Projeto: synhershko/lucene.net

        private static void AssertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset,
                                       int endOffset)
        {
            var termAtt = ts.AddAttribute<ITermAttribute>();
            var posIncrAtt = ts.AddAttribute<IPositionIncrementAttribute>();
            var payloadAtt = ts.AddAttribute<IPayloadAttribute>();
            var offsetAtt = ts.AddAttribute<IOffsetAttribute>();

            Assert.IsTrue(ts.IncrementToken());
            Assert.AreEqual(text, termAtt.Term);
            Assert.AreEqual(positionIncrement, posIncrAtt.PositionIncrement);
            Assert.AreEqual(boost,
                            payloadAtt.Payload == null
                                ? 1f
                                : PayloadHelper.DecodeFloat(payloadAtt.Payload.GetData()), 0);
            Assert.AreEqual(startOffset, offsetAtt.StartOffset);
            Assert.AreEqual(endOffset, offsetAtt.EndOffset);
        }

Exemplo n.º 28

0

Exibir arquivo

Arquivo: TestISOLatin1AccentFilter.cs Projeto: stgwilli/ravendb

 internal virtual void  AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt)
 {
     Assert.IsTrue(stream.IncrementToken());
     Assert.AreEqual(expected, termAtt.Term());
 }

Exemplo n.º 29

0

Exibir arquivo

Arquivo: TestTokenStreamBWComp.cs Projeto: Rationalle/ravendb

		private static void  ConsumeStreamNewAPI(TokenStream stream)
		{
			stream.Reset();
			PayloadAttribute payloadAtt = (PayloadAttribute) stream.AddAttribute(typeof(PayloadAttribute));
			TermAttribute termAtt = (TermAttribute) stream.AddAttribute(typeof(TermAttribute));
			
			int i = 0;
			while (stream.IncrementToken())
			{
				System.String term = termAtt.Term();
				Payload p = payloadAtt.GetPayload();
				if (p != null && p.GetData().Length == 1 && p.GetData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION)
				{
					Assert.IsTrue("tokenstream".Equals(term), "only TokenStream is a proper noun");
				}
				else
				{
					Assert.IsFalse("tokenstream".Equals(term), "all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)");
				}
				Assert.AreEqual(results[i], term);
				i++;
			}
		}

Exemplo n.º 30

0

Exibir arquivo

Arquivo: TestShingleMatrixFilter.cs Projeto: kstenson/NHibernate.Search

        // assert-methods start here

        private static void AssertNext(TokenStream ts, String text)
        {
            var termAtt = (TermAttribute) ts.AddAttribute(typeof (TermAttribute));

            Assert.IsTrue(ts.IncrementToken());
            Assert.AreEqual(text, termAtt.Term());
        }

Exemplo n.º 31

0

Exibir arquivo

Arquivo: TokenStreamToAutomaton.cs Projeto: zhuthree/lucenenet

        /// <summary>
        /// Pulls the graph (including <see cref="IPositionLengthAttribute"/>
        /// from the provided <see cref="TokenStream"/>, and creates the corresponding
        /// automaton where arcs are bytes (or Unicode code points
        /// if unicodeArcs = true) from each term.
        /// </summary>
        public virtual Automaton ToAutomaton(TokenStream @in)
        {
            var  a             = new Automaton();
            bool deterministic = true;

            var posIncAtt    = @in.AddAttribute <IPositionIncrementAttribute>();
            var posLengthAtt = @in.AddAttribute <IPositionLengthAttribute>();
            var offsetAtt    = @in.AddAttribute <IOffsetAttribute>();
            var termBytesAtt = @in.AddAttribute <ITermToBytesRefAttribute>();

            BytesRef term = termBytesAtt.BytesRef;

            @in.Reset();

            // Only temporarily holds states ahead of our current
            // position:

            RollingBuffer <Position> positions = new Positions();

            int      pos       = -1;
            Position posData   = null;
            int      maxOffset = 0;

            while (@in.IncrementToken())
            {
                int posInc = posIncAtt.PositionIncrement;
                if (!preservePositionIncrements && posInc > 1)
                {
                    posInc = 1;
                }
                Debug.Assert(pos > -1 || posInc > 0);

                if (posInc > 0)
                {
                    // New node:
                    pos += posInc;

                    posData = positions.Get(pos);
                    Debug.Assert(posData.leaving == null);

                    if (posData.arriving == null)
                    {
                        // No token ever arrived to this position
                        if (pos == 0)
                        {
                            // OK: this is the first token
                            posData.leaving = a.GetInitialState();
                        }
                        else
                        {
                            // this means there's a hole (eg, StopFilter
                            // does this):
                            posData.leaving = new State();
                            AddHoles(a.GetInitialState(), positions, pos);
                        }
                    }
                    else
                    {
                        posData.leaving = new State();
                        posData.arriving.AddTransition(new Transition(POS_SEP, posData.leaving));
                        if (posInc > 1)
                        {
                            // A token spanned over a hole; add holes
                            // "under" it:
                            AddHoles(a.GetInitialState(), positions, pos);
                        }
                    }
                    positions.FreeBefore(pos);
                }
                else
                {
                    // note: this isn't necessarily true. its just that we aren't surely det.
                    // we could optimize this further (e.g. buffer and sort synonyms at a position)
                    // but thats probably overkill. this is cheap and dirty
                    deterministic = false;
                }

                int endPos = pos + posLengthAtt.PositionLength;

                termBytesAtt.FillBytesRef();
                BytesRef termUTF8    = ChangeToken(term);
                int[]    termUnicode = null;
                Position endPosData  = positions.Get(endPos);
                if (endPosData.arriving == null)
                {
                    endPosData.arriving = new State();
                }

                State state   = posData.leaving;
                int   termLen = termUTF8.Length;
                if (unicodeArcs)
                {
                    string utf16 = termUTF8.Utf8ToString();
                    termUnicode = new int[utf16.CodePointCount(0, utf16.Length)];
                    termLen     = termUnicode.Length;
                    for (int cp, i = 0, j = 0; i < utf16.Length; i += Character.CharCount(cp))
                    {
                        termUnicode[j++] = cp = Character.CodePointAt(utf16, i);
                    }
                }
                else
                {
                    termLen = termUTF8.Length;
                }

                for (int byteIDX = 0; byteIDX < termLen; byteIDX++)
                {
                    State nextState = byteIDX == termLen - 1 ? endPosData.arriving : new State();
                    int   c;
                    if (unicodeArcs)
                    {
                        c = termUnicode[byteIDX];
                    }
                    else
                    {
                        c = termUTF8.Bytes[termUTF8.Offset + byteIDX] & 0xff;
                    }
                    state.AddTransition(new Transition(c, nextState));
                    state = nextState;
                }

                maxOffset = Math.Max(maxOffset, offsetAtt.EndOffset);
            }

            @in.End();
            State endState = null;

            if (offsetAtt.EndOffset > maxOffset)
            {
                endState        = new State();
                endState.Accept = true;
            }

            pos++;
            while (pos <= positions.MaxPos)
            {
                posData = positions.Get(pos);
                if (posData.arriving != null)
                {
                    if (endState != null)
                    {
                        posData.arriving.AddTransition(new Transition(POS_SEP, endState));
                    }
                    else
                    {
                        posData.arriving.Accept = true;
                    }
                }
                pos++;
            }

            //toDot(a);
            a.IsDeterministic = deterministic;
            return(a);
        }

Exemplo n.º 32

0

Exibir arquivo

Arquivo: TestShingleMatrixFilter.cs Projeto: kstenson/NHibernate.Search

        private static void AssertNext(TokenStream ts, String text, int startOffset, int endOffset)
        {
            var termAtt = (TermAttribute) ts.AddAttribute(typeof (TermAttribute));
            var offsetAtt = (OffsetAttribute) ts.AddAttribute(typeof (OffsetAttribute));

            Assert.IsTrue(ts.IncrementToken());
            Assert.AreEqual(text, termAtt.Term());
            Assert.AreEqual(startOffset, offsetAtt.StartOffset());
            Assert.AreEqual(endOffset, offsetAtt.EndOffset());
        }

Exemplo n.º 33

0

Exibir arquivo

Arquivo: TestShingleMatrixFilter.cs Projeto: kstenson/NHibernate.Search

        private static void AssertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset,
                                       int endOffset)
        {
            var termAtt = (TermAttribute) ts.AddAttribute(typeof (TermAttribute));
            var posIncrAtt = (PositionIncrementAttribute) ts.AddAttribute(typeof (PositionIncrementAttribute));
            var payloadAtt = (PayloadAttribute) ts.AddAttribute(typeof (PayloadAttribute));
            var offsetAtt = (OffsetAttribute) ts.AddAttribute(typeof (OffsetAttribute));

            Assert.IsTrue(ts.IncrementToken());
            Assert.AreEqual(text, termAtt.Term());
            Assert.AreEqual(positionIncrement, posIncrAtt.GetPositionIncrement());
            Assert.AreEqual(boost,
                            payloadAtt.GetPayload() == null
                                ? 1f
                                : PayloadHelper.DecodeFloat(payloadAtt.GetPayload().GetData()), 0);
            Assert.AreEqual(startOffset, offsetAtt.StartOffset());
            Assert.AreEqual(endOffset, offsetAtt.EndOffset());
        }

Exemplo n.º 34

0

Exibir arquivo

Arquivo: BaseTokenStreamTestCase.cs Projeto: zfxsss/lucenenet

        // offsetsAreCorrect also validates:
        //   - graph offsets are correct (all tokens leaving from
        //     pos X have the same startOffset; all tokens
        //     arriving to pos Y have the same endOffset)
        //   - offsets only move forwards (startOffset >=
        //     lastStartOffset)
        public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int?finalOffset, int?finalPosInc, bool[] keywordAtts, bool offsetsAreCorrect)
        {
            Assert.IsNotNull(output);
            var checkClearAtt = ts.AddAttribute <ICheckClearAttributesAttribute>();

            ICharTermAttribute termAtt = null;

            if (output.Length > 0)
            {
                Assert.IsTrue(ts.HasAttribute <ICharTermAttribute>(), "has no CharTermAttribute");
                termAtt = ts.GetAttribute <ICharTermAttribute>();
            }

            IOffsetAttribute offsetAtt = null;

            if (startOffsets != null || endOffsets != null || finalOffset != null)
            {
                Assert.IsTrue(ts.HasAttribute <IOffsetAttribute>(), "has no OffsetAttribute");
                offsetAtt = ts.GetAttribute <IOffsetAttribute>();
            }

            ITypeAttribute typeAtt = null;

            if (types != null)
            {
                Assert.IsTrue(ts.HasAttribute <ITypeAttribute>(), "has no TypeAttribute");
                typeAtt = ts.GetAttribute <ITypeAttribute>();
            }

            IPositionIncrementAttribute posIncrAtt = null;

            if (posIncrements != null || finalPosInc != null)
            {
                Assert.IsTrue(ts.HasAttribute <IPositionIncrementAttribute>(), "has no PositionIncrementAttribute");
                posIncrAtt = ts.GetAttribute <IPositionIncrementAttribute>();
            }

            IPositionLengthAttribute posLengthAtt = null;

            if (posLengths != null)
            {
                Assert.IsTrue(ts.HasAttribute <IPositionLengthAttribute>(), "has no PositionLengthAttribute");
                posLengthAtt = ts.GetAttribute <IPositionLengthAttribute>();
            }

            IKeywordAttribute keywordAtt = null;

            if (keywordAtts != null)
            {
                Assert.IsTrue(ts.HasAttribute <IKeywordAttribute>(), "has no KeywordAttribute");
                keywordAtt = ts.GetAttribute <IKeywordAttribute>();
            }

            // Maps position to the start/end offset:
            IDictionary <int?, int?> posToStartOffset = new Dictionary <int?, int?>();
            IDictionary <int?, int?> posToEndOffset   = new Dictionary <int?, int?>();

            ts.Reset();
            int pos             = -1;
            int lastStartOffset = 0;

            for (int i = 0; i < output.Length; i++)
            {
                // extra safety to enforce, that the state is not preserved and also assign bogus values
                ts.ClearAttributes();
                termAtt.SetEmpty().Append("bogusTerm");
                if (offsetAtt != null)
                {
                    offsetAtt.SetOffset(14584724, 24683243);
                }
                if (typeAtt != null)
                {
                    typeAtt.Type = "bogusType";
                }
                if (posIncrAtt != null)
                {
                    posIncrAtt.PositionIncrement = 45987657;
                }
                if (posLengthAtt != null)
                {
                    posLengthAtt.PositionLength = 45987653;
                }
                if (keywordAtt != null)
                {
                    keywordAtt.Keyword = (i & 1) == 0;
                }

                bool reset = checkClearAtt.AndResetClearCalled; // reset it, because we called clearAttribute() before
                Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist");
                Assert.IsTrue(reset, "ClearAttributes() was not called correctly in TokenStream chain");

                Assert.AreEqual(output[i], termAtt.ToString(), "term " + i + ", output[i] = " + output[i] + ", termAtt = " + termAtt.ToString());
                if (startOffsets != null)
                {
                    Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset(), "startOffset " + i);
                }
                if (endOffsets != null)
                {
                    Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset(), "endOffset " + i);
                }
                if (types != null)
                {
                    Assert.AreEqual(types[i], typeAtt.Type, "type " + i);
                }
                if (posIncrements != null)
                {
                    Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i);
                }
                if (posLengths != null)
                {
                    Assert.AreEqual(posLengths[i], posLengthAtt.PositionLength, "posLength " + i);
                }
                if (keywordAtts != null)
                {
                    Assert.AreEqual(keywordAtts[i], keywordAtt.Keyword, "keywordAtt " + i);
                }

                // we can enforce some basic things about a few attributes even if the caller doesn't check:
                if (offsetAtt != null)
                {
                    int startOffset = offsetAtt.StartOffset();
                    int endOffset   = offsetAtt.EndOffset();
                    if (finalOffset != null)
                    {
                        Assert.IsTrue(startOffset <= (int)finalOffset, "startOffset must be <= finalOffset");
                        Assert.IsTrue(endOffset <= (int)finalOffset, "endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset=" + (int)finalOffset);
                    }

                    if (offsetsAreCorrect)
                    {
                        Assert.IsTrue(offsetAtt.StartOffset() >= lastStartOffset, "offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset);
                        lastStartOffset = offsetAtt.StartOffset();
                    }

                    if (offsetsAreCorrect && posLengthAtt != null && posIncrAtt != null)
                    {
                        // Validate offset consistency in the graph, ie
                        // all tokens leaving from a certain pos have the
                        // same startOffset, and all tokens arriving to a
                        // certain pos have the same endOffset:
                        int posInc = posIncrAtt.PositionIncrement;
                        pos += posInc;

                        int posLength = posLengthAtt.PositionLength;

                        if (!posToStartOffset.ContainsKey(pos))
                        {
                            // First time we've seen a token leaving from this position:
                            posToStartOffset[pos] = startOffset;
                            //System.out.println("  + s " + pos + " -> " + startOffset);
                        }
                        else
                        {
                            // We've seen a token leaving from this position
                            // before; verify the startOffset is the same:
                            //System.out.println("  + vs " + pos + " -> " + startOffset);
                            Assert.AreEqual((int)posToStartOffset[pos], startOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt);
                        }

                        int endPos = pos + posLength;

                        if (!posToEndOffset.ContainsKey(endPos))
                        {
                            // First time we've seen a token arriving to this position:
                            posToEndOffset[endPos] = endOffset;
                            //System.out.println("  + e " + endPos + " -> " + endOffset);
                        }
                        else
                        {
                            // We've seen a token arriving to this position
                            // before; verify the endOffset is the same:
                            //System.out.println("  + ve " + endPos + " -> " + endOffset);
                            Assert.AreEqual((int)posToEndOffset[endPos], endOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt);
                        }
                    }
                }
                if (posIncrAtt != null)
                {
                    if (i == 0)
                    {
                        Assert.IsTrue(posIncrAtt.PositionIncrement >= 1, "first posIncrement must be >= 1");
                    }
                    else
                    {
                        Assert.IsTrue(posIncrAtt.PositionIncrement >= 0, "posIncrement must be >= 0");
                    }
                }
                if (posLengthAtt != null)
                {
                    Assert.IsTrue(posLengthAtt.PositionLength >= 1, "posLength must be >= 1");
                }
            }

            if (ts.IncrementToken())
            {
                Assert.Fail("TokenStream has more tokens than expected (expected count=" + output.Length + "); extra token=" + termAtt);
            }

            // repeat our extra safety checks for End()
            ts.ClearAttributes();
            if (termAtt != null)
            {
                termAtt.SetEmpty().Append("bogusTerm");
            }
            if (offsetAtt != null)
            {
                offsetAtt.SetOffset(14584724, 24683243);
            }
            if (typeAtt != null)
            {
                typeAtt.Type = "bogusType";
            }
            if (posIncrAtt != null)
            {
                posIncrAtt.PositionIncrement = 45987657;
            }
            if (posLengthAtt != null)
            {
                posLengthAtt.PositionLength = 45987653;
            }

            var reset_ = checkClearAtt.AndResetClearCalled; // reset it, because we called clearAttribute() before

            ts.End();
            Assert.IsTrue(checkClearAtt.AndResetClearCalled, "super.End()/ClearAttributes() was not called correctly in End()");

            if (finalOffset != null)
            {
                Assert.AreEqual((int)finalOffset, offsetAtt.EndOffset(), "finalOffset");
            }
            if (offsetAtt != null)
            {
                Assert.IsTrue(offsetAtt.EndOffset() >= 0, "finalOffset must be >= 0");
            }
            if (finalPosInc != null)
            {
                Assert.AreEqual((int)finalPosInc, posIncrAtt.PositionIncrement, "finalPosInc");
            }

            ts.Dispose();
        }

Exemplo n.º 35

0

Exibir arquivo

Arquivo: BaseTokenStreamTestCase.cs Projeto: stgwilli/ravendb

        public static void AssertTokenStreamContents(TokenStream ts, System.String[] output, int[] startOffsets, int[] endOffsets, System.String[] types, int[] posIncrements, int?finalOffset)
        {
            Assert.IsNotNull(output);
            CheckClearAttributesAttribute checkClearAtt = (CheckClearAttributesAttribute)ts.AddAttribute(typeof(CheckClearAttributesAttribute));

            Assert.IsTrue(ts.HasAttribute(typeof(TermAttribute)), "has no TermAttribute");
            TermAttribute termAtt = (TermAttribute)ts.GetAttribute(typeof(TermAttribute));

            OffsetAttribute offsetAtt = null;

            if (startOffsets != null || endOffsets != null || finalOffset != null)
            {
                Assert.IsTrue(ts.HasAttribute(typeof(OffsetAttribute)), "has no OffsetAttribute");
                offsetAtt = (OffsetAttribute)ts.GetAttribute(typeof(OffsetAttribute));
            }

            TypeAttribute typeAtt = null;

            if (types != null)
            {
                Assert.IsTrue(ts.HasAttribute(typeof(TypeAttribute)), "has no TypeAttribute");
                typeAtt = (TypeAttribute)ts.GetAttribute(typeof(TypeAttribute));
            }

            PositionIncrementAttribute posIncrAtt = null;

            if (posIncrements != null)
            {
                Assert.IsTrue(ts.HasAttribute(typeof(PositionIncrementAttribute)), "has no PositionIncrementAttribute");
                posIncrAtt = (PositionIncrementAttribute)ts.GetAttribute(typeof(PositionIncrementAttribute));
            }

            ts.Reset();
            for (int i = 0; i < output.Length; i++)
            {
                // extra safety to enforce, that the state is not preserved and also assign bogus values
                ts.ClearAttributes();
                termAtt.SetTermBuffer("bogusTerm");
                if (offsetAtt != null)
                {
                    offsetAtt.SetOffset(14584724, 24683243);
                }
                if (typeAtt != null)
                {
                    typeAtt.SetType("bogusType");
                }
                if (posIncrAtt != null)
                {
                    posIncrAtt.SetPositionIncrement(45987657);
                }

                checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before
                Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist");
                Assert.IsTrue(checkClearAtt.GetAndResetClearCalled(), "clearAttributes() was not called correctly in TokenStream chain");

                Assert.AreEqual(output[i], termAtt.Term(), "term " + i);
                if (startOffsets != null)
                {
                    Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset(), "startOffset " + i);
                }
                if (endOffsets != null)
                {
                    Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset(), "endOffset " + i);
                }
                if (types != null)
                {
                    Assert.AreEqual(types[i], typeAtt.Type(), "type " + i);
                }
                if (posIncrements != null)
                {
                    Assert.AreEqual(posIncrements[i], posIncrAtt.GetPositionIncrement(), "posIncrement " + i);
                }
            }
            Assert.IsFalse(ts.IncrementToken(), "end of stream");
            ts.End();
            if (finalOffset.HasValue)
            {
                Assert.AreEqual(finalOffset, offsetAtt.EndOffset(), "finalOffset ");
            }
            ts.Close();
        }

Exemplo n.º 36

0

Exibir arquivo

Arquivo: TestCachingTokenFilter.cs Projeto: VirtueMe/ravendb

		private void  checkTokens(TokenStream stream)
		{
			int count = 0;
			
			TermAttribute termAtt = (TermAttribute) stream.GetAttribute(typeof(TermAttribute));
			Assert.IsNotNull(termAtt);
			while (stream.IncrementToken())
			{
				Assert.IsTrue(count < tokens.Length);
				Assert.AreEqual(tokens[count], termAtt.Term());
				count++;
			}
			
			Assert.AreEqual(tokens.Length, count);
		}

Exemplo n.º 37

0

Exibir arquivo

Arquivo: DelimitedPayloadTokenFilterTest.cs Projeto: hanabi1224/lucene.net

        void AssertTermEquals(String expected, TokenStream stream, ITermAttribute termAtt, IPayloadAttribute payAtt, byte[] expectPay)
        {
            Assert.True(stream.IncrementToken());
            Assert.AreEqual(expected, termAtt.Term);
            Payload payload = payAtt.Payload;
            if (payload != null)
            {
                Assert.True(payload.Length == expectPay.Length, payload.Length + " does not equal: " + expectPay.Length);
                for (int i = 0; i < expectPay.Length; i++)
                {
                    Assert.True(expectPay[i] == payload.ByteAt(i), expectPay[i] + " does not equal: " + payload.ByteAt(i));

                }
            }
            else
            {
                Assert.True(expectPay == null, "expectPay is not null and it should be");
            }
        }

Exemplo n.º 38

0

Exibir arquivo

Arquivo: TestCachingTokenFilter.cs Projeto: Cefa68000/lucenenet

        private void CheckTokens(TokenStream stream)
        {
            int count = 0;

            ICharTermAttribute termAtt = stream.GetAttribute<ICharTermAttribute>();
            while (stream.IncrementToken())
            {
                Assert.IsTrue(count < Tokens.Length);
                Assert.AreEqual(Tokens[count], termAtt.ToString());
                count++;
            }

            Assert.AreEqual(Tokens.Length, count);
        }

Exemplo n.º 39

0

Exibir arquivo

        /// <summary> Not an explicit test, just useful to print out some info on performance
        ///
        /// </summary>
        /// <throws>  Exception </throws>
        public virtual void Performance()
        {
            int[] tokCount  = new int[] { 100, 500, 1000, 2000, 5000, 10000 };
            int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 };
            for (int k = 0; k < tokCount.Length; k++)
            {
                System.Text.StringBuilder buffer = new System.Text.StringBuilder();
                System.Console.Out.WriteLine("-----Tokens: " + tokCount[k] + "-----");
                for (int i = 0; i < tokCount[k]; i++)
                {
                    buffer.Append(English.IntToEnglish(i).ToUpper()).Append(' ');
                }
                //make sure we produce the same tokens
                TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))));
                TokenStream        sink      = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, 100));
                teeStream.ConsumeAllTokens();
                TokenStream    stream  = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))), 100);
                ITermAttribute tfTok   = stream.AddAttribute <ITermAttribute>();
                ITermAttribute sinkTok = sink.AddAttribute <ITermAttribute>();
                for (int i = 0; stream.IncrementToken(); i++)
                {
                    Assert.IsTrue(sink.IncrementToken());
                    Assert.IsTrue(tfTok.Equals(sinkTok) == true, tfTok + " is not equal to " + sinkTok + " at token: " + i);
                }

                //simulate two fields, each being analyzed once, for 20 documents
                for (int j = 0; j < modCounts.Length; j++)
                {
                    int  tfPos = 0;
                    long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    for (int i = 0; i < 20; i++)
                    {
                        stream = new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString())));
                        IPositionIncrementAttribute posIncrAtt = stream.GetAttribute <IPositionIncrementAttribute>();
                        while (stream.IncrementToken())
                        {
                            tfPos += posIncrAtt.PositionIncrement;
                        }
                        stream     = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))), modCounts[j]);
                        posIncrAtt = stream.GetAttribute <IPositionIncrementAttribute>();
                        while (stream.IncrementToken())
                        {
                            tfPos += posIncrAtt.PositionIncrement;
                        }
                    }
                    long finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
                    int sinkPos = 0;
                    //simulate one field with one sink
                    start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    for (int i = 0; i < 20; i++)
                    {
                        teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))));
                        sink      = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, modCounts[j]));
                        IPositionIncrementAttribute posIncrAtt = teeStream.GetAttribute <IPositionIncrementAttribute>();
                        while (teeStream.IncrementToken())
                        {
                            sinkPos += posIncrAtt.PositionIncrement;
                        }
                        //System.out.println("Modulo--------");
                        posIncrAtt = sink.GetAttribute <IPositionIncrementAttribute>();
                        while (sink.IncrementToken())
                        {
                            sinkPos += posIncrAtt.PositionIncrement;
                        }
                    }
                    finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                    System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
                    Assert.IsTrue(sinkPos == tfPos, sinkPos + " does not equal: " + tfPos);
                }
                System.Console.Out.WriteLine("- End Tokens: " + tokCount[k] + "-----");
            }
        }

Exemplo n.º 40

0

Exibir arquivo

Arquivo: BaseTokenStreamTestCase.cs Projeto: ChristopherHaws/lucenenet

        // offsetsAreCorrect also validates:
        //   - graph offsets are correct (all tokens leaving from
        //     pos X have the same startOffset; all tokens
        //     arriving to pos Y have the same endOffset)
        //   - offsets only move forwards (startOffset >=
        //     lastStartOffset)
        public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, int? finalPosInc, bool[] keywordAtts, bool offsetsAreCorrect)
        {
            // LUCENENET: Bug fix: NUnit throws an exception when something fails. 
            // This causes Dispose() to be skipped and it pollutes other tests indicating false negatives.
            // Added this try-finally block to fix this.
            try
            {

                Assert.IsNotNull(output);
                var checkClearAtt = ts.AddAttribute<ICheckClearAttributesAttribute>();

                ICharTermAttribute termAtt = null;
                if (output.Length > 0)
                {
                    Assert.IsTrue(ts.HasAttribute<ICharTermAttribute>(), "has no CharTermAttribute");
                    termAtt = ts.GetAttribute<ICharTermAttribute>();
                }

                IOffsetAttribute offsetAtt = null;
                if (startOffsets != null || endOffsets != null || finalOffset != null)
                {
                    Assert.IsTrue(ts.HasAttribute<IOffsetAttribute>(), "has no OffsetAttribute");
                    offsetAtt = ts.GetAttribute<IOffsetAttribute>();
                }

                ITypeAttribute typeAtt = null;
                if (types != null)
                {
                    Assert.IsTrue(ts.HasAttribute<ITypeAttribute>(), "has no TypeAttribute");
                    typeAtt = ts.GetAttribute<ITypeAttribute>();
                }

                IPositionIncrementAttribute posIncrAtt = null;
                if (posIncrements != null || finalPosInc != null)
                {
                    Assert.IsTrue(ts.HasAttribute<IPositionIncrementAttribute>(), "has no PositionIncrementAttribute");
                    posIncrAtt = ts.GetAttribute<IPositionIncrementAttribute>();
                }

                IPositionLengthAttribute posLengthAtt = null;
                if (posLengths != null)
                {
                    Assert.IsTrue(ts.HasAttribute<IPositionLengthAttribute>(), "has no PositionLengthAttribute");
                    posLengthAtt = ts.GetAttribute<IPositionLengthAttribute>();
                }

                IKeywordAttribute keywordAtt = null;
                if (keywordAtts != null)
                {
                    Assert.IsTrue(ts.HasAttribute<IKeywordAttribute>(), "has no KeywordAttribute");
                    keywordAtt = ts.GetAttribute<IKeywordAttribute>();
                }

                // Maps position to the start/end offset:
                IDictionary<int?, int?> posToStartOffset = new Dictionary<int?, int?>();
                IDictionary<int?, int?> posToEndOffset = new Dictionary<int?, int?>();

                ts.Reset();
                int pos = -1;
                int lastStartOffset = 0;
                for (int i = 0; i < output.Length; i++)
                {
                    // extra safety to enforce, that the state is not preserved and also assign bogus values
                    ts.ClearAttributes();
                    termAtt.SetEmpty().Append("bogusTerm");
                    if (offsetAtt != null)
                    {
                        offsetAtt.SetOffset(14584724, 24683243);
                    }
                    if (typeAtt != null)
                    {
                        typeAtt.Type = "bogusType";
                    }
                    if (posIncrAtt != null)
                    {
                        posIncrAtt.PositionIncrement = 45987657;
                    }
                    if (posLengthAtt != null)
                    {
                        posLengthAtt.PositionLength = 45987653;
                    }
                    if (keywordAtt != null)
                    {
                        keywordAtt.Keyword = (i & 1) == 0;
                    }

                    bool reset = checkClearAtt.AndResetClearCalled; // reset it, because we called clearAttribute() before
                    Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist");
                    Assert.IsTrue(reset, "ClearAttributes() was not called correctly in TokenStream chain");

                    Assert.AreEqual(output[i], termAtt.ToString(), "term " + i + ", output[i] = " + output[i] + ", termAtt = " + termAtt.ToString());
                    if (startOffsets != null)
                    {
                        Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset(), "startOffset " + i);
                    }
                    if (endOffsets != null)
                    {
                        Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset(), "endOffset " + i);
                    }
                    if (types != null)
                    {
                        Assert.AreEqual(types[i], typeAtt.Type, "type " + i);
                    }
                    if (posIncrements != null)
                    {
                        Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i);
                    }
                    if (posLengths != null)
                    {
                        Assert.AreEqual(posLengths[i], posLengthAtt.PositionLength, "posLength " + i);
                    }
                    if (keywordAtts != null)
                    {
                        Assert.AreEqual(keywordAtts[i], keywordAtt.Keyword, "keywordAtt " + i);
                    }

                    // we can enforce some basic things about a few attributes even if the caller doesn't check:
                    if (offsetAtt != null)
                    {
                        int startOffset = offsetAtt.StartOffset();
                        int endOffset = offsetAtt.EndOffset();
                        if (finalOffset != null)
                        {
                            Assert.IsTrue(startOffset <= (int)finalOffset, "startOffset must be <= finalOffset");
                            Assert.IsTrue(endOffset <= (int)finalOffset, "endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset=" + (int)finalOffset);
                        }

                        if (offsetsAreCorrect)
                        {
                            Assert.IsTrue(offsetAtt.StartOffset() >= lastStartOffset, "offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset);
                            lastStartOffset = offsetAtt.StartOffset();
                        }

                        if (offsetsAreCorrect && posLengthAtt != null && posIncrAtt != null)
                        {
                            // Validate offset consistency in the graph, ie
                            // all tokens leaving from a certain pos have the
                            // same startOffset, and all tokens arriving to a
                            // certain pos have the same endOffset:
                            int posInc = posIncrAtt.PositionIncrement;
                            pos += posInc;

                            int posLength = posLengthAtt.PositionLength;

                            if (!posToStartOffset.ContainsKey(pos))
                            {
                                // First time we've seen a token leaving from this position:
                                posToStartOffset[pos] = startOffset;
                                //System.out.println("  + s " + pos + " -> " + startOffset);
                            }
                            else
                            {
                                // We've seen a token leaving from this position
                                // before; verify the startOffset is the same:
                                //System.out.println("  + vs " + pos + " -> " + startOffset);
                                Assert.AreEqual((int)posToStartOffset[pos], startOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt);
                            }

                            int endPos = pos + posLength;

                            if (!posToEndOffset.ContainsKey(endPos))
                            {
                                // First time we've seen a token arriving to this position:
                                posToEndOffset[endPos] = endOffset;
                                //System.out.println("  + e " + endPos + " -> " + endOffset);
                            }
                            else
                            {
                                // We've seen a token arriving to this position
                                // before; verify the endOffset is the same:
                                //System.out.println("  + ve " + endPos + " -> " + endOffset);
                                Assert.AreEqual((int)posToEndOffset[endPos], endOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt);
                            }
                        }
                    }
                    if (posIncrAtt != null)
                    {
                        if (i == 0)
                        {
                            Assert.IsTrue(posIncrAtt.PositionIncrement >= 1, "first posIncrement must be >= 1");
                        }
                        else
                        {
                            Assert.IsTrue(posIncrAtt.PositionIncrement >= 0, "posIncrement must be >= 0");
                        }
                    }
                    if (posLengthAtt != null)
                    {
                        Assert.IsTrue(posLengthAtt.PositionLength >= 1, "posLength must be >= 1");
                    }
                }

                if (ts.IncrementToken())
                {
                    Assert.Fail("TokenStream has more tokens than expected (expected count=" + output.Length + "); extra token=" + termAtt);
                }

                // repeat our extra safety checks for End()
                ts.ClearAttributes();
                if (termAtt != null)
                {
                    termAtt.SetEmpty().Append("bogusTerm");
                }
                if (offsetAtt != null)
                {
                    offsetAtt.SetOffset(14584724, 24683243);
                }
                if (typeAtt != null)
                {
                    typeAtt.Type = "bogusType";
                }
                if (posIncrAtt != null)
                {
                    posIncrAtt.PositionIncrement = 45987657;
                }
                if (posLengthAtt != null)
                {
                    posLengthAtt.PositionLength = 45987653;
                }

                var reset_ = checkClearAtt.AndResetClearCalled; // reset it, because we called clearAttribute() before

                ts.End();
                Assert.IsTrue(checkClearAtt.AndResetClearCalled, "super.End()/ClearAttributes() was not called correctly in End()");

                if (finalOffset != null)
                {
                    Assert.AreEqual((int)finalOffset, offsetAtt.EndOffset(), "finalOffset");
                }
                if (offsetAtt != null)
                {
                    Assert.IsTrue(offsetAtt.EndOffset() >= 0, "finalOffset must be >= 0");
                }
                if (finalPosInc != null)
                {
                    Assert.AreEqual((int)finalPosInc, posIncrAtt.PositionIncrement, "finalPosInc");
                }

                ts.Dispose();
            }
            catch (Exception)
            {
                //ts.Reset();
                ts.ClearAttributes();
                ts.End();
                ts.Dispose();
                throw;
            }
        }

Exemplos de Lucene.Net.Analysis TokenStream.IncrementToken em C# (CSharp)

IncrementToken() public abstract method