Example #1
0
        public virtual void  TestPayloadSpanUtil()
        {
            RAMDirectory    directory = new RAMDirectory();
            PayloadAnalyzer analyzer  = new PayloadAnalyzer(this);
            IndexWriter     writer    = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.SetSimilarity(similarity);
            Document doc = new Document();

            doc.Add(new Field(PayloadHelper.FIELD, "xx rr yy mm  pp", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Close();

            IndexSearcher searcher = new IndexSearcher(directory, true);

            IndexReader     reader = searcher.IndexReader;
            PayloadSpanUtil psu    = new PayloadSpanUtil(reader);

            System.Collections.Generic.ICollection <byte[]> payloads = psu.GetPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
            if (DEBUG)
            {
                System.Console.Out.WriteLine("Num payloads:" + payloads.Count);
            }
            System.Collections.IEnumerator it = payloads.GetEnumerator();
            while (it.MoveNext())
            {
                byte[] bytes = (byte[])it.Current;
                if (DEBUG)
                {
                    System.Console.Out.WriteLine(new System.String(System.Text.UTF8Encoding.UTF8.GetChars(bytes)));
                }
            }
        }
        public virtual void TestPayloadsPos0()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockPayloadAnalyzer());
            Document doc = new Document();
            doc.Add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
            writer.AddDocument(doc);

            IndexReader readerFromWriter = writer.Reader;
            AtomicReader r = SlowCompositeReaderWrapper.Wrap(readerFromWriter);

            DocsAndPositionsEnum tp = r.TermPositionsEnum(new Term("content", "a"));

            int count = 0;
            Assert.IsTrue(tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            // "a" occurs 4 times
            Assert.AreEqual(4, tp.Freq());
            Assert.AreEqual(0, tp.NextPosition());
            Assert.AreEqual(1, tp.NextPosition());
            Assert.AreEqual(3, tp.NextPosition());
            Assert.AreEqual(6, tp.NextPosition());

            // only one doc has "a"
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, tp.NextDoc());

            IndexSearcher @is = NewSearcher(readerFromWriter);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
            SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

            count = 0;
            bool sawZero = false;
            if (VERBOSE)
            {
                Console.WriteLine("\ngetPayloadSpans test");
            }
            Search.Spans.Spans pspans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            while (pspans.Next())
            {
                if (VERBOSE)
                {
                    Console.WriteLine("doc " + pspans.Doc() + ": span " + pspans.Start() + " to " + pspans.End());
                }
                var payloads = pspans.Payload;
                sawZero |= pspans.Start() == 0;
                foreach (var bytes in payloads)
                {
                    count++;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  payload: " + Encoding.UTF8.GetString((byte[])(Array)bytes));
                    }
                }
            }
            Assert.IsTrue(sawZero);
            Assert.AreEqual(5, count);

            // System.out.println("\ngetSpans test");
            Search.Spans.Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            count = 0;
            sawZero = false;
            while (spans.Next())
            {
                count++;
                sawZero |= spans.Start() == 0;
                // System.out.println(spans.Doc() + " - " + spans.Start() + " - " +
                // spans.End());
            }
            Assert.AreEqual(4, count);
            Assert.IsTrue(sawZero);

            // System.out.println("\nPayloadSpanUtil test");

            sawZero = false;
            PayloadSpanUtil psu = new PayloadSpanUtil(@is.TopReaderContext);
            var pls = psu.GetPayloadsForQuery(snq);
            count = pls.Count;
            foreach (var bytes in pls)
            {
                string s = Encoding.UTF8.GetString(bytes);
                //System.out.println(s);
                sawZero |= s.Equals("pos: 0");
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);
            writer.Dispose();
            @is.IndexReader.Dispose();
            dir.Dispose();
        }
Example #3
0
        public virtual void TestPayloadSpanUtil()
        {
            Directory directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer(this)).SetSimilarity(Similarity));

            Document doc = new Document();
            doc.Add(NewTextField(PayloadHelper.FIELD, "xx rr yy mm  pp", Field.Store.YES));
            writer.AddDocument(doc);

            IndexReader reader = writer.Reader;
            writer.Dispose();
            IndexSearcher searcher = NewSearcher(reader);

            PayloadSpanUtil psu = new PayloadSpanUtil(searcher.TopReaderContext);

            var payloads = psu.GetPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
            if (VERBOSE)
            {
                Console.WriteLine("Num payloads:" + payloads.Count);
                foreach (var bytes in payloads)
                {
                    Console.WriteLine(Encoding.UTF8.GetString((byte[])(Array)bytes));
                }
            }
            reader.Dispose();
            directory.Dispose();
        }
		public virtual void  TestPayloadSpanUtil()
		{
			RAMDirectory directory = new RAMDirectory();
			PayloadAnalyzer analyzer = new PayloadAnalyzer(this);
			IndexWriter writer = new IndexWriter(directory, analyzer, true);
			writer.SetSimilarity(similarity);
			Document doc = new Document();
			doc.Add(new Field(PayloadHelper.FIELD, "xx rr yy mm  pp", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(directory);
			
			IndexReader reader = searcher.GetIndexReader();
			PayloadSpanUtil psu = new PayloadSpanUtil(reader);
			
			System.Collections.Generic.ICollection<byte[]> payloads = psu.GetPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
			if (DEBUG)
				System.Console.Out.WriteLine("Num payloads:" + payloads.Count);
			System.Collections.IEnumerator it = payloads.GetEnumerator();
			while (it.MoveNext())
			{
				byte[] bytes = (byte[]) it.Current;
				if (DEBUG)
					System.Console.Out.WriteLine(new System.String(System.Text.UTF8Encoding.UTF8.GetChars(bytes)));
			}
		}
		public virtual void  TestPayloadsPos0()
		{
			for (int x = 0; x < 2; x++)
			{
				Directory dir = new MockRAMDirectory();
				IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
				if (x == 1)
				{
					writer.SetAllowMinus1Position();
				}
				Document doc = new Document();
                System.IO.MemoryStream ms = new System.IO.MemoryStream();
                System.IO.StreamWriter sw = new System.IO.StreamWriter(ms);
                sw.Write("a a b c d e a f g h i j a b k k");
                // flush to stream & reset it's position so it can be read
                sw.Flush();
                ms.Position = 0;
                doc.Add(new Field("content", new System.IO.StreamReader(ms)));
				writer.AddDocument(doc);
				
				IndexReader r = writer.GetReader();
				
				TermPositions tp = r.TermPositions(new Term("content", "a"));
				int count = 0;
				Assert.IsTrue(tp.Next());
				// "a" occurs 4 times
				Assert.AreEqual(4, tp.Freq());
				int expected;
				if (x == 1)
				{
					expected = System.Int32.MaxValue;
				}
				else
				{
					expected = 0;
				}
				Assert.AreEqual(expected, tp.NextPosition());
				if (x == 1)
				{
					continue;
				}
				Assert.AreEqual(1, tp.NextPosition());
				Assert.AreEqual(3, tp.NextPosition());
				Assert.AreEqual(6, tp.NextPosition());
				
				// only one doc has "a"
				Assert.IsFalse(tp.Next());
				
				IndexSearcher is_Renamed = new IndexSearcher(r);
				
				SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
				SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
				SpanQuery[] sqs = new SpanQuery[]{stq1, stq2};
				SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
				
				count = 0;
				bool sawZero = false;
				//System.out.println("\ngetPayloadSpans test");
				Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.GetIndexReader());
				while (pspans.Next())
				{
					//System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
					System.Collections.Generic.ICollection<byte[]> payloads = pspans.GetPayload();
					sawZero |= pspans.Start() == 0;
					for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext(); )
					{
						count++;
						System.Object generatedAux2 = it.Current;
						//System.out.println(new String((byte[]) it.next()));
					}
				}
				Assert.AreEqual(5, count);
				Assert.IsTrue(sawZero);
				
				//System.out.println("\ngetSpans test");
				Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.GetIndexReader());
				count = 0;
				sawZero = false;
				while (spans.Next())
				{
					count++;
					sawZero |= spans.Start() == 0;
					//System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
				}
				Assert.AreEqual(4, count);
				Assert.IsTrue(sawZero);
				
				//System.out.println("\nPayloadSpanUtil test");
				
				sawZero = false;
				PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.GetIndexReader());
				System.Collections.Generic.ICollection<byte[]> pls = psu.GetPayloadsForQuery(snq);
				count = pls.Count;
				for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext(); )
				{
					System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[]) it.Current));
					//System.out.println(s);
					sawZero |= s.Equals("pos: 0");
				}
				Assert.AreEqual(5, count);
				Assert.IsTrue(sawZero);
				writer.Close();
				is_Renamed.GetIndexReader().Close();
				dir.Close();
			}
		}
        public virtual void TestPayloadsPos0()
        {
            Directory   dir    = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true,
                                                 IndexWriter.MaxFieldLength.LIMITED);
            Document doc = new Document();

            System.IO.MemoryStream ms = new System.IO.MemoryStream();
            System.IO.StreamWriter sw = new System.IO.StreamWriter(ms);
            sw.Write("a a b c d e a f g h i j a b k k");
            // flush to stream & reset it's position so it can be read
            sw.Flush();
            ms.Position = 0;
            doc.Add(new Field("content", new System.IO.StreamReader(ms)));
            writer.AddDocument(doc);

            IndexReader r = writer.GetReader();

            TermPositions tp    = r.TermPositions(new Term("content", "a"));
            int           count = 0;

            Assert.IsTrue(tp.Next());
            // "a" occurs 4 times
            Assert.AreEqual(4, tp.Freq);
            int expected = 0;

            Assert.AreEqual(expected, tp.NextPosition());
            Assert.AreEqual(1, tp.NextPosition());
            Assert.AreEqual(3, tp.NextPosition());
            Assert.AreEqual(6, tp.NextPosition());

            // only one doc has "a"
            Assert.IsFalse(tp.Next());

            IndexSearcher is_Renamed = new IndexSearcher(r);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));

            SpanQuery[]   sqs = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

            count = 0;
            bool sawZero = false;

            //System.out.println("\ngetPayloadSpans test");
            Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.IndexReader);
            while (pspans.Next())
            {
                //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
                System.Collections.Generic.ICollection <byte[]> payloads = pspans.GetPayload();
                sawZero |= pspans.Start() == 0;
                for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();)
                {
                    count++;
                    System.Object generatedAux2 = it.Current;
                    //System.out.println(new String((byte[]) it.next()));
                }
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);

            //System.out.println("\ngetSpans test");
            Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.IndexReader);
            count   = 0;
            sawZero = false;
            while (spans.Next())
            {
                count++;
                sawZero |= spans.Start() == 0;
                //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
            }
            Assert.AreEqual(4, count);
            Assert.IsTrue(sawZero);

            //System.out.println("\nPayloadSpanUtil test");

            sawZero = false;
            PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.IndexReader);

            System.Collections.Generic.ICollection <byte[]> pls = psu.GetPayloadsForQuery(snq);
            count = pls.Count;
            for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext();)
            {
                System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current));
                //System.out.println(s);
                sawZero |= s.Equals("pos: 0");
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);
            writer.Close();
            is_Renamed.IndexReader.Close();
            dir.Close();
        }