예제 #1
0
        public virtual void  TestGetFieldNameVariations()
        {
            System.Collections.Generic.ICollection <string> result = reader.GetFieldNames(IndexReader.FieldOption.ALL);
            Assert.IsTrue(result != null);
            Assert.IsTrue(result.Count == DocHelper.all.Count);
            for (System.Collections.IEnumerator iter = result.GetEnumerator(); iter.MoveNext();)
            {
                System.String s = (System.String)iter.Current;
                //System.out.println("Name: " + s);
                Assert.IsTrue(DocHelper.nameValues.Contains(s) == true || s.Equals(""));
            }
            result = reader.GetFieldNames(IndexReader.FieldOption.INDEXED);
            Assert.IsTrue(result != null);
            Assert.IsTrue(result.Count == DocHelper.indexed.Count);
            for (System.Collections.IEnumerator iter = result.GetEnumerator(); iter.MoveNext();)
            {
                System.String s = (System.String)iter.Current;
                Assert.IsTrue(DocHelper.indexed.Contains(s) == true || s.Equals(""));
            }

            result = reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED);
            Assert.IsTrue(result != null);
            Assert.IsTrue(result.Count == DocHelper.unindexed.Count);
            //Get all indexed fields that are storing term vectors
            result = reader.GetFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
            Assert.IsTrue(result != null);
            Assert.IsTrue(result.Count == DocHelper.termvector.Count);

            result = reader.GetFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR);
            Assert.IsTrue(result != null);
            Assert.IsTrue(result.Count == DocHelper.notermvector.Count);
        }
예제 #2
0
        /// <summary> Test field norms.</summary>
        private Status.FieldNormStatus TestFieldNorms(System.Collections.Generic.ICollection <string> fieldNames, SegmentReader reader)
        {
            Status.FieldNormStatus status = new Status.FieldNormStatus();

            try
            {
                // Test Field Norms
                if (infoStream != null)
                {
                    infoStream.Write("    test: field norms.........");
                }
                System.Collections.IEnumerator it = fieldNames.GetEnumerator();
                byte[] b = new byte[reader.MaxDoc()];
                while (it.MoveNext())
                {
                    System.String fieldName = (System.String)it.Current;
                    reader.Norms(fieldName, b, 0);
                    ++status.totFields;
                }

                Msg("OK [" + status.totFields + " fields]");
            }
            catch (System.Exception e)
            {
                Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
                status.error = e;
                if (infoStream != null)
                {
                    infoStream.WriteLine(e.StackTrace);
                }
            }

            return(status);
        }
예제 #3
0
        public virtual void  TestPayloadSpanUtil()
        {
            RAMDirectory    directory = new RAMDirectory();
            PayloadAnalyzer analyzer  = new PayloadAnalyzer(this);
            IndexWriter     writer    = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.SetSimilarity(similarity);
            Document doc = new Document();

            doc.Add(new Field(PayloadHelper.FIELD, "xx rr yy mm  pp", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Close();

            IndexSearcher searcher = new IndexSearcher(directory, true);

            IndexReader     reader = searcher.IndexReader;
            PayloadSpanUtil psu    = new PayloadSpanUtil(reader);

            System.Collections.Generic.ICollection <byte[]> payloads = psu.GetPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
            if (DEBUG)
            {
                System.Console.Out.WriteLine("Num payloads:" + payloads.Count);
            }
            System.Collections.IEnumerator it = payloads.GetEnumerator();
            while (it.MoveNext())
            {
                byte[] bytes = (byte[])it.Current;
                if (DEBUG)
                {
                    System.Console.Out.WriteLine(new System.String(System.Text.UTF8Encoding.UTF8.GetChars(bytes)));
                }
            }
        }
예제 #4
0
        private void  CheckSpans(Spans spans, int expectedNumSpans, int expectedNumPayloads, int expectedPayloadLength, int expectedFirstByte)
        {
            Assert.IsTrue(spans != null, "spans is null and it shouldn't be");
            //each position match should have a span associated with it, since there is just one underlying term query, there should
            //only be one entry in the span
            int seen = 0;

            while (spans.Next() == true)
            {
                //if we expect payloads, then isPayloadAvailable should be true
                if (expectedNumPayloads > 0)
                {
                    Assert.IsTrue(spans.IsPayloadAvailable() == true, "isPayloadAvailable is not returning the correct value: " + spans.IsPayloadAvailable() + " and it should be: " + (expectedNumPayloads > 0));
                }
                else
                {
                    Assert.IsTrue(spans.IsPayloadAvailable() == false, "isPayloadAvailable should be false");
                }
                //See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token
                if (spans.IsPayloadAvailable())
                {
                    System.Collections.Generic.ICollection <byte[]> payload = spans.GetPayload();
                    Assert.IsTrue(payload.Count == expectedNumPayloads, "payload Size: " + payload.Count + " is not: " + expectedNumPayloads);
                    for (System.Collections.IEnumerator iterator = payload.GetEnumerator(); iterator.MoveNext();)
                    {
                        byte[] thePayload = (byte[])iterator.Current;
                        Assert.IsTrue(thePayload.Length == expectedPayloadLength, "payload[0] Size: " + thePayload.Length + " is not: " + expectedPayloadLength);
                        Assert.IsTrue(thePayload[0] == expectedFirstByte, thePayload[0] + " does not equal: " + expectedFirstByte);
                    }
                }
                seen++;
            }
            Assert.IsTrue(seen == expectedNumSpans, seen + " does not equal: " + expectedNumSpans);
        }
예제 #5
0
 internal void  DecRef(System.Collections.Generic.ICollection <string> files)
 {
     System.Collections.Generic.IEnumerator <string> it = files.GetEnumerator();
     while (it.MoveNext())
     {
         DecRef(it.Current);
     }
 }
예제 #6
0
 public virtual bool AddAll(System.Collections.Generic.ICollection <E> collection)
 {
     System.Collections.Generic.IEnumerator <E> iterator = collection.GetEnumerator();
     while (iterator.MoveNext())
     {
         Add(iterator.Current);
     }
     return(true);
 }
예제 #7
0
 private void AddIndexed(IndexReader reader, FieldInfos fInfos, System.Collections.Generic.ICollection <string> names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions)
 {
     System.Collections.Generic.IEnumerator <string> i = names.GetEnumerator();
     while (i.MoveNext())
     {
         System.String field = i.Current;
         fInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field), storePayloads, omitTFAndPositions);
     }
 }
예제 #8
0
 internal void  DecRef(System.Collections.Generic.ICollection <string> files)
 {
     if (files is System.Collections.Hashtable)
     {
         System.Collections.IEnumerator it = files.GetEnumerator();
         while (it.MoveNext())
         {
             DecRef((System.String)((System.Collections.DictionaryEntry)it.Current).Key);
         }
     }
     else
     {
         System.Collections.IEnumerator it = files.GetEnumerator();
         while (it.MoveNext())
         {
             DecRef((System.String)it.Current);
         }
     }
 }
예제 #9
0
 /// <summary> Assumes the fields are not storing term vectors.
 ///
 /// </summary>
 /// <param name="names">The names of the fields
 /// </param>
 /// <param name="isIndexed">Whether the fields are indexed or not
 ///
 /// </param>
 /// <seealso cref="Add(String, boolean)">
 /// </seealso>
 public void Add(System.Collections.Generic.ICollection <string> names, bool isIndexed)
 {
     lock (this)
     {
         System.Collections.IEnumerator i = names.GetEnumerator();
         while (i.MoveNext())
         {
             Add((System.String)i.Current, isIndexed);
         }
     }
 }
예제 #10
0
 /// <summary>Deletes the specified files, but only if they are new
 /// (have not yet been incref'd).
 /// </summary>
 internal void DeleteNewFiles(System.Collections.Generic.ICollection <string> files)
 {
     System.Collections.IEnumerator it = files.GetEnumerator();
     while (it.MoveNext())
     {
         System.String fileName = (System.String)it.Current;
         if (!refCounts.ContainsKey(fileName))
         {
             DeleteFile(fileName);
         }
     }
 }
예제 #11
0
 /// <summary>Deletes the specified files, but only if they are new
 /// (have not yet been incref'd).
 /// </summary>
 internal void DeleteNewFiles(System.Collections.Generic.ICollection <string> files)
 {
     System.Collections.IEnumerator it = files.GetEnumerator();
     while (it.MoveNext())
     {
         System.String fileName = (System.String)it.Current;
         if (!refCounts.ContainsKey(fileName))
         {
             if (infoStream != null)
             {
                 Message("delete new file \"" + fileName + "\"");
             }
             DeleteFile(fileName);
         }
     }
 }
예제 #12
0
        public virtual void TestShrinkToAfterShortestMatch3()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new TestPayloadAnalyzer(this), IndexWriter.MaxFieldLength.LIMITED);
            Document     doc       = new Document();

            doc.Add(new Field("content", new System.IO.StreamReader(new System.IO.MemoryStream(System.Text.Encoding.ASCII.GetBytes("j k a l f k k p a t a k l k t a")))));
            writer.AddDocument(doc);
            writer.Close();

            IndexSearcher is_Renamed = new IndexSearcher(directory, true);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));

            SpanQuery[]   sqs   = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq   = new SpanNearQuery(sqs, 0, true);
            Spans         spans = snq.GetSpans(is_Renamed.IndexReader);

            TopDocs topDocs = is_Renamed.Search(snq, 1);

            System.Collections.Hashtable payloadSet = new System.Collections.Hashtable();
            for (int i = 0; i < topDocs.ScoreDocs.Length; i++)
            {
                while (spans.Next())
                {
                    System.Collections.Generic.ICollection <byte[]> payloads = spans.GetPayload();

                    for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();)
                    {
                        CollectionsHelper.AddIfNotContains(payloadSet, new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current)));
                    }
                }
            }
            Assert.AreEqual(2, payloadSet.Count);
            if (DEBUG)
            {
                System.Collections.IEnumerator pit = payloadSet.GetEnumerator();
                while (pit.MoveNext())
                {
                    System.Console.Out.WriteLine("match:" + pit.Current);
                }
            }
            Assert.IsTrue(payloadSet.Contains("a:Noise:10"));
            Assert.IsTrue(payloadSet.Contains("k:Noise:11"));
        }
예제 #13
0
        /// <summary> Return a query that will return docs like the passed file.
        ///
        /// </summary>
        /// <returns> a query that will return docs like the passed file.
        /// </returns>
        public Query Like(System.IO.FileInfo f)
        {
            if (fieldNames == null)
            {
                // gather list of valid fields from lucene
                System.Collections.Generic.ICollection <string> fields = ir.GetFieldNames(IndexReader.FieldOption.INDEXED);
                System.Collections.IEnumerator e = fields.GetEnumerator();
                fieldNames = new System.String[fields.Count];
                int index = 0;
                while (e.MoveNext())
                {
                    fieldNames[index++] = (System.String)e.Current;
                }
            }

            return(Like(new System.IO.StreamReader(f.FullName, System.Text.Encoding.Default)));
        }
예제 #14
0
        /// <summary> Return a query that will return docs like the passed lucene document ID.
        ///
        /// </summary>
        /// <param name="docNum">the documentID of the lucene doc to generate the 'More Like This" query for.
        /// </param>
        /// <returns> a query that will return docs like the passed lucene document ID.
        /// </returns>
        public Query Like(int docNum)
        {
            if (fieldNames == null)
            {
                // gather list of valid fields from lucene
                System.Collections.Generic.ICollection <string> fields = ir.GetFieldNames(IndexReader.FieldOption.INDEXED);
                System.Collections.IEnumerator e = fields.GetEnumerator();
                fieldNames = new System.String[fields.Count];
                int index = 0;
                while (e.MoveNext())
                {
                    fieldNames[index++] = (System.String)e.Current;
                }
            }

            return(CreateQuery(RetrieveTerms(docNum)));
        }
예제 #15
0
        public /*internal*/ System.Collections.Generic.ICollection <string> CreateCompoundFile(System.String fileName)
        {
            System.Collections.Generic.ICollection <string> files = GetMergedFiles();
            CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);

            // Now merge all added files
            System.Collections.IEnumerator it = files.GetEnumerator();
            while (it.MoveNext())
            {
                cfsWriter.AddFile((System.String)it.Current);
            }

            // Perform the merge
            cfsWriter.Close();

            return(files);
        }
예제 #16
0
        /// <summary>Add an IndexReader whose stored fields will not be returned.  This can
        /// accellerate search when stored fields are only needed from a subset of
        /// the IndexReaders.
        ///
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes contain the same number </throws>
        /// <summary>     of documents
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes have the same value </throws>
        /// <summary>     of {@link IndexReader#MaxDoc()}
        /// </summary>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
        {
            EnsureOpen();
            if (readers.Count == 0)
            {
                this.maxDoc       = reader.MaxDoc();
                this.numDocs      = reader.NumDocs();
                this.hasDeletions = reader.HasDeletions();
            }

            if (reader.MaxDoc() != maxDoc)
            {
                // check compatibility
                throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
            }
            if (reader.NumDocs() != numDocs)
            {
                throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
            }

            System.Collections.Generic.ICollection <string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);
            readerToFields[reader] = fields;
            System.Collections.IEnumerator i = fields.GetEnumerator();
            while (i.MoveNext())
            {
                // update fieldToReader map
                System.String field = (System.String)i.Current;
                if (fieldToReader[field] == null)
                {
                    fieldToReader[field] = reader;
                }
            }

            if (!ignoreStoredFields)
            {
                storedFieldReaders.Add(reader);                 // add to storedFieldReaders
            }
            readers.Add(reader);

            if (incRefReaders)
            {
                reader.IncRef();
            }
            decrefOnClose.Add(incRefReaders);
        }
 private void  CopyFiles(Directory dir, IndexCommit cp)
 {
     // While we hold the snapshot, and nomatter how long
     // we take to do the backup, the IndexWriter will
     // never delete the files in the snapshot:
     System.Collections.Generic.ICollection <string> files = cp.GetFileNames();
     System.Collections.IEnumerator it = files.GetEnumerator();
     while (it.MoveNext())
     {
         System.String fileName = (System.String)it.Current;
         // NOTE: in a real backup you would not use
         // readFile; you would need to use something else
         // that copies the file to a backup location.  This
         // could even be a spawned shell process (eg "tar",
         // "zip") that takes the list of files and builds a
         // backup.
         ReadFile(dir, fileName);
     }
 }
예제 #18
0
        private void  CheckSpans(Spans spans, int numSpans, int[] numPayloads)
        {
            int cnt = 0;

            while (spans.Next() == true)
            {
                if (DEBUG)
                {
                    System.Console.Out.WriteLine("\nSpans Dump --");
                }
                if (spans.IsPayloadAvailable())
                {
                    System.Collections.Generic.ICollection <byte[]> payload = spans.GetPayload();
                    if (DEBUG)
                    {
                        System.Console.Out.WriteLine("payloads for span:" + payload.Count);
                    }
                    System.Collections.IEnumerator it = payload.GetEnumerator();
                    while (it.MoveNext())
                    {
                        byte[] bytes = (byte[])it.Current;
                        if (DEBUG)
                        {
                            System.Console.Out.WriteLine("doc:" + spans.Doc() + " s:" + spans.Start() + " e:" + spans.End() + " " + new System.String(System.Text.UTF8Encoding.UTF8.GetChars(bytes)));
                        }
                    }

                    Assert.AreEqual(numPayloads[cnt], payload.Count);
                }
                else
                {
                    Assert.IsFalse(numPayloads.Length > 0 && numPayloads[cnt] > 0, "Expected spans:" + numPayloads[cnt] + " found: 0");
                }
                cnt++;
            }

            Assert.AreEqual(numSpans, cnt);
        }
예제 #19
0
        private SegmentInfo Merge(SegmentInfo si1, SegmentInfo si2, System.String merged, bool useCompoundFile)
        {
            SegmentReader r1 = SegmentReader.Get(true, si1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
            SegmentReader r2 = SegmentReader.Get(true, si2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);

            SegmentMerger merger = new SegmentMerger(si1.dir, merged);

            merger.Add(r1);
            merger.Add(r2);
            merger.Merge();
            merger.CloseReaders();

            if (useCompoundFile)
            {
                System.Collections.Generic.ICollection <string> filesToDelete = merger.CreateCompoundFile(merged + ".cfs");
                for (System.Collections.IEnumerator iter = filesToDelete.GetEnumerator(); iter.MoveNext();)
                {
                    si1.dir.DeleteFile((System.String)iter.Current);
                }
            }

            return(new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir, useCompoundFile, true));
        }
예제 #20
0
 public IIterator <T> Iterator()
 {
     return(new EnumeratorWrapper <T>(collection.GetEnumerator()));
 }
예제 #21
0
        public virtual void  TestPayloadsPos0()
        {
            for (int x = 0; x < 2; x++)
            {
                Directory   dir    = new MockRAMDirectory();
                IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
                if (x == 1)
                {
                    writer.SetAllowMinus1Position();
                }
                Document doc = new Document();
                System.IO.MemoryStream ms = new System.IO.MemoryStream();
                System.IO.StreamWriter sw = new System.IO.StreamWriter(ms);
                sw.Write("a a b c d e a f g h i j a b k k");
                // flush to stream & reset it's position so it can be read
                sw.Flush();
                ms.Position = 0;
                doc.Add(new Field("content", new System.IO.StreamReader(ms)));
                writer.AddDocument(doc);

                IndexReader r = writer.GetReader();

                TermPositions tp    = r.TermPositions(new Term("content", "a"));
                int           count = 0;
                Assert.IsTrue(tp.Next());
                // "a" occurs 4 times
                Assert.AreEqual(4, tp.Freq());
                int expected;
                if (x == 1)
                {
                    expected = System.Int32.MaxValue;
                }
                else
                {
                    expected = 0;
                }
                Assert.AreEqual(expected, tp.NextPosition());
                if (x == 1)
                {
                    continue;
                }
                Assert.AreEqual(1, tp.NextPosition());
                Assert.AreEqual(3, tp.NextPosition());
                Assert.AreEqual(6, tp.NextPosition());

                // only one doc has "a"
                Assert.IsFalse(tp.Next());

                IndexSearcher is_Renamed = new IndexSearcher(r);

                SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
                SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
                SpanQuery[]   sqs  = new SpanQuery[] { stq1, stq2 };
                SpanNearQuery snq  = new SpanNearQuery(sqs, 30, false);

                count = 0;
                bool sawZero = false;
                //System.out.println("\ngetPayloadSpans test");
                Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.GetIndexReader());
                while (pspans.Next())
                {
                    //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
                    System.Collections.Generic.ICollection <byte[]> payloads = pspans.GetPayload();
                    sawZero |= pspans.Start() == 0;
                    for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();)
                    {
                        count++;
                        System.Object generatedAux2 = it.Current;
                        //System.out.println(new String((byte[]) it.next()));
                    }
                }
                Assert.AreEqual(5, count);
                Assert.IsTrue(sawZero);

                //System.out.println("\ngetSpans test");
                Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.GetIndexReader());
                count   = 0;
                sawZero = false;
                while (spans.Next())
                {
                    count++;
                    sawZero |= spans.Start() == 0;
                    //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
                }
                Assert.AreEqual(4, count);
                Assert.IsTrue(sawZero);

                //System.out.println("\nPayloadSpanUtil test");

                sawZero = false;
                PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.GetIndexReader());
                System.Collections.Generic.ICollection <byte[]> pls = psu.GetPayloadsForQuery(snq);
                count = pls.Count;
                for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext();)
                {
                    System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current));
                    //System.out.println(s);
                    sawZero |= s.Equals("pos: 0");
                }
                Assert.AreEqual(5, count);
                Assert.IsTrue(sawZero);
                writer.Close();
                is_Renamed.GetIndexReader().Close();
                dir.Close();
            }
        }