Document() public method

Returns the stored fields of the nth Document in this index.

NOTE: for performance reasons, this method does not check if the requested document is deleted, and therefore asking for a deleted document may yield unspecified results. Usually this is not required, however you can call IsDeleted(int) with the requested document ID to verify the document is not deleted.

public Document ( int n ) : Lucene.Net.Documents.Document
n int
return Lucene.Net.Documents.Document
Example #1
0
        public virtual void  TestCompressionTools()
        {
            IFieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.Compress(System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed)), Field.Store.YES);
            IFieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.CompressString(binaryValCompressed), Field.Store.YES);

            Document doc = new Document();

            doc.Add(binaryFldCompressed);
            doc.Add(stringFldCompressed);

            /* add the doc to a ram index */
            MockRAMDirectory dir    = new MockRAMDirectory();
            IndexWriter      writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null);

            writer.AddDocument(doc, null);
            writer.Close();

            /* open a reader and fetch the document */
            IndexReader reader        = IndexReader.Open((Directory)dir, false, null);
            Document    docFromReader = reader.Document(0, null);

            Assert.IsTrue(docFromReader != null);

            /* fetch the binary compressed field and compare it's content with the original one */
            System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed", null))));
            Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed));
            Assert.IsTrue(CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed", null)).Equals(binaryValCompressed));

            reader.Close();
            dir.Close();
        }
Example #2
0
        public override IComparable Generate(IndexReader reader, int doc)
        {
            var ravenDoc = reader.Document(doc);

            var payingTagField = ravenDoc.GetField("PayingTag_" + CustomTagId);
            var queriedPayingTag = payingTagField != null && Boolean.Parse(payingTagField.StringValue);
            var tagValue = Int32.Parse(ravenDoc.GetField("TagId").StringValue);
            var pointsValue = Int32.Parse(ravenDoc.GetField("Points").StringValue);

            CustomerDocumentOrderWithRandomEffect.OrderCategory cat;

            if (tagValue == CustomTagId && queriedPayingTag )
            {
                cat = CustomerDocumentOrderWithRandomEffect.OrderCategory.TagAndPaying;
            }
            else if (queriedPayingTag)
            {
                cat = CustomerDocumentOrderWithRandomEffect.OrderCategory.OnlyPaying;
            }
            else if (tagValue == CustomTagId )
            {
                cat = CustomerDocumentOrderWithRandomEffect.OrderCategory.OnlyTag;
            }
            else
            {
                cat = CustomerDocumentOrderWithRandomEffect.OrderCategory.NoneOfTheAbove;
            }

            return new CustomerDocumentOrderWithRandomEffect()
            {
                Category = cat,
                Points = pointsValue
            };
        }
Example #3
0
        private int CopyFieldsWithDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader, IState state)
        {
            int docCount = 0;
            int maxDoc   = reader.MaxDoc;

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int j = 0; j < maxDoc;)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        ++j;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = j, numDocs = 0;
                    do
                    {
                        j++;
                        numDocs++;
                        if (j >= maxDoc)
                        {
                            break;
                        }
                        if (reader.IsDeleted(j))
                        {
                            j++;
                            break;
                        }
                    }while (numDocs < MAX_RAW_MERGE_DOCS);

                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs, state);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs, state);
                    docCount += numDocs;
                    checkAbort.Work(300 * numDocs, state);
                }
            }
            else
            {
                for (int j = 0; j < maxDoc; j++)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        continue;
                    }
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(j, state);
                    fieldsWriter.AddDocument(doc, state);
                    docCount++;
                    checkAbort.Work(300, state);
                }
            }
            return(docCount);
        }
Example #4
0
        public virtual void  TestSkipToFirsttimeMiss()
        {
            DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f);

            dq.Add(Tq("id", "d1"));
            dq.Add(Tq("dek", "DOES_NOT_EXIST"));

            QueryUtils.Check(dq, s);

            Weight dw     = dq.Weight(s, null);
            Scorer ds     = dw.Scorer(r, true, false, null);
            bool   skipOk = ds.Advance(3, null) != DocIdSetIterator.NO_MORE_DOCS;

            if (skipOk)
            {
                Assert.Fail("firsttime skipTo found a match? ... " + r.Document(ds.DocID(), null).Get("id", null));
            }
        }
        public virtual void  DoTestDocument()
        {
            sis.Read(dir);
            IndexReader reader = OpenReader();

            Assert.IsTrue(reader != null);
            Document newDoc1 = reader.Document(0);

            Assert.IsTrue(newDoc1 != null);
            Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - DocHelper.unstored.Count);
            Document newDoc2 = reader.Document(1);

            Assert.IsTrue(newDoc2 != null);
            Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - DocHelper.unstored.Count);
            TermFreqVector vector = reader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);

            Assert.IsTrue(vector != null);
            TestSegmentReader.CheckNorms(reader);
        }
Example #6
0
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestIndexedBit()
        {
            Directory         dir        = NewDirectory();
            RandomIndexWriter w          = new RandomIndexWriter(Random(), dir, ClassEnvRule.similarity, ClassEnvRule.timeZone);
            Document          doc        = new Document();
            FieldType         onlyStored = new FieldType();

            onlyStored.IsStored = true;
            doc.Add(new Field("field", "value", onlyStored));
            doc.Add(new StringField("field2", "value", Field.Store.YES));
            w.AddDocument(doc);
            IndexReader r = w.Reader;

            w.Dispose();
            Assert.IsFalse(r.Document(0).GetField("field").FieldType.IsIndexed);
            Assert.IsTrue(r.Document(0).GetField("field2").FieldType.IsIndexed);
            r.Dispose();
            dir.Dispose();
        }
        private void DoTestHits(ScoreDoc[] hits, int expectedCount, IndexReader reader)
        {
            int hitCount = hits.Length;

            Assert.AreEqual(expectedCount, hitCount, "wrong number of hits");
            for (int i = 0; i < hitCount; i++)
            {
                reader.Document(hits[i].Doc);
                reader.GetTermVectors(hits[i].Doc);
            }
        }
        private void CheckContents(IndexReader ir, string indexname)
        {
            IBits liveDocs = MultiFields.GetLiveDocs(ir);

            for (int i = 0; i < ir.MaxDoc; i++)
            {
                if (liveDocs == null || liveDocs.Get(i))
                {
                    assertEquals(indexname, ir.Document(i).Get("indexname"));
                }
            }
        }
Example #9
0
        public virtual void Test()
        {
            Assert.IsTrue(Dir != null);
            Assert.IsTrue(FieldInfos != null);
            IndexReader reader = DirectoryReader.Open(Dir);
            Document    doc    = reader.Document(0);

            Assert.IsTrue(doc != null);
            Assert.IsTrue(doc.GetField(DocHelper.TEXT_FIELD_1_KEY) != null);

            Field field = (Field)doc.GetField(DocHelper.TEXT_FIELD_2_KEY);

            Assert.IsTrue(field != null);
            Assert.IsTrue(field.FieldType.StoreTermVectors);

            Assert.IsFalse(field.FieldType.OmitNorms);
            Assert.IsTrue(field.FieldType.IndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

            field = (Field)doc.GetField(DocHelper.TEXT_FIELD_3_KEY);
            Assert.IsTrue(field != null);
            Assert.IsFalse(field.FieldType.StoreTermVectors);
            Assert.IsTrue(field.FieldType.OmitNorms);
            Assert.IsTrue(field.FieldType.IndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

            field = (Field)doc.GetField(DocHelper.NO_TF_KEY);
            Assert.IsTrue(field != null);
            Assert.IsFalse(field.FieldType.StoreTermVectors);
            Assert.IsFalse(field.FieldType.OmitNorms);
            Assert.IsTrue(field.FieldType.IndexOptions == FieldInfo.IndexOptions.DOCS_ONLY);

            DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(DocHelper.TEXT_FIELD_3_KEY);

            reader.Document(0, visitor);
            IList <IndexableField> fields = visitor.Document.Fields;

            Assert.AreEqual(1, fields.Count);
            Assert.AreEqual(DocHelper.TEXT_FIELD_3_KEY, fields[0].Name);
            reader.Dispose();
        }
Example #10
0
            internal virtual void  loadDoc(IndexReader ir)
            {
                // beware of deleted docs in the future
                Document doc = ir.Document(rand.Next(ir.MaxDoc), new AnonymousClassFieldSelector(this), null);

                var fields = doc.GetFields();

                for (int i = 0; i < fields.Count; i++)
                {
                    IFieldable f = fields[i];
                    Enclosing_Instance.ValidateField(f);
                }
            }
Example #11
0
        public virtual void TestTermVectorCorruption()
        {
            // LUCENENET specific - log the current locking strategy used and HResult values
            // for assistance troubleshooting problems on Linux/macOS
            LogNativeFSFactoryDebugInfo();

            Directory dir = NewDirectory();

            for (int iter = 0; iter < 2; iter++)
            {
                IndexWriter writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy()));

                Document  document   = new Document();
                FieldType customType = new FieldType();
                customType.IsStored = true;

                Field storedField = NewField("stored", "stored", customType);
                document.Add(storedField);
                writer.AddDocument(document);
                writer.AddDocument(document);

                document = new Document();
                document.Add(storedField);
                FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED);
                customType2.StoreTermVectors         = true;
                customType2.StoreTermVectorPositions = true;
                customType2.StoreTermVectorOffsets   = true;
                Field termVectorField = NewField("termVector", "termVector", customType2);

                document.Add(termVectorField);
                writer.AddDocument(document);
                writer.ForceMerge(1);
                writer.Dispose();

                IndexReader reader = DirectoryReader.Open(dir);
                for (int i = 0; i < reader.NumDocs; i++)
                {
                    reader.Document(i);
                    reader.GetTermVectors(i);
                }
                reader.Dispose();

                writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy()));

                Directory[] indexDirs = new Directory[] { new MockDirectoryWrapper(Random, new RAMDirectory(dir, NewIOContext(Random))) };
                writer.AddIndexes(indexDirs);
                writer.ForceMerge(1);
                writer.Dispose();
            }
            dir.Dispose();
        }
Example #12
0
        // append fields from storedFieldReaders
        public override Document Document(int n)
        {
            Document result = new Document();

            for (int i = 0; i < storedFieldReaders.Count; i++)
            {
                IndexReader reader = (IndexReader)storedFieldReaders[i];
                foreach (Field field in reader.Document(n).Fields())
                {
                    result.Add(field);
                }
            }
            return(result);
        }
Example #13
0
        public virtual void TestTermVectorCorruption()
        {
            Directory dir = NewDirectory();

            for (int iter = 0; iter < 2; iter++)
            {
                IndexWriter writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy()));

                Document  document   = new Document();
                FieldType customType = new FieldType();
                customType.Stored = true;

                Field storedField = NewField("stored", "stored", customType);
                document.Add(storedField);
                writer.AddDocument(document);
                writer.AddDocument(document);

                document = new Document();
                document.Add(storedField);
                FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED);
                customType2.StoreTermVectors         = true;
                customType2.StoreTermVectorPositions = true;
                customType2.StoreTermVectorOffsets   = true;
                Field termVectorField = NewField("termVector", "termVector", customType2);

                document.Add(termVectorField);
                writer.AddDocument(document);
                writer.ForceMerge(1);
                writer.Dispose();

                IndexReader reader = DirectoryReader.Open(dir);
                for (int i = 0; i < reader.NumDocs; i++)
                {
                    reader.Document(i);
                    reader.GetTermVectors(i);
                }
                reader.Dispose();

                writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy()));

                Directory[] indexDirs = new Directory[] { new MockDirectoryWrapper(Random(), new RAMDirectory(dir, NewIOContext(Random()))) };
                writer.AddIndexes(indexDirs);
                writer.ForceMerge(1);
                writer.Dispose();
            }
            dir.Dispose();
        }
        static void MakeLatestVersionLookupPerReader(IDictionary<string, Tuple<NuGetVersion, string, int>> lookup, IndexReader reader, string readerName, bool includePrerelease, bool includeUnlisted)
        {
            for (int n = 0; n < reader.MaxDoc; n++)
            {
                if (reader.IsDeleted(n))
                {
                    continue;
                }

                Document document = reader.Document(n);

                NuGetVersion version = GetVersion(document);

                if (version == null)
                {
                    continue;
                }

                bool isListed = GetListed(document);

                if (isListed || includeUnlisted)
                {
                    if (!version.IsPrerelease || includePrerelease)
                    {
                        string id = GetId(document);

                        if (id == null)
                        {
                            continue;
                        }

                        Tuple<NuGetVersion, string, int> existingVersion;
                        if (lookup.TryGetValue(id, out existingVersion))
                        {
                            if (version > existingVersion.Item1)
                            {
                                lookup[id] = Tuple.Create(version, readerName, n);
                            }
                        }
                        else
                        {
                            lookup.Add(id, Tuple.Create(version, readerName, n));
                        }
                    }
                }
            }
        }
Example #15
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos();             // merge field names
            int docCount = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
            // in  merge mode, we use this FieldSelector
            FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    int         maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
                            docCount++;
                        }
                    }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return(docCount);
        }
Example #16
0
        public virtual void  TestBinaryFieldInIndex()
        {
            IFieldable binaryFldStored = new Field("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValStored), Field.Store.YES);
            IFieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);

            // binary fields with store off are not allowed
            Assert.Throws <ArgumentException>(
                () => new Field("fail", System.Text.Encoding.UTF8.GetBytes(binaryValStored), Field.Store.NO));

            Document doc = new Document();

            doc.Add(binaryFldStored);

            doc.Add(stringFldStored);

            /* test for field count */
            Assert.AreEqual(2, doc.fields_ForNUnit.Count);

            /* add the doc to a ram index */
            MockRAMDirectory dir    = new MockRAMDirectory();
            IndexWriter      writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null);

            writer.AddDocument(doc, null);
            writer.Close();

            /* open a reader and fetch the document */
            IndexReader reader        = IndexReader.Open((Directory)dir, false, null);
            Document    docFromReader = reader.Document(0, null);

            Assert.IsTrue(docFromReader != null);

            /* fetch the binary stored field and compare it's content with the original one */
            System.String binaryFldStoredTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryStored", null)));
            Assert.IsTrue(binaryFldStoredTest.Equals(binaryValStored));

            /* fetch the string field and compare it's content with the original one */
            System.String stringFldStoredTest = docFromReader.Get("stringStored", null);
            Assert.IsTrue(stringFldStoredTest.Equals(binaryValStored));

            /* delete the document from index */
            reader.DeleteDocument(0, null);
            Assert.AreEqual(0, reader.NumDocs());

            reader.Close();
            dir.Close();
        }
        public virtual void TestCloseInnerReader()
        {
            Directory       dir1 = GetDir1(Random);
            CompositeReader ir1  = DirectoryReader.Open(dir1);

            Assert.AreEqual(1, ir1.GetSequentialSubReaders()[0].RefCount);

            // with overlapping
            ParallelCompositeReader pr = new ParallelCompositeReader(true, new CompositeReader[] { ir1 }, new CompositeReader[] { ir1 });

            IndexReader psub = pr.GetSequentialSubReaders()[0];

            Assert.AreEqual(1, psub.RefCount);

            ir1.Dispose();

            Assert.AreEqual(1, psub.RefCount, "refCount of synthetic subreader should be unchanged");
            try
            {
                psub.Document(0);
                Assert.Fail("Subreader should be already closed because inner reader was closed!");
            }
#pragma warning disable 168
            catch (ObjectDisposedException e)
#pragma warning restore 168
            {
                // pass
            }

            try
            {
                pr.Document(0);
                Assert.Fail("ParallelCompositeReader should be already closed because inner reader was closed!");
            }
#pragma warning disable 168
            catch (ObjectDisposedException e)
#pragma warning restore 168
            {
                // pass
            }

            // noop:
            pr.Dispose();
            Assert.AreEqual(0, psub.RefCount);
            dir1.Dispose();
        }
Example #18
0
        } // doIndex

        private static void removeAllDuplicateAndDeletedFiles(IndexableFileInfo[] fileInfos, string LuceneIndexDir, IndexCreationMode indexCreationMode)
        {
            if (indexCreationMode != IndexCreationMode.AppendToExistingIndex)
            {
                return;
            }

            IndexReader reader = IndexReader.Open(LuceneIndexDir);

            try
            {
                int numDocs = reader.NumDocs();
                for (int i = 0; i < numDocs; i++)
                {
                    Document docToCheck         = reader.Document(i);
                    bool     removeDocFromIndex = true;
                    string   filenameField      = docToCheck.GetField("filename").StringValue();
                    string   lastModified       = (docToCheck.GetField("LastModified").StringValue());

                    foreach (IndexableFileInfo fi in fileInfos)
                    {
                        if (String.Compare(fi.Filename, filenameField, true) == 0 && DateTools.DateToString(fi.LastModified, DateTools.Resolution.SECOND) == lastModified)
                        {
                            removeDocFromIndex = false;
                            break;
                        }
                    } // foreach

                    if (removeDocFromIndex)
                    {
                        reader.DeleteDocument(i);
                        if (!reader.HasDeletions())
                        {
                            throw new Exception("error: deletion failed!!");
                        }
                    }
                } // for each lucene doc
            }
            finally
            {
                reader.Close();
            }
            LuceneIndexer indexer = new LuceneIndexer(LuceneIndexDir, indexCreationMode); // open up the index again

            indexer.CloseIndexWriter(OptimizeMode.DoOptimization);                        // just to optimize the index (which removes deleted items).
        }
        void ProcessReader(IndexReader indexReader, string readerName, ref int perIndexDocumentNumber)
        {
            for (int perReaderDocumentNumber = 0; perReaderDocumentNumber < indexReader.MaxDoc; perReaderDocumentNumber++)
            {
                if (indexReader.IsDeleted(perReaderDocumentNumber))
                {
                    ProcessDocument(indexReader, readerName, perReaderDocumentNumber, perIndexDocumentNumber, null, isDelete: true);
                }
                else
                {
                    Document document = indexReader.Document(perReaderDocumentNumber);
                    ProcessDocument(indexReader, readerName, perReaderDocumentNumber, perIndexDocumentNumber, document, isDelete: false);
                }

                perIndexDocumentNumber++;
            }
        }
Example #20
0
            public DeleteThreads(TestIndexWriterReader enclosingInstance, IndexWriter mainWriter)
            {
                InitBlock(enclosingInstance);
                this.mainWriter = mainWriter;
                IndexReader reader = mainWriter.GetReader();
                int         maxDoc = reader.MaxDoc();

                random = Enclosing_Instance.NewRandom();
                int iter = random.Next(maxDoc);

                for (int x = 0; x < iter; x++)
                {
                    int           doc = random.Next(iter);
                    System.String id  = reader.Document(doc).Get("id");
                    toDeleteTerms.Add(new Term("id", id));
                }
            }
Example #21
0
        public virtual void  DoTest(int[] docs)
        {
            if (dataset.Count == 0)
            {
                for (int i = 0; i < data.Length; i++)
                {
                    dataset.Add(data[i], data[i]);
                }
            }

            Directory   dir    = MakeIndex();
            IndexReader reader = IndexReader.Open(dir);

            for (int i = 0; i < docs.Length; i++)
            {
                Document d = reader.Document(docs[i], SELECTOR);
                d.Get(MAGIC_FIELD);

                System.Collections.IList fields = d.GetFields();
                for (System.Collections.IEnumerator fi = fields.GetEnumerator(); fi.MoveNext();)
                {
                    Fieldable f = null;
                    try
                    {
                        f = (Fieldable)fi.Current;
                        System.String fname = f.Name();
                        System.String fval  = f.StringValue();
                        Assert.IsNotNull(docs[i] + " FIELD: " + fname, fval);
                        System.String[] vals = fval.Split('#');
                        if (!dataset.Contains(vals[0]) || !dataset.Contains(vals[1]))
                        {
                            Assert.Fail("FIELD:" + fname + ",VAL:" + fval);
                        }
                    }
                    catch (System.Exception e)
                    {
                        throw new Exception(docs[i] + " WTF: " + f.Name(), e);
                    }
                }
            }
            reader.Close();
        }
Example #22
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos();             // merge field names
            int docCount = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    int         maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j));
                            docCount++;
                        }
                    }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return(docCount);
        }
        private bool VerifyIndex(Directory directory, int startAt)
        {
            bool        fail   = false;
            IndexReader reader = DirectoryReader.Open(directory);

            int max = reader.MaxDoc;

            for (int i = 0; i < max; i++)
            {
                Document temp = reader.Document(i);
                //System.out.println("doc "+i+"="+temp.GetField("count").StringValue);
                //compare the index doc number to the value that it should be
                if (!temp.GetField("count").GetStringValue().Equals((i + startAt) + "", StringComparison.Ordinal))
                {
                    fail = true;
                    Console.WriteLine("Document " + (i + startAt) + " is returning document " + temp.GetField("count").GetStringValue());
                }
            }
            reader.Dispose();
            return(fail);
        }
Example #24
0
        private void  CheckExpecteds(System.Collections.BitArray expecteds)
        {
            IndexReader r = IndexReader.Open(dir);

            //Perhaps not the most efficient approach but meets our needs here.
            for (int i = 0; i < r.MaxDoc(); i++)
            {
                if (!r.IsDeleted(i))
                {
                    System.String sval = r.Document(i).Get(FIELD_RECORD_ID);
                    if (sval != null)
                    {
                        int val = System.Int32.Parse(sval);
                        Assert.IsTrue(expecteds.Get(val), "Did not expect document #" + val);
                        expecteds.Set(val, false);
                    }
                }
            }
            r.Close();
            Assert.AreEqual(0, SupportClass.BitSetSupport.Cardinality(expecteds), "Should have 0 docs remaining ");
        }
Example #25
0
        public override IEnumerable<Row> Execute(IEnumerable<Row> rows)
        {
            if (_indexDirectory == null)
                yield break;

            try {
                _reader = IndexReader.Open(_indexDirectory, true);
            }
            catch (Exception) {
                Warn("Failed to open lucene index in {0}.", _indexDirectory.Directory.FullName);
                yield break;
            }

            var docCount = _reader.NumDocs();
            Info("Found {0} documents in lucene index.", docCount);

            for (var i = 0; i < docCount; i++) {

                if (_reader.IsDeleted(i))
                    continue;

                var doc = _reader.Document(i);
                var row = new Row();
                foreach (var field in doc.GetFields().Where(field => field.IsStored)) {
                    switch (field.Name) {
                        case "dropped":
                            row[field.Name] = Convert.ToBoolean(field.StringValue);
                            break;
                        default:
                            row[field.Name] = field.StringValue;
                            break;
                    }

                }
                yield return row;

            }
        }
Example #26
0
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        // LUCENE-1727: make sure doc fields are stored in order
        public virtual void TestStoredFieldsOrder()
        {
            Directory   d   = NewDirectory();
            IndexWriter w   = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            Document    doc = new Document();

            FieldType customType = new FieldType();

            customType.IsStored = true;
            doc.Add(NewField("zzz", "a b c", customType));
            doc.Add(NewField("aaa", "a b c", customType));
            doc.Add(NewField("zzz", "1 2 3", customType));
            w.AddDocument(doc);
            IndexReader r    = w.GetReader();
            Document    doc2 = r.Document(0);
            IEnumerator <IIndexableField> it = doc2.Fields.GetEnumerator();

            Assert.IsTrue(it.MoveNext());
            Field f = (Field)it.Current;

            Assert.AreEqual(f.Name, "zzz");
            Assert.AreEqual(f.GetStringValue(), "a b c");

            Assert.IsTrue(it.MoveNext());
            f = (Field)it.Current;
            Assert.AreEqual(f.Name, "aaa");
            Assert.AreEqual(f.GetStringValue(), "a b c");

            Assert.IsTrue(it.MoveNext());
            f = (Field)it.Current;
            Assert.AreEqual(f.Name, "zzz");
            Assert.AreEqual(f.GetStringValue(), "1 2 3");
            Assert.IsFalse(it.MoveNext());
            r.Dispose();
            w.Dispose();
            d.Dispose();
        }
Example #27
0
        private void CheckExpecteds(BitArray expecteds)
        {
            IndexReader r = DirectoryReader.Open(Dir);

            //Perhaps not the most efficient approach but meets our
            //needs here.
            IBits liveDocs = MultiFields.GetLiveDocs(r);

            for (int i = 0; i < r.MaxDoc; i++)
            {
                if (liveDocs == null || liveDocs.Get(i))
                {
                    string sval = r.Document(i).Get(FIELD_RECORD_ID);
                    if (sval != null)
                    {
                        int val = Convert.ToInt32(sval);
                        Assert.IsTrue(expecteds.SafeGet(val), "Did not expect document #" + val);
                        expecteds.SafeSet(val, false);
                    }
                }
            }
            r.Dispose();
            Assert.AreEqual(0, expecteds.Cardinality(), "Should have 0 docs remaining ");
        }
Example #28
0
		public override IComparable Generate(IndexReader reader, int doc)
		{
			var document = reader.Document(doc);
			var name = document.GetField("Name").StringValue;
			return name.Substring(name.Length - len, len);
		}
Example #29
0
        public virtual void  TestUpdateDocument()
        {
            bool optimize = true;

            Directory   dir1   = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            // create the index
            CreateIndexNoClose(!optimize, "index1", writer);

            // writer.flush(false, true, true);

            // get a reader
            IndexReader r1 = writer.GetReader();

            Assert.IsTrue(r1.IsCurrent());

            System.String id10 = r1.Document(10).GetField("id").StringValue();

            Document newDoc = r1.Document(10);

            newDoc.RemoveField("id");
            newDoc.Add(new Field("id", System.Convert.ToString(8000), Field.Store.YES, Field.Index.NOT_ANALYZED));
            writer.UpdateDocument(new Term("id", id10), newDoc);
            Assert.IsFalse(r1.IsCurrent());

            IndexReader r2 = writer.GetReader();

            Assert.IsTrue(r2.IsCurrent());
            Assert.AreEqual(0, Count(new Term("id", id10), r2));
            Assert.AreEqual(1, Count(new Term("id", System.Convert.ToString(8000)), r2));

            r1.Close();
            writer.Close();
            Assert.IsTrue(r2.IsCurrent());

            IndexReader r3 = IndexReader.Open(dir1);

            Assert.IsTrue(r3.IsCurrent());
            Assert.IsTrue(r2.IsCurrent());
            Assert.AreEqual(0, Count(new Term("id", id10), r3));
            Assert.AreEqual(1, Count(new Term("id", System.Convert.ToString(8000)), r3));

            writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            Document doc = new Document();

            doc.Add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED));
            writer.AddDocument(doc);
            Assert.IsTrue(r2.IsCurrent());
            Assert.IsTrue(r3.IsCurrent());

            writer.Close();

            Assert.IsFalse(r2.IsCurrent());
            Assert.IsTrue(!r3.IsCurrent());

            r2.Close();
            r3.Close();

            dir1.Close();
        }
 private void DoTestHits(ScoreDoc[] hits, int expectedCount, IndexReader reader)
 {
     int hitCount = hits.Length;
     Assert.AreEqual(expectedCount, hitCount, "wrong number of hits");
     for (int i = 0; i < hitCount; i++)
     {
         reader.Document(hits[i].Doc);
         reader.GetTermVectors(hits[i].Doc);
     }
 }
Example #31
0
        /// <summary>
        /// Executes the search query build by the parser or another source.
        /// </summary>
        /// <param name="query">Query to execute on the index</param>
        /// <param name="indexReader">The reader used to read from the index</param>
        /// <returns>Returns the found search results</returns>
        private IEnumerable<SearchHit> PerformSearchQuery(Query query, IndexReader indexReader)
        {
            var foundItems = new List<SearchHit>();

            IndexSearcher searcher = new IndexSearcher(indexReader);
            var results = searcher.Search(query, Int16.MaxValue);

            foreach (var scoreDoc in results.ScoreDocs)
            {
                var document = indexReader.Document(scoreDoc.Doc);

                // Collect the search results from the index.
                foundItems.Add(new SearchHit()
                {
                    Id = Int32.Parse(document.Get("id")),
                    Name = document.Get("recipe-name"),
                    Description = document.Get("recipe-description")
                });
            }

            return foundItems;
        }
Example #32
0
 public override Document Document(int n, FieldSelector fieldSelector)
 {
     EnsureOpen();
     return(in_Renamed.Document(n, fieldSelector));
 }
Example #33
0
 //convenience method
 public static TokenStream GetTokenStream(IndexReader reader, int docId, String field, Analyzer analyzer)
 {
     Document doc = reader.Document(docId);
     return GetTokenStream(doc, field, analyzer);
 }
		// There are two ways we can determine the max_results
		// most recent items:  
		//
		// One is to instantiate Lucene documents for each of
		// the document IDs in primary_matches.  This is a
		// fairly expensive operation.
		//
		// The other is to walk through the list of all
		// document IDs in descending time order.  This is
		// a less expensive operation, but adds up over time
		// on large data sets.
		//
		// We can walk about 2.5 docs for every Document we
		// instantiate.  So what we'll do, if we have more
		// matches than available hits, is walk (m * 1.25)
		// docs to see if we can fill out the top 100 hits.
		// If not, we'll fall back to creating documents
		// for all of them.

		private static ArrayList ScanRecentDocs (IndexReader	    primary_reader,
						    IndexReader		    secondary_reader,
						    BetterBitArray	    primary_matches,
						    Dictionary<int, Hit>    hits_by_id,
						    int			    max_results,
						    ref int		    total_number_of_matches,
						    HitFilter		    hit_filter,
						    string		    index_name)
		{
			Stopwatch a = new Stopwatch ();
			a.Start ();

			TermDocs docs = primary_reader.TermDocs ();
			TermEnum enumerator = primary_reader.Terms (new Term ("InvertedTimestamp", String.Empty));
			ArrayList results = new ArrayList (max_results);
			int docs_found = 0;
			int docs_walked = 0;
			int hit_filter_removed = 0;
			int max_docs = (int) (primary_matches.TrueCount * 1.25);

			Term term;
			TermDocs secondary_term_docs = null;
			if (secondary_reader != null)
				secondary_term_docs = secondary_reader.TermDocs ();

			do {
				term = enumerator.Term ();
			
				if (term.Field () != "InvertedTimestamp")
					break;

				docs.Seek (enumerator);

				while (docs.Next ()
				       && docs_found < max_results
				       && docs_walked < max_docs) {
					int doc_id = docs.Doc ();

					if (primary_matches.Get (doc_id)) {
						Document doc = primary_reader.Document (doc_id);
						Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs);

						// If we have a HitFilter, apply it.
						if (hit_filter != null && ! hit_filter (hit)) {
							if (Debug)
								Log.Debug ("Filtered out {0}", hit.Uri);
							hit_filter_removed ++;
							continue;
						}
						hits_by_id [doc_id] = hit;
						// Add the result, last modified first
						results.Add (hit);
						docs_found++;
					}
			
					docs_walked++;
				}
			} while (enumerator.Next ()
				 && docs_found < max_results
				 && docs_walked < max_docs);

			docs.Close ();
			if (secondary_term_docs != null)
				secondary_term_docs.Close ();

			// If we've found all the docs we can return in a subset!
			// Fantastic, we've probably short circuited a slow search.
			if (docs_found != max_results) {
				// Otherwise bad luck! Not all docs found
				// Start afresh - this time traversing all results
				results = null;
			} else {
				// Adjust total_number_of_matches. We need to do this to avoid scenarios like the following:
				// max_hits = 100. Matched 100 results. But hit filter removed 30. So 70 results will be returned.
				// We want to avoid saying "Showing top 70 of 100". Note that since we are not passing
				// every document in the index through the hit_filter, when we say "Showing top 100 of 1234", the
				// 1234 could actually be much less. But since max_hits was 100, that will not mislead the user.
				total_number_of_matches -= hit_filter_removed;
			}

			a.Stop ();
			if (Debug) {
				Log.Debug (">>> {0}: Walked {1} items, populated an enum with {2} items in {3}", index_name, docs_walked, docs_found, a);
				
				if (docs_found == max_results)
					Log.Debug (">>> {0}: Successfully short circuited timestamp ordering!", index_name);
			}

			return results;
		}
Example #35
0
        private void CompareChildHits(IndexReader r, IndexReader joinR, TopDocs results, TopDocs joinResults)
        {
            assertEquals(results.TotalHits, joinResults.TotalHits);
            assertEquals(results.ScoreDocs.Length, joinResults.ScoreDocs.Length);
            for (int hitCount = 0; hitCount < results.ScoreDocs.Length; hitCount++)
            {
                ScoreDoc hit = results.ScoreDocs[hitCount];
                ScoreDoc joinHit = joinResults.ScoreDocs[hitCount];
                Document doc1 = r.Document(hit.Doc);
                Document doc2 = joinR.Document(joinHit.Doc);
                assertEquals("hit " + hitCount + " differs", doc1.Get("childID"), doc2.Get("childID"));
                // don't compare scores -- they are expected to differ


                assertTrue(hit is FieldDoc);
                assertTrue(joinHit is FieldDoc);

                FieldDoc hit0 = (FieldDoc)hit;
                FieldDoc joinHit0 = (FieldDoc)joinHit;
                assertArrayEquals(hit0.Fields, joinHit0.Fields);
            }
        }
Example #36
0
        private int CheckDbAndIndex(DbDataReader dbreader, IndexReader ixreader, List<Difference> result)
        {
            var versionId = dbreader.GetInt32(0);
            var dbNodeTimestamp = dbreader.GetInt64(1);
            var dbVersionTimestamp = dbreader.GetInt64(2);

            var termDocs = ixreader.TermDocs(new Lucene.Net.Index.Term(LucObject.FieldName.VersionId, Lucene.Net.Util.NumericUtils.IntToPrefixCoded(versionId)));
            Lucene.Net.Documents.Document doc = null;
            int docid = -1;
            if (termDocs.Next())
            {
                docid = termDocs.Doc();
                doc = ixreader.Document(docid);
                var indexNodeTimestamp = ParseLong(doc.Get(LucObject.FieldName.NodeTimestamp));
                var indexVersionTimestamp = ParseLong(doc.Get(LucObject.FieldName.VersionTimestamp));
                var nodeId = ParseInt(doc.Get(LucObject.FieldName.NodeId));
                var version = doc.Get(LucObject.FieldName.Version);
                var p = doc.Get(LucObject.FieldName.Path);
                if (termDocs.Next())
                {
                    result.Add(new Difference(IndexDifferenceKind.MoreDocument)
                    {
                        DocId = docid,
                        NodeId = nodeId,
                        VersionId = versionId,
                        Version = version,
                        Path = p,
                        DbNodeTimestamp = dbNodeTimestamp,
                        DbVersionTimestamp = dbVersionTimestamp,
                        IxNodeTimestamp = indexNodeTimestamp,
                        IxVersionTimestamp = indexVersionTimestamp,
                    });
                }
                if (dbVersionTimestamp != indexVersionTimestamp)
                {
                    result.Add(new Difference(IndexDifferenceKind.DifferentVersionTimestamp)
                    {
                        DocId = docid,
                        VersionId = versionId,
                        DbNodeTimestamp = dbNodeTimestamp,
                        DbVersionTimestamp = dbVersionTimestamp,
                        IxNodeTimestamp = indexNodeTimestamp,
                        IxVersionTimestamp = indexVersionTimestamp,
                        NodeId = nodeId,
                        Version = version,
                        Path = p
                    });
                }
                if (dbNodeTimestamp != indexNodeTimestamp)
                {
                    var ok = false;
                    var isLastDraft = doc.Get(LucObject.FieldName.IsLastDraft);
                    if (isLastDraft != BooleanIndexHandler.YES)
                    {
                        var latestDocs = ixreader.TermDocs(new Lucene.Net.Index.Term(LucObject.FieldName.NodeId, Lucene.Net.Util.NumericUtils.IntToPrefixCoded(nodeId)));
                        Lucene.Net.Documents.Document latestDoc = null;
                        while (latestDocs.Next())
                        {
                            var latestdocid = latestDocs.Doc();
                            var d = ixreader.Document(latestdocid);
                            if (d.Get(LucObject.FieldName.IsLastDraft) != BooleanIndexHandler.YES)
                                continue;
                            latestDoc = d;
                            break;
                        }
                        var latestPath = latestDoc.Get(LucObject.FieldName.Path);
                        if (latestPath == p)
                            ok = true;
                    }
                    if (!ok)
                    {
                        result.Add(new Difference(IndexDifferenceKind.DifferentNodeTimestamp)
                        {
                            DocId = docid,
                            VersionId = versionId,
                            DbNodeTimestamp = dbNodeTimestamp,
                            DbVersionTimestamp = dbVersionTimestamp,
                            IxNodeTimestamp = indexNodeTimestamp,
                            IxVersionTimestamp = indexVersionTimestamp,
                            NodeId = nodeId,
                            Version = version,
                            Path = p
                        });
                    }
                }
            }
            else
            {
                result.Add(new Difference(IndexDifferenceKind.NotInIndex)
                {
                    DocId = docid,
                    VersionId = versionId,
                    DbNodeTimestamp = dbNodeTimestamp,
                    DbVersionTimestamp = dbVersionTimestamp,
                });
            }
            return docid;
        }
		private static Hit CreateHit ( Document primary_doc,
					IndexReader secondary_reader,
					TermDocs term_docs,
					FieldSelector fields)
		{
			Hit hit = DocumentToHit (primary_doc);

			if (secondary_reader == null)
				return hit;

			// Get the stringified version of the URI
			// exactly as it comes out of the index.
			Term term = new Term ("Uri", primary_doc.Get ("Uri"));
			term_docs.Seek (term);

			// Move to the first (and only) matching term doc
			term_docs.Next ();
			Document secondary_doc =
				(fields == null) ?
				secondary_reader.Document (term_docs.Doc ()) :
				secondary_reader.Document (term_docs.Doc (), fields);

			// If we are using the secondary index, now we need to
			// merge the properties from the secondary index
			AddPropertiesToHit (hit, secondary_doc, false);

			return hit;
		}
 private void CheckContents(IndexReader ir, string indexname)
 {
     Bits liveDocs = MultiFields.GetLiveDocs(ir);
     for (int i = 0; i < ir.MaxDoc; i++)
     {
         if (liveDocs == null || liveDocs.Get(i))
         {
             assertEquals(indexname, ir.Document(i).Get("indexname"));
         }
     }
 }
Example #39
0
		private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int docCount = 0;
			int maxDoc = reader.MaxDoc();
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				for (int j = 0; j < maxDoc; )
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						++j;
						continue;
					}
					// We can optimize this case (doing a bulk byte copy) since the field 
					// numbers are identical
					int start = j, numDocs = 0;
					do 
					{
						j++;
						numDocs++;
						if (j >= maxDoc)
							break;
						if (reader.IsDeleted(j))
						{
							j++;
							break;
						}
					}
					while (numDocs < MAX_RAW_MERGE_DOCS);
					
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
					docCount += numDocs;
					checkAbort.Work(300 * numDocs);
				}
			}
			else
			{
				for (int j = 0; j < maxDoc; j++)
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						continue;
					}
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(j, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					docCount++;
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
Example #40
0
        private void CompareHits(IndexReader r, IndexReader joinR, TopDocs results, TopGroups<int> joinResults)
        {
            // results is 'complete'; joinResults is a subset
            int resultUpto = 0;
            int joinGroupUpto = 0;

            ScoreDoc[] hits = results.ScoreDocs;
            IGroupDocs<int>[] groupDocs = joinResults.Groups;

            while (joinGroupUpto < groupDocs.Length)
            {
                IGroupDocs<int> group = groupDocs[joinGroupUpto++];
                ScoreDoc[] groupHits = group.ScoreDocs;
                assertNotNull(group.GroupValue);
                Document parentDoc = joinR.Document(group.GroupValue);
                string parentID = parentDoc.Get("parentID");
                //System.out.println("GROUP groupDoc=" + group.groupDoc + " parent=" + parentDoc);
                assertNotNull(parentID);
                assertTrue(groupHits.Length > 0);
                for (int hitIDX = 0; hitIDX < groupHits.Length; hitIDX++)
                {
                    Document nonJoinHit = r.Document(hits[resultUpto++].Doc);
                    Document joinHit = joinR.Document(groupHits[hitIDX].Doc);
                    assertEquals(parentID, nonJoinHit.Get("parentID"));
                    assertEquals(joinHit.Get("childID"), nonJoinHit.Get("childID"));
                }

                if (joinGroupUpto < groupDocs.Length)
                {
                    // Advance non-join hit to the next parentID:
                    //System.out.println("  next joingroupUpto=" + joinGroupUpto + " gd.Length=" + groupDocs.Length + " parentID=" + parentID);
                    while (true)
                    {
                        assertTrue(resultUpto < hits.Length);
                        if (!parentID.Equals(r.Document(hits[resultUpto].Doc).Get("parentID")))
                        {
                            break;
                        }
                        resultUpto++;
                    }
                }
            }
        }
Example #41
0
		private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int maxDoc = reader.MaxDoc();
			int docCount = 0;
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				while (docCount < maxDoc)
				{
					int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
					docCount += len;
					checkAbort.Work(300 * len);
				}
			}
			else
			{
				for (; docCount < maxDoc; docCount++)
				{
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(docCount, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
Example #42
0
        private void assertCompressedFields29(Directory dir, bool shouldStillBeCompressed)
        {
            int count             = 0;
            int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.Length * 2;
            // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields:
            int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.Length;

            IndexReader reader = IndexReader.Open(dir, true);

            try
            {
                // look into sub readers and check if raw merge is on/off
                var readers = new System.Collections.Generic.List <IndexReader>();
                ReaderUtil.GatherSubReaders(readers, reader);
                foreach (IndexReader ir in readers)
                {
                    FieldsReader fr = ((SegmentReader)ir).GetFieldsReader();
                    Assert.IsTrue(shouldStillBeCompressed != fr.CanReadRawDocs(),
                                  "for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index");
                }

                // test that decompression works correctly
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    if (!reader.IsDeleted(i))
                    {
                        Document d = reader.Document(i);
                        if (d.Get("content3") != null)
                        {
                            continue;
                        }
                        count++;
                        IFieldable compressed = d.GetFieldable("compressed");
                        if (int.Parse(d.Get("id")) % 2 == 0)
                        {
                            Assert.IsFalse(compressed.IsBinary);
                            Assert.AreEqual(TEXT_TO_COMPRESS, compressed.StringValue,
                                            "incorrectly decompressed string");
                        }
                        else
                        {
                            Assert.IsTrue(compressed.IsBinary);
                            Assert.IsTrue(BINARY_TO_COMPRESS.SequenceEqual(compressed.GetBinaryValue()),
                                          "incorrectly decompressed binary");
                        }
                    }
                }

                //check if field was decompressed after optimize
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    if (!reader.IsDeleted(i))
                    {
                        Document d = reader.Document(i, new AnonymousFieldSelector());
                        if (d.Get("content3") != null)
                        {
                            continue;
                        }
                        count++;
                        // read the size from the binary value using BinaryReader (this prevents us from doing the shift ops ourselves):
                        // ugh, Java uses Big-Endian streams, so we need to do it manually.
                        byte[] encodedSize    = d.GetFieldable("compressed").GetBinaryValue().Take(4).Reverse().ToArray();
                        int    actualSize     = BitConverter.ToInt32(encodedSize, 0);
                        int    compressedSize = int.Parse(d.Get("compressedSize"));
                        bool   binary         = int.Parse(d.Get("id")) % 2 > 0;
                        int    shouldSize     = shouldStillBeCompressed
                                             ? compressedSize
                                             : (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH);
                        Assert.AreEqual(shouldSize, actualSize, "size incorrect");
                        if (!shouldStillBeCompressed)
                        {
                            Assert.IsFalse(compressedSize == actualSize,
                                           "uncompressed field should have another size than recorded in index");
                        }
                    }
                }
                Assert.AreEqual(34 * 2, count, "correct number of tests");
            }
            finally
            {
                reader.Dispose();
            }
        }
		protected override Field[] GetFields(IndexReader reader, int docId, string fieldName)
		{
			var field = ContentFieldName ?? fieldName;
			var doc = reader.Document(docId, new MapFieldSelector(new[] {field}));
			return doc.GetFields(field); // according to Document class javadoc, this never returns null
		}
Example #44
0
        public virtual void  searchIndex(System.String dirName, System.String oldName)
        {
            //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer());
            //Query query = parser.parse("handle:1");

            dirName = FullDir(dirName);

            Directory     dir      = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));
            IndexSearcher searcher = new IndexSearcher(dir, true);
            IndexReader   reader   = searcher.IndexReader;

            _TestUtil.CheckIndex(dir);

            for (int i = 0; i < 35; i++)
            {
                if (!reader.IsDeleted(i))
                {
                    Document d      = reader.Document(i);
                    var      fields = d.GetFields();
                    if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
                    {
                        if (d.GetField("content3") == null)
                        {
                            int numFields = oldName.StartsWith("29.") ? 7 : 5;
                            Assert.AreEqual(numFields, fields.Count);
                            Field f = d.GetField("id");
                            Assert.AreEqual("" + i, f.StringValue);

                            f = (Field)d.GetField("utf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                            f = (Field)d.GetField("autf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                            f = (Field)d.GetField("content2");
                            Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue);

                            f = (Field)d.GetField("fie\u2C77ld");
                            Assert.AreEqual("field with non-ascii name", f.StringValue);
                        }
                    }
                }
                // Only ID 7 is deleted
                else
                {
                    Assert.AreEqual(7, i);
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #21 since it's norm was
            // increased:
            Document d2 = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d2.Get("id"), "didn't get the right document first");

            TestHits(hits, 34, searcher.IndexReader);

            if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
            {
                // Test on indices >= 2.3
                hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
            }

            searcher.Close();
            dir.Close();
        }
Example #45
0
        /// <summary>
        /// checks that stored fields of all documents are the same
        /// </summary>
        public void AssertStoredFieldsEquals(string info, IndexReader leftReader, IndexReader rightReader)
        {
            Debug.Assert(leftReader.MaxDoc == rightReader.MaxDoc);
            for (int i = 0; i < leftReader.MaxDoc; i++)
            {
                Document leftDoc = leftReader.Document(i);
                Document rightDoc = rightReader.Document(i);

                // TODO: I think this is bogus because we don't document what the order should be
                // from these iterators, etc. I think the codec/IndexReader should be free to order this stuff
                // in whatever way it wants (e.g. maybe it packs related fields together or something)
                // To fix this, we sort the fields in both documents by name, but
                // we still assume that all instances with same name are in order:
                Comparison<IndexableField> comp = (a, b) => String.Compare(a.Name(), b.Name(), StringComparison.Ordinal);
                leftDoc.Fields.Sort(comp);
                rightDoc.Fields.Sort(comp);

                var leftIterator = leftDoc.GetEnumerator();
                var rightIterator = rightDoc.GetEnumerator();
                while (leftIterator.MoveNext())
                {
                    Assert.IsTrue(rightIterator.MoveNext(), info);
                    AssertStoredFieldEquals(info, leftIterator.Current, rightIterator.Current);
                }
                Assert.IsFalse(rightIterator.MoveNext(), info);
            }
        }
Example #46
0
		public static void  VerifyEquals(IndexReader r1, IndexReader r2, System.String idField)
		{
			Assert.AreEqual(r1.NumDocs(), r2.NumDocs());
			bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc());
			
			int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping
			
			TermDocs termDocs1 = r1.TermDocs();
			TermDocs termDocs2 = r2.TermDocs();
			
			// create mapping from id2 space to id2 based on idField
			idField = StringHelper.Intern(idField);
			TermEnum termEnum = r1.Terms(new Term(idField, ""));
			do 
			{
				Term term = termEnum.Term();
				if (term == null || (System.Object) term.Field() != (System.Object) idField)
					break;
				
				termDocs1.Seek(termEnum);
				if (!termDocs1.Next())
				{
					// This doc is deleted and wasn't replaced
					termDocs2.Seek(termEnum);
					Assert.IsFalse(termDocs2.Next());
					continue;
				}
				
				int id1 = termDocs1.Doc();
				Assert.IsFalse(termDocs1.Next());
				
				termDocs2.Seek(termEnum);
				Assert.IsTrue(termDocs2.Next());
				int id2 = termDocs2.Doc();
				Assert.IsFalse(termDocs2.Next());
				
				r2r1[id2] = id1;
				
				// verify stored fields are equivalent
				try
				{
					VerifyEquals(r1.Document(id1), r2.Document(id2));
				}
				catch (System.Exception t)
				{
					System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
					System.Console.Out.WriteLine("  d1=" + r1.Document(id1));
					System.Console.Out.WriteLine("  d2=" + r2.Document(id2));
					throw t;
				}
				
				try
				{
					// verify term vectors are equivalent        
					VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2));
				}
				catch (System.Exception e)
				{
					System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
					TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1);
					System.Console.Out.WriteLine("  d1=" + tv1);
					if (tv1 != null)
						for (int i = 0; i < tv1.Length; i++)
						{
							System.Console.Out.WriteLine("    " + i + ": " + tv1[i]);
						}
					
					TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2);
					System.Console.Out.WriteLine("  d2=" + tv2);
					if (tv2 != null)
						for (int i = 0; i < tv2.Length; i++)
						{
							System.Console.Out.WriteLine("    " + i + ": " + tv2[i]);
						}
					
					throw e;
				}
			}
			while (termEnum.Next());
			
			termEnum.Close();
			
			// Verify postings
			TermEnum termEnum1 = r1.Terms(new Term("", ""));
			TermEnum termEnum2 = r2.Terms(new Term("", ""));
			
			// pack both doc and freq into single element for easy sorting
			long[] info1 = new long[r1.NumDocs()];
			long[] info2 = new long[r2.NumDocs()];
			
			for (; ; )
			{
				Term term1, term2;
				
				// iterate until we get some docs
				int len1;
				for (; ; )
				{
					len1 = 0;
					term1 = termEnum1.Term();
					if (term1 == null)
						break;
					termDocs1.Seek(termEnum1);
					while (termDocs1.Next())
					{
						int d1 = termDocs1.Doc();
						int f1 = termDocs1.Freq();
						info1[len1] = (((long) d1) << 32) | f1;
						len1++;
					}
					if (len1 > 0)
						break;
					if (!termEnum1.Next())
						break;
				}
				
				// iterate until we get some docs
				int len2;
				for (; ; )
				{
					len2 = 0;
					term2 = termEnum2.Term();
					if (term2 == null)
						break;
					termDocs2.Seek(termEnum2);
					while (termDocs2.Next())
					{
						int d2 = termDocs2.Doc();
						int f2 = termDocs2.Freq();
						info2[len2] = (((long) r2r1[d2]) << 32) | f2;
						len2++;
					}
					if (len2 > 0)
						break;
					if (!termEnum2.Next())
						break;
				}
				
				if (!hasDeletes)
					Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq());
				
				Assert.AreEqual(len1, len2);
				if (len1 == 0)
					break; // no more terms
				
				Assert.AreEqual(term1, term2);
				
				// sort info2 to get it into ascending docid
				System.Array.Sort(info2, 0, len2 - 0);
				
				// now compare
				for (int i = 0; i < len1; i++)
				{
					Assert.AreEqual(info1[i], info2[i]);
				}
				
				termEnum1.Next();
				termEnum2.Next();
			}
		}
Example #47
0
        // Runs test, with multiple threads, using the specific
        // failure to trigger an IOException
        public virtual void TestMultipleThreadsFailure(Func <IConcurrentMergeScheduler> newScheduler, Failure failure)
        {
            int NUM_THREADS = 3;

            for (int iter = 0; iter < 2; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                MockDirectoryWrapper dir = NewMockDirectory();
                var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                             .SetMaxBufferedDocs(2)
                             .SetMergeScheduler(newScheduler())
                             .SetMergePolicy(NewLogMergePolicy(4));
                IndexWriter writer    = new IndexWriter(dir, config);
                var         scheduler = config.mergeScheduler as IConcurrentMergeScheduler;
                if (scheduler != null)
                {
                    scheduler.SetSuppressExceptions();
                }

                IndexerThread[] threads = new IndexerThread[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i] = new IndexerThread(writer, true, NewField);
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                Thread.Sleep(10);

                dir.FailOn(failure);
                failure.SetDoFail();

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Join();
                    Assert.IsTrue(threads[i].Error == null, "hit unexpected Throwable");
                }

                bool success = false;
                try
                {
                    writer.Dispose(false);
                    success = true;
                }
                catch (IOException)
                {
                    failure.ClearDoFail();
                    writer.Dispose(false);
                }
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: success=" + success);
                }

                if (success)
                {
                    IndexReader reader  = DirectoryReader.Open(dir);
                    IBits       delDocs = MultiFields.GetLiveDocs(reader);
                    for (int j = 0; j < reader.MaxDoc; j++)
                    {
                        if (delDocs == null || !delDocs.Get(j))
                        {
                            reader.Document(j);
                            reader.GetTermVectors(j);
                        }
                    }
                    reader.Dispose();
                }

                dir.Dispose();
            }
        }
		private static ArrayList   FindRecentResults (IndexReader	    primary_reader,
							      IndexReader	    secondary_reader,
							      BetterBitArray	    primary_matches,
							      Dictionary<int, Hit>  hits_by_id,
							      int		    max_results,
							      ref int		    total_number_of_matches,
							      HitFilter		    hit_filter,
							      string		    index_name)
		{
			Stopwatch b = new Stopwatch ();
			b.Start ();
			
			int count = 0;
			Document doc;

			ArrayList all_docs = null;
			TopScores top_docs = null;
			TermDocs term_docs = null;

			if (primary_matches.TrueCount > max_results)
				top_docs = new TopScores (max_results);
			else
				all_docs = new ArrayList (primary_matches.TrueCount);

			if (secondary_reader != null)
				term_docs = secondary_reader.TermDocs ();

			for (int match_index = primary_matches.Count; ; match_index --) {
				// Walk across the matches backwards, since newer
				// documents are more likely to be at the end of
				// the index.
				match_index = primary_matches.GetPreviousTrueIndex (match_index);
				if (match_index < 0)
					break;

				count++;

				doc = primary_reader.Document (match_index, fields_timestamp_uri);

				// Check the timestamp --- if we have already reached our
				// limit, we might be able to reject it immediately.
				string timestamp_str;
				long timestamp_num = 0;

				timestamp_str = doc.Get ("Timestamp");
				if (timestamp_str == null) {
					Logger.Log.Warn ("No timestamp on {0}!", GetUriFromDocument (doc));
				} else {
					timestamp_num = Int64.Parse (doc.Get ("Timestamp"));
					if (top_docs != null && ! top_docs.WillAccept (timestamp_num))
						continue;
				}

				// Get the actual hit now
				// doc was created with only 2 fields, so first get the complete lucene document for primary document.
				// Also run our hit_filter now, if we have one. Since we insist of returning max_results
				// most recent hits, any hits that would be filtered out should happen now and not later.
				Hit hit = CreateHit (primary_reader.Document (match_index), secondary_reader, term_docs);
				if (hit_filter != null && ! hit_filter (hit)) {
					if (Debug)
						Log.Debug ("Filtered out {0}", hit.Uri);
					total_number_of_matches --;
					continue;
				}

				hits_by_id [match_index] = hit;

				// Add the document to the appropriate data structure.
				// We use the timestamp_num as the score, so high
				// scores correspond to more-recent timestamps.
				if (all_docs != null)
					all_docs.Add (hit);
				else
					top_docs.Add (timestamp_num, hit);
			}

			if (term_docs != null)
				term_docs.Close ();

			b.Stop ();

			if (Debug)
				Log.Debug (">>> {0}: Instantiated and scanned {1} documents in {2}", index_name, count, b);

			if (all_docs != null) {
				// Sort results before sending
				all_docs.Sort ();
				return all_docs;
			} else {
				return top_docs.TopScoringObjects;
			}
		}
        public virtual void SearchIndex(Directory dir, string oldName)
        {
            //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
            //Query query = parser.parse("handle:1");

            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            TestUtil.CheckIndex(dir);

            // true if this is a 4.0+ index
            bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null;
            // true if this is a 4.2+ index
            bool is42Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("dvSortedSet") != null;

            Debug.Assert(is40Index); // NOTE: currently we can only do this on trunk!

            Bits liveDocs = MultiFields.GetLiveDocs(reader);

            for (int i = 0; i < 35; i++)
            {
                if (liveDocs.Get(i))
                {
                    Document d = reader.Document(i);
                    IList <IndexableField> fields = d.Fields;
                    bool isProxDoc = d.GetField("content3") == null;
                    if (isProxDoc)
                    {
                        int numFields = is40Index ? 7 : 5;
                        Assert.AreEqual(numFields, fields.Count);
                        IndexableField f = d.GetField("id");
                        Assert.AreEqual("" + i, f.StringValue);

                        f = d.GetField("utf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                        f = d.GetField("autf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                        f = d.GetField("content2");
                        Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue);

                        f = d.GetField("fie\u2C77ld");
                        Assert.AreEqual("field with non-ascii name", f.StringValue);
                    }

                    Fields tfvFields = reader.GetTermVectors(i);
                    Assert.IsNotNull(tfvFields, "i=" + i);
                    Terms tfv = tfvFields.Terms("utf8");
                    Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName);
                }
                else
                {
                    // Only ID 7 is deleted
                    Assert.AreEqual(7, i);
                }
            }

            if (is40Index)
            {
                // check docvalues fields
                NumericDocValues   dvByte               = MultiDocValues.GetNumericValues(reader, "dvByte");
                BinaryDocValues    dvBytesDerefFixed    = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed");
                BinaryDocValues    dvBytesDerefVar      = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar");
                SortedDocValues    dvBytesSortedFixed   = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed");
                SortedDocValues    dvBytesSortedVar     = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar");
                BinaryDocValues    dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed");
                BinaryDocValues    dvBytesStraightVar   = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar");
                NumericDocValues   dvDouble             = MultiDocValues.GetNumericValues(reader, "dvDouble");
                NumericDocValues   dvFloat              = MultiDocValues.GetNumericValues(reader, "dvFloat");
                NumericDocValues   dvInt       = MultiDocValues.GetNumericValues(reader, "dvInt");
                NumericDocValues   dvLong      = MultiDocValues.GetNumericValues(reader, "dvLong");
                NumericDocValues   dvPacked    = MultiDocValues.GetNumericValues(reader, "dvPacked");
                NumericDocValues   dvShort     = MultiDocValues.GetNumericValues(reader, "dvShort");
                SortedSetDocValues dvSortedSet = null;
                if (is42Index)
                {
                    dvSortedSet = MultiDocValues.GetSortedSetValues(reader, "dvSortedSet");
                }

                for (int i = 0; i < 35; i++)
                {
                    int id = Convert.ToInt32(reader.Document(i).Get("id"));
                    Assert.AreEqual(id, dvByte.Get(i));

                    sbyte[]  bytes       = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id };
                    BytesRef expectedRef = new BytesRef((byte[])(Array)bytes);
                    BytesRef scratch     = new BytesRef();

                    dvBytesDerefFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesDerefVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);

                    Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D);
                    Assert.AreEqual((float)id, Number.IntBitsToFloat((int)dvFloat.Get(i)), 0F);
                    Assert.AreEqual(id, dvInt.Get(i));
                    Assert.AreEqual(id, dvLong.Get(i));
                    Assert.AreEqual(id, dvPacked.Get(i));
                    Assert.AreEqual(id, dvShort.Get(i));
                    if (is42Index)
                    {
                        dvSortedSet.Document = i;
                        long ord = dvSortedSet.NextOrd();
                        Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.NextOrd());
                        dvSortedSet.LookupOrd(ord, scratch);
                        Assert.AreEqual(expectedRef, scratch);
                    }
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #0
            Document doc = searcher.IndexReader.Document(hits[0].Doc);

            assertEquals("didn't get the right document first", "0", doc.Get("id"));

            DoTestHits(hits, 34, searcher.IndexReader);

            if (is40Index)
            {
                hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);

                hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);
            }

            hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);

            reader.Dispose();
        }
Example #50
0
 protected virtual Field[] GetFields(IndexReader reader, int docId, String fieldName)
 {
     // according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
     Document doc = reader.Document(docId, new MapFieldSelector(new String[] { fieldName }));
     return doc.GetFields(fieldName); // according to Document class javadoc, this never returns null
 }
        public virtual void DoTestNumbers(bool withPayloads)
        {
            Directory dir      = NewDirectory();
            Analyzer  analyzer = withPayloads ? (Analyzer) new MockPayloadAnalyzer() : new MockAnalyzer(Random);

            iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping
            RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc);

            FieldType ft = new FieldType(TextField.TYPE_STORED);

            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random.NextBoolean())
            {
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = Random.NextBoolean();
                ft.StoreTermVectorPositions = Random.NextBoolean();
            }

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("numbers", English.Int32ToEnglish(i), ft));
                doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft));
                doc.Add(new StringField("id", "" + i, Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader reader = w.GetReader();

            w.Dispose();

            string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" };

            foreach (string term in terms)
            {
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term));
                int doc;
                while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    int    freq          = dp.Freq;
                    for (int i = 0; i < freq; i++)
                    {
                        dp.NextPosition();
                        int start = dp.StartOffset;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(start >= 0);
                        }
                        int end = dp.EndOffset;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(end >= 0 && end >= start);
                        }
                        // check that the offsets correspond to the term in the src text
                        Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term, StringComparison.Ordinal));
                        if (withPayloads)
                        {
                            // check that we have a payload and it starts with "pos"
                            Assert.IsNotNull(dp.GetPayload());
                            BytesRef payload = dp.GetPayload();
                            Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal));
                        } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                    }
                }
            }

            // check we can skip correctly
            int numSkippingTests = AtLeast(50);

            for (int j = 0; j < numSkippingTests; j++)
            {
                int num = TestUtil.NextInt32(Random, 100, Math.Min(numDocs - 1, 999));
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred"));
                int doc = dp.Advance(num);
                Assert.AreEqual(num, doc);
                int freq = dp.Freq;
                for (int i = 0; i < freq; i++)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    dp.NextPosition();
                    int start = dp.StartOffset;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(start >= 0);
                    }
                    int end = dp.EndOffset;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(end >= 0 && end >= start);
                    }
                    // check that the offsets correspond to the term in the src text
                    Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred", StringComparison.Ordinal));
                    if (withPayloads)
                    {
                        // check that we have a payload and it starts with "pos"
                        Assert.IsNotNull(dp.GetPayload());
                        BytesRef payload = dp.GetPayload();
                        Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal));
                    } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                }
            }

            // check that other fields (without offsets) work correctly

            for (int i = 0; i < numDocs; i++)
            {
                DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0);
                Assert.AreEqual(i, dp.NextDoc());
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
            }

            reader.Dispose();
            dir.Dispose();
        }
Example #52
0
 protected virtual String[] GetFieldValues(IndexReader reader, int docId, String fieldName)
 {
     Document doc = reader.Document(docId, new MapFieldSelector(new String[] { fieldName }));
     return doc.GetValues(fieldName); // according to Document class javadoc, this never returns null
 }
            private void ApplyFacetValueHit(FacetValue facetValue, Facet value, int docId, ParsedRange parsedRange, IndexReader indexReader)
            {
                facetValue.Hits++;
                if (
					IndexQuery.IsDistinct == false &&
					(value.Aggregation == FacetAggregation.Count || value.Aggregation == FacetAggregation.None)
				   )
                {
                    return;
                }
                FacetValueState set;
                if (matches.TryGetValue(facetValue, out set) == false)
                {
                    matches[facetValue] = set = new FacetValueState
                    {
                        Docs = new HashSet<int>(),
                        Facet = value,
                        Range = parsedRange
                    };
                }
	            if (IndexQuery.IsDistinct)
	            {
					if(IndexQuery.FieldsToFetch.Length == 0)
						throw new InvalidOperationException("Cannot process distinct facet query without specifying which fields to distinct upon.");

		            if (set.AlreadySeen == null)
			            set.AlreadySeen = new HashSet<StringCollectionValue>();

		            var document = indexReader.Document(docId);
		            var fields = new List<string>();
					foreach (var fieldName in IndexQuery.FieldsToFetch)
		            {
						foreach (var field in document.GetFields(fieldName))
						{
							if (field.StringValue == null)
								continue;
				            fields.Add(field.StringValue);
			            }
		            }
		            if (fields.Count == 0)
			            throw new InvalidOperationException("Cannot apply distinct facet on [" + string.Join(", ", IndexQuery.FieldsToFetch) +
							"], did you forget to store them in the index? ");

		            if (set.AlreadySeen.Add(new StringCollectionValue(fields)) == false)
		            {
			            facetValue.Hits--;// already seen, cancel this
			            return;
		            }
	            }
                set.Docs.Add(docId);
            }
Example #54
0
		public static void  AssertIndexEquals(IndexReader index1, IndexReader index2)
		{
			Assert.AreEqual(index1.NumDocs(), index2.NumDocs(), "IndexReaders have different values for numDocs.");
			Assert.AreEqual(index1.MaxDoc, index2.MaxDoc, "IndexReaders have different values for maxDoc.");
			Assert.AreEqual(index1.HasDeletions, index2.HasDeletions, "Only one IndexReader has deletions.");
			Assert.AreEqual(index1.IsOptimized(), index2.IsOptimized(), "Only one index is optimized.");
			
			// check field names
			System.Collections.Generic.ICollection<string> fieldsNames1 = index1.GetFieldNames(FieldOption.ALL);
			System.Collections.Generic.ICollection<string> fieldsNames2 = index1.GetFieldNames(FieldOption.ALL);

            System.Collections.Generic.ICollection<IFieldable> fields1 = null;
            System.Collections.Generic.ICollection<IFieldable> fields2 = null;

            Assert.AreEqual(fieldsNames1.Count, fieldsNames2.Count, "IndexReaders have different numbers of fields.");
            System.Collections.IEnumerator it1 = fieldsNames1.GetEnumerator();
            System.Collections.IEnumerator it2 = fieldsNames2.GetEnumerator();
			while (it1.MoveNext() && it2.MoveNext())
			{
				Assert.AreEqual((System.String) it1.Current, (System.String) it2.Current, "Different field names.");
			}
			
			// check norms
            it1 = fieldsNames1.GetEnumerator();
			while (it1.MoveNext())
			{
				System.String curField = (System.String) it1.Current;
				byte[] norms1 = index1.Norms(curField);
				byte[] norms2 = index2.Norms(curField);
				if (norms1 != null && norms2 != null)
				{
					Assert.AreEqual(norms1.Length, norms2.Length);
					for (int i = 0; i < norms1.Length; i++)
					{
						Assert.AreEqual(norms1[i], norms2[i], "Norm different for doc " + i + " and field '" + curField + "'.");
					}
				}
				else
				{
					Assert.AreSame(norms1, norms2);
				}
			}
			
			// check deletions
			for (int i = 0; i < index1.MaxDoc; i++)
			{
				Assert.AreEqual(index1.IsDeleted(i), index2.IsDeleted(i), "Doc " + i + " only deleted in one index.");
			}
			
			// check stored fields
			for (int i = 0; i < index1.MaxDoc; i++)
			{
				if (!index1.IsDeleted(i))
				{
					Document doc1 = index1.Document(i);
					Document doc2 = index2.Document(i);
					fields1 = doc1.GetFields();
					fields2 = doc2.GetFields();
					Assert.AreEqual(fields1.Count, fields2.Count, "Different numbers of fields for doc " + i + ".");
					it1 = fields1.GetEnumerator();
					it2 = fields2.GetEnumerator();
					while (it1.MoveNext() && it2.MoveNext())
					{
						Field curField1 = (Field) it1.Current;
						Field curField2 = (Field) it2.Current;
						Assert.AreEqual(curField1.Name, curField2.Name, "Different fields names for doc " + i + ".");
						Assert.AreEqual(curField1.StringValue, curField2.StringValue, "Different field values for doc " + i + ".");
					}
				}
			}
			
			// check dictionary and posting lists
			TermEnum enum1 = index1.Terms();
			TermEnum enum2 = index2.Terms();
			TermPositions tp1 = index1.TermPositions();
			TermPositions tp2 = index2.TermPositions();
			while (enum1.Next())
			{
				Assert.IsTrue(enum2.Next());
				Assert.AreEqual(enum1.Term, enum2.Term, "Different term in dictionary.");
				tp1.Seek(enum1.Term);
				tp2.Seek(enum1.Term);
				while (tp1.Next())
				{
					Assert.IsTrue(tp2.Next());
					Assert.AreEqual(tp1.Doc, tp2.Doc, "Different doc id in postinglist of term " + enum1.Term + ".");
					Assert.AreEqual(tp1.Freq, tp2.Freq, "Different term frequence in postinglist of term " + enum1.Term + ".");
					for (int i = 0; i < tp1.Freq; i++)
					{
						Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term + ".");
					}
				}
			}
		}