private IDictionary<string, Document> GenerateIndexDocuments(int ndocs)
 {
     IDictionary<string, Document> docs = new HashMap<string, Document>();
     for (int i = 0; i < ndocs; i++)
     {
         Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
         Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
         Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i);
         Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i);
         Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i);
         Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_" + i + "_0"));
         Document doc = new Document();
         doc.Add(field);
         doc.Add(payload);
         doc.Add(weight1);
         doc.Add(weight2);
         doc.Add(weight3);
         doc.Add(contexts);
         for (int j = 1; j < AtLeast(3); j++)
         {
             contexts.BytesValue = new BytesRef("ctx_" + i + "_" + j);
             doc.Add(contexts);
         }
         docs.Put(field.StringValue, doc);
     }
     return docs;
 }
Esempio n. 2
0
        public override void SetUp()
        {
            base.SetUp();

            dir = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwConfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConfig);
            Document document = new Document();
            Field idField = new StringField("id", "", Field.Store.NO);
            document.Add(idField);
            Field byteField = new StringField("byte", "", Field.Store.NO);
            document.Add(byteField);
            Field doubleField = new StringField("double", "", Field.Store.NO);
            document.Add(doubleField);
            Field floatField = new StringField("float", "", Field.Store.NO);
            document.Add(floatField);
            Field intField = new StringField("int", "", Field.Store.NO);
            document.Add(intField);
            Field longField = new StringField("long", "", Field.Store.NO);
            document.Add(longField);
            Field shortField = new StringField("short", "", Field.Store.NO);
            document.Add(shortField);
            Field stringField = new StringField("string", "", Field.Store.NO);
            document.Add(stringField);
            Field textField = new TextField("text", "", Field.Store.NO);
            document.Add(textField);

            foreach (string[] doc in documents)
            {
                idField.StringValue = doc[0];
                byteField.StringValue = doc[1];
                doubleField.StringValue = doc[2];
                floatField.StringValue = doc[3];
                intField.StringValue = doc[4];
                longField.StringValue = doc[5];
                shortField.StringValue = doc[6];
                stringField.StringValue = doc[7];
                textField.StringValue = doc[8];
                iw.AddDocument(document);
            }

            reader = iw.Reader;
            searcher = NewSearcher(reader);
            iw.Dispose();
        }
        public virtual void TestFloatNorms()
        {
            Directory dir = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Similarity provider = new MySimProvider(this);
            config.SetSimilarity(provider);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config);
            LineFileDocs docs = new LineFileDocs(Random());
            int num = AtLeast(100);
            for (int i = 0; i < num; i++)
            {
                Document doc = docs.NextDoc();
                float nextFloat = (float)Random().NextDouble();
                // Cast to a double to get more precision output to the string.
                Field f = new TextField(FloatTestField, "" + (double)nextFloat, Field.Store.YES);
                f.Boost = nextFloat;

                doc.Add(f);
                writer.AddDocument(doc);
                doc.RemoveField(FloatTestField);
                if (Rarely())
                {
                    writer.Commit();
                }
            }
            writer.Commit();
            writer.Dispose();
            AtomicReader open = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            NumericDocValues norms = open.GetNormValues(FloatTestField);
            Assert.IsNotNull(norms);
            for (int i = 0; i < open.MaxDoc; i++)
            {
                Document document = open.Document(i);
                float expected = Convert.ToSingle(document.Get(FloatTestField));
                Assert.AreEqual(expected, Number.IntBitsToFloat((int)norms.Get(i)), 0.0f);
            }
            open.Dispose();
            dir.Dispose();
            docs.Dispose();
        }
        public void BeforeClass()
        {
            Random random = Random();
            Directory = NewDirectory();
            Stopword = "" + RandomChar();
            CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.MakeString(Stopword));
            Analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
            RandomIndexWriter iw = new RandomIndexWriter(random, Directory, Analyzer, ClassEnvRule.Similarity, ClassEnvRule.TimeZone);
            Document doc = new Document();
            Field id = new StringField("id", "", Field.Store.NO);
            Field field = new TextField("field", "", Field.Store.NO);
            doc.Add(id);
            doc.Add(field);

            // index some docs
            int numDocs = AtLeast(1000);
            for (int i = 0; i < numDocs; i++)
            {
                id.StringValue = Convert.ToString(i);
                field.StringValue = RandomFieldContents();
                iw.AddDocument(doc);
            }

            // delete some docs
            int numDeletes = numDocs / 20;
            for (int i = 0; i < numDeletes; i++)
            {
                Term toDelete = new Term("id", Convert.ToString(random.Next(numDocs)));
                if (random.NextBoolean())
                {
                    iw.DeleteDocuments(toDelete);
                }
                else
                {
                    iw.DeleteDocuments(new TermQuery(toDelete));
                }
            }

            Reader = iw.Reader;
            S1 = NewSearcher(Reader);
            S2 = NewSearcher(Reader);
            iw.Dispose();
        }
Esempio n. 5
0
 // TODO: create a testNormsNotPresent ourselves by adding/deleting/merging docs
 public virtual void BuildIndex(Directory dir)
 {
     Random random = Random();
     MockAnalyzer analyzer = new MockAnalyzer(Random());
     analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
     IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
     Similarity provider = new MySimProvider(this);
     config.SetSimilarity(provider);
     RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
     LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());
     int num = AtLeast(100);
     for (int i = 0; i < num; i++)
     {
         Document doc = docs.NextDoc();
         int boost = Random().Next(255);
         Field f = new TextField(ByteTestField, "" + boost, Field.Store.YES);
         f.Boost = boost;
         doc.Add(f);
         writer.AddDocument(doc);
         doc.RemoveField(ByteTestField);
         if (Rarely())
         {
             writer.Commit();
         }
     }
     writer.Commit();
     writer.Dispose();
     docs.Dispose();
 }
Esempio n. 6
0
        public virtual void TestTextFieldReader()
        {
            Field field = new TextField("foo", new StringReader("bar"));

            field.Boost = 5f;
            TrySetByteValue(field);
            TrySetBytesValue(field);
            TrySetBytesRefValue(field);
            TrySetDoubleValue(field);
            TrySetIntValue(field);
            TrySetFloatValue(field);
            TrySetLongValue(field);
            field.ReaderValue = new StringReader("foobar");
            TrySetShortValue(field);
            TrySetStringValue(field);
            field.TokenStream = new CannedTokenStream(new Token("foo", 0, 3));

            Assert.IsNotNull(field.ReaderValue);
            Assert.AreEqual(5f, field.Boost, 0f);
        }
        /** Returns Pair(list of invalid document terms, Map of document term -> document) */
        private KeyValuePair<List<string>, IDictionary<string, Document>> GenerateIndexDocuments(int ndocs, bool requiresPayload, bool requiresContexts)
        {
            IDictionary<string, Document> docs = new HashMap<string, Document>();
            List<string> invalidDocTerms = new List<string>();
            for (int i = 0; i < ndocs; i++)
            {
                Document doc = new Document();
                bool invalidDoc = false;
                Field field = null;
                // usually have valid term field in document
                if (Usually())
                {
                    field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
                    doc.Add(field);
                }
                else
                {
                    invalidDoc = true;
                }

                // even if payload is not required usually have it
                if (requiresPayload || Usually())
                {
                    // usually have valid payload field in document
                    if (Usually())
                    {
                        Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
                        doc.Add(payload);
                    }
                    else if (requiresPayload)
                    {
                        invalidDoc = true;
                    }
                }

                if (requiresContexts || Usually())
                {
                    if (Usually())
                    {
                        for (int j = 0; j < AtLeast(2); j++)
                        {
                            doc.Add(new StoredField(CONTEXT_FIELD_NAME, new BytesRef("context_" + i + "_" + j)));
                        }
                    }
                    // we should allow entries without context
                }

                // usually have valid weight field in document
                if (Usually())
                {
                    Field weight = (Rarely()) ?
                        (Field)new StoredField(WEIGHT_FIELD_NAME, 100d + i) :
                        (Field)new NumericDocValuesField(WEIGHT_FIELD_NAME, 100 + i);
                    doc.Add(weight);
                }

                string term = null;
                if (invalidDoc)
                {
                    term = (field != null) ? field.StringValue : "invalid_" + i;
                    invalidDocTerms.Add(term);
                }
                else
                {
                    term = field.StringValue;
                }

                docs.Put(term, doc);
            }
            return new KeyValuePair<List<string>, IDictionary<string, Document>>(invalidDocTerms, docs);
        }
Esempio n. 8
0
 public virtual void TestMixupDocs()
 {
     Directory dir = NewDirectory();
     IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
     iwc.SetMergePolicy(NewLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
     Document doc = new Document();
     Field field = new TextField("field", "", Field.Store.NO);
     TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);
     Assert.IsFalse(ts.HasAttribute<PayloadAttribute>());
     field.TokenStream = ts;
     doc.Add(field);
     writer.AddDocument(doc);
     Token withPayload = new Token("withPayload", 0, 11);
     withPayload.Payload = new BytesRef("test");
     ts = new CannedTokenStream(withPayload);
     Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>());
     field.TokenStream = ts;
     writer.AddDocument(doc);
     ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
     Assert.IsFalse(ts.HasAttribute<PayloadAttribute>());
     field.TokenStream = ts;
     writer.AddDocument(doc);
     DirectoryReader reader = writer.Reader;
     AtomicReader sr = SlowCompositeReaderWrapper.Wrap(reader);
     DocsAndPositionsEnum de = sr.TermPositionsEnum(new Term("field", "withPayload"));
     de.NextDoc();
     de.NextPosition();
     Assert.AreEqual(new BytesRef("test"), de.Payload);
     writer.Dispose();
     reader.Dispose();
     dir.Dispose();
 }
Esempio n. 9
0
 public virtual void TestMixupMultiValued()
 {
     Directory dir = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
     Document doc = new Document();
     Field field = new TextField("field", "", Field.Store.NO);
     TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);
     Assert.IsFalse(ts.HasAttribute<PayloadAttribute>());
     field.TokenStream = ts;
     doc.Add(field);
     Field field2 = new TextField("field", "", Field.Store.NO);
     Token withPayload = new Token("withPayload", 0, 11);
     withPayload.Payload = new BytesRef("test");
     ts = new CannedTokenStream(withPayload);
     Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>());
     field2.TokenStream = ts;
     doc.Add(field2);
     Field field3 = new TextField("field", "", Field.Store.NO);
     ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true);
     Assert.IsFalse(ts.HasAttribute<PayloadAttribute>());
     field3.TokenStream = ts;
     doc.Add(field3);
     writer.AddDocument(doc);
     DirectoryReader reader = writer.Reader;
     SegmentReader sr = GetOnlySegmentReader(reader);
     DocsAndPositionsEnum de = sr.TermPositionsEnum(new Term("field", "withPayload"));
     de.NextDoc();
     de.NextPosition();
     Assert.AreEqual(new BytesRef("test"), de.Payload);
     writer.Dispose();
     reader.Dispose();
     dir.Dispose();
 }
Esempio n. 10
0
 private static Document Doc(int id, PositionsTokenStream positions)
 {
     Document doc = new Document();
     doc.Add(new StringField(ID_FIELD, id.ToString(), Field.Store.YES));
     doc.Add(new StringField(DOCS_ENUM_FIELD, DOCS_ENUM_TERM, Field.Store.NO));
     positions.SetId(id);
     if (DoesntSupportOffsets.contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD)))
     {
         // codec doesnt support offsets: just index positions for the field
         doc.Add(new Field(DOC_POSITIONS_FIELD, positions, TextField.TYPE_NOT_STORED));
     }
     else
     {
         doc.Add(new Field(DOC_POSITIONS_FIELD, positions, POSITIONS_TYPE));
     }
     doc.Add(new NumericDocValuesField(NUMERIC_DV_FIELD, id));
     TextField norms = new TextField(NORMS_FIELD, id.ToString(), Field.Store.NO);
     norms.Boost = (Number.IntBitsToFloat(id));
     doc.Add(norms);
     doc.Add(new BinaryDocValuesField(BINARY_DV_FIELD, new BytesRef(id.ToString())));
     doc.Add(new SortedDocValuesField(SORTED_DV_FIELD, new BytesRef(id.ToString())));
     if (DefaultCodecSupportsSortedSet())
     {
         doc.Add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(id.ToString())));
         doc.Add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef((id + 1).ToString())));
     }
     doc.Add(new Field(TERM_VECTORS_FIELD, id.ToString(), TERM_VECTORS_TYPE));
     return doc;
 }