Describes the properties of a field.
Наследование: IndexableFieldType
Пример #1
0
 /// <summary>
 /// Constructs a <see cref="NumericConfig"/> object.
 /// </summary>
 /// <param name="precisionStep">the precision used to index the numeric values</param>
 /// <param name="format">the <see cref="NumberFormat"/> used to parse a <see cref="string"/> to an <see cref="object"/> representing a .NET numeric type.</param>
 /// <param name="type">the numeric type used to index the numeric values</param>
 /// <seealso cref="NumericConfig.PrecisionStep"/>
 /// <seealso cref="NumericConfig.NumberFormat"/>
 /// <seealso cref="Type"/>
 public NumericConfig(int precisionStep, NumberFormat format,
     FieldType.NumericType type)
 {
     PrecisionStep = precisionStep;
     NumberFormat = format;
     Type = type;
 }
Пример #2
0
        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));

            Document doc = new Document();
            FieldType customType = new FieldType(TextField.TYPE_STORED);
            customType.OmitNorms = true;
            Field field = NewField("field", "", customType);
            doc.Add(field);

            NumberFormatInfo df = new NumberFormatInfo();
            df.NumberDecimalDigits = 0;

            //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
            for (int i = 0; i < 1000; i++)
            {
                field.StringValue = i.ToString(df);
                writer.AddDocument(doc);
            }

            Reader = writer.Reader;
            writer.Dispose();
            Searcher = NewSearcher(Reader);
        }
        public virtual void TestBasic()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            Field f = NewField("foo", "this is a test test", ft);
            doc.Add(f);
            for (int i = 0; i < 100; i++)
            {
                w.AddDocument(doc);
            }

            IndexReader reader = w.Reader;
            w.Dispose();

            Assert.IsNull(MultiFields.GetTermPositionsEnum(reader, null, "foo", new BytesRef("test")));

            DocsEnum de = TestUtil.Docs(Random(), reader, "foo", new BytesRef("test"), null, null, DocsEnum.FLAG_FREQS);
            while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                Assert.AreEqual(2, de.Freq());
            }

            reader.Dispose();
            dir.Dispose();
        }
Пример #4
0
        public override void SetUp()
        {
            base.SetUp();
            _dir = NewDirectory();
            _indexWriter = new RandomIndexWriter(Random(), _dir, new MockAnalyzer(Random()), Similarity, TimeZone);

            FieldType ft = new FieldType(TextField.TYPE_STORED);
            ft.StoreTermVectors = true;
            ft.StoreTermVectorOffsets = true;
            ft.StoreTermVectorPositions = true;

            Analyzer analyzer = new MockAnalyzer(Random());

            Document doc;
            for (int i = 0; i < 100; i++)
            {
                doc = new Document();
                doc.Add(new Field(_idFieldName, Random().toString(), ft));
                doc.Add(new Field(_textFieldName, new StringBuilder(Random().toString()).append(Random().toString()).append(
                    Random().toString()).toString(), ft));
                doc.Add(new Field(_classFieldName, Random().toString(), ft));
                _indexWriter.AddDocument(doc, analyzer);
            }

            _indexWriter.Commit();

            _originalIndex = SlowCompositeReaderWrapper.Wrap(_indexWriter.Reader);
        }
        public virtual void RunTest(Random random, Directory directory)
        {
            IndexWriter writer = new IndexWriter(directory, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, ANALYZER).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2)).SetMergePolicy(NewLogMergePolicy()));

            for (int iter = 0; iter < NUM_ITER; iter++)
            {
                int iterFinal = iter;

                ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 1000;

                FieldType customType = new FieldType(StringField.TYPE_STORED);
                customType.OmitNorms = true;

                for (int i = 0; i < 200; i++)
                {
                    Document d = new Document();
                    d.Add(NewField("id", Convert.ToString(i), customType));
                    d.Add(NewField("contents", English.IntToEnglish(i), customType));
                    writer.AddDocument(d);
                }

                ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 4;

                ThreadClass[] threads = new ThreadClass[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    int iFinal = i;
                    IndexWriter writerFinal = writer;
                    threads[i] = new ThreadAnonymousInnerClassHelper(this, iterFinal, customType, iFinal, writerFinal);
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Join();
                }

                Assert.IsTrue(!Failed);

                int expectedDocCount = (int)((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS)));

                Assert.AreEqual(expectedDocCount, writer.NumDocs(), "index=" + writer.SegString() + " numDocs=" + writer.NumDocs() + " maxDoc=" + writer.MaxDoc + " config=" + writer.Config);
                Assert.AreEqual(expectedDocCount, writer.MaxDoc, "index=" + writer.SegString() + " numDocs=" + writer.NumDocs() + " maxDoc=" + writer.MaxDoc + " config=" + writer.Config);

                writer.Dispose();
                writer = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, ANALYZER).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(2));

                DirectoryReader reader = DirectoryReader.Open(directory);
                Assert.AreEqual(1, reader.Leaves.Count, "reader=" + reader);
                Assert.AreEqual(expectedDocCount, reader.NumDocs);
                reader.Dispose();
            }
            writer.Dispose();
        }
Пример #6
0
        public virtual void TestDoubleOffsetCounting()
        {
            Directory dir = NewDirectory();
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            Document doc = new Document();
            FieldType customType = new FieldType(StringField.TYPE_NOT_STORED);
            customType.StoreTermVectors = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorOffsets = true;
            Field f = NewField("field", "abcd", customType);
            doc.Add(f);
            doc.Add(f);
            Field f2 = NewField("field", "", customType);
            doc.Add(f2);
            doc.Add(f);
            w.AddDocument(doc);
            w.Dispose();

            IndexReader r = DirectoryReader.Open(dir);
            Terms vector = r.GetTermVectors(0).Terms("field");
            Assert.IsNotNull(vector);
            TermsEnum termsEnum = vector.Iterator(null);
            Assert.IsNotNull(termsEnum.Next());
            Assert.AreEqual("", termsEnum.Term().Utf8ToString());

            // Token "" occurred once
            Assert.AreEqual(1, termsEnum.TotalTermFreq());

            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset());
            Assert.AreEqual(8, dpEnum.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

            // Token "abcd" occurred three times
            Assert.AreEqual(new BytesRef("abcd"), termsEnum.Next());
            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
            Assert.AreEqual(3, termsEnum.TotalTermFreq());

            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(0, dpEnum.StartOffset());
            Assert.AreEqual(4, dpEnum.EndOffset());

            dpEnum.NextPosition();
            Assert.AreEqual(4, dpEnum.StartOffset());
            Assert.AreEqual(8, dpEnum.EndOffset());

            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset());
            Assert.AreEqual(12, dpEnum.EndOffset());

            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            Assert.IsNull(termsEnum.Next());
            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestBasic()
        {
            Directory dir = NewDirectory();

            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc);
            Document doc = new Document();

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random().NextBoolean())
            {
                ft.StoreTermVectors = true;
                ft.StoreTermVectorPositions = Random().NextBoolean();
                ft.StoreTermVectorOffsets = Random().NextBoolean();
            }
            Token[] tokens = new Token[] { MakeToken("a", 1, 0, 6), MakeToken("b", 1, 8, 9), MakeToken("a", 1, 9, 17), MakeToken("c", 1, 19, 50) };
            doc.Add(new Field("content", new CannedTokenStream(tokens), ft));

            w.AddDocument(doc);
            IndexReader r = w.Reader;
            w.Dispose();

            DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("a"));
            Assert.IsNotNull(dp);
            Assert.AreEqual(0, dp.NextDoc());
            Assert.AreEqual(2, dp.Freq());
            Assert.AreEqual(0, dp.NextPosition());
            Assert.AreEqual(0, dp.StartOffset());
            Assert.AreEqual(6, dp.EndOffset());
            Assert.AreEqual(2, dp.NextPosition());
            Assert.AreEqual(9, dp.StartOffset());
            Assert.AreEqual(17, dp.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());

            dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("b"));
            Assert.IsNotNull(dp);
            Assert.AreEqual(0, dp.NextDoc());
            Assert.AreEqual(1, dp.Freq());
            Assert.AreEqual(1, dp.NextPosition());
            Assert.AreEqual(8, dp.StartOffset());
            Assert.AreEqual(9, dp.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());

            dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("c"));
            Assert.IsNotNull(dp);
            Assert.AreEqual(0, dp.NextDoc());
            Assert.AreEqual(1, dp.Freq());
            Assert.AreEqual(3, dp.NextPosition());
            Assert.AreEqual(19, dp.StartOffset());
            Assert.AreEqual(50, dp.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestForceMergeDeletes()
        {
            Directory dir = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH));
            Document document = new Document();

            FieldType customType = new FieldType();
            customType.Stored = true;

            FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED);
            customType1.Tokenized = false;
            customType1.StoreTermVectors = true;
            customType1.StoreTermVectorPositions = true;
            customType1.StoreTermVectorOffsets = true;

            Field idField = NewStringField("id", "", Field.Store.NO);
            document.Add(idField);
            Field storedField = NewField("stored", "stored", customType);
            document.Add(storedField);
            Field termVectorField = NewField("termVector", "termVector", customType1);
            document.Add(termVectorField);
            for (int i = 0; i < 10; i++)
            {
                idField.StringValue = "" + i;
                writer.AddDocument(document);
            }
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);
            Assert.AreEqual(10, ir.MaxDoc);
            Assert.AreEqual(10, ir.NumDocs);
            ir.Dispose();

            IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES);
            writer = new IndexWriter(dir, dontMergeConfig);
            writer.DeleteDocuments(new Term("id", "0"));
            writer.DeleteDocuments(new Term("id", "7"));
            writer.Dispose();

            ir = DirectoryReader.Open(dir);
            Assert.AreEqual(8, ir.NumDocs);
            ir.Dispose();

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            Assert.AreEqual(8, writer.NumDocs());
            Assert.AreEqual(10, writer.MaxDoc);
            writer.ForceMergeDeletes();
            Assert.AreEqual(8, writer.NumDocs());
            writer.Dispose();
            ir = DirectoryReader.Open(dir);
            Assert.AreEqual(8, ir.MaxDoc);
            Assert.AreEqual(8, ir.NumDocs);
            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestPostings()
        {
            Directory dir = NewFSDirectory(CreateTempDir("postings"));
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetCodec(Codec.ForName("Lucene40"));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            Document doc = new Document();

            // id field
            FieldType idType = new FieldType(StringField.TYPE_NOT_STORED);
            idType.StoreTermVectors = true;
            Field idField = new Field("id", "", idType);
            doc.Add(idField);

            // title field: short text field
            FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED);
            titleType.StoreTermVectors = true;
            titleType.StoreTermVectorPositions = true;
            titleType.StoreTermVectorOffsets = true;
            titleType.IndexOptions = IndexOptions();
            Field titleField = new Field("title", "", titleType);
            doc.Add(titleField);

            // body field: long text field
            FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED);
            bodyType.StoreTermVectors = true;
            bodyType.StoreTermVectorPositions = true;
            bodyType.StoreTermVectorOffsets = true;
            bodyType.IndexOptions = IndexOptions();
            Field bodyField = new Field("body", "", bodyType);
            doc.Add(bodyField);

            int numDocs = AtLeast(1000);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                titleField.StringValue = FieldValue(1);
                bodyField.StringValue = FieldValue(3);
                iw.AddDocument(doc);
                if (Random().Next(20) == 0)
                {
                    iw.DeleteDocuments(new Term("id", Convert.ToString(i)));
                }
            }
            if (Random().NextBoolean())
            {
                // delete 1-100% of docs
                iw.DeleteDocuments(new Term("title", Terms[Random().Next(Terms.Length)]));
            }
            iw.Dispose();
            dir.Dispose(); // checkindex
        }
Пример #10
0
        public virtual void TestMixedMerge()
        {
            Directory ram = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy(2)));
            Document d = new Document();

            // this field will have norms
            Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO);
            d.Add(f1);

            // this field will NOT have norms
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
            customType.OmitNorms = true;
            Field f2 = NewField("f2", "this field has NO norms in all docs", customType);
            d.Add(f2);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger
            // keep things constant
            d = new Document();

            // Reverese
            d.Add(NewField("f1", "this field has norms", customType));

            d.Add(NewTextField("f2", "this field has NO norms in all docs", Field.Store.NO));

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos fi = reader.FieldInfos;
            Assert.IsTrue(fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set.");

            reader.Dispose();
            ram.Dispose();
        }
Пример #11
0
 /// <summary>
 /// Create a new mutable FieldType with all of the properties from <code>ref</code>
 /// </summary>
 public FieldType(FieldType @ref)
 {
     this.Indexed_Renamed = @ref.Indexed;
     this.Stored_Renamed = @ref.Stored;
     this.Tokenized_Renamed = @ref.Tokenized;
     this.StoreTermVectors_Renamed = @ref.StoreTermVectors;
     this.StoreTermVectorOffsets_Renamed = @ref.StoreTermVectorOffsets;
     this.StoreTermVectorPositions_Renamed = @ref.StoreTermVectorPositions;
     this.StoreTermVectorPayloads_Renamed = @ref.StoreTermVectorPayloads;
     this.OmitNorms_Renamed = @ref.OmitNorms;
     this._indexOptions = @ref.IndexOptions;
     this.docValueType = @ref.DocValueType;
     this.numericType = @ref.NumericTypeValue;
     // Do not copy frozen!
 }
Пример #12
0
 public virtual void TestBogusTermVectors()
 {
     Directory dir = NewDirectory();
     IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
     Document doc = new Document();
     FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
     ft.StoreTermVectors = true;
     ft.StoreTermVectorOffsets = true;
     Field field = new Field("foo", "", ft);
     field.TokenStream = new CannedTokenStream(new Token("bar", 5, 10), new Token("bar", 1, 4));
     doc.Add(field);
     iw.AddDocument(doc);
     iw.Dispose();
     dir.Dispose(); // checkindex
 }
Пример #13
0
        public virtual void TestBinaryFieldInIndex()
        {
            FieldType ft = new FieldType();
            ft.Stored = true;
            IndexableField binaryFldStored = new StoredField("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(BinaryValStored));
            IndexableField stringFldStored = new Field("stringStored", BinaryValStored, ft);

            Documents.Document doc = new Documents.Document();

            doc.Add(binaryFldStored);

            doc.Add(stringFldStored);

            /// <summary>
            /// test for field count </summary>
            Assert.AreEqual(2, doc.Fields.Count);

            /// <summary>
            /// add the doc to a ram index </summary>
            Directory dir = NewDirectory();
            Random r = Random();
            RandomIndexWriter writer = new RandomIndexWriter(r, dir);
            writer.AddDocument(doc);

            /// <summary>
            /// open a reader and fetch the document </summary>
            IndexReader reader = writer.Reader;
            Documents.Document docFromReader = reader.Document(0);
            Assert.IsTrue(docFromReader != null);

            /// <summary>
            /// fetch the binary stored field and compare it's content with the original one </summary>
            BytesRef bytes = docFromReader.GetBinaryValue("binaryStored");
            Assert.IsNotNull(bytes);

            string binaryFldStoredTest = Encoding.UTF8.GetString((byte[])(Array)bytes.Bytes).Substring(bytes.Offset, bytes.Length);
            //new string(bytes.Bytes, bytes.Offset, bytes.Length, IOUtils.CHARSET_UTF_8);
            Assert.IsTrue(binaryFldStoredTest.Equals(BinaryValStored));

            /// <summary>
            /// fetch the string field and compare it's content with the original one </summary>
            string stringFldStoredTest = docFromReader.Get("stringStored");
            Assert.IsTrue(stringFldStoredTest.Equals(BinaryValStored));

            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
        public virtual void TestAddIndexes()
        {
            Directory dir1 = NewDirectory();
            Directory dir2 = NewDirectory();
            IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            Document d1 = new Document();
            d1.Add(new TextField("f1", "first field", Field.Store.YES));
            d1.Add(new TextField("f2", "second field", Field.Store.YES));
            writer.AddDocument(d1);

            writer.Dispose();
            writer = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            Document d2 = new Document();
            FieldType customType2 = new FieldType(TextField.TYPE_STORED);
            customType2.StoreTermVectors = true;
            d2.Add(new TextField("f2", "second field", Field.Store.YES));
            d2.Add(new Field("f1", "first field", customType2));
            d2.Add(new TextField("f3", "third field", Field.Store.YES));
            d2.Add(new TextField("f4", "fourth field", Field.Store.YES));
            writer.AddDocument(d2);

            writer.Dispose();

            writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
            writer.AddIndexes(dir2);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir1);
            Assert.AreEqual(2, sis.Size());

            FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
            FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));

            Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
            Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
            // make sure the ordering of the "external" segment is preserved
            Assert.AreEqual("f2", fis2.FieldInfo(0).Name);
            Assert.AreEqual("f1", fis2.FieldInfo(1).Name);
            Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
            Assert.AreEqual("f4", fis2.FieldInfo(3).Name);

            dir1.Dispose();
            dir2.Dispose();
        }
Пример #15
0
        public virtual void TestBinaryField()
        {
            Documents.Document doc = new Documents.Document();

            FieldType ft = new FieldType();
            ft.Stored = true;
            IndexableField stringFld = new Field("string", BinaryVal, ft);
            IndexableField binaryFld = new StoredField("binary", BinaryVal.GetBytes(Encoding.UTF8));
            IndexableField binaryFld2 = new StoredField("binary", BinaryVal2.GetBytes(Encoding.UTF8));

            doc.Add(stringFld);
            doc.Add(binaryFld);

            Assert.AreEqual(2, doc.Fields.Count);

            Assert.IsTrue(binaryFld.BinaryValue != null);
            Assert.IsTrue(binaryFld.FieldType.Stored);
            Assert.IsFalse(binaryFld.FieldType.Indexed);

            string binaryTest = doc.GetBinaryValue("binary").Utf8ToString();
            Assert.IsTrue(binaryTest.Equals(BinaryVal));

            string stringTest = doc.Get("string");
            Assert.IsTrue(binaryTest.Equals(stringTest));

            doc.Add(binaryFld2);

            Assert.AreEqual(3, doc.Fields.Count);

            BytesRef[] binaryTests = doc.GetBinaryValues("binary");

            Assert.AreEqual(2, binaryTests.Length);

            binaryTest = binaryTests[0].Utf8ToString();
            string binaryTest2 = binaryTests[1].Utf8ToString();

            Assert.IsFalse(binaryTest.Equals(binaryTest2));

            Assert.IsTrue(binaryTest.Equals(BinaryVal));
            Assert.IsTrue(binaryTest2.Equals(BinaryVal2));

            doc.RemoveField("string");
            Assert.AreEqual(2, doc.Fields.Count);

            doc.RemoveFields("binary");
            Assert.AreEqual(0, doc.Fields.Count);
        }
Пример #16
0
        public virtual void Test()
        {
            IndexWriter w = new IndexWriter(Dir, NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            try
            {
                FieldType ft = new FieldType();
                ft.Indexed = true;
                ft.Stored = Random().NextBoolean();
                ft.Freeze();

                Document doc = new Document();
                if (Random().NextBoolean())
                {
                    // totally ok short field value
                    doc.Add(new Field(TestUtil.RandomSimpleString(Random(), 1, 10), TestUtil.RandomSimpleString(Random(), 1, 10), ft));
                }
                // problematic field
                string name = TestUtil.RandomSimpleString(Random(), 1, 50);
                string value = TestUtil.RandomSimpleString(Random(), MinTestTermLength, MaxTestTermLegnth);
                Field f = new Field(name, value, ft);
                if (Random().NextBoolean())
                {
                    // totally ok short field value
                    doc.Add(new Field(TestUtil.RandomSimpleString(Random(), 1, 10), TestUtil.RandomSimpleString(Random(), 1, 10), ft));
                }
                doc.Add(f);

                try
                {
                    w.AddDocument(doc);
                    Assert.Fail("Did not get an exception from adding a monster term");
                }
                catch (System.ArgumentException e)
                {
                    string maxLengthMsg = Convert.ToString(IndexWriter.MAX_TERM_LENGTH);
                    string msg = e.Message;
                    Assert.IsTrue(msg.Contains("immense term"), "IllegalArgumentException didn't mention 'immense term': " + msg);
                    Assert.IsTrue(msg.Contains(maxLengthMsg), "IllegalArgumentException didn't mention max length (" + maxLengthMsg + "): " + msg);
                    Assert.IsTrue(msg.Contains(name), "IllegalArgumentException didn't mention field name (" + name + "): " + msg);
                }
            }
            finally
            {
                w.Dispose();
            }
        }
        public virtual void TestPositionsSimple()
        {
            Directory directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            for (int i = 0; i < 39; i++)
            {
                Document doc = new Document();
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.OmitNorms = true;
                doc.Add(NewField(FieldName, "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10", customType));
                writer.AddDocument(doc);
            }
            IndexReader reader = writer.Reader;
            writer.Dispose();

            int num = AtLeast(13);
            for (int i = 0; i < num; i++)
            {
                BytesRef bytes = new BytesRef("1");
                IndexReaderContext topReaderContext = reader.Context;
                foreach (AtomicReaderContext atomicReaderContext in topReaderContext.Leaves)
                {
                    DocsAndPositionsEnum docsAndPosEnum = GetDocsAndPositions((AtomicReader)atomicReaderContext.Reader, bytes, null);
                    Assert.IsNotNull(docsAndPosEnum);
                    if (atomicReaderContext.Reader.MaxDoc == 0)
                    {
                        continue;
                    }
                    int advance = docsAndPosEnum.Advance(Random().Next(atomicReaderContext.Reader.MaxDoc));
                    do
                    {
                        string msg = "Advanced to: " + advance + " current doc: " + docsAndPosEnum.DocID(); // TODO: + " usePayloads: " + usePayload;
                        Assert.AreEqual(4, docsAndPosEnum.Freq(), msg);
                        Assert.AreEqual(0, docsAndPosEnum.NextPosition(), msg);
                        Assert.AreEqual(4, docsAndPosEnum.Freq(), msg);
                        Assert.AreEqual(10, docsAndPosEnum.NextPosition(), msg);
                        Assert.AreEqual(4, docsAndPosEnum.Freq(), msg);
                        Assert.AreEqual(20, docsAndPosEnum.NextPosition(), msg);
                        Assert.AreEqual(4, docsAndPosEnum.Freq(), msg);
                        Assert.AreEqual(30, docsAndPosEnum.NextPosition(), msg);
                    } while (docsAndPosEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                }
            }
            reader.Dispose();
            directory.Dispose();
        }
 private Document NewDocument()
 {
     Document doc = new Document();
     foreach (FieldInfo.IndexOptions option in Enum.GetValues(typeof(FieldInfo.IndexOptions)))
     {
         var ft = new FieldType(TextField.TYPE_NOT_STORED)
         {
             StoreTermVectors = true,
             StoreTermVectorOffsets = true,
             StoreTermVectorPositions = true,
             StoreTermVectorPayloads = true,
             IndexOptions = option
         };
         // turn on tvs for a cross-check, since we rely upon checkindex in this test (for now)
         doc.Add(new Field(option.ToString(), "", ft));
     }
     return doc;
 }
        public virtual void TestMixupDocs()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            Document doc = new Document();
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
            customType.StoreTermVectors = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorPayloads = true;
            customType.StoreTermVectorOffsets = Random().NextBoolean();
            Field field = new Field("field", "", customType);
            TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>());
            field.TokenStream = ts;
            doc.Add(field);
            writer.AddDocument(doc);

            Token withPayload = new Token("withPayload", 0, 11);
            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>());
            field.TokenStream = ts;
            writer.AddDocument(doc);

            ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>());
            field.TokenStream = ts;
            writer.AddDocument(doc);

            DirectoryReader reader = writer.Reader;
            Terms terms = reader.GetTermVector(1, "field");
            Debug.Assert(terms != null);
            TermsEnum termsEnum = terms.Iterator(null);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload")));
            DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(0, de.NextPosition());
            Assert.AreEqual(new BytesRef("test"), de.Payload);
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
Пример #20
0
        public virtual void TestSimpleCase()
        {
            string[] keywords = new string[] { "1", "2" };
            string[] unindexed = new string[] { "Netherlands", "Italy" };
            string[] unstored = new string[] { "Amsterdam has lots of bridges", "Venice has lots of canals" };
            string[] text = new string[] { "Amsterdam", "Venice" };

            Directory dir = NewDirectory();
            IndexWriter modifier = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)).SetMaxBufferedDeleteTerms(1));

            FieldType custom1 = new FieldType();
            custom1.Stored = true;
            for (int i = 0; i < keywords.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("id", keywords[i], Field.Store.YES));
                doc.Add(NewField("country", unindexed[i], custom1));
                doc.Add(NewTextField("contents", unstored[i], Field.Store.NO));
                doc.Add(NewTextField("city", text[i], Field.Store.YES));
                modifier.AddDocument(doc);
            }
            modifier.ForceMerge(1);
            modifier.Commit();

            Term term = new Term("city", "Amsterdam");
            int hitCount = GetHitCount(dir, term);
            Assert.AreEqual(1, hitCount);
            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now delete by term=" + term);
            }
            modifier.DeleteDocuments(term);
            modifier.Commit();

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now getHitCount");
            }
            hitCount = GetHitCount(dir, term);
            Assert.AreEqual(0, hitCount);

            modifier.Dispose();
            dir.Dispose();
        }
Пример #21
0
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostings"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                            .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                            .SetRAMBufferSizeMB(256.0)
                            .SetMergeScheduler(scheduler)
                            .SetMergePolicy(NewLogMergePolicy(false, 10))
                            .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);

            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;
            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.OmitNorms = true;
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
            Field field = new Field("field", new MyTokenStream(), ft);
            doc.Add(field);

            int numDocs = (int.MaxValue / 26) + 1;
            for (int i = 0; i < numDocs; i++)
            {
                w.AddDocument(doc);
                if (VERBOSE && i % 100000 == 0)
                {
                    Console.WriteLine(i + " of " + numDocs + "...");
                }
            }
            w.ForceMerge(1);
            w.Dispose();
            dir.Dispose();
        }
        public virtual void TestDeletePartiallyWrittenFilesIfAbort()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            iwConf.SetCodec(CompressingCodec.RandomInstance(Random()));
            // disable CFS because this test checks file names
            iwConf.SetMergePolicy(NewLogMergePolicy(false));
            iwConf.SetUseCompoundFile(false);
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            Document validDoc = new Document();
            validDoc.Add(new IntField("id", 0, Field.Store.YES));
            iw.AddDocument(validDoc);
            iw.Commit();

            // make sure that #writeField will fail to trigger an abort
            Document invalidDoc = new Document();
            FieldType fieldType = new FieldType();
            fieldType.Stored = true;
            invalidDoc.Add(new FieldAnonymousInnerClassHelper(this, fieldType));

            try
            {
                iw.AddDocument(invalidDoc);
                iw.Commit();
            }
            finally
            {
                int counter = 0;
                foreach (string fileName in dir.ListAll())
                {
                    if (fileName.EndsWith(".fdt") || fileName.EndsWith(".fdx"))
                    {
                        counter++;
                    }
                }
                // Only one .fdt and one .fdx files must have been found
                Assert.AreEqual(2, counter);
                iw.Dispose();
                dir.Dispose();
            }
        }
 public override void SetUp()
 {
     base.SetUp();
     Directory = NewDirectory();
     IndexWriter writer = new IndexWriter(Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
     //writer.setNoCFSRatio(0.0);
     //writer.infoStream = System.out;
     FieldType customType = new FieldType(TextField.TYPE_STORED);
     customType.Tokenized = false;
     customType.StoreTermVectors = true;
     for (int i = 0; i < NumDocs; i++)
     {
         Documents.Document doc = new Documents.Document();
         Field fld = NewField("field", English.IntToEnglish(i), customType);
         doc.Add(fld);
         writer.AddDocument(doc);
     }
     writer.Dispose();
 }
Пример #24
0
 public void BeforeClass()
 {
     Directory = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)).SetMergePolicy(NewLogMergePolicy()));
     //writer.setNoCFSRatio(1.0);
     //writer.infoStream = System.out;
     for (int i = 0; i < 1000; i++)
     {
         Document doc = new Document();
         FieldType ft = new FieldType(TextField.TYPE_STORED);
         int mod3 = i % 3;
         int mod2 = i % 2;
         if (mod2 == 0 && mod3 == 0)
         {
             ft.StoreTermVectors = true;
             ft.StoreTermVectorOffsets = true;
             ft.StoreTermVectorPositions = true;
         }
         else if (mod2 == 0)
         {
             ft.StoreTermVectors = true;
             ft.StoreTermVectorPositions = true;
         }
         else if (mod3 == 0)
         {
             ft.StoreTermVectors = true;
             ft.StoreTermVectorOffsets = true;
         }
         else
         {
             ft.StoreTermVectors = true;
         }
         doc.Add(new Field("field", English.IntToEnglish(i), ft));
         //test no term vectors too
         doc.Add(new TextField("noTV", English.IntToEnglish(i), Field.Store.YES));
         writer.AddDocument(doc);
     }
     Reader = writer.Reader;
     writer.Dispose();
 }
Пример #25
0
        public virtual void TestChangeGaps()
        {
            // LUCENE-5324: check that it is possible to change the wrapper's gaps
            int positionGap = Random().Next(1000);
            int offsetGap = Random().Next(1000);
            Analyzer @delegate = new MockAnalyzer(Random());
            Analyzer a = new AnalyzerWrapperAnonymousInnerClassHelper2(this, @delegate.Strategy, positionGap, offsetGap, @delegate);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), NewDirectory());
            Document doc = new Document();
            FieldType ft = new FieldType();
            ft.Indexed = true;
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
            ft.Tokenized = true;
            ft.StoreTermVectors = true;
            ft.StoreTermVectorPositions = true;
            ft.StoreTermVectorOffsets = true;
            doc.Add(new Field("f", "a", ft));
            doc.Add(new Field("f", "a", ft));
            writer.AddDocument(doc, a);
            AtomicReader reader = GetOnlySegmentReader(writer.Reader);
            Fields fields = reader.GetTermVectors(0);
            Terms terms = fields.Terms("f");
            TermsEnum te = terms.Iterator(null);
            Assert.AreEqual(new BytesRef("a"), te.Next());
            DocsAndPositionsEnum dpe = te.DocsAndPositions(null, null);
            Assert.AreEqual(0, dpe.NextDoc());
            Assert.AreEqual(2, dpe.Freq());
            Assert.AreEqual(0, dpe.NextPosition());
            Assert.AreEqual(0, dpe.StartOffset());
            int endOffset = dpe.EndOffset();
            Assert.AreEqual(1 + positionGap, dpe.NextPosition());
            Assert.AreEqual(1 + endOffset + offsetGap, dpe.EndOffset());
            Assert.AreEqual(null, te.Next());
            reader.Dispose();
            writer.Dispose();
            writer.w.Directory.Dispose();
        }
Пример #26
0
        public virtual void TestBinary()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            BytesRef bytes = new BytesRef(2);
            BinaryTokenStream tokenStream = new BinaryTokenStream(bytes);

            for (int i = 0; i < 256; i++)
            {
                bytes.Bytes[0] = (byte)i;
                bytes.Bytes[1] = unchecked((byte)(255 - i));
                bytes.Length = 2;
                Document doc = new Document();
                FieldType customType = new FieldType();
                customType.Stored = true;
                doc.Add(new Field("id", "" + i, customType));
                doc.Add(new TextField("bytes", tokenStream));
                iw.AddDocument(doc);
            }

            IndexReader ir = iw.Reader;
            iw.Dispose();

            IndexSearcher @is = NewSearcher(ir);

            for (int i = 0; i < 256; i++)
            {
                bytes.Bytes[0] = (byte)i;
                bytes.Bytes[1] = unchecked((byte)(255 - i));
                bytes.Length = 2;
                TopDocs docs = @is.Search(new TermQuery(new Term("bytes", bytes)), 5);
                Assert.AreEqual(1, docs.TotalHits);
                Assert.AreEqual("" + i, @is.Doc(docs.ScoreDocs[0].Doc).Get("id"));
            }

            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestNoOrds()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.StoreTermVectors = true;
            doc.Add(new Field("foo", "this is a test", ft));
            iw.AddDocument(doc);
            AtomicReader ir = GetOnlySegmentReader(iw.Reader);
            Terms terms = ir.GetTermVector(0, "foo");
            Assert.IsNotNull(terms);
            TermsEnum termsEnum = terms.Iterator(null);
            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("this")));
            try
            {
                termsEnum.Ord();
                Assert.Fail();
            }
            catch (System.NotSupportedException expected)
            {
                // expected exception
            }

            try
            {
                termsEnum.SeekExact(0);
                Assert.Fail();
            }
            catch (System.NotSupportedException expected)
            {
                // expected exception
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
        public void BeforeClass()
        {
            string[] data = new string[] { "A 1 2 3 4 5 6", "Z       4 5 6", null, "B   2   4 5 6", "Y     3   5 6", null, "C     3     6", "X       4 5 6" };

            Small = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Small, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)).SetMergePolicy(NewLogMergePolicy()));

            FieldType customType = new FieldType(TextField.TYPE_STORED);
            customType.Tokenized = false;
            for (int i = 0; i < data.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewField("id", Convert.ToString(i), customType)); // Field.Keyword("id",String.valueOf(i)));
                doc.Add(NewField("all", "all", customType)); // Field.Keyword("all","all"));
                if (null != data[i])
                {
                    doc.Add(NewTextField("data", data[i], Field.Store.YES)); // Field.Text("data",data[i]));
                }
                writer.AddDocument(doc);
            }

            Reader = writer.Reader;
            writer.Dispose();
        }
Пример #29
0
 /// <summary>
 /// Expert: allows you to customize the {@link
 ///  FieldType}. </summary>
 ///  <param name="name"> field name </param>
 ///  <param name="value"> 64-bit long value </param>
 ///  <param name="type"> customized field type: must have <seealso cref="FieldType#numericType()"/>
 ///         of <seealso cref="FieldType.NumericType#LONG"/>. </param>
 ///  <exception cref="IllegalArgumentException"> if the field name or type is null, or
 ///          if the field type does not have a LONG numericType() </exception>
 public LongField(string name, long value, FieldType type)
     : base(name, type)
 {
     if (type.NumericTypeValue != Documents.FieldType.NumericType.LONG)
     {
         throw new System.ArgumentException("type.numericType() must be LONG but got " + type.NumericTypeValue);
     }
     FieldsData = Convert.ToInt64(value);
 }
Пример #30
0
        public static Field NewField(Random random, string name, string value, FieldType type)
        {
            name = new string(name.ToCharArray());
            if (Usually(random) || !type.Indexed)
            {
                // most of the time, don't modify the params
                return new Field(name, value, type);
            }

            // TODO: once all core & test codecs can index
            // offsets, sometimes randomly turn on offsets if we are
            // already indexing positions...

            FieldType newType = new FieldType(type);
            if (!newType.Stored && random.NextBoolean())
            {
                newType.Stored = true; // randomly store it
            }

            if (!newType.StoreTermVectors && random.NextBoolean())
            {
                newType.StoreTermVectors = true;
                if (!newType.StoreTermVectorOffsets)
                {
                    newType.StoreTermVectorOffsets = random.NextBoolean();
                }
                if (!newType.StoreTermVectorPositions)
                {
                    newType.StoreTermVectorPositions = random.NextBoolean();

                    if (newType.StoreTermVectorPositions && !newType.StoreTermVectorPayloads && !OLD_FORMAT_IMPERSONATION_IS_ACTIVE)
                    {
                        newType.StoreTermVectorPayloads = random.NextBoolean();
                    }
                }
            }

            // TODO: we need to do this, but smarter, ie, most of
            // the time we set the same value for a given field but
            // sometimes (rarely) we change it up:
            /*
                if (newType.OmitsNorms()) {
                  newType.setOmitNorms(random.NextBoolean());
                }
            */

            return new Field(name, value, newType);
        }
Пример #31
0
        /// <summary>
        /// Add documents.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="facetWriter">The facet index writer.</param>
        /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param>
        /// <param name="files">The list of files that are to be added.</param>
        /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param>
        /// <param name="facetField">The facet field information.</param>
        /// <param name="config">The facet configuration information.</param>
        public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter,
                                 DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents, FacetField facetField, FacetsConfig config)
        {
            FieldType pathFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = false,
                Stored       = true,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };
            FieldType contentFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = documents.TokenizeContent,
                Stored       = documents.StoreContent,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };

            // For each file.
            for (int i = 0; i < files.Length; i++)
            {
                // If the file exists
                if (File.Exists(files[i]))
                {
                    Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document();

                    try
                    {
                        FileInfo fileInfo = new FileInfo(files[i]);
                        string   file     = files[i].Replace(directoryInfo.Root.FullName, "").ToLower();

                        Lucene.Net.Documents.Field path     = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType);
                        Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType);

                        // Add the fields.
                        document.Add(facetField);
                        document.Add(path);
                        document.Add(modified);

                        // Create the stream reader.
                        OpenDocument(files[i]);
                        string content = Nequeo.Xml.Document.ExtractContent(_xDocument);

                        // If content exists.
                        if (!String.IsNullOrEmpty(content))
                        {
                            // Split the white spaces from the text.
                            string[] words = content.Words();

                            // If words exist.
                            if (words != null && words.Length > 0)
                            {
                                // Add the query for each word.
                                for (int j = 0; j < words.Length; j++)
                                {
                                    // Format the word.
                                    string word = words[j].ToLower().RemovePunctuationFromStartAndEnd();

                                    // If a word exists.
                                    if (!String.IsNullOrEmpty(word))
                                    {
                                        Lucene.Net.Documents.Field contentField = new Field("facetcontent", word, contentFieldType);
                                        document.Add(contentField);
                                    }
                                }
                            }
                        }

                        // Add the document.
                        writer.AddDocument(config.Build(facetWriter, document));
                        _document.Close();

                        // Commit after a set number of documents.
                        documents.TotalDocumentSize += fileInfo.Length;
                        if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit)
                        {
                            // Commit the index.
                            writer.Commit();
                            facetWriter.Commit();
                            documents.TotalDocumentSize = 0;
                        }
                    }
                    catch (Exception)
                    {
                        throw;
                    }
                    finally
                    {
                        CloseDocument();
                    }
                }
            }
        }
Пример #32
0
        /// <summary>
        /// Add text to the existing index.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="facetWriter">The facet index writer.</param>
        /// <param name="addTextData">The text data to add.</param>
        /// <param name="config">The facet configuration information.</param>
        public void AddText(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, Dictionary <FacetField, AddTextData[]> addTextData, FacetsConfig config)
        {
            long totalTextLength           = 0;
            long maxTextLengthBeforeCommit = 30000000L;

            // For each text facet.
            foreach (KeyValuePair <FacetField, AddTextData[]> item in addTextData)
            {
                // If text exists.
                if (item.Value != null && item.Value.Length > 0)
                {
                    // Add the text.
                    FieldType nameFieldType = new Lucene.Net.Documents.FieldType()
                    {
                        Indexed      = true,
                        Tokenized    = false,
                        Stored       = true,
                        IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                    };

                    // Add the text.
                    FieldType completeFieldType = new Lucene.Net.Documents.FieldType()
                    {
                        Indexed      = true,
                        Tokenized    = false,
                        Stored       = true,
                        IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                    };

                    // Add the text.
                    FieldType textFieldType = new Lucene.Net.Documents.FieldType()
                    {
                        Indexed      = true,
                        Tokenized    = false,
                        Stored       = false,
                        IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                    };

                    // For each text.
                    foreach (AddTextData data in item.Value)
                    {
                        // Should the data be stored.
                        completeFieldType.Stored = data.StoreText;

                        // Create the document.
                        Lucene.Net.Documents.Document document     = new Lucene.Net.Documents.Document();
                        Lucene.Net.Documents.Field    textName     = new Field("textname", data.Name.ToLower(), nameFieldType);
                        Lucene.Net.Documents.Field    textComplete = new Field("textcomplete", data.Text.ToLower(), completeFieldType);

                        document.Add(item.Key);
                        document.Add(textName);
                        document.Add(textComplete);

                        // Split the white spaces from the text.
                        string[] words = data.Text.Words();

                        // If words exist.
                        if (words != null && words.Length > 0)
                        {
                            // Add the query for each word.
                            for (int j = 0; j < words.Length; j++)
                            {
                                // Format the word.
                                string word = words[j].ToLower().RemovePunctuationFromStartAndEnd();

                                // If a word exists.
                                if (!String.IsNullOrEmpty(word))
                                {
                                    Lucene.Net.Documents.Field textData = new Field("facetcontent", word, textFieldType);
                                    document.Add(textData);
                                }
                            }
                        }

                        // Add the document.
                        writer.AddDocument(config.Build(facetWriter, document));

                        // Commit after a set number of documents.
                        totalTextLength += (long)data.Text.Length;
                        if (totalTextLength > maxTextLengthBeforeCommit)
                        {
                            // Commit the index.
                            writer.Commit();
                            facetWriter.Commit();
                            totalTextLength = 0;
                        }
                    }
                }
            }

            // Commit the index.
            writer.Commit();
            facetWriter.Commit();
        }
Пример #33
0
        /// <summary>
        /// Add text to the existing index.
        /// </summary>
        /// <param name="directoryIndexInfo">The directory infomation where the index files are located.</param>
        /// <param name="addTextData">The text data to add.</param>
        public void AddText(DirectoryInfo directoryIndexInfo, AddTextData[] addTextData)
        {
            Lucene.Net.Index.IndexWriter writer    = null;
            Lucene.Net.Store.Directory   directory = null;
            long totalTextLength           = 0;
            long maxTextLengthBeforeCommit = 30000000L;

            try
            {
                // If text exists.
                if (addTextData != null && addTextData.Length > 0)
                {
                    // Create the analyzer.
                    SimpleAnalyzer   simpleAnalyzer   = new Analyzer.SimpleAnalyzer();
                    StandardAnalyzer standardAnalyzer = new Analyzer.StandardAnalyzer(simpleAnalyzer);

                    // Create the index writer.
                    directory = FSDirectory.Open(directoryIndexInfo);
                    IndexWriterConfig indexConfig = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, standardAnalyzer);
                    indexConfig.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND);

                    // Open existing or create new.
                    writer = new IndexWriter(directory, indexConfig);

                    // Add the text.
                    FieldType nameFieldType = new Lucene.Net.Documents.FieldType()
                    {
                        Indexed      = true,
                        Tokenized    = false,
                        Stored       = true,
                        IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                    };

                    // Add the text.
                    FieldType completeFieldType = new Lucene.Net.Documents.FieldType()
                    {
                        Indexed      = true,
                        Tokenized    = false,
                        Stored       = true,
                        IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                    };

                    // Add the text.
                    FieldType textFieldType = new Lucene.Net.Documents.FieldType()
                    {
                        Indexed      = true,
                        Tokenized    = false,
                        Stored       = false,
                        IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                    };

                    // For each text.
                    foreach (AddTextData data in addTextData)
                    {
                        // Should the data be stored.
                        completeFieldType.Stored = data.StoreText;

                        // Create the document.
                        Lucene.Net.Documents.Document document     = new Lucene.Net.Documents.Document();
                        Lucene.Net.Documents.Field    textName     = new Field("textname", data.Name.ToLower(), nameFieldType);
                        Lucene.Net.Documents.Field    textComplete = new Field("textcomplete", data.Text.ToLower(), completeFieldType);
                        document.Add(textName);
                        document.Add(textComplete);

                        // Split the white spaces from the text.
                        string[] words = data.Text.Words();

                        // If words exist.
                        if (words != null && words.Length > 0)
                        {
                            // Add the query for each word.
                            for (int j = 0; j < words.Length; j++)
                            {
                                // Format the word.
                                string word = words[j].ToLower().RemovePunctuationFromStartAndEnd();

                                // If a word exists.
                                if (!String.IsNullOrEmpty(word))
                                {
                                    Lucene.Net.Documents.Field textData = new Field("text", word, textFieldType);
                                    document.Add(textData);
                                }
                            }
                        }

                        // Add the document.
                        writer.AddDocument(document.Fields);

                        // Commit after a set number of documents.
                        totalTextLength += (long)data.Text.Length;
                        if (totalTextLength > maxTextLengthBeforeCommit)
                        {
                            // Commit the index.
                            writer.Commit();
                            totalTextLength = 0;
                        }
                    }

                    // Commit the index.
                    writer.Commit();
                }
            }
            catch (Exception)
            {
                throw;
            }
            finally
            {
                if (writer != null)
                {
                    writer.Dispose();
                }

                if (directory != null)
                {
                    directory.Dispose();
                }
            }
        }
Пример #34
0
        /// <summary>
        /// Add documents.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param>
        /// <param name="files">The list of files that are to be added.</param>
        /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param>
        public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents)
        {
            System.Windows.Forms.RichTextBox textbox = new System.Windows.Forms.RichTextBox();

            FieldType pathFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = false,
                Stored       = true,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };
            FieldType contentFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = documents.TokenizeContent,
                Stored       = documents.StoreContent,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };

            // For each file.
            for (int i = 0; i < files.Length; i++)
            {
                // If the file exists
                if (File.Exists(files[i]))
                {
                    Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document();
                    System.IO.StreamReader        stream   = null;

                    try
                    {
                        FileInfo fileInfo = new FileInfo(files[i]);
                        string   file     = files[i].Replace(directoryInfo.Root.FullName, "").ToLower();

                        Lucene.Net.Documents.Field path     = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType);
                        Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType);

                        // Add the fields.
                        document.Add(path);
                        document.Add(modified);

                        // Create the stream reader.
                        stream = new StreamReader(files[i]);
                        string contentRtf = stream.ReadToEnd();
                        textbox.Rtf = contentRtf;
                        string content = textbox.Text;

                        // If content exists.
                        if (!String.IsNullOrEmpty(content))
                        {
                            // Split the white spaces from the text.
                            string[] words = content.Words();

                            // If words exist.
                            if (words != null && words.Length > 0)
                            {
                                // Add the query for each word.
                                for (int j = 0; j < words.Length; j++)
                                {
                                    // Format the word.
                                    string word = words[j].ToLower().RemovePunctuationFromStartAndEnd();

                                    // If a word exists.
                                    if (!String.IsNullOrEmpty(word))
                                    {
                                        Lucene.Net.Documents.Field contentField = new Field("content", word, contentFieldType);
                                        document.Add(contentField);
                                    }
                                }
                            }
                        }

                        // Add the document.
                        writer.AddDocument(document.Fields);
                        stream.Close();

                        // Commit after a set number of documents.
                        documents.TotalDocumentSize += fileInfo.Length;
                        if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit)
                        {
                            writer.Commit();
                            documents.TotalDocumentSize = 0;
                        }
                    }
                    catch (Exception)
                    {
                        throw;
                    }
                    finally
                    {
                        if (stream != null)
                        {
                            stream.Dispose();
                        }
                    }
                }
            }
        }