public override void WriteField(FieldInfo info, IIndexableField field)
        {
            fieldsStream.WriteVInt32(info.Number);
            int      bits = 0;
            BytesRef bytes;
            string   @string;

            // TODO: maybe a field should serialize itself?
            // this way we don't bake into indexer all these
            // specific encodings for different fields?  and apps
            // can customize...

            // LUCENENET specific - To avoid boxing/unboxing, we don't
            // call GetNumericValue(). Instead, we check the field.NumericType and then
            // call the appropriate conversion method.
            if (field.NumericType != NumericFieldType.NONE)
            {
                switch (field.NumericType)
                {
                case NumericFieldType.BYTE:
                case NumericFieldType.INT16:
                case NumericFieldType.INT32:
                    bits |= FIELD_IS_NUMERIC_INT;
                    break;

                case NumericFieldType.INT64:
                    bits |= FIELD_IS_NUMERIC_LONG;
                    break;

                case NumericFieldType.SINGLE:
                    bits |= FIELD_IS_NUMERIC_FLOAT;
                    break;

                case NumericFieldType.DOUBLE:
                    bits |= FIELD_IS_NUMERIC_DOUBLE;
                    break;

                default:
                    throw new ArgumentException("cannot store numeric type " + field.NumericType);
                }

                @string = null;
                bytes   = null;
            }
            else
            {
                bytes = field.GetBinaryValue();
                if (bytes != null)
                {
                    bits   |= FIELD_IS_BINARY;
                    @string = null;
                }
                else
                {
                    @string = field.GetStringValue();
                    if (@string == null)
                    {
                        throw new ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue");
                    }
                }
            }

            fieldsStream.WriteByte((byte)(sbyte)bits);

            if (bytes != null)
            {
                fieldsStream.WriteVInt32(bytes.Length);
                fieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length);
            }
            else if (@string != null)
            {
                fieldsStream.WriteString(field.GetStringValue());
            }
            else
            {
                switch (field.NumericType)
                {
                case NumericFieldType.BYTE:
                case NumericFieldType.INT16:
                case NumericFieldType.INT32:
                    fieldsStream.WriteInt32(field.GetInt32Value().Value);
                    break;

                case NumericFieldType.INT64:
                    fieldsStream.WriteInt64(field.GetInt64Value().Value);
                    break;

                case NumericFieldType.SINGLE:
                    fieldsStream.WriteInt32(BitConversion.SingleToInt32Bits(field.GetSingleValue().Value));
                    break;

                case NumericFieldType.DOUBLE:
                    fieldsStream.WriteInt64(BitConversion.DoubleToInt64Bits(field.GetDoubleValue().Value));
                    break;

                default:
                    throw new InvalidOperationException("Cannot get here");
                }
            }
        }
        public virtual void SearchIndex(Directory dir, string oldName)
        {
            //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
            //Query query = parser.parse("handle:1");

            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            TestUtil.CheckIndex(dir);

            // true if this is a 4.0+ index
            bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null;
            // true if this is a 4.2+ index
            bool is42Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("dvSortedSet") != null;

            Debug.Assert(is40Index); // NOTE: currently we can only do this on trunk!

            IBits liveDocs = MultiFields.GetLiveDocs(reader);

            for (int i = 0; i < 35; i++)
            {
                if (liveDocs.Get(i))
                {
                    Document d = reader.Document(i);
                    IList <IIndexableField> fields = d.Fields;
                    bool isProxDoc = d.GetField("content3") == null;
                    if (isProxDoc)
                    {
                        int numFields = is40Index ? 7 : 5;
                        Assert.AreEqual(numFields, fields.Count);
                        IIndexableField f = d.GetField("id");
                        Assert.AreEqual("" + i, f.GetStringValue());

                        f = d.GetField("utf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.GetStringValue());

                        f = d.GetField("autf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.GetStringValue());

                        f = d.GetField("content2");
                        Assert.AreEqual("here is more content with aaa aaa aaa", f.GetStringValue());

                        f = d.GetField("fie\u2C77ld");
                        Assert.AreEqual("field with non-ascii name", f.GetStringValue());
                    }

                    Fields tfvFields = reader.GetTermVectors(i);
                    Assert.IsNotNull(tfvFields, "i=" + i);
                    Terms tfv = tfvFields.GetTerms("utf8");
                    Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName);
                }
                else
                {
                    // Only ID 7 is deleted
                    Assert.AreEqual(7, i);
                }
            }

            if (is40Index)
            {
                // check docvalues fields
                NumericDocValues   dvByte               = MultiDocValues.GetNumericValues(reader, "dvByte");
                BinaryDocValues    dvBytesDerefFixed    = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed");
                BinaryDocValues    dvBytesDerefVar      = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar");
                SortedDocValues    dvBytesSortedFixed   = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed");
                SortedDocValues    dvBytesSortedVar     = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar");
                BinaryDocValues    dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed");
                BinaryDocValues    dvBytesStraightVar   = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar");
                NumericDocValues   dvDouble             = MultiDocValues.GetNumericValues(reader, "dvDouble");
                NumericDocValues   dvFloat              = MultiDocValues.GetNumericValues(reader, "dvFloat");
                NumericDocValues   dvInt       = MultiDocValues.GetNumericValues(reader, "dvInt");
                NumericDocValues   dvLong      = MultiDocValues.GetNumericValues(reader, "dvLong");
                NumericDocValues   dvPacked    = MultiDocValues.GetNumericValues(reader, "dvPacked");
                NumericDocValues   dvShort     = MultiDocValues.GetNumericValues(reader, "dvShort");
                SortedSetDocValues dvSortedSet = null;
                if (is42Index)
                {
                    dvSortedSet = MultiDocValues.GetSortedSetValues(reader, "dvSortedSet");
                }

                for (int i = 0; i < 35; i++)
                {
                    int id = Convert.ToInt32(reader.Document(i).Get("id"));
                    Assert.AreEqual(id, dvByte.Get(i));

                    sbyte[]  bytes       = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id };
                    BytesRef expectedRef = new BytesRef((byte[])(Array)bytes);
                    BytesRef scratch     = new BytesRef();

                    dvBytesDerefFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesDerefVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);

                    Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D);
                    Assert.AreEqual((float)id, Number.Int32BitsToSingle((int)dvFloat.Get(i)), 0F);
                    Assert.AreEqual(id, dvInt.Get(i));
                    Assert.AreEqual(id, dvLong.Get(i));
                    Assert.AreEqual(id, dvPacked.Get(i));
                    Assert.AreEqual(id, dvShort.Get(i));
                    if (is42Index)
                    {
                        dvSortedSet.SetDocument(i);
                        long ord = dvSortedSet.NextOrd();
                        Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.NextOrd());
                        dvSortedSet.LookupOrd(ord, scratch);
                        Assert.AreEqual(expectedRef, scratch);
                    }
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #0
            Document doc = searcher.IndexReader.Document(hits[0].Doc);

            assertEquals("didn't get the right document first", "0", doc.Get("id"));

            DoTestHits(hits, 34, searcher.IndexReader);

            if (is40Index)
            {
                hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);

                hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);
            }

            hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);

            reader.Dispose();
        }
        public void TestWithDeletions()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc);
            KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), false, false);
            IDictionary <string, Document> docs = res.Value;
            List <String> invalidDocTerms       = res.Key;
            Random        rand       = Random;
            List <string> termsToDel = new List <string>();

            foreach (Document doc in docs.Values)
            {
                IIndexableField f2 = doc.GetField(FIELD_NAME);
                if (rand.nextBoolean() && f2 != null && !invalidDocTerms.Contains(f2.GetStringValue()))
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                assertTrue(toDel != null);
                docs.Remove(termToDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);

            assertEquals(ir.NumDocs, docs.size());
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                var      field = f.Utf8ToString();
                Document doc   = docs[field];
                docs.Remove(field);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? weightField.GetInt64ValueOrDefault() : 0);
                assertEquals(inputIterator.Payload, null);
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }
            assertTrue(docs.Count == 0);

            ir.Dispose();
            dir.Dispose();
        }
        internal override bool Start(IIndexableField[] fields, int count)
        {
            doVectors         = false;
            doVectorPositions = false;
            doVectorOffsets   = false;
            doVectorPayloads  = false;
            hasPayloads       = false;

            for (int i = 0; i < count; i++)
            {
                IIndexableField field = fields[i];
                if (field.IndexableFieldType.IsIndexed)
                {
                    if (field.IndexableFieldType.StoreTermVectors)
                    {
                        doVectors          = true;
                        doVectorPositions |= field.IndexableFieldType.StoreTermVectorPositions;
                        doVectorOffsets   |= field.IndexableFieldType.StoreTermVectorOffsets;
                        if (doVectorPositions)
                        {
                            doVectorPayloads |= field.IndexableFieldType.StoreTermVectorPayloads;
                        }
                        else if (field.IndexableFieldType.StoreTermVectorPayloads)
                        {
                            // TODO: move this check somewhere else, and impl the other missing ones
                            throw new System.ArgumentException("cannot index term vector payloads without term vector positions (field=\"" + field.Name + "\")");
                        }
                    }
                    else
                    {
                        if (field.IndexableFieldType.StoreTermVectorOffsets)
                        {
                            throw new System.ArgumentException("cannot index term vector offsets when term vectors are not indexed (field=\"" + field.Name + "\")");
                        }
                        if (field.IndexableFieldType.StoreTermVectorPositions)
                        {
                            throw new System.ArgumentException("cannot index term vector positions when term vectors are not indexed (field=\"" + field.Name + "\")");
                        }
                        if (field.IndexableFieldType.StoreTermVectorPayloads)
                        {
                            throw new System.ArgumentException("cannot index term vector payloads when term vectors are not indexed (field=\"" + field.Name + "\")");
                        }
                    }
                }
                else
                {
                    if (field.IndexableFieldType.StoreTermVectors)
                    {
                        throw new System.ArgumentException("cannot index term vectors when field is not indexed (field=\"" + field.Name + "\")");
                    }
                    if (field.IndexableFieldType.StoreTermVectorOffsets)
                    {
                        throw new System.ArgumentException("cannot index term vector offsets when field is not indexed (field=\"" + field.Name + "\")");
                    }
                    if (field.IndexableFieldType.StoreTermVectorPositions)
                    {
                        throw new System.ArgumentException("cannot index term vector positions when field is not indexed (field=\"" + field.Name + "\")");
                    }
                    if (field.IndexableFieldType.StoreTermVectorPayloads)
                    {
                        throw new System.ArgumentException("cannot index term vector payloads when field is not indexed (field=\"" + field.Name + "\")");
                    }
                }
            }

            if (doVectors)
            {
                termsWriter.hasVectors = true;
                if (termsHashPerField.bytesHash.Count != 0)
                {
                    // Only necessary if previous doc hit a
                    // non-aborting exception while writing vectors in
                    // this field:
                    termsHashPerField.Reset();
                }
            }

            // TODO: only if needed for performance
            //perThread.postingsCount = 0;

            return(doVectors);
        }
Exemplo n.º 5
0
        public override void WriteField(FieldInfo info, IIndexableField field)
        {
            Write(FIELD);
            Write(info.Number.ToString(CultureInfo.InvariantCulture));
            NewLine();

            Write(NAME);
            Write(field.Name);
            NewLine();

            Write(TYPE);

            // LUCENENET specific - To avoid boxing/unboxing, we don't
            // call GetNumericValue(). Instead, we check the field.NumericType and then
            // call the appropriate conversion method.
            if (field.NumericType != NumericFieldType.NONE)
            {
                switch (field.NumericType)
                {
                case NumericFieldType.BYTE:
                case NumericFieldType.INT16:
                case NumericFieldType.INT32:
                    Write(TYPE_INT);
                    NewLine();

                    Write(VALUE);
                    Write(field.GetStringValue(CultureInfo.InvariantCulture));
                    NewLine();
                    break;

                case NumericFieldType.INT64:
                    Write(TYPE_LONG);
                    NewLine();

                    Write(VALUE);
                    Write(field.GetStringValue(CultureInfo.InvariantCulture));
                    NewLine();
                    break;

                case NumericFieldType.SINGLE:
                    Write(TYPE_FLOAT);
                    NewLine();

                    Write(VALUE);
                    // LUCENENET: Need to specify the "R" for round-trip: http://stackoverflow.com/a/611564
                    Write(field.GetStringValue("R", CultureInfo.InvariantCulture));
                    NewLine();
                    break;

                case NumericFieldType.DOUBLE:
                    Write(TYPE_DOUBLE);
                    NewLine();

                    Write(VALUE);
                    // LUCENENET: Need to specify the "R" for round-trip: http://stackoverflow.com/a/611564
                    Write(field.GetStringValue("R", CultureInfo.InvariantCulture));
                    NewLine();
                    break;

                default:
                    throw new ArgumentException("cannot store numeric type " + field.NumericType);
                }
            }
            else
            {
                BytesRef bytes = field.GetBinaryValue();
                if (bytes != null)
                {
                    Write(TYPE_BINARY);
                    NewLine();

                    Write(VALUE);
                    Write(bytes);
                    NewLine();
                }
                else if (field.GetStringValue() == null)
                {
                    throw new ArgumentException("field " + field.Name +
                                                " is stored but does not have binaryValue, stringValue nor numericValue");
                }
                else
                {
                    Write(TYPE_STRING);
                    NewLine();

                    Write(VALUE);
                    Write(field.GetStringValue());
                    NewLine();
                }
            }
        }
Exemplo n.º 6
0
        public override void WriteField(FieldInfo info, IIndexableField field)
        {
            FieldsStream.WriteVInt32(info.Number);
            int      bits = 0;
            BytesRef bytes;
            string   @string;
            // TODO: maybe a field should serialize itself?
            // this way we don't bake into indexer all these
            // specific encodings for different fields?  and apps
            // can customize...

            object number = field.GetNumericValue();

            if (number != null)
            {
                if (number is sbyte? || number is short? || number is int?)
                {
                    bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_INT;
                }
                else if (number is long?)
                {
                    bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_LONG;
                }
                else if (number is float?)
                {
                    bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_FLOAT;
                }
                else if (number is double?)
                {
                    bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_DOUBLE;
                }
                else
                {
                    throw new System.ArgumentException("cannot store numeric type " + number.GetType());
                }
                @string = null;
                bytes   = null;
            }
            else
            {
                bytes = field.GetBinaryValue();
                if (bytes != null)
                {
                    bits   |= Lucene3xStoredFieldsReader.FIELD_IS_BINARY;
                    @string = null;
                }
                else
                {
                    @string = field.GetStringValue();
                    if (@string == null)
                    {
                        throw new System.ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue");
                    }
                }
            }

            FieldsStream.WriteByte((byte)(sbyte)bits);

            if (bytes != null)
            {
                FieldsStream.WriteVInt32(bytes.Length);
                FieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length);
            }
            else if (@string != null)
            {
                FieldsStream.WriteString(field.GetStringValue());
            }
            else
            {
                if (number is sbyte? || number is short? || number is int?)
                {
                    FieldsStream.WriteInt32((int)number);
                }
                else if (number is long?)
                {
                    FieldsStream.WriteInt64((long)number);
                }
                else if (number is float?)
                {
                    FieldsStream.WriteInt32(Number.SingleToInt32Bits((float)number));
                }
                else if (number is double?)
                {
                    FieldsStream.WriteInt64(BitConverter.DoubleToInt64Bits((double)number));
                }
                else
                {
                    Debug.Assert(false);
                }
            }
        }
Exemplo n.º 7
0
        public override void ProcessFields(IIndexableField[] fields, int count)
        {
            fieldState.Reset();

            bool doInvert = consumer.Start(fields, count);

            for (int i = 0; i < count; i++)
            {
                IIndexableField     field     = fields[i];
                IIndexableFieldType fieldType = field.IndexableFieldType;

                // TODO FI: this should be "genericized" to querying
                // consumer if it wants to see this particular field
                // tokenized.
                if (fieldType.IsIndexed && doInvert)
                {
                    bool analyzed = fieldType.IsTokenized && docState.analyzer != null;

                    // if the field omits norms, the boost cannot be indexed.
                    if (fieldType.OmitNorms && field.Boost != 1.0f)
                    {
                        throw new NotSupportedException("You cannot set an index-time boost: norms are omitted for field '" + field.Name + "'");
                    }

                    // only bother checking offsets if something will consume them.
                    // TODO: after we fix analyzers, also check if termVectorOffsets will be indexed.
                    bool checkOffsets    = fieldType.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                    int  lastStartOffset = 0;

                    if (i > 0)
                    {
                        fieldState.Position += analyzed ? docState.analyzer.GetPositionIncrementGap(fieldInfo.Name) : 0;
                    }

                    /*
                     * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
                     * when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses,
                     * but rather a finally that takes note of the problem.
                     */

                    bool succeededInProcessingField = false;

                    TokenStream stream = field.GetTokenStream(docState.analyzer);
                    // reset the TokenStream to the first token
                    stream.Reset();

                    try
                    {
                        bool hasMoreTokens = stream.IncrementToken();

                        fieldState.AttributeSource = stream;

                        IOffsetAttribute            offsetAttribute  = fieldState.AttributeSource.AddAttribute <IOffsetAttribute>();
                        IPositionIncrementAttribute posIncrAttribute = fieldState.AttributeSource.AddAttribute <IPositionIncrementAttribute>();

                        if (hasMoreTokens)
                        {
                            consumer.Start(field);

                            do
                            {
                                // If we hit an exception in stream.next below
                                // (which is fairly common, eg if analyzer
                                // chokes on a given document), then it's
                                // non-aborting and (above) this one document
                                // will be marked as deleted, but still
                                // consume a docID

                                int posIncr = posIncrAttribute.PositionIncrement;
                                if (posIncr < 0)
                                {
                                    throw new ArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.Name + "'");
                                }
                                if (fieldState.Position == 0 && posIncr == 0)
                                {
                                    throw new ArgumentException("first position increment must be > 0 (got 0) for field '" + field.Name + "'");
                                }
                                int position = fieldState.Position + posIncr;
                                if (position > 0)
                                {
                                    // NOTE: confusing: this "mirrors" the
                                    // position++ we do below
                                    position--;
                                }
                                else if (position < 0)
                                {
                                    throw new ArgumentException("position overflow for field '" + field.Name + "'");
                                }

                                // position is legal, we can safely place it in fieldState now.
                                // not sure if anything will use fieldState after non-aborting exc...
                                fieldState.Position = position;

                                if (posIncr == 0)
                                {
                                    fieldState.NumOverlap++;
                                }

                                if (checkOffsets)
                                {
                                    int startOffset = fieldState.Offset + offsetAttribute.StartOffset;
                                    int endOffset   = fieldState.Offset + offsetAttribute.EndOffset;
                                    if (startOffset < 0 || endOffset < startOffset)
                                    {
                                        throw new ArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " + "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.Name + "'");
                                    }
                                    if (startOffset < lastStartOffset)
                                    {
                                        throw new ArgumentException("offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.Name + "'");
                                    }
                                    lastStartOffset = startOffset;
                                }

                                bool success = false;
                                try
                                {
                                    // If we hit an exception in here, we abort
                                    // all buffered documents since the last
                                    // flush, on the likelihood that the
                                    // internal state of the consumer is now
                                    // corrupt and should not be flushed to a
                                    // new segment:
                                    consumer.Add();
                                    success = true;
                                }
                                finally
                                {
                                    if (!success)
                                    {
                                        docState.docWriter.SetAborting();
                                    }
                                }
                                fieldState.Length++;
                                fieldState.Position++;
                            } while (stream.IncrementToken());
                        }
                        // trigger streams to perform end-of-stream operations
                        stream.End();
                        // TODO: maybe add some safety? then again, its already checked
                        // when we come back around to the field...
                        fieldState.Position += posIncrAttribute.PositionIncrement;
                        fieldState.Offset   += offsetAttribute.EndOffset;

                        if (docState.maxTermPrefix != null)
                        {
                            string msg = "Document contains at least one immense term in field=\"" + fieldInfo.Name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'";
                            if (docState.infoStream.IsEnabled("IW"))
                            {
                                docState.infoStream.Message("IW", "ERROR: " + msg);
                            }
                            docState.maxTermPrefix = null;
                            throw new ArgumentException(msg);
                        }

                        /* if success was false above there is an exception coming through and we won't get here.*/
                        succeededInProcessingField = true;
                    }
                    finally
                    {
                        if (!succeededInProcessingField)
                        {
                            IOUtils.DisposeWhileHandlingException(stream);
                        }
                        else
                        {
                            stream.Dispose();
                        }
                        if (!succeededInProcessingField && docState.infoStream.IsEnabled("DW"))
                        {
                            docState.infoStream.Message("DW", "An exception was thrown while processing field " + fieldInfo.Name);
                        }
                    }

                    fieldState.Offset += analyzed ? docState.analyzer.GetOffsetGap(fieldInfo.Name) : 0;
                    fieldState.Boost  *= field.Boost;
                }

                // LUCENE-2387: don't hang onto the field, so GC can
                // reclaim
                fields[i] = null;
            }

            consumer.Finish();
            endConsumer.Finish();
        }
 internal abstract void Start(IIndexableField field);
Exemplo n.º 9
0
 public abstract void AddField(int docID, IIndexableField field, FieldInfo fieldInfo);