예제 #1
0
 public override void AddField(int docID, IndexableField field, FieldInfo fieldInfo)
 {
     DocValuesType_e? dvType = field.FieldType().DocValueType;
     if (dvType != null)
     {
         fieldInfo.DocValuesType = dvType;
         if (dvType == DocValuesType_e.BINARY)
         {
             AddBinaryField(fieldInfo, docID, field.BinaryValue());
         }
         else if (dvType == DocValuesType_e.SORTED)
         {
             AddSortedField(fieldInfo, docID, field.BinaryValue());
         }
         else if (dvType == DocValuesType_e.SORTED_SET)
         {
             AddSortedSetField(fieldInfo, docID, field.BinaryValue());
         }
         else if (dvType == DocValuesType_e.NUMERIC)
         {
             if (!(field.NumericValue is long?))
             {
                 throw new System.ArgumentException("illegal type " + field.NumericValue.GetType() + ": DocValues types must be Long");
             }
             AddNumericField(fieldInfo, docID, (long)field.NumericValue);
         }
         else
         {
             Debug.Assert(false, "unrecognized DocValues.Type: " + dvType);
         }
     }
 }
예제 #2
0
        public override void AddField(int docID, IndexableField field, FieldInfo fieldInfo)
        {
            DocValuesType_e?dvType = field.FieldType().DocValueType;

            if (dvType != null)
            {
                fieldInfo.DocValuesType = dvType;
                if (dvType == DocValuesType_e.BINARY)
                {
                    AddBinaryField(fieldInfo, docID, field.BinaryValue());
                }
                else if (dvType == DocValuesType_e.SORTED)
                {
                    AddSortedField(fieldInfo, docID, field.BinaryValue());
                }
                else if (dvType == DocValuesType_e.SORTED_SET)
                {
                    AddSortedSetField(fieldInfo, docID, field.BinaryValue());
                }
                else if (dvType == DocValuesType_e.NUMERIC)
                {
                    if (!(field.NumericValue is long?))
                    {
                        throw new System.ArgumentException("illegal type " + field.NumericValue.GetType() + ": DocValues types must be Long");
                    }
                    AddNumericField(fieldInfo, docID, (long)field.NumericValue);
                }
                else
                {
                    Debug.Assert(false, "unrecognized DocValues.Type: " + dvType);
                }
            }
        }
예제 #3
0
 public static void CheckNorms(AtomicReader reader)
 {
     // test omit norms
     for (int i = 0; i < DocHelper.Fields.Length; i++)
     {
         IndexableField f = DocHelper.Fields[i];
         if (f.FieldType().Indexed)
         {
             Assert.AreEqual(reader.GetNormValues(f.Name()) != null, !f.FieldType().OmitNorms);
             Assert.AreEqual(reader.GetNormValues(f.Name()) != null, !DocHelper.NoNorms.ContainsKey(f.Name()));
             if (reader.GetNormValues(f.Name()) == null)
             {
                 // test for norms of null
                 NumericDocValues norms = MultiDocValues.GetNormValues(reader, f.Name());
                 Assert.IsNull(norms);
             }
         }
     }
 }
예제 #4
0
        public override void AddField(int docID, IndexableField field, FieldInfo fieldInfo)
        {
            if (field.FieldType().Stored)
            {
                if (NumStoredFields == StoredFields.Length)
                {
                    int newSize = ArrayUtil.Oversize(NumStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
                    IndexableField[] newArray = new IndexableField[newSize];
                    Array.Copy(StoredFields, 0, newArray, 0, NumStoredFields);
                    StoredFields = newArray;

                    FieldInfo[] newInfoArray = new FieldInfo[newSize];
                    Array.Copy(FieldInfos, 0, newInfoArray, 0, NumStoredFields);
                    FieldInfos = newInfoArray;
                }

                StoredFields[NumStoredFields] = field;
                FieldInfos[NumStoredFields]   = fieldInfo;
                NumStoredFields++;

                Debug.Assert(DocState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField"));
            }
        }
예제 #5
0
        static DocHelper()
        {
            CustomType  = new FieldType(TextField.TYPE_STORED);
            TextField1  = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, CustomType);
            CustomType2 = new FieldType(TextField.TYPE_STORED);
            CustomType2.StoreTermVectors         = true;
            CustomType2.StoreTermVectorPositions = true;
            CustomType2.StoreTermVectorOffsets   = true;
            TextField2            = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, CustomType2);
            CustomType3           = new FieldType(TextField.TYPE_STORED);
            CustomType3.OmitNorms = true;
            TextField3            = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, CustomType3);
            KeyField                     = new StringField(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES);
            CustomType5                  = new FieldType(TextField.TYPE_STORED);
            CustomType5.OmitNorms        = true;
            CustomType5.Tokenized        = false;
            NoNormsField                 = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, CustomType5);
            CustomType6                  = new FieldType(TextField.TYPE_STORED);
            CustomType6.IndexOptions     = FieldInfo.IndexOptions.DOCS_ONLY;
            NoTFField                    = new Field(NO_TF_KEY, NO_TF_TEXT, CustomType6);
            CustomType7                  = new FieldType();
            CustomType7.Stored           = true;
            UnIndField                   = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, CustomType7);
            CustomType8                  = new FieldType(TextField.TYPE_NOT_STORED);
            CustomType8.StoreTermVectors = true;
            UnStoredField2               = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, CustomType8);

            UnStoredField1 = new TextField(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO);
            LazyField      = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, CustomType);
            TextUtfField1  = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, CustomType);
            TextUtfField2  = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, CustomType2);

            Fields = new Field[] { TextField1, TextField2, TextField3, KeyField, NoNormsField, NoTFField, UnIndField, UnStoredField1, UnStoredField2, TextUtfField1, TextUtfField2, LazyField, LazyFieldBinary, LargeLazyField };

            //Initialize the large Lazy Field
            StringBuilder buffer = new StringBuilder();

            for (int i = 0; i < 10000; i++)
            {
                buffer.Append("Lazily loading lengths of language in lieu of laughing ");
            }

            try
            {
                LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".GetBytes(IOUtils.CHARSET_UTF_8);
            }
            catch (EncoderFallbackException e)
            {
            }
            LazyFieldBinary           = new StoredField(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
            Fields[Fields.Length - 2] = LazyFieldBinary;
            LARGE_LAZY_FIELD_TEXT     = buffer.ToString();
            LargeLazyField            = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, CustomType);
            Fields[Fields.Length - 1] = LargeLazyField;
            for (int i = 0; i < Fields.Length; i++)
            {
                IndexableField f = Fields[i];
                Add(All, f);
                if (f.FieldType().Indexed)
                {
                    Add(Indexed, f);
                }
                else
                {
                    Add(Unindexed, f);
                }
                if (f.FieldType().StoreTermVectors)
                {
                    Add(Termvector, f);
                }
                if (f.FieldType().Indexed&& !f.FieldType().StoreTermVectors)
                {
                    Add(Notermvector, f);
                }
                if (f.FieldType().Stored)
                {
                    Add(Stored, f);
                }
                else
                {
                    Add(Unstored, f);
                }
                if (f.FieldType().IndexOptions == FieldInfo.IndexOptions.DOCS_ONLY)
                {
                    Add(NoTf, f);
                }
                if (f.FieldType().OmitNorms)
                {
                    Add(NoNorms, f);
                }
                if (f.FieldType().IndexOptions == FieldInfo.IndexOptions.DOCS_ONLY)
                {
                    Add(NoTf, f);
                }
                //if (f.isLazy()) add(lazy, f);
            }
            NameValues = new Dictionary <string, object>();
            NameValues[TEXT_FIELD_1_KEY]      = FIELD_1_TEXT;
            NameValues[TEXT_FIELD_2_KEY]      = FIELD_2_TEXT;
            NameValues[TEXT_FIELD_3_KEY]      = FIELD_3_TEXT;
            NameValues[KEYWORD_FIELD_KEY]     = KEYWORD_TEXT;
            NameValues[NO_NORMS_KEY]          = NO_NORMS_TEXT;
            NameValues[NO_TF_KEY]             = NO_TF_TEXT;
            NameValues[UNINDEXED_FIELD_KEY]   = UNINDEXED_FIELD_TEXT;
            NameValues[UNSTORED_FIELD_1_KEY]  = UNSTORED_1_FIELD_TEXT;
            NameValues[UNSTORED_FIELD_2_KEY]  = UNSTORED_2_FIELD_TEXT;
            NameValues[LAZY_FIELD_KEY]        = LAZY_FIELD_TEXT;
            NameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES;
            NameValues[LARGE_LAZY_FIELD_KEY]  = LARGE_LAZY_FIELD_TEXT;
            NameValues[TEXT_FIELD_UTF1_KEY]   = FIELD_UTF1_TEXT;
            NameValues[TEXT_FIELD_UTF2_KEY]   = FIELD_UTF2_TEXT;
        }
예제 #6
0
        public override void ProcessFields(IndexableField[] fields, int count)
        {
            FieldState.Reset();

            bool doInvert = Consumer.Start(fields, count);

            for (int i = 0; i < count; i++)
            {
                IndexableField     field     = fields[i];
                IndexableFieldType fieldType = field.FieldType();

                // TODO FI: this should be "genericized" to querying
                // consumer if it wants to see this particular field
                // tokenized.
                if (fieldType.Indexed && doInvert)
                {
                    bool analyzed = fieldType.Tokenized && DocState.Analyzer != null;

                    // if the field omits norms, the boost cannot be indexed.
                    if (fieldType.OmitNorms && field.GetBoost() != 1.0f)
                    {
                        throw new System.NotSupportedException("You cannot set an index-time boost: norms are omitted for field '" + field.Name() + "'");
                    }

                    // only bother checking offsets if something will consume them.
                    // TODO: after we fix analyzers, also check if termVectorOffsets will be indexed.
                    bool checkOffsets    = fieldType.IndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                    int  lastStartOffset = 0;

                    if (i > 0)
                    {
                        FieldState.Position_Renamed += analyzed ? DocState.Analyzer.GetPositionIncrementGap(fieldInfo.Name) : 0;
                    }

                    /*
                     * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
                     * when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses,
                     * but rather a finally that takes note of the problem.
                     */

                    bool succeededInProcessingField = false;

                    TokenStream stream = field.GetTokenStream(DocState.Analyzer);
                    // reset the TokenStream to the first token
                    stream.Reset();

                    try
                    {
                        bool hasMoreTokens = stream.IncrementToken();

                        FieldState.AttributeSource_Renamed = stream;

                        IOffsetAttribute            offsetAttribute  = FieldState.AttributeSource_Renamed.AddAttribute <IOffsetAttribute>();
                        IPositionIncrementAttribute posIncrAttribute = FieldState.AttributeSource_Renamed.AddAttribute <IPositionIncrementAttribute>();

                        if (hasMoreTokens)
                        {
                            Consumer.Start(field);

                            do
                            {
                                // If we hit an exception in stream.next below
                                // (which is fairly common, eg if analyzer
                                // chokes on a given document), then it's
                                // non-aborting and (above) this one document
                                // will be marked as deleted, but still
                                // consume a docID

                                int posIncr = posIncrAttribute.PositionIncrement;
                                if (posIncr < 0)
                                {
                                    throw new System.ArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.Name() + "'");
                                }
                                if (FieldState.Position_Renamed == 0 && posIncr == 0)
                                {
                                    throw new System.ArgumentException("first position increment must be > 0 (got 0) for field '" + field.Name() + "'");
                                }
                                int position = FieldState.Position_Renamed + posIncr;
                                if (position > 0)
                                {
                                    // NOTE: confusing: this "mirrors" the
                                    // position++ we do below
                                    position--;
                                }
                                else if (position < 0)
                                {
                                    throw new System.ArgumentException("position overflow for field '" + field.Name() + "'");
                                }

                                // position is legal, we can safely place it in fieldState now.
                                // not sure if anything will use fieldState after non-aborting exc...
                                FieldState.Position_Renamed = position;

                                if (posIncr == 0)
                                {
                                    FieldState.NumOverlap_Renamed++;
                                }

                                if (checkOffsets)
                                {
                                    int startOffset = FieldState.Offset_Renamed + offsetAttribute.StartOffset();
                                    int endOffset   = FieldState.Offset_Renamed + offsetAttribute.EndOffset();
                                    if (startOffset < 0 || endOffset < startOffset)
                                    {
                                        throw new System.ArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " + "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.Name() + "'");
                                    }
                                    if (startOffset < lastStartOffset)
                                    {
                                        throw new System.ArgumentException("offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.Name() + "'");
                                    }
                                    lastStartOffset = startOffset;
                                }

                                bool success = false;
                                try
                                {
                                    // If we hit an exception in here, we abort
                                    // all buffered documents since the last
                                    // flush, on the likelihood that the
                                    // internal state of the consumer is now
                                    // corrupt and should not be flushed to a
                                    // new segment:
                                    Consumer.Add();
                                    success = true;
                                }
                                finally
                                {
                                    if (!success)
                                    {
                                        DocState.DocWriter.SetAborting();
                                    }
                                }
                                FieldState.Length_Renamed++;
                                FieldState.Position_Renamed++;
                            } while (stream.IncrementToken());
                        }
                        // trigger streams to perform end-of-stream operations
                        stream.End();
                        // TODO: maybe add some safety? then again, its already checked
                        // when we come back around to the field...
                        FieldState.Position_Renamed += posIncrAttribute.PositionIncrement;
                        FieldState.Offset_Renamed   += offsetAttribute.EndOffset();

                        if (DocState.MaxTermPrefix != null)
                        {
                            string msg = "Document contains at least one immense term in field=\"" + fieldInfo.Name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + DocState.MaxTermPrefix + "...'";
                            if (DocState.InfoStream.IsEnabled("IW"))
                            {
                                DocState.InfoStream.Message("IW", "ERROR: " + msg);
                            }
                            DocState.MaxTermPrefix = null;
                            throw new System.ArgumentException(msg);
                        }

                        /* if success was false above there is an exception coming through and we won't get here.*/
                        succeededInProcessingField = true;
                    }
                    finally
                    {
                        if (!succeededInProcessingField)
                        {
                            IOUtils.CloseWhileHandlingException(stream);
                        }
                        else
                        {
                            stream.Dispose();
                        }
                        if (!succeededInProcessingField && DocState.InfoStream.IsEnabled("DW"))
                        {
                            DocState.InfoStream.Message("DW", "An exception was thrown while processing field " + fieldInfo.Name);
                        }
                    }

                    FieldState.Offset_Renamed += analyzed ? DocState.Analyzer.GetOffsetGap(fieldInfo.Name) : 0;
                    FieldState.Boost_Renamed  *= field.GetBoost();
                }

                // LUCENE-2387: don't hang onto the field, so GC can
                // reclaim
                fields[i] = null;
            }

            Consumer.Finish();
            EndConsumer.Finish();
        }
예제 #7
0
        public override void AddField(int docID, IndexableField field, FieldInfo fieldInfo)
        {
            if (field.FieldType().Stored)
            {
                if (NumStoredFields == StoredFields.Length)
                {
                    int newSize = ArrayUtil.Oversize(NumStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
                    IndexableField[] newArray = new IndexableField[newSize];
                    Array.Copy(StoredFields, 0, newArray, 0, NumStoredFields);
                    StoredFields = newArray;

                    FieldInfo[] newInfoArray = new FieldInfo[newSize];
                    Array.Copy(FieldInfos, 0, newInfoArray, 0, NumStoredFields);
                    FieldInfos = newInfoArray;
                }

                StoredFields[NumStoredFields] = field;
                FieldInfos[NumStoredFields] = fieldInfo;
                NumStoredFields++;

                Debug.Assert(DocState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField"));
            }
        }
        internal override bool Start(IndexableField[] fields, int count)
        {
            DoVectors         = false;
            DoVectorPositions = false;
            DoVectorOffsets   = false;
            DoVectorPayloads  = false;
            HasPayloads       = false;

            for (int i = 0; i < count; i++)
            {
                IndexableField field = fields[i];
                if (field.FieldType().Indexed)
                {
                    if (field.FieldType().StoreTermVectors)
                    {
                        DoVectors          = true;
                        DoVectorPositions |= field.FieldType().StoreTermVectorPositions;
                        DoVectorOffsets   |= field.FieldType().StoreTermVectorOffsets;
                        if (DoVectorPositions)
                        {
                            DoVectorPayloads |= field.FieldType().StoreTermVectorPayloads;
                        }
                        else if (field.FieldType().StoreTermVectorPayloads)
                        {
                            // TODO: move this check somewhere else, and impl the other missing ones
                            throw new System.ArgumentException("cannot index term vector payloads without term vector positions (field=\"" + field.Name() + "\")");
                        }
                    }
                    else
                    {
                        if (field.FieldType().StoreTermVectorOffsets)
                        {
                            throw new System.ArgumentException("cannot index term vector offsets when term vectors are not indexed (field=\"" + field.Name() + "\")");
                        }
                        if (field.FieldType().StoreTermVectorPositions)
                        {
                            throw new System.ArgumentException("cannot index term vector positions when term vectors are not indexed (field=\"" + field.Name() + "\")");
                        }
                        if (field.FieldType().StoreTermVectorPayloads)
                        {
                            throw new System.ArgumentException("cannot index term vector payloads when term vectors are not indexed (field=\"" + field.Name() + "\")");
                        }
                    }
                }
                else
                {
                    if (field.FieldType().StoreTermVectors)
                    {
                        throw new System.ArgumentException("cannot index term vectors when field is not indexed (field=\"" + field.Name() + "\")");
                    }
                    if (field.FieldType().StoreTermVectorOffsets)
                    {
                        throw new System.ArgumentException("cannot index term vector offsets when field is not indexed (field=\"" + field.Name() + "\")");
                    }
                    if (field.FieldType().StoreTermVectorPositions)
                    {
                        throw new System.ArgumentException("cannot index term vector positions when field is not indexed (field=\"" + field.Name() + "\")");
                    }
                    if (field.FieldType().StoreTermVectorPayloads)
                    {
                        throw new System.ArgumentException("cannot index term vector payloads when field is not indexed (field=\"" + field.Name() + "\")");
                    }
                }
            }

            if (DoVectors)
            {
                TermsWriter.HasVectors = true;
                if (TermsHashPerField.BytesHash.Size() != 0)
                {
                    // Only necessary if previous doc hit a
                    // non-aborting exception while writing vectors in
                    // this field:
                    TermsHashPerField.Reset();
                }
            }

            // TODO: only if needed for performance
            //perThread.postingsCount = 0;

            return(DoVectors);
        }