Example #1
0
 public static void CheckNorms(AtomicReader reader)
 {
     // test omit norms
     for (int i = 0; i < DocHelper.Fields.Length; i++)
     {
         IndexableField f = DocHelper.Fields[i];
         if (f.FieldType().Indexed)
         {
             Assert.AreEqual(reader.GetNormValues(f.Name()) != null, !f.FieldType().OmitNorms);
             Assert.AreEqual(reader.GetNormValues(f.Name()) != null, !DocHelper.NoNorms.ContainsKey(f.Name()));
             if (reader.GetNormValues(f.Name()) == null)
             {
                 // test for norms of null
                 NumericDocValues norms = MultiDocValues.GetNormValues(reader, f.Name());
                 Assert.IsNull(norms);
             }
         }
     }
 }
Example #2
0
 private static void Add(IDictionary <string, IndexableField> map, IndexableField field)
 {
     map[field.Name()] = field;
 }
        public override void WriteField(FieldInfo info, IndexableField field)
        {
            int bits = 0;
            BytesRef bytes;
            string @string;

            object number = (object)field.NumericValue;
            if (number != null)
            {
                if (number is string)
                {
                    string numStr = number.ToString();
                    sbyte dummySbyte;
                    short dummyShort;
                    int dummyInt;
                    long dummyLong;
                    float dummyFloat;
                    double dummyDouble;
                    if (sbyte.TryParse(numStr, out dummySbyte) || short.TryParse(numStr, out dummyShort) || int.TryParse(numStr, out dummyInt))
                    {
                        bits = NUMERIC_INT;
                    }
                    else if (long.TryParse(numStr, out dummyLong))
                    {
                        bits = NUMERIC_LONG;
                    }
                    else if (float.TryParse(numStr, out dummyFloat))
                    {
                        bits = NUMERIC_FLOAT;
                    }
                    else if (double.TryParse(numStr, out dummyDouble))
                    {
                        bits = NUMERIC_DOUBLE;
                    }
                    else
                    {
                        throw new System.ArgumentException("cannot store numeric type " + number.GetType());
                    }
                }
                else
                {
                    if (number is sbyte || number is short || number is int)
                    {
                        bits = NUMERIC_INT;
                    }
                    else if (number is long)
                    {
                        bits = NUMERIC_LONG;
                    }
                    else if (number is float)
                    {
                        bits = NUMERIC_FLOAT;
                    }
                    else if (number is double)
                    {
                        bits = NUMERIC_DOUBLE;
                    }
                    else
                    {
                        throw new System.ArgumentException("cannot store numeric type " + number.GetType());
                    }
                }

                @string = null;
                bytes = null;
            }
            else
            {
                bytes = field.BinaryValue();
                if (bytes != null)
                {
                    bits = BYTE_ARR;
                    @string = null;
                }
                else
                {
                    bits = STRING;
                    @string = field.StringValue;
                    if (@string == null)
                    {
                        throw new System.ArgumentException("field " + field.Name() + " is stored but does not have binaryValue, stringValue nor numericValue");
                    }
                }
            }

            long infoAndBits = (((long)info.Number) << TYPE_BITS) | bits;
            BufferedDocs.WriteVLong(infoAndBits);

            if (bytes != null)
            {
                BufferedDocs.WriteVInt(bytes.Length);
                BufferedDocs.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length);
            }
            else if (@string != null)
            {
                BufferedDocs.WriteString(field.StringValue);
            }
            else
            {
                if (number is string)
                {
                    string numStr = number.ToString();
                    sbyte dummySbyte;
                    short dummyShort;
                    int dummyInt;
                    long dummyLong;
                    float dummyFloat;
                    double dummyDouble;
                    if (sbyte.TryParse(numStr, out dummySbyte) || short.TryParse(numStr, out dummyShort) ||
                        int.TryParse(numStr, out dummyInt))
                    {
                        bits = NUMERIC_INT;
                    }
                    else if (long.TryParse(numStr, out dummyLong))
                    {
                        bits = NUMERIC_LONG;
                    }
                    else if (float.TryParse(numStr, out dummyFloat))
                    {
                        bits = NUMERIC_FLOAT;
                    }
                    else if (double.TryParse(numStr, out dummyDouble))
                    {
                        bits = NUMERIC_DOUBLE;
                    }
                    else
                    {
                        throw new System.ArgumentException("cannot store numeric type " + number.GetType());
                    }
                }
                else
                {
                    if (number is sbyte || number is short || number is int)
                    {
                        BufferedDocs.WriteInt((int)number);
                    }
                    else if (number is long)
                    {
                        BufferedDocs.WriteLong((long)number);
                    }
                    else if (number is float)
                    {
                        BufferedDocs.WriteInt(Number.FloatToIntBits((float)number));
                    }
                    else if (number is double)
                    {
                        BufferedDocs.WriteLong(BitConverter.DoubleToInt64Bits((double)number));
                    }
                    else
                    {
                        throw new Exception("Cannot get here");
                    }
                }
            }
        }
Example #4
0
 /// <summary>
 /// checks that two stored fields are equivalent
 /// </summary>
 public void AssertStoredFieldEquals(string info, IndexableField leftField, IndexableField rightField)
 {
     Assert.AreEqual(leftField.Name(), rightField.Name(), info);
     Assert.AreEqual(leftField.BinaryValue(), rightField.BinaryValue(), info);
     Assert.AreEqual(leftField.StringValue, rightField.StringValue, info);
     Assert.AreEqual(leftField.NumericValue, rightField.NumericValue, info);
     // TODO: should we check the FT at all?
 }
Example #5
0
        public override void WriteField(FieldInfo info, IndexableField field)
        {
            FieldsStream.WriteVInt(info.Number);
            int      bits = 0;
            BytesRef bytes;
            string   @string;
            // TODO: maybe a field should serialize itself?
            // this way we don't bake into indexer all these
            // specific encodings for different fields?  and apps
            // can customize...

            object number = (object)field.NumericValue;

            if (number != null)
            {
                if (number is sbyte || number is short || number is int)
                {
                    bits |= FIELD_IS_NUMERIC_INT;
                }
                else if (number is long)
                {
                    bits |= FIELD_IS_NUMERIC_LONG;
                }
                else if (number is float)
                {
                    bits |= FIELD_IS_NUMERIC_FLOAT;
                }
                else if (number is double)
                {
                    bits |= FIELD_IS_NUMERIC_DOUBLE;
                }
                else
                {
                    throw new System.ArgumentException("cannot store numeric type " + number.GetType());
                }
                @string = null;
                bytes   = null;
            }
            else
            {
                bytes = field.BinaryValue();
                if (bytes != null)
                {
                    bits   |= FIELD_IS_BINARY;
                    @string = null;
                }
                else
                {
                    @string = field.StringValue;
                    if (@string == null)
                    {
                        throw new System.ArgumentException("field " + field.Name() + " is stored but does not have binaryValue, stringValue nor numericValue");
                    }
                }
            }

            FieldsStream.WriteByte((byte)(sbyte)bits);

            if (bytes != null)
            {
                FieldsStream.WriteVInt(bytes.Length);
                FieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length);
            }
            else if (@string != null)
            {
                FieldsStream.WriteString(field.StringValue);
            }
            else
            {
                if (number is sbyte || number is short || number is int)
                {
                    FieldsStream.WriteInt((int)number);
                }
                else if (number is long)
                {
                    FieldsStream.WriteLong((long)number);
                }
                else if (number is float)
                {
                    FieldsStream.WriteInt(Number.FloatToIntBits((float)number));
                }
                else if (number is double)
                {
                    FieldsStream.WriteLong(BitConverter.DoubleToInt64Bits((double)number));
                }
                else
                {
                    throw new InvalidOperationException("Cannot get here");
                }
            }
        }
        public override void WriteField(FieldInfo info, IndexableField field)
        {
            int      bits = 0;
            BytesRef bytes;
            string   @string;

            object number = (object)field.NumericValue;

            if (number != null)
            {
                if (number is string)
                {
                    string numStr = number.ToString();
                    sbyte  dummySbyte;
                    short  dummyShort;
                    int    dummyInt;
                    long   dummyLong;
                    float  dummyFloat;
                    double dummyDouble;
                    if (sbyte.TryParse(numStr, out dummySbyte) || short.TryParse(numStr, out dummyShort) || int.TryParse(numStr, out dummyInt))
                    {
                        bits = NUMERIC_INT;
                    }
                    else if (long.TryParse(numStr, out dummyLong))
                    {
                        bits = NUMERIC_LONG;
                    }
                    else if (float.TryParse(numStr, out dummyFloat))
                    {
                        bits = NUMERIC_FLOAT;
                    }
                    else if (double.TryParse(numStr, out dummyDouble))
                    {
                        bits = NUMERIC_DOUBLE;
                    }
                    else
                    {
                        throw new System.ArgumentException("cannot store numeric type " + number.GetType());
                    }
                }
                else
                {
                    if (number is sbyte || number is short || number is int)
                    {
                        bits = NUMERIC_INT;
                    }
                    else if (number is long)
                    {
                        bits = NUMERIC_LONG;
                    }
                    else if (number is float)
                    {
                        bits = NUMERIC_FLOAT;
                    }
                    else if (number is double)
                    {
                        bits = NUMERIC_DOUBLE;
                    }
                    else
                    {
                        throw new System.ArgumentException("cannot store numeric type " + number.GetType());
                    }
                }

                @string = null;
                bytes   = null;
            }
            else
            {
                bytes = field.BinaryValue();
                if (bytes != null)
                {
                    bits    = BYTE_ARR;
                    @string = null;
                }
                else
                {
                    bits    = STRING;
                    @string = field.StringValue;
                    if (@string == null)
                    {
                        throw new System.ArgumentException("field " + field.Name() + " is stored but does not have binaryValue, stringValue nor numericValue");
                    }
                }
            }

            long infoAndBits = (((long)info.Number) << TYPE_BITS) | bits;

            BufferedDocs.WriteVLong(infoAndBits);

            if (bytes != null)
            {
                BufferedDocs.WriteVInt(bytes.Length);
                BufferedDocs.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length);
            }
            else if (@string != null)
            {
                BufferedDocs.WriteString(field.StringValue);
            }
            else
            {
                if (number is string)
                {
                    string numStr = number.ToString();
                    sbyte  dummySbyte;
                    short  dummyShort;
                    int    dummyInt;
                    long   dummyLong;
                    float  dummyFloat;
                    double dummyDouble;
                    if (sbyte.TryParse(numStr, out dummySbyte) || short.TryParse(numStr, out dummyShort) ||
                        int.TryParse(numStr, out dummyInt))
                    {
                        bits = NUMERIC_INT;
                    }
                    else if (long.TryParse(numStr, out dummyLong))
                    {
                        bits = NUMERIC_LONG;
                    }
                    else if (float.TryParse(numStr, out dummyFloat))
                    {
                        bits = NUMERIC_FLOAT;
                    }
                    else if (double.TryParse(numStr, out dummyDouble))
                    {
                        bits = NUMERIC_DOUBLE;
                    }
                    else
                    {
                        throw new System.ArgumentException("cannot store numeric type " + number.GetType());
                    }
                }
                else
                {
                    if (number is sbyte || number is short || number is int)
                    {
                        BufferedDocs.WriteInt((int)number);
                    }
                    else if (number is long)
                    {
                        BufferedDocs.WriteLong((long)number);
                    }
                    else if (number is float)
                    {
                        BufferedDocs.WriteInt(Number.FloatToIntBits((float)number));
                    }
                    else if (number is double)
                    {
                        BufferedDocs.WriteLong(BitConverter.DoubleToInt64Bits((double)number));
                    }
                    else
                    {
                        throw new Exception("Cannot get here");
                    }
                }
            }
        }
Example #7
0
 private static void Add(IDictionary<string, IndexableField> map, IndexableField field)
 {
     map[field.Name()] = field;
 }
Example #8
0
        public override void ProcessFields(IndexableField[] fields, int count)
        {
            FieldState.Reset();

            bool doInvert = Consumer.Start(fields, count);

            for (int i = 0; i < count; i++)
            {
                IndexableField     field     = fields[i];
                IndexableFieldType fieldType = field.FieldType();

                // TODO FI: this should be "genericized" to querying
                // consumer if it wants to see this particular field
                // tokenized.
                if (fieldType.Indexed && doInvert)
                {
                    bool analyzed = fieldType.Tokenized && DocState.Analyzer != null;

                    // if the field omits norms, the boost cannot be indexed.
                    if (fieldType.OmitNorms && field.GetBoost() != 1.0f)
                    {
                        throw new System.NotSupportedException("You cannot set an index-time boost: norms are omitted for field '" + field.Name() + "'");
                    }

                    // only bother checking offsets if something will consume them.
                    // TODO: after we fix analyzers, also check if termVectorOffsets will be indexed.
                    bool checkOffsets    = fieldType.IndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                    int  lastStartOffset = 0;

                    if (i > 0)
                    {
                        FieldState.Position_Renamed += analyzed ? DocState.Analyzer.GetPositionIncrementGap(fieldInfo.Name) : 0;
                    }

                    /*
                     * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
                     * when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses,
                     * but rather a finally that takes note of the problem.
                     */

                    bool succeededInProcessingField = false;

                    TokenStream stream = field.GetTokenStream(DocState.Analyzer);
                    // reset the TokenStream to the first token
                    stream.Reset();

                    try
                    {
                        bool hasMoreTokens = stream.IncrementToken();

                        FieldState.AttributeSource_Renamed = stream;

                        IOffsetAttribute            offsetAttribute  = FieldState.AttributeSource_Renamed.AddAttribute <IOffsetAttribute>();
                        IPositionIncrementAttribute posIncrAttribute = FieldState.AttributeSource_Renamed.AddAttribute <IPositionIncrementAttribute>();

                        if (hasMoreTokens)
                        {
                            Consumer.Start(field);

                            do
                            {
                                // If we hit an exception in stream.next below
                                // (which is fairly common, eg if analyzer
                                // chokes on a given document), then it's
                                // non-aborting and (above) this one document
                                // will be marked as deleted, but still
                                // consume a docID

                                int posIncr = posIncrAttribute.PositionIncrement;
                                if (posIncr < 0)
                                {
                                    throw new System.ArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.Name() + "'");
                                }
                                if (FieldState.Position_Renamed == 0 && posIncr == 0)
                                {
                                    throw new System.ArgumentException("first position increment must be > 0 (got 0) for field '" + field.Name() + "'");
                                }
                                int position = FieldState.Position_Renamed + posIncr;
                                if (position > 0)
                                {
                                    // NOTE: confusing: this "mirrors" the
                                    // position++ we do below
                                    position--;
                                }
                                else if (position < 0)
                                {
                                    throw new System.ArgumentException("position overflow for field '" + field.Name() + "'");
                                }

                                // position is legal, we can safely place it in fieldState now.
                                // not sure if anything will use fieldState after non-aborting exc...
                                FieldState.Position_Renamed = position;

                                if (posIncr == 0)
                                {
                                    FieldState.NumOverlap_Renamed++;
                                }

                                if (checkOffsets)
                                {
                                    int startOffset = FieldState.Offset_Renamed + offsetAttribute.StartOffset();
                                    int endOffset   = FieldState.Offset_Renamed + offsetAttribute.EndOffset();
                                    if (startOffset < 0 || endOffset < startOffset)
                                    {
                                        throw new System.ArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " + "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.Name() + "'");
                                    }
                                    if (startOffset < lastStartOffset)
                                    {
                                        throw new System.ArgumentException("offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.Name() + "'");
                                    }
                                    lastStartOffset = startOffset;
                                }

                                bool success = false;
                                try
                                {
                                    // If we hit an exception in here, we abort
                                    // all buffered documents since the last
                                    // flush, on the likelihood that the
                                    // internal state of the consumer is now
                                    // corrupt and should not be flushed to a
                                    // new segment:
                                    Consumer.Add();
                                    success = true;
                                }
                                finally
                                {
                                    if (!success)
                                    {
                                        DocState.DocWriter.SetAborting();
                                    }
                                }
                                FieldState.Length_Renamed++;
                                FieldState.Position_Renamed++;
                            } while (stream.IncrementToken());
                        }
                        // trigger streams to perform end-of-stream operations
                        stream.End();
                        // TODO: maybe add some safety? then again, its already checked
                        // when we come back around to the field...
                        FieldState.Position_Renamed += posIncrAttribute.PositionIncrement;
                        FieldState.Offset_Renamed   += offsetAttribute.EndOffset();

                        if (DocState.MaxTermPrefix != null)
                        {
                            string msg = "Document contains at least one immense term in field=\"" + fieldInfo.Name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + DocState.MaxTermPrefix + "...'";
                            if (DocState.InfoStream.IsEnabled("IW"))
                            {
                                DocState.InfoStream.Message("IW", "ERROR: " + msg);
                            }
                            DocState.MaxTermPrefix = null;
                            throw new System.ArgumentException(msg);
                        }

                        /* if success was false above there is an exception coming through and we won't get here.*/
                        succeededInProcessingField = true;
                    }
                    finally
                    {
                        if (!succeededInProcessingField)
                        {
                            IOUtils.CloseWhileHandlingException(stream);
                        }
                        else
                        {
                            stream.Dispose();
                        }
                        if (!succeededInProcessingField && DocState.InfoStream.IsEnabled("DW"))
                        {
                            DocState.InfoStream.Message("DW", "An exception was thrown while processing field " + fieldInfo.Name);
                        }
                    }

                    FieldState.Offset_Renamed += analyzed ? DocState.Analyzer.GetOffsetGap(fieldInfo.Name) : 0;
                    FieldState.Boost_Renamed  *= field.GetBoost();
                }

                // LUCENE-2387: don't hang onto the field, so GC can
                // reclaim
                fields[i] = null;
            }

            Consumer.Finish();
            EndConsumer.Finish();
        }
        internal override bool Start(IndexableField[] fields, int count)
        {
            DoVectors         = false;
            DoVectorPositions = false;
            DoVectorOffsets   = false;
            DoVectorPayloads  = false;
            HasPayloads       = false;

            for (int i = 0; i < count; i++)
            {
                IndexableField field = fields[i];
                if (field.FieldType().Indexed)
                {
                    if (field.FieldType().StoreTermVectors)
                    {
                        DoVectors          = true;
                        DoVectorPositions |= field.FieldType().StoreTermVectorPositions;
                        DoVectorOffsets   |= field.FieldType().StoreTermVectorOffsets;
                        if (DoVectorPositions)
                        {
                            DoVectorPayloads |= field.FieldType().StoreTermVectorPayloads;
                        }
                        else if (field.FieldType().StoreTermVectorPayloads)
                        {
                            // TODO: move this check somewhere else, and impl the other missing ones
                            throw new System.ArgumentException("cannot index term vector payloads without term vector positions (field=\"" + field.Name() + "\")");
                        }
                    }
                    else
                    {
                        if (field.FieldType().StoreTermVectorOffsets)
                        {
                            throw new System.ArgumentException("cannot index term vector offsets when term vectors are not indexed (field=\"" + field.Name() + "\")");
                        }
                        if (field.FieldType().StoreTermVectorPositions)
                        {
                            throw new System.ArgumentException("cannot index term vector positions when term vectors are not indexed (field=\"" + field.Name() + "\")");
                        }
                        if (field.FieldType().StoreTermVectorPayloads)
                        {
                            throw new System.ArgumentException("cannot index term vector payloads when term vectors are not indexed (field=\"" + field.Name() + "\")");
                        }
                    }
                }
                else
                {
                    if (field.FieldType().StoreTermVectors)
                    {
                        throw new System.ArgumentException("cannot index term vectors when field is not indexed (field=\"" + field.Name() + "\")");
                    }
                    if (field.FieldType().StoreTermVectorOffsets)
                    {
                        throw new System.ArgumentException("cannot index term vector offsets when field is not indexed (field=\"" + field.Name() + "\")");
                    }
                    if (field.FieldType().StoreTermVectorPositions)
                    {
                        throw new System.ArgumentException("cannot index term vector positions when field is not indexed (field=\"" + field.Name() + "\")");
                    }
                    if (field.FieldType().StoreTermVectorPayloads)
                    {
                        throw new System.ArgumentException("cannot index term vector payloads when field is not indexed (field=\"" + field.Name() + "\")");
                    }
                }
            }

            if (DoVectors)
            {
                TermsWriter.HasVectors = true;
                if (TermsHashPerField.BytesHash.Size() != 0)
                {
                    // Only necessary if previous doc hit a
                    // non-aborting exception while writing vectors in
                    // this field:
                    TermsHashPerField.Reset();
                }
            }

            // TODO: only if needed for performance
            //perThread.postingsCount = 0;

            return(DoVectors);
        }
        public override void WriteField(FieldInfo info, IndexableField field)
        {
            FieldsStream.WriteVInt(info.Number);
            int bits = 0;
            BytesRef bytes;
            string @string;
            // TODO: maybe a field should serialize itself?
            // this way we don't bake into indexer all these
            // specific encodings for different fields?  and apps
            // can customize...

            object number = (object)field.NumericValue;
            if (number != null)
            {
                if (number is sbyte || number is short || number is int)
                {
                    bits |= FIELD_IS_NUMERIC_INT;
                }
                else if (number is long)
                {
                    bits |= FIELD_IS_NUMERIC_LONG;
                }
                else if (number is float)
                {
                    bits |= FIELD_IS_NUMERIC_FLOAT;
                }
                else if (number is double)
                {
                    bits |= FIELD_IS_NUMERIC_DOUBLE;
                }
                else
                {
                    throw new System.ArgumentException("cannot store numeric type " + number.GetType());
                }
                @string = null;
                bytes = null;
            }
            else
            {
                bytes = field.BinaryValue();
                if (bytes != null)
                {
                    bits |= FIELD_IS_BINARY;
                    @string = null;
                }
                else
                {
                    @string = field.StringValue;
                    if (@string == null)
                    {
                        throw new System.ArgumentException("field " + field.Name() + " is stored but does not have binaryValue, stringValue nor numericValue");
                    }
                }
            }

            FieldsStream.WriteByte((sbyte)bits);

            if (bytes != null)
            {
                FieldsStream.WriteVInt(bytes.Length);
                FieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length);
            }
            else if (@string != null)
            {
                FieldsStream.WriteString(field.StringValue);
            }
            else
            {
                if (number is sbyte || number is short || number is int)
                {
                    FieldsStream.WriteInt((int)number);
                }
                else if (number is long)
                {
                    FieldsStream.WriteLong((long)number);
                }
                else if (number is float)
                {
                    FieldsStream.WriteInt(Number.FloatToIntBits((float)number));
                }
                else if (number is double)
                {
                    FieldsStream.WriteLong(BitConverter.DoubleToInt64Bits((double)number));
                }
                else
                {
                    throw new InvalidOperationException("Cannot get here");
                }
            }
        }