public static void CheckNorms(AtomicReader reader) { // test omit norms for (int i = 0; i < DocHelper.Fields.Length; i++) { IndexableField f = DocHelper.Fields[i]; if (f.FieldType().Indexed) { Assert.AreEqual(reader.GetNormValues(f.Name()) != null, !f.FieldType().OmitNorms); Assert.AreEqual(reader.GetNormValues(f.Name()) != null, !DocHelper.NoNorms.ContainsKey(f.Name())); if (reader.GetNormValues(f.Name()) == null) { // test for norms of null NumericDocValues norms = MultiDocValues.GetNormValues(reader, f.Name()); Assert.IsNull(norms); } } } }
private static void Add(IDictionary <string, IndexableField> map, IndexableField field) { map[field.Name()] = field; }
public override void WriteField(FieldInfo info, IndexableField field) { int bits = 0; BytesRef bytes; string @string; object number = (object)field.NumericValue; if (number != null) { if (number is string) { string numStr = number.ToString(); sbyte dummySbyte; short dummyShort; int dummyInt; long dummyLong; float dummyFloat; double dummyDouble; if (sbyte.TryParse(numStr, out dummySbyte) || short.TryParse(numStr, out dummyShort) || int.TryParse(numStr, out dummyInt)) { bits = NUMERIC_INT; } else if (long.TryParse(numStr, out dummyLong)) { bits = NUMERIC_LONG; } else if (float.TryParse(numStr, out dummyFloat)) { bits = NUMERIC_FLOAT; } else if (double.TryParse(numStr, out dummyDouble)) { bits = NUMERIC_DOUBLE; } else { throw new System.ArgumentException("cannot store numeric type " + number.GetType()); } } else { if (number is sbyte || number is short || number is int) { bits = NUMERIC_INT; } else if (number is long) { bits = NUMERIC_LONG; } else if (number is float) { bits = NUMERIC_FLOAT; } else if (number is double) { bits = NUMERIC_DOUBLE; } else { throw new System.ArgumentException("cannot store numeric type " + number.GetType()); } } @string = null; bytes = null; } else { bytes = field.BinaryValue(); if (bytes != null) { bits = BYTE_ARR; @string = null; } else { bits = STRING; @string = field.StringValue; if (@string == null) { throw new System.ArgumentException("field " + field.Name() + " is stored but does not have binaryValue, stringValue nor numericValue"); } } } long infoAndBits = (((long)info.Number) << TYPE_BITS) | bits; BufferedDocs.WriteVLong(infoAndBits); if (bytes != null) { BufferedDocs.WriteVInt(bytes.Length); BufferedDocs.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); } else if (@string != null) { BufferedDocs.WriteString(field.StringValue); } else { if (number is string) { string numStr = number.ToString(); sbyte dummySbyte; short dummyShort; int dummyInt; long dummyLong; float dummyFloat; double dummyDouble; if (sbyte.TryParse(numStr, out dummySbyte) || short.TryParse(numStr, out dummyShort) || int.TryParse(numStr, out dummyInt)) { bits = NUMERIC_INT; } else if (long.TryParse(numStr, out dummyLong)) { bits = NUMERIC_LONG; } else if (float.TryParse(numStr, out dummyFloat)) { bits = NUMERIC_FLOAT; } else if (double.TryParse(numStr, out dummyDouble)) { bits = NUMERIC_DOUBLE; } else { throw new System.ArgumentException("cannot store numeric type " + number.GetType()); } } else { if (number is sbyte || number is short || number is int) { BufferedDocs.WriteInt((int)number); } else if (number is long) { BufferedDocs.WriteLong((long)number); } else if (number is float) { BufferedDocs.WriteInt(Number.FloatToIntBits((float)number)); } else if (number is double) { BufferedDocs.WriteLong(BitConverter.DoubleToInt64Bits((double)number)); } else { throw new Exception("Cannot get here"); } } } }
/// <summary> /// checks that two stored fields are equivalent /// </summary> public void AssertStoredFieldEquals(string info, IndexableField leftField, IndexableField rightField) { Assert.AreEqual(leftField.Name(), rightField.Name(), info); Assert.AreEqual(leftField.BinaryValue(), rightField.BinaryValue(), info); Assert.AreEqual(leftField.StringValue, rightField.StringValue, info); Assert.AreEqual(leftField.NumericValue, rightField.NumericValue, info); // TODO: should we check the FT at all? }
public override void WriteField(FieldInfo info, IndexableField field) { FieldsStream.WriteVInt(info.Number); int bits = 0; BytesRef bytes; string @string; // TODO: maybe a field should serialize itself? // this way we don't bake into indexer all these // specific encodings for different fields? and apps // can customize... object number = (object)field.NumericValue; if (number != null) { if (number is sbyte || number is short || number is int) { bits |= FIELD_IS_NUMERIC_INT; } else if (number is long) { bits |= FIELD_IS_NUMERIC_LONG; } else if (number is float) { bits |= FIELD_IS_NUMERIC_FLOAT; } else if (number is double) { bits |= FIELD_IS_NUMERIC_DOUBLE; } else { throw new System.ArgumentException("cannot store numeric type " + number.GetType()); } @string = null; bytes = null; } else { bytes = field.BinaryValue(); if (bytes != null) { bits |= FIELD_IS_BINARY; @string = null; } else { @string = field.StringValue; if (@string == null) { throw new System.ArgumentException("field " + field.Name() + " is stored but does not have binaryValue, stringValue nor numericValue"); } } } FieldsStream.WriteByte((byte)(sbyte)bits); if (bytes != null) { FieldsStream.WriteVInt(bytes.Length); FieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); } else if (@string != null) { FieldsStream.WriteString(field.StringValue); } else { if (number is sbyte || number is short || number is int) { FieldsStream.WriteInt((int)number); } else if (number is long) { FieldsStream.WriteLong((long)number); } else if (number is float) { FieldsStream.WriteInt(Number.FloatToIntBits((float)number)); } else if (number is double) { FieldsStream.WriteLong(BitConverter.DoubleToInt64Bits((double)number)); } else { throw new InvalidOperationException("Cannot get here"); } } }
private static void Add(IDictionary<string, IndexableField> map, IndexableField field) { map[field.Name()] = field; }
public override void ProcessFields(IndexableField[] fields, int count) { FieldState.Reset(); bool doInvert = Consumer.Start(fields, count); for (int i = 0; i < count; i++) { IndexableField field = fields[i]; IndexableFieldType fieldType = field.FieldType(); // TODO FI: this should be "genericized" to querying // consumer if it wants to see this particular field // tokenized. if (fieldType.Indexed && doInvert) { bool analyzed = fieldType.Tokenized && DocState.Analyzer != null; // if the field omits norms, the boost cannot be indexed. if (fieldType.OmitNorms && field.GetBoost() != 1.0f) { throw new System.NotSupportedException("You cannot set an index-time boost: norms are omitted for field '" + field.Name() + "'"); } // only bother checking offsets if something will consume them. // TODO: after we fix analyzers, also check if termVectorOffsets will be indexed. bool checkOffsets = fieldType.IndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; int lastStartOffset = 0; if (i > 0) { FieldState.Position_Renamed += analyzed ? DocState.Analyzer.GetPositionIncrementGap(fieldInfo.Name) : 0; } /* * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream * when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses, * but rather a finally that takes note of the problem. */ bool succeededInProcessingField = false; TokenStream stream = field.GetTokenStream(DocState.Analyzer); // reset the TokenStream to the first token stream.Reset(); try { bool hasMoreTokens = stream.IncrementToken(); FieldState.AttributeSource_Renamed = stream; IOffsetAttribute offsetAttribute = FieldState.AttributeSource_Renamed.AddAttribute <IOffsetAttribute>(); IPositionIncrementAttribute posIncrAttribute = FieldState.AttributeSource_Renamed.AddAttribute <IPositionIncrementAttribute>(); if (hasMoreTokens) { Consumer.Start(field); do { // If we hit an exception in stream.next below // (which is fairly common, eg if analyzer // chokes on a given document), then it's // non-aborting and (above) this one document // will be marked as deleted, but still // consume a docID int posIncr = posIncrAttribute.PositionIncrement; if (posIncr < 0) { throw new System.ArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.Name() + "'"); } if (FieldState.Position_Renamed == 0 && posIncr == 0) { throw new System.ArgumentException("first position increment must be > 0 (got 0) for field '" + field.Name() + "'"); } int position = FieldState.Position_Renamed + posIncr; if (position > 0) { // NOTE: confusing: this "mirrors" the // position++ we do below position--; } else if (position < 0) { throw new System.ArgumentException("position overflow for field '" + field.Name() + "'"); } // position is legal, we can safely place it in fieldState now. // not sure if anything will use fieldState after non-aborting exc... FieldState.Position_Renamed = position; if (posIncr == 0) { FieldState.NumOverlap_Renamed++; } if (checkOffsets) { int startOffset = FieldState.Offset_Renamed + offsetAttribute.StartOffset(); int endOffset = FieldState.Offset_Renamed + offsetAttribute.EndOffset(); if (startOffset < 0 || endOffset < startOffset) { throw new System.ArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " + "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.Name() + "'"); } if (startOffset < lastStartOffset) { throw new System.ArgumentException("offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.Name() + "'"); } lastStartOffset = startOffset; } bool success = false; try { // If we hit an exception in here, we abort // all buffered documents since the last // flush, on the likelihood that the // internal state of the consumer is now // corrupt and should not be flushed to a // new segment: Consumer.Add(); success = true; } finally { if (!success) { DocState.DocWriter.SetAborting(); } } FieldState.Length_Renamed++; FieldState.Position_Renamed++; } while (stream.IncrementToken()); } // trigger streams to perform end-of-stream operations stream.End(); // TODO: maybe add some safety? then again, its already checked // when we come back around to the field... FieldState.Position_Renamed += posIncrAttribute.PositionIncrement; FieldState.Offset_Renamed += offsetAttribute.EndOffset(); if (DocState.MaxTermPrefix != null) { string msg = "Document contains at least one immense term in field=\"" + fieldInfo.Name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + DocState.MaxTermPrefix + "...'"; if (DocState.InfoStream.IsEnabled("IW")) { DocState.InfoStream.Message("IW", "ERROR: " + msg); } DocState.MaxTermPrefix = null; throw new System.ArgumentException(msg); } /* if success was false above there is an exception coming through and we won't get here.*/ succeededInProcessingField = true; } finally { if (!succeededInProcessingField) { IOUtils.CloseWhileHandlingException(stream); } else { stream.Dispose(); } if (!succeededInProcessingField && DocState.InfoStream.IsEnabled("DW")) { DocState.InfoStream.Message("DW", "An exception was thrown while processing field " + fieldInfo.Name); } } FieldState.Offset_Renamed += analyzed ? DocState.Analyzer.GetOffsetGap(fieldInfo.Name) : 0; FieldState.Boost_Renamed *= field.GetBoost(); } // LUCENE-2387: don't hang onto the field, so GC can // reclaim fields[i] = null; } Consumer.Finish(); EndConsumer.Finish(); }
internal override bool Start(IndexableField[] fields, int count) { DoVectors = false; DoVectorPositions = false; DoVectorOffsets = false; DoVectorPayloads = false; HasPayloads = false; for (int i = 0; i < count; i++) { IndexableField field = fields[i]; if (field.FieldType().Indexed) { if (field.FieldType().StoreTermVectors) { DoVectors = true; DoVectorPositions |= field.FieldType().StoreTermVectorPositions; DoVectorOffsets |= field.FieldType().StoreTermVectorOffsets; if (DoVectorPositions) { DoVectorPayloads |= field.FieldType().StoreTermVectorPayloads; } else if (field.FieldType().StoreTermVectorPayloads) { // TODO: move this check somewhere else, and impl the other missing ones throw new System.ArgumentException("cannot index term vector payloads without term vector positions (field=\"" + field.Name() + "\")"); } } else { if (field.FieldType().StoreTermVectorOffsets) { throw new System.ArgumentException("cannot index term vector offsets when term vectors are not indexed (field=\"" + field.Name() + "\")"); } if (field.FieldType().StoreTermVectorPositions) { throw new System.ArgumentException("cannot index term vector positions when term vectors are not indexed (field=\"" + field.Name() + "\")"); } if (field.FieldType().StoreTermVectorPayloads) { throw new System.ArgumentException("cannot index term vector payloads when term vectors are not indexed (field=\"" + field.Name() + "\")"); } } } else { if (field.FieldType().StoreTermVectors) { throw new System.ArgumentException("cannot index term vectors when field is not indexed (field=\"" + field.Name() + "\")"); } if (field.FieldType().StoreTermVectorOffsets) { throw new System.ArgumentException("cannot index term vector offsets when field is not indexed (field=\"" + field.Name() + "\")"); } if (field.FieldType().StoreTermVectorPositions) { throw new System.ArgumentException("cannot index term vector positions when field is not indexed (field=\"" + field.Name() + "\")"); } if (field.FieldType().StoreTermVectorPayloads) { throw new System.ArgumentException("cannot index term vector payloads when field is not indexed (field=\"" + field.Name() + "\")"); } } } if (DoVectors) { TermsWriter.HasVectors = true; if (TermsHashPerField.BytesHash.Size() != 0) { // Only necessary if previous doc hit a // non-aborting exception while writing vectors in // this field: TermsHashPerField.Reset(); } } // TODO: only if needed for performance //perThread.postingsCount = 0; return(DoVectors); }
public override void WriteField(FieldInfo info, IndexableField field) { FieldsStream.WriteVInt(info.Number); int bits = 0; BytesRef bytes; string @string; // TODO: maybe a field should serialize itself? // this way we don't bake into indexer all these // specific encodings for different fields? and apps // can customize... object number = (object)field.NumericValue; if (number != null) { if (number is sbyte || number is short || number is int) { bits |= FIELD_IS_NUMERIC_INT; } else if (number is long) { bits |= FIELD_IS_NUMERIC_LONG; } else if (number is float) { bits |= FIELD_IS_NUMERIC_FLOAT; } else if (number is double) { bits |= FIELD_IS_NUMERIC_DOUBLE; } else { throw new System.ArgumentException("cannot store numeric type " + number.GetType()); } @string = null; bytes = null; } else { bytes = field.BinaryValue(); if (bytes != null) { bits |= FIELD_IS_BINARY; @string = null; } else { @string = field.StringValue; if (@string == null) { throw new System.ArgumentException("field " + field.Name() + " is stored but does not have binaryValue, stringValue nor numericValue"); } } } FieldsStream.WriteByte((sbyte)bits); if (bytes != null) { FieldsStream.WriteVInt(bytes.Length); FieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); } else if (@string != null) { FieldsStream.WriteString(field.StringValue); } else { if (number is sbyte || number is short || number is int) { FieldsStream.WriteInt((int)number); } else if (number is long) { FieldsStream.WriteLong((long)number); } else if (number is float) { FieldsStream.WriteInt(Number.FloatToIntBits((float)number)); } else if (number is double) { FieldsStream.WriteLong(BitConverter.DoubleToInt64Bits((double)number)); } else { throw new InvalidOperationException("Cannot get here"); } } }