public override void AddField(int docID, IndexableField field, FieldInfo fieldInfo) { DocValuesType_e? dvType = field.FieldType().DocValueType; if (dvType != null) { fieldInfo.DocValuesType = dvType; if (dvType == DocValuesType_e.BINARY) { AddBinaryField(fieldInfo, docID, field.BinaryValue()); } else if (dvType == DocValuesType_e.SORTED) { AddSortedField(fieldInfo, docID, field.BinaryValue()); } else if (dvType == DocValuesType_e.SORTED_SET) { AddSortedSetField(fieldInfo, docID, field.BinaryValue()); } else if (dvType == DocValuesType_e.NUMERIC) { if (!(field.NumericValue is long?)) { throw new System.ArgumentException("illegal type " + field.NumericValue.GetType() + ": DocValues types must be Long"); } AddNumericField(fieldInfo, docID, (long)field.NumericValue); } else { Debug.Assert(false, "unrecognized DocValues.Type: " + dvType); } } }
public override void AddField(int docID, IndexableField field, FieldInfo fieldInfo) { DocValuesType_e?dvType = field.FieldType().DocValueType; if (dvType != null) { fieldInfo.DocValuesType = dvType; if (dvType == DocValuesType_e.BINARY) { AddBinaryField(fieldInfo, docID, field.BinaryValue()); } else if (dvType == DocValuesType_e.SORTED) { AddSortedField(fieldInfo, docID, field.BinaryValue()); } else if (dvType == DocValuesType_e.SORTED_SET) { AddSortedSetField(fieldInfo, docID, field.BinaryValue()); } else if (dvType == DocValuesType_e.NUMERIC) { if (!(field.NumericValue is long?)) { throw new System.ArgumentException("illegal type " + field.NumericValue.GetType() + ": DocValues types must be Long"); } AddNumericField(fieldInfo, docID, (long)field.NumericValue); } else { Debug.Assert(false, "unrecognized DocValues.Type: " + dvType); } } }
public static void CheckNorms(AtomicReader reader) { // test omit norms for (int i = 0; i < DocHelper.Fields.Length; i++) { IndexableField f = DocHelper.Fields[i]; if (f.FieldType().Indexed) { Assert.AreEqual(reader.GetNormValues(f.Name()) != null, !f.FieldType().OmitNorms); Assert.AreEqual(reader.GetNormValues(f.Name()) != null, !DocHelper.NoNorms.ContainsKey(f.Name())); if (reader.GetNormValues(f.Name()) == null) { // test for norms of null NumericDocValues norms = MultiDocValues.GetNormValues(reader, f.Name()); Assert.IsNull(norms); } } } }
public override void AddField(int docID, IndexableField field, FieldInfo fieldInfo) { if (field.FieldType().Stored) { if (NumStoredFields == StoredFields.Length) { int newSize = ArrayUtil.Oversize(NumStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); IndexableField[] newArray = new IndexableField[newSize]; Array.Copy(StoredFields, 0, newArray, 0, NumStoredFields); StoredFields = newArray; FieldInfo[] newInfoArray = new FieldInfo[newSize]; Array.Copy(FieldInfos, 0, newInfoArray, 0, NumStoredFields); FieldInfos = newInfoArray; } StoredFields[NumStoredFields] = field; FieldInfos[NumStoredFields] = fieldInfo; NumStoredFields++; Debug.Assert(DocState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField")); } }
static DocHelper() { CustomType = new FieldType(TextField.TYPE_STORED); TextField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, CustomType); CustomType2 = new FieldType(TextField.TYPE_STORED); CustomType2.StoreTermVectors = true; CustomType2.StoreTermVectorPositions = true; CustomType2.StoreTermVectorOffsets = true; TextField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, CustomType2); CustomType3 = new FieldType(TextField.TYPE_STORED); CustomType3.OmitNorms = true; TextField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, CustomType3); KeyField = new StringField(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES); CustomType5 = new FieldType(TextField.TYPE_STORED); CustomType5.OmitNorms = true; CustomType5.Tokenized = false; NoNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, CustomType5); CustomType6 = new FieldType(TextField.TYPE_STORED); CustomType6.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; NoTFField = new Field(NO_TF_KEY, NO_TF_TEXT, CustomType6); CustomType7 = new FieldType(); CustomType7.Stored = true; UnIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, CustomType7); CustomType8 = new FieldType(TextField.TYPE_NOT_STORED); CustomType8.StoreTermVectors = true; UnStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, CustomType8); UnStoredField1 = new TextField(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO); LazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, CustomType); TextUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, CustomType); TextUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, CustomType2); Fields = new Field[] { TextField1, TextField2, TextField3, KeyField, NoNormsField, NoTFField, UnIndField, UnStoredField1, UnStoredField2, TextUtfField1, TextUtfField2, LazyField, LazyFieldBinary, LargeLazyField }; //Initialize the large Lazy Field StringBuilder buffer = new StringBuilder(); for (int i = 0; i < 10000; i++) { buffer.Append("Lazily loading lengths of language in lieu of laughing "); } try { LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".GetBytes(IOUtils.CHARSET_UTF_8); } catch (EncoderFallbackException e) { } LazyFieldBinary = new StoredField(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES); Fields[Fields.Length - 2] = LazyFieldBinary; LARGE_LAZY_FIELD_TEXT = buffer.ToString(); LargeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, CustomType); Fields[Fields.Length - 1] = LargeLazyField; for (int i = 0; i < Fields.Length; i++) { IndexableField f = Fields[i]; Add(All, f); if (f.FieldType().Indexed) { Add(Indexed, f); } else { Add(Unindexed, f); } if (f.FieldType().StoreTermVectors) { Add(Termvector, f); } if (f.FieldType().Indexed&& !f.FieldType().StoreTermVectors) { Add(Notermvector, f); } if (f.FieldType().Stored) { Add(Stored, f); } else { Add(Unstored, f); } if (f.FieldType().IndexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { Add(NoTf, f); } if (f.FieldType().OmitNorms) { Add(NoNorms, f); } if (f.FieldType().IndexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { Add(NoTf, f); } //if (f.isLazy()) add(lazy, f); } NameValues = new Dictionary <string, object>(); NameValues[TEXT_FIELD_1_KEY] = FIELD_1_TEXT; NameValues[TEXT_FIELD_2_KEY] = FIELD_2_TEXT; NameValues[TEXT_FIELD_3_KEY] = FIELD_3_TEXT; NameValues[KEYWORD_FIELD_KEY] = KEYWORD_TEXT; NameValues[NO_NORMS_KEY] = NO_NORMS_TEXT; NameValues[NO_TF_KEY] = NO_TF_TEXT; NameValues[UNINDEXED_FIELD_KEY] = UNINDEXED_FIELD_TEXT; NameValues[UNSTORED_FIELD_1_KEY] = UNSTORED_1_FIELD_TEXT; NameValues[UNSTORED_FIELD_2_KEY] = UNSTORED_2_FIELD_TEXT; NameValues[LAZY_FIELD_KEY] = LAZY_FIELD_TEXT; NameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES; NameValues[LARGE_LAZY_FIELD_KEY] = LARGE_LAZY_FIELD_TEXT; NameValues[TEXT_FIELD_UTF1_KEY] = FIELD_UTF1_TEXT; NameValues[TEXT_FIELD_UTF2_KEY] = FIELD_UTF2_TEXT; }
public override void ProcessFields(IndexableField[] fields, int count) { FieldState.Reset(); bool doInvert = Consumer.Start(fields, count); for (int i = 0; i < count; i++) { IndexableField field = fields[i]; IndexableFieldType fieldType = field.FieldType(); // TODO FI: this should be "genericized" to querying // consumer if it wants to see this particular field // tokenized. if (fieldType.Indexed && doInvert) { bool analyzed = fieldType.Tokenized && DocState.Analyzer != null; // if the field omits norms, the boost cannot be indexed. if (fieldType.OmitNorms && field.GetBoost() != 1.0f) { throw new System.NotSupportedException("You cannot set an index-time boost: norms are omitted for field '" + field.Name() + "'"); } // only bother checking offsets if something will consume them. // TODO: after we fix analyzers, also check if termVectorOffsets will be indexed. bool checkOffsets = fieldType.IndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; int lastStartOffset = 0; if (i > 0) { FieldState.Position_Renamed += analyzed ? DocState.Analyzer.GetPositionIncrementGap(fieldInfo.Name) : 0; } /* * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream * when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses, * but rather a finally that takes note of the problem. */ bool succeededInProcessingField = false; TokenStream stream = field.GetTokenStream(DocState.Analyzer); // reset the TokenStream to the first token stream.Reset(); try { bool hasMoreTokens = stream.IncrementToken(); FieldState.AttributeSource_Renamed = stream; IOffsetAttribute offsetAttribute = FieldState.AttributeSource_Renamed.AddAttribute <IOffsetAttribute>(); IPositionIncrementAttribute posIncrAttribute = FieldState.AttributeSource_Renamed.AddAttribute <IPositionIncrementAttribute>(); if (hasMoreTokens) { Consumer.Start(field); do { // If we hit an exception in stream.next below // (which is fairly common, eg if analyzer // chokes on a given document), then it's // non-aborting and (above) this one document // will be marked as deleted, but still // consume a docID int posIncr = posIncrAttribute.PositionIncrement; if (posIncr < 0) { throw new System.ArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.Name() + "'"); } if (FieldState.Position_Renamed == 0 && posIncr == 0) { throw new System.ArgumentException("first position increment must be > 0 (got 0) for field '" + field.Name() + "'"); } int position = FieldState.Position_Renamed + posIncr; if (position > 0) { // NOTE: confusing: this "mirrors" the // position++ we do below position--; } else if (position < 0) { throw new System.ArgumentException("position overflow for field '" + field.Name() + "'"); } // position is legal, we can safely place it in fieldState now. // not sure if anything will use fieldState after non-aborting exc... FieldState.Position_Renamed = position; if (posIncr == 0) { FieldState.NumOverlap_Renamed++; } if (checkOffsets) { int startOffset = FieldState.Offset_Renamed + offsetAttribute.StartOffset(); int endOffset = FieldState.Offset_Renamed + offsetAttribute.EndOffset(); if (startOffset < 0 || endOffset < startOffset) { throw new System.ArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " + "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.Name() + "'"); } if (startOffset < lastStartOffset) { throw new System.ArgumentException("offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.Name() + "'"); } lastStartOffset = startOffset; } bool success = false; try { // If we hit an exception in here, we abort // all buffered documents since the last // flush, on the likelihood that the // internal state of the consumer is now // corrupt and should not be flushed to a // new segment: Consumer.Add(); success = true; } finally { if (!success) { DocState.DocWriter.SetAborting(); } } FieldState.Length_Renamed++; FieldState.Position_Renamed++; } while (stream.IncrementToken()); } // trigger streams to perform end-of-stream operations stream.End(); // TODO: maybe add some safety? then again, its already checked // when we come back around to the field... FieldState.Position_Renamed += posIncrAttribute.PositionIncrement; FieldState.Offset_Renamed += offsetAttribute.EndOffset(); if (DocState.MaxTermPrefix != null) { string msg = "Document contains at least one immense term in field=\"" + fieldInfo.Name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + DocState.MaxTermPrefix + "...'"; if (DocState.InfoStream.IsEnabled("IW")) { DocState.InfoStream.Message("IW", "ERROR: " + msg); } DocState.MaxTermPrefix = null; throw new System.ArgumentException(msg); } /* if success was false above there is an exception coming through and we won't get here.*/ succeededInProcessingField = true; } finally { if (!succeededInProcessingField) { IOUtils.CloseWhileHandlingException(stream); } else { stream.Dispose(); } if (!succeededInProcessingField && DocState.InfoStream.IsEnabled("DW")) { DocState.InfoStream.Message("DW", "An exception was thrown while processing field " + fieldInfo.Name); } } FieldState.Offset_Renamed += analyzed ? DocState.Analyzer.GetOffsetGap(fieldInfo.Name) : 0; FieldState.Boost_Renamed *= field.GetBoost(); } // LUCENE-2387: don't hang onto the field, so GC can // reclaim fields[i] = null; } Consumer.Finish(); EndConsumer.Finish(); }
internal override bool Start(IndexableField[] fields, int count) { DoVectors = false; DoVectorPositions = false; DoVectorOffsets = false; DoVectorPayloads = false; HasPayloads = false; for (int i = 0; i < count; i++) { IndexableField field = fields[i]; if (field.FieldType().Indexed) { if (field.FieldType().StoreTermVectors) { DoVectors = true; DoVectorPositions |= field.FieldType().StoreTermVectorPositions; DoVectorOffsets |= field.FieldType().StoreTermVectorOffsets; if (DoVectorPositions) { DoVectorPayloads |= field.FieldType().StoreTermVectorPayloads; } else if (field.FieldType().StoreTermVectorPayloads) { // TODO: move this check somewhere else, and impl the other missing ones throw new System.ArgumentException("cannot index term vector payloads without term vector positions (field=\"" + field.Name() + "\")"); } } else { if (field.FieldType().StoreTermVectorOffsets) { throw new System.ArgumentException("cannot index term vector offsets when term vectors are not indexed (field=\"" + field.Name() + "\")"); } if (field.FieldType().StoreTermVectorPositions) { throw new System.ArgumentException("cannot index term vector positions when term vectors are not indexed (field=\"" + field.Name() + "\")"); } if (field.FieldType().StoreTermVectorPayloads) { throw new System.ArgumentException("cannot index term vector payloads when term vectors are not indexed (field=\"" + field.Name() + "\")"); } } } else { if (field.FieldType().StoreTermVectors) { throw new System.ArgumentException("cannot index term vectors when field is not indexed (field=\"" + field.Name() + "\")"); } if (field.FieldType().StoreTermVectorOffsets) { throw new System.ArgumentException("cannot index term vector offsets when field is not indexed (field=\"" + field.Name() + "\")"); } if (field.FieldType().StoreTermVectorPositions) { throw new System.ArgumentException("cannot index term vector positions when field is not indexed (field=\"" + field.Name() + "\")"); } if (field.FieldType().StoreTermVectorPayloads) { throw new System.ArgumentException("cannot index term vector payloads when field is not indexed (field=\"" + field.Name() + "\")"); } } } if (DoVectors) { TermsWriter.HasVectors = true; if (TermsHashPerField.BytesHash.Size() != 0) { // Only necessary if previous doc hit a // non-aborting exception while writing vectors in // this field: TermsHashPerField.Reset(); } } // TODO: only if needed for performance //perThread.postingsCount = 0; return(DoVectors); }