/// <summary> /// NOTE: this method does not carry over termVector /// booleans nor docValuesType; the indexer chain /// (TermVectorsConsumerPerField, DocFieldProcessor) must /// set these fields when they succeed in consuming /// the document /// </summary> public FieldInfo AddOrUpdate(string name, IndexableFieldType fieldType) { // TODO: really, indexer shouldn't even call this // method (it's only called from DocFieldProcessor); // rather, each component in the chain should update // what it "owns". EG fieldType.indexOptions() should // be updated by maybe FreqProxTermsWriterPerField: return(AddOrUpdateInternal(name, -1, fieldType.Indexed, false, fieldType.OmitNorms, false, fieldType.IndexOptions, fieldType.DocValueType, null)); }
public MyField() { fieldType = new IndexableFieldTypeAnonymousInnerClassHelper(this); }
/// <summary> /// Translates any added <seealso cref="FacetField"/>s into normal fields for indexing. /// /// <para> /// <b>NOTE:</b> you should add the returned document to IndexWriter, not the /// input one! /// </para> /// </summary> public virtual Document Build(TaxonomyWriter taxoWriter, Document doc) { // Find all FacetFields, collated by the actual field: IDictionary <string, IList <FacetField> > byField = new Dictionary <string, IList <FacetField> >(); // ... and also all SortedSetDocValuesFacetFields: IDictionary <string, IList <SortedSetDocValuesFacetField> > dvByField = new Dictionary <string, IList <SortedSetDocValuesFacetField> >(); // ... and also all AssociationFacetFields IDictionary <string, IList <AssociationFacetField> > assocByField = new Dictionary <string, IList <AssociationFacetField> >(); var seenDims = new HashSet <string>(); foreach (IndexableField field in doc.Fields) { if (field.FieldType == FacetField.TYPE) { FacetField facetField = (FacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.dim); if (dimConfig.MultiValued == false) { CheckSeen(seenDims, facetField.dim); } string indexFieldName = dimConfig.IndexFieldName; IList <FacetField> fields; if (!byField.TryGetValue(indexFieldName, out fields)) { fields = new List <FacetField>(); byField[indexFieldName] = fields; } fields.Add(facetField); } if (field.FieldType == SortedSetDocValuesFacetField.TYPE) { var facetField = (SortedSetDocValuesFacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim); if (dimConfig.MultiValued == false) { CheckSeen(seenDims, facetField.Dim); } string indexFieldName = dimConfig.IndexFieldName; IList <SortedSetDocValuesFacetField> fields; if (!dvByField.TryGetValue(indexFieldName, out fields)) { fields = new List <SortedSetDocValuesFacetField>(); dvByField[indexFieldName] = fields; } fields.Add(facetField); } if (field.FieldType == AssociationFacetField.TYPE) { AssociationFacetField facetField = (AssociationFacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.dim); if (dimConfig.MultiValued == false) { CheckSeen(seenDims, facetField.dim); } if (dimConfig.Hierarchical) { throw new System.ArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.dim + "\")"); } if (dimConfig.RequireDimCount) { throw new System.ArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.dim + "\")"); } string indexFieldName = dimConfig.IndexFieldName; IList <AssociationFacetField> fields; if (!assocByField.TryGetValue(indexFieldName, out fields)) { fields = new List <AssociationFacetField>(); assocByField[indexFieldName] = fields; } fields.Add(facetField); // Best effort: detect mis-matched types in same // indexed field: string type; if (facetField is IntAssociationFacetField) { type = "int"; } else if (facetField is FloatAssociationFacetField) { type = "float"; } else { type = "bytes"; } // NOTE: not thread safe, but this is just best effort: string curType; if (!assocDimTypes.TryGetValue(indexFieldName, out curType)) { assocDimTypes[indexFieldName] = type; } else if (!curType.Equals(type)) { throw new System.ArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension"); } } } Document result = new Document(); ProcessFacetFields(taxoWriter, byField, result); processSSDVFacetFields(dvByField, result); ProcessAssocFacetFields(taxoWriter, assocByField, result); //System.out.println("add stored: " + addedStoredFields); foreach (IndexableField field in doc.Fields) { IndexableFieldType ft = field.FieldType; if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE) { result.Add(field); } } return(result); }
public override void ProcessFields(IndexableField[] fields, int count) { FieldState.Reset(); bool doInvert = Consumer.Start(fields, count); for (int i = 0; i < count; i++) { IndexableField field = fields[i]; IndexableFieldType fieldType = field.FieldType(); // TODO FI: this should be "genericized" to querying // consumer if it wants to see this particular field // tokenized. if (fieldType.Indexed && doInvert) { bool analyzed = fieldType.Tokenized && DocState.Analyzer != null; // if the field omits norms, the boost cannot be indexed. if (fieldType.OmitNorms && field.GetBoost() != 1.0f) { throw new System.NotSupportedException("You cannot set an index-time boost: norms are omitted for field '" + field.Name() + "'"); } // only bother checking offsets if something will consume them. // TODO: after we fix analyzers, also check if termVectorOffsets will be indexed. bool checkOffsets = fieldType.IndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; int lastStartOffset = 0; if (i > 0) { FieldState.Position_Renamed += analyzed ? DocState.Analyzer.GetPositionIncrementGap(fieldInfo.Name) : 0; } /* * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream * when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses, * but rather a finally that takes note of the problem. */ bool succeededInProcessingField = false; TokenStream stream = field.GetTokenStream(DocState.Analyzer); // reset the TokenStream to the first token stream.Reset(); try { bool hasMoreTokens = stream.IncrementToken(); FieldState.AttributeSource_Renamed = stream; IOffsetAttribute offsetAttribute = FieldState.AttributeSource_Renamed.AddAttribute <IOffsetAttribute>(); IPositionIncrementAttribute posIncrAttribute = FieldState.AttributeSource_Renamed.AddAttribute <IPositionIncrementAttribute>(); if (hasMoreTokens) { Consumer.Start(field); do { // If we hit an exception in stream.next below // (which is fairly common, eg if analyzer // chokes on a given document), then it's // non-aborting and (above) this one document // will be marked as deleted, but still // consume a docID int posIncr = posIncrAttribute.PositionIncrement; if (posIncr < 0) { throw new System.ArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.Name() + "'"); } if (FieldState.Position_Renamed == 0 && posIncr == 0) { throw new System.ArgumentException("first position increment must be > 0 (got 0) for field '" + field.Name() + "'"); } int position = FieldState.Position_Renamed + posIncr; if (position > 0) { // NOTE: confusing: this "mirrors" the // position++ we do below position--; } else if (position < 0) { throw new System.ArgumentException("position overflow for field '" + field.Name() + "'"); } // position is legal, we can safely place it in fieldState now. // not sure if anything will use fieldState after non-aborting exc... FieldState.Position_Renamed = position; if (posIncr == 0) { FieldState.NumOverlap_Renamed++; } if (checkOffsets) { int startOffset = FieldState.Offset_Renamed + offsetAttribute.StartOffset(); int endOffset = FieldState.Offset_Renamed + offsetAttribute.EndOffset(); if (startOffset < 0 || endOffset < startOffset) { throw new System.ArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " + "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.Name() + "'"); } if (startOffset < lastStartOffset) { throw new System.ArgumentException("offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.Name() + "'"); } lastStartOffset = startOffset; } bool success = false; try { // If we hit an exception in here, we abort // all buffered documents since the last // flush, on the likelihood that the // internal state of the consumer is now // corrupt and should not be flushed to a // new segment: Consumer.Add(); success = true; } finally { if (!success) { DocState.DocWriter.SetAborting(); } } FieldState.Length_Renamed++; FieldState.Position_Renamed++; } while (stream.IncrementToken()); } // trigger streams to perform end-of-stream operations stream.End(); // TODO: maybe add some safety? then again, its already checked // when we come back around to the field... FieldState.Position_Renamed += posIncrAttribute.PositionIncrement; FieldState.Offset_Renamed += offsetAttribute.EndOffset(); if (DocState.MaxTermPrefix != null) { string msg = "Document contains at least one immense term in field=\"" + fieldInfo.Name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + DocState.MaxTermPrefix + "...'"; if (DocState.InfoStream.IsEnabled("IW")) { DocState.InfoStream.Message("IW", "ERROR: " + msg); } DocState.MaxTermPrefix = null; throw new System.ArgumentException(msg); } /* if success was false above there is an exception coming through and we won't get here.*/ succeededInProcessingField = true; } finally { if (!succeededInProcessingField) { IOUtils.CloseWhileHandlingException(stream); } else { stream.Dispose(); } if (!succeededInProcessingField && DocState.InfoStream.IsEnabled("DW")) { DocState.InfoStream.Message("DW", "An exception was thrown while processing field " + fieldInfo.Name); } } FieldState.Offset_Renamed += analyzed ? DocState.Analyzer.GetOffsetGap(fieldInfo.Name) : 0; FieldState.Boost_Renamed *= field.GetBoost(); } // LUCENE-2387: don't hang onto the field, so GC can // reclaim fields[i] = null; } Consumer.Finish(); EndConsumer.Finish(); }
internal void Update(IndexableFieldType ft) { Update(ft.Indexed, false, ft.OmitNorms, false, ft.IndexOptions); }