/// <summary> /// NOTE: this method does not carry over termVector /// booleans nor docValuesType; the indexer chain /// (TermVectorsConsumerPerField, DocFieldProcessor) must /// set these fields when they succeed in consuming /// the document /// </summary> public FieldInfo AddOrUpdate(string name, IIndexableFieldType fieldType) { // TODO: really, indexer shouldn't even call this // method (it's only called from DocFieldProcessor); // rather, each component in the chain should update // what it "owns". EG fieldType.indexOptions() should // be updated by maybe FreqProxTermsWriterPerField: return(AddOrUpdateInternal(name, -1, fieldType.IsIndexed, false, fieldType.OmitNorms, false, fieldType.IndexOptions, fieldType.DocValueType, DocValuesType.NONE)); }
public MyField() { fieldType = new IndexableFieldTypeAnonymousInnerClassHelper(this); }
internal void Update(IIndexableFieldType ft) { Update(ft.IsIndexed, false, ft.OmitNorms, false, ft.IndexOptions); }
public override void ProcessFields(IIndexableField[] fields, int count) { fieldState.Reset(); bool doInvert = consumer.Start(fields, count); for (int i = 0; i < count; i++) { IIndexableField field = fields[i]; IIndexableFieldType fieldType = field.IndexableFieldType; // TODO FI: this should be "genericized" to querying // consumer if it wants to see this particular field // tokenized. if (fieldType.IsIndexed && doInvert) { bool analyzed = fieldType.IsTokenized && docState.analyzer != null; // if the field omits norms, the boost cannot be indexed. if (fieldType.OmitNorms && field.Boost != 1.0f) { throw new System.NotSupportedException("You cannot set an index-time boost: norms are omitted for field '" + field.Name + "'"); } // only bother checking offsets if something will consume them. // TODO: after we fix analyzers, also check if termVectorOffsets will be indexed. bool checkOffsets = fieldType.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; int lastStartOffset = 0; if (i > 0) { fieldState.Position += analyzed ? docState.analyzer.GetPositionIncrementGap(fieldInfo.Name) : 0; } /* * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream * when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses, * but rather a finally that takes note of the problem. */ bool succeededInProcessingField = false; TokenStream stream = field.GetTokenStream(docState.analyzer); // reset the TokenStream to the first token stream.Reset(); try { bool hasMoreTokens = stream.IncrementToken(); fieldState.AttributeSource = stream; IOffsetAttribute offsetAttribute = fieldState.AttributeSource.AddAttribute <IOffsetAttribute>(); IPositionIncrementAttribute posIncrAttribute = fieldState.AttributeSource.AddAttribute <IPositionIncrementAttribute>(); if (hasMoreTokens) { consumer.Start(field); do { // If we hit an exception in stream.next below // (which is fairly common, eg if analyzer // chokes on a given document), then it's // non-aborting and (above) this one document // will be marked as deleted, but still // consume a docID int posIncr = posIncrAttribute.PositionIncrement; if (posIncr < 0) { throw new System.ArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.Name + "'"); } if (fieldState.Position == 0 && posIncr == 0) { throw new System.ArgumentException("first position increment must be > 0 (got 0) for field '" + field.Name + "'"); } int position = fieldState.Position + posIncr; if (position > 0) { // NOTE: confusing: this "mirrors" the // position++ we do below position--; } else if (position < 0) { throw new System.ArgumentException("position overflow for field '" + field.Name + "'"); } // position is legal, we can safely place it in fieldState now. // not sure if anything will use fieldState after non-aborting exc... fieldState.Position = position; if (posIncr == 0) { fieldState.NumOverlap++; } if (checkOffsets) { int startOffset = fieldState.Offset + offsetAttribute.StartOffset; int endOffset = fieldState.Offset + offsetAttribute.EndOffset; if (startOffset < 0 || endOffset < startOffset) { throw new System.ArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " + "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.Name + "'"); } if (startOffset < lastStartOffset) { throw new System.ArgumentException("offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.Name + "'"); } lastStartOffset = startOffset; } bool success = false; try { // If we hit an exception in here, we abort // all buffered documents since the last // flush, on the likelihood that the // internal state of the consumer is now // corrupt and should not be flushed to a // new segment: consumer.Add(); success = true; } finally { if (!success) { docState.docWriter.SetAborting(); } } fieldState.Length++; fieldState.Position++; } while (stream.IncrementToken()); } // trigger streams to perform end-of-stream operations stream.End(); // TODO: maybe add some safety? then again, its already checked // when we come back around to the field... fieldState.Position += posIncrAttribute.PositionIncrement; fieldState.Offset += offsetAttribute.EndOffset; if (docState.maxTermPrefix != null) { string msg = "Document contains at least one immense term in field=\"" + fieldInfo.Name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"; if (docState.infoStream.IsEnabled("IW")) { docState.infoStream.Message("IW", "ERROR: " + msg); } docState.maxTermPrefix = null; throw new System.ArgumentException(msg); } /* if success was false above there is an exception coming through and we won't get here.*/ succeededInProcessingField = true; } finally { if (!succeededInProcessingField) { IOUtils.DisposeWhileHandlingException(stream); } else { stream.Dispose(); } if (!succeededInProcessingField && docState.infoStream.IsEnabled("DW")) { docState.infoStream.Message("DW", "An exception was thrown while processing field " + fieldInfo.Name); } } fieldState.Offset += analyzed ? docState.analyzer.GetOffsetGap(fieldInfo.Name) : 0; fieldState.Boost *= field.Boost; } // LUCENE-2387: don't hang onto the field, so GC can // reclaim fields[i] = null; } consumer.Finish(); endConsumer.Finish(); }
/// <summary> /// Translates any added <see cref="FacetField"/>s into normal fields for indexing. /// /// <para> /// <b>NOTE:</b> you should add the returned document to <see cref="Index.IndexWriter"/>, not the /// input one! /// </para> /// </summary> public virtual Document Build(ITaxonomyWriter taxoWriter, Document doc) { // Find all FacetFields, collated by the actual field: IDictionary <string, IList <FacetField> > byField = new Dictionary <string, IList <FacetField> >(); // ... and also all SortedSetDocValuesFacetFields: IDictionary <string, IList <SortedSetDocValuesFacetField> > dvByField = new Dictionary <string, IList <SortedSetDocValuesFacetField> >(); // ... and also all AssociationFacetFields IDictionary <string, IList <AssociationFacetField> > assocByField = new Dictionary <string, IList <AssociationFacetField> >(); var seenDims = new JCG.HashSet <string>(); foreach (IIndexableField field in doc.Fields) { if (field.IndexableFieldType == FacetField.TYPE) { FacetField facetField = (FacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim); if (dimConfig.IsMultiValued == false) { CheckSeen(seenDims, facetField.Dim); } string indexFieldName = dimConfig.IndexFieldName; if (!byField.TryGetValue(indexFieldName, out IList <FacetField> fields)) { fields = new List <FacetField>(); byField[indexFieldName] = fields; } fields.Add(facetField); } if (field.IndexableFieldType == SortedSetDocValuesFacetField.TYPE) { var facetField = (SortedSetDocValuesFacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim); if (dimConfig.IsMultiValued == false) { CheckSeen(seenDims, facetField.Dim); } string indexFieldName = dimConfig.IndexFieldName; if (!dvByField.TryGetValue(indexFieldName, out IList <SortedSetDocValuesFacetField> fields)) { fields = new List <SortedSetDocValuesFacetField>(); dvByField[indexFieldName] = fields; } fields.Add(facetField); } if (field.IndexableFieldType == AssociationFacetField.TYPE) { AssociationFacetField facetField = (AssociationFacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim); if (dimConfig.IsMultiValued == false) { CheckSeen(seenDims, facetField.Dim); } if (dimConfig.IsHierarchical) { throw new ArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.Dim + "\")"); } if (dimConfig.RequireDimCount) { throw new ArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.Dim + "\")"); } string indexFieldName = dimConfig.IndexFieldName; if (!assocByField.TryGetValue(indexFieldName, out IList <AssociationFacetField> fields)) { fields = new List <AssociationFacetField>(); assocByField[indexFieldName] = fields; } fields.Add(facetField); // Best effort: detect mis-matched types in same // indexed field: string type; if (facetField is Int32AssociationFacetField) { type = "int"; } else if (facetField is SingleAssociationFacetField) { type = "float"; } else { type = "bytes"; } // NOTE: not thread safe, but this is just best effort: if (!assocDimTypes.TryGetValue(indexFieldName, out string curType)) { assocDimTypes[indexFieldName] = type; } else if (!curType.Equals(type, StringComparison.Ordinal)) { throw new ArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension"); } } } Document result = new Document(); ProcessFacetFields(taxoWriter, byField, result); ProcessSSDVFacetFields(dvByField, result); ProcessAssocFacetFields(taxoWriter, assocByField, result); //System.out.println("add stored: " + addedStoredFields); foreach (IIndexableField field in doc.Fields) { IIndexableFieldType ft = field.IndexableFieldType; if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE) { result.Add(field); } } return(result); }
public MyField() { fieldType = new IndexableFieldTypeAnonymousClass(this); }