Пример #1
0
 /// <summary>
 /// NOTE: this method does not carry over termVector
 ///  booleans nor docValuesType; the indexer chain
 ///  (TermVectorsConsumerPerField, DocFieldProcessor) must
 ///  set these fields when they succeed in consuming
 ///  the document
 /// </summary>
 public FieldInfo AddOrUpdate(string name, IndexableFieldType fieldType)
 {
     // TODO: really, indexer shouldn't even call this
     // method (it's only called from DocFieldProcessor);
     // rather, each component in the chain should update
     // what it "owns".  EG fieldType.indexOptions() should
     // be updated by maybe FreqProxTermsWriterPerField:
     return(AddOrUpdateInternal(name, -1, fieldType.Indexed, false, fieldType.OmitNorms, false, fieldType.IndexOptions, fieldType.DocValueType, null));
 }
Пример #2
0
 public MyField()
 {
     fieldType = new IndexableFieldTypeAnonymousInnerClassHelper(this);
 }
Пример #3
0
        /// <summary>
        /// Translates any added <seealso cref="FacetField"/>s into normal fields for indexing.
        ///
        /// <para>
        /// <b>NOTE:</b> you should add the returned document to IndexWriter, not the
        /// input one!
        /// </para>
        /// </summary>
        public virtual Document Build(TaxonomyWriter taxoWriter, Document doc)
        {
            // Find all FacetFields, collated by the actual field:
            IDictionary <string, IList <FacetField> > byField = new Dictionary <string, IList <FacetField> >();

            // ... and also all SortedSetDocValuesFacetFields:
            IDictionary <string, IList <SortedSetDocValuesFacetField> > dvByField = new Dictionary <string, IList <SortedSetDocValuesFacetField> >();

            // ... and also all AssociationFacetFields
            IDictionary <string, IList <AssociationFacetField> > assocByField = new Dictionary <string, IList <AssociationFacetField> >();

            var seenDims = new HashSet <string>();

            foreach (IndexableField field in doc.Fields)
            {
                if (field.FieldType == FacetField.TYPE)
                {
                    FacetField             facetField = (FacetField)field;
                    FacetsConfig.DimConfig dimConfig  = GetDimConfig(facetField.dim);
                    if (dimConfig.MultiValued == false)
                    {
                        CheckSeen(seenDims, facetField.dim);
                    }
                    string             indexFieldName = dimConfig.IndexFieldName;
                    IList <FacetField> fields;
                    if (!byField.TryGetValue(indexFieldName, out fields))
                    {
                        fields = new List <FacetField>();
                        byField[indexFieldName] = fields;
                    }
                    fields.Add(facetField);
                }

                if (field.FieldType == SortedSetDocValuesFacetField.TYPE)
                {
                    var facetField = (SortedSetDocValuesFacetField)field;
                    FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim);
                    if (dimConfig.MultiValued == false)
                    {
                        CheckSeen(seenDims, facetField.Dim);
                    }
                    string indexFieldName = dimConfig.IndexFieldName;
                    IList <SortedSetDocValuesFacetField> fields;
                    if (!dvByField.TryGetValue(indexFieldName, out fields))
                    {
                        fields = new List <SortedSetDocValuesFacetField>();
                        dvByField[indexFieldName] = fields;
                    }
                    fields.Add(facetField);
                }

                if (field.FieldType == AssociationFacetField.TYPE)
                {
                    AssociationFacetField  facetField = (AssociationFacetField)field;
                    FacetsConfig.DimConfig dimConfig  = GetDimConfig(facetField.dim);
                    if (dimConfig.MultiValued == false)
                    {
                        CheckSeen(seenDims, facetField.dim);
                    }
                    if (dimConfig.Hierarchical)
                    {
                        throw new System.ArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.dim + "\")");
                    }
                    if (dimConfig.RequireDimCount)
                    {
                        throw new System.ArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.dim + "\")");
                    }

                    string indexFieldName = dimConfig.IndexFieldName;
                    IList <AssociationFacetField> fields;
                    if (!assocByField.TryGetValue(indexFieldName, out fields))
                    {
                        fields = new List <AssociationFacetField>();
                        assocByField[indexFieldName] = fields;
                    }
                    fields.Add(facetField);

                    // Best effort: detect mis-matched types in same
                    // indexed field:
                    string type;
                    if (facetField is IntAssociationFacetField)
                    {
                        type = "int";
                    }
                    else if (facetField is FloatAssociationFacetField)
                    {
                        type = "float";
                    }
                    else
                    {
                        type = "bytes";
                    }
                    // NOTE: not thread safe, but this is just best effort:
                    string curType;
                    if (!assocDimTypes.TryGetValue(indexFieldName, out curType))
                    {
                        assocDimTypes[indexFieldName] = type;
                    }
                    else if (!curType.Equals(type))
                    {
                        throw new System.ArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension");
                    }
                }
            }

            Document result = new Document();

            ProcessFacetFields(taxoWriter, byField, result);
            processSSDVFacetFields(dvByField, result);
            ProcessAssocFacetFields(taxoWriter, assocByField, result);

            //System.out.println("add stored: " + addedStoredFields);

            foreach (IndexableField field in doc.Fields)
            {
                IndexableFieldType ft = field.FieldType;
                if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE)
                {
                    result.Add(field);
                }
            }

            return(result);
        }
Пример #4
0
        public override void ProcessFields(IndexableField[] fields, int count)
        {
            FieldState.Reset();

            bool doInvert = Consumer.Start(fields, count);

            for (int i = 0; i < count; i++)
            {
                IndexableField     field     = fields[i];
                IndexableFieldType fieldType = field.FieldType();

                // TODO FI: this should be "genericized" to querying
                // consumer if it wants to see this particular field
                // tokenized.
                if (fieldType.Indexed && doInvert)
                {
                    bool analyzed = fieldType.Tokenized && DocState.Analyzer != null;

                    // if the field omits norms, the boost cannot be indexed.
                    if (fieldType.OmitNorms && field.GetBoost() != 1.0f)
                    {
                        throw new System.NotSupportedException("You cannot set an index-time boost: norms are omitted for field '" + field.Name() + "'");
                    }

                    // only bother checking offsets if something will consume them.
                    // TODO: after we fix analyzers, also check if termVectorOffsets will be indexed.
                    bool checkOffsets    = fieldType.IndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                    int  lastStartOffset = 0;

                    if (i > 0)
                    {
                        FieldState.Position_Renamed += analyzed ? DocState.Analyzer.GetPositionIncrementGap(fieldInfo.Name) : 0;
                    }

                    /*
                     * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
                     * when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses,
                     * but rather a finally that takes note of the problem.
                     */

                    bool succeededInProcessingField = false;

                    TokenStream stream = field.GetTokenStream(DocState.Analyzer);
                    // reset the TokenStream to the first token
                    stream.Reset();

                    try
                    {
                        bool hasMoreTokens = stream.IncrementToken();

                        FieldState.AttributeSource_Renamed = stream;

                        IOffsetAttribute            offsetAttribute  = FieldState.AttributeSource_Renamed.AddAttribute <IOffsetAttribute>();
                        IPositionIncrementAttribute posIncrAttribute = FieldState.AttributeSource_Renamed.AddAttribute <IPositionIncrementAttribute>();

                        if (hasMoreTokens)
                        {
                            Consumer.Start(field);

                            do
                            {
                                // If we hit an exception in stream.next below
                                // (which is fairly common, eg if analyzer
                                // chokes on a given document), then it's
                                // non-aborting and (above) this one document
                                // will be marked as deleted, but still
                                // consume a docID

                                int posIncr = posIncrAttribute.PositionIncrement;
                                if (posIncr < 0)
                                {
                                    throw new System.ArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.Name() + "'");
                                }
                                if (FieldState.Position_Renamed == 0 && posIncr == 0)
                                {
                                    throw new System.ArgumentException("first position increment must be > 0 (got 0) for field '" + field.Name() + "'");
                                }
                                int position = FieldState.Position_Renamed + posIncr;
                                if (position > 0)
                                {
                                    // NOTE: confusing: this "mirrors" the
                                    // position++ we do below
                                    position--;
                                }
                                else if (position < 0)
                                {
                                    throw new System.ArgumentException("position overflow for field '" + field.Name() + "'");
                                }

                                // position is legal, we can safely place it in fieldState now.
                                // not sure if anything will use fieldState after non-aborting exc...
                                FieldState.Position_Renamed = position;

                                if (posIncr == 0)
                                {
                                    FieldState.NumOverlap_Renamed++;
                                }

                                if (checkOffsets)
                                {
                                    int startOffset = FieldState.Offset_Renamed + offsetAttribute.StartOffset();
                                    int endOffset   = FieldState.Offset_Renamed + offsetAttribute.EndOffset();
                                    if (startOffset < 0 || endOffset < startOffset)
                                    {
                                        throw new System.ArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " + "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.Name() + "'");
                                    }
                                    if (startOffset < lastStartOffset)
                                    {
                                        throw new System.ArgumentException("offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.Name() + "'");
                                    }
                                    lastStartOffset = startOffset;
                                }

                                bool success = false;
                                try
                                {
                                    // If we hit an exception in here, we abort
                                    // all buffered documents since the last
                                    // flush, on the likelihood that the
                                    // internal state of the consumer is now
                                    // corrupt and should not be flushed to a
                                    // new segment:
                                    Consumer.Add();
                                    success = true;
                                }
                                finally
                                {
                                    if (!success)
                                    {
                                        DocState.DocWriter.SetAborting();
                                    }
                                }
                                FieldState.Length_Renamed++;
                                FieldState.Position_Renamed++;
                            } while (stream.IncrementToken());
                        }
                        // trigger streams to perform end-of-stream operations
                        stream.End();
                        // TODO: maybe add some safety? then again, its already checked
                        // when we come back around to the field...
                        FieldState.Position_Renamed += posIncrAttribute.PositionIncrement;
                        FieldState.Offset_Renamed   += offsetAttribute.EndOffset();

                        if (DocState.MaxTermPrefix != null)
                        {
                            string msg = "Document contains at least one immense term in field=\"" + fieldInfo.Name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + DocState.MaxTermPrefix + "...'";
                            if (DocState.InfoStream.IsEnabled("IW"))
                            {
                                DocState.InfoStream.Message("IW", "ERROR: " + msg);
                            }
                            DocState.MaxTermPrefix = null;
                            throw new System.ArgumentException(msg);
                        }

                        /* if success was false above there is an exception coming through and we won't get here.*/
                        succeededInProcessingField = true;
                    }
                    finally
                    {
                        if (!succeededInProcessingField)
                        {
                            IOUtils.CloseWhileHandlingException(stream);
                        }
                        else
                        {
                            stream.Dispose();
                        }
                        if (!succeededInProcessingField && DocState.InfoStream.IsEnabled("DW"))
                        {
                            DocState.InfoStream.Message("DW", "An exception was thrown while processing field " + fieldInfo.Name);
                        }
                    }

                    FieldState.Offset_Renamed += analyzed ? DocState.Analyzer.GetOffsetGap(fieldInfo.Name) : 0;
                    FieldState.Boost_Renamed  *= field.GetBoost();
                }

                // LUCENE-2387: don't hang onto the field, so GC can
                // reclaim
                fields[i] = null;
            }

            Consumer.Finish();
            EndConsumer.Finish();
        }
Пример #5
0
 internal void Update(IndexableFieldType ft)
 {
     Update(ft.Indexed, false, ft.OmitNorms, false, ft.IndexOptions);
 }
Пример #6
0
 internal void Update(IndexableFieldType ft)
 {
     Update(ft.Indexed, false, ft.OmitNorms, false, ft.IndexOptions);
 }