internal override bool Start(Fieldable[] fields, int count)
		{
			for (int i = 0; i < count; i++)
				if (fields[i].IsIndexed())
					return true;
			return false;
		}
示例#2
0
        internal void  AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;

            System.Collections.IEnumerator fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                if (field.IsStored())
                {
                    storedCount++;
                }
            }
            fieldsStream.WriteVInt(storedCount);

            fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                if (field.IsStored())
                {
                    WriteField(fieldInfos.FieldInfo(field.Name()), field);
                }
            }
        }
示例#3
0
 internal override void  Start(Fieldable f)
 {
     termAtt = (TermAttribute)fieldState.attributeSource.AddAttribute(typeof(TermAttribute));
     consumer.Start(f);
     if (nextPerField != null)
     {
         nextPerField.Start(f);
     }
 }
示例#4
0
        /// <summary> Return the offsetGap from the analyzer assigned to field </summary>
        public override int GetOffsetGap(Mono.Lucene.Net.Documents.Fieldable field)
        {
            Analyzer analyzer = (Analyzer)analyzerMap[field.Name()];

            if (analyzer == null)
            {
                analyzer = defaultAnalyzer;
            }
            return(analyzer.GetOffsetGap(field));
        }
示例#5
0
 internal override void  Start(Fieldable f)
 {
     if (doVectorOffsets)
     {
         offsetAttribute = (OffsetAttribute)fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute));
     }
     else
     {
         offsetAttribute = null;
     }
 }
 internal override void  Start(Fieldable f)
 {
     if (fieldState.attributeSource.HasAttribute(typeof(PayloadAttribute)))
     {
         payloadAttribute = (PayloadAttribute)fieldState.attributeSource.GetAttribute(typeof(PayloadAttribute));
     }
     else
     {
         payloadAttribute = null;
     }
 }
示例#7
0
 /// <summary> Just like {@link #getPositionIncrementGap}, except for
 /// Token offsets instead.  By default this returns 1 for
 /// tokenized fields and, as if the fields were joined
 /// with an extra space character, and 0 for un-tokenized
 /// fields.  This method is only called if the field
 /// produced at least one token for indexing.
 ///
 /// </summary>
 /// <param name="field">the field just indexed
 /// </param>
 /// <returns> offset gap, added to the next token emitted from {@link #TokenStream(String,Reader)}
 /// </returns>
 public virtual int GetOffsetGap(Fieldable field)
 {
     if (field.IsTokenized())
     {
         return(1);
     }
     else
     {
         return(0);
     }
 }
示例#8
0
 /// <summary>Adds field info for a Document. </summary>
 public void  Add(Document doc)
 {
     lock (this)
     {
         System.Collections.IList       fields        = doc.GetFields();
         System.Collections.IEnumerator fieldIterator = fields.GetEnumerator();
         while (fieldIterator.MoveNext())
         {
             Fieldable field = (Fieldable)fieldIterator.Current;
             Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());
         }
     }
 }
		public void  AddField(Fieldable field, FieldInfo fieldInfo)
		{
			if (doc == null)
			{
				doc = storedFieldsWriter.GetPerDoc();
				doc.docID = docState.docID;
				localFieldsWriter.SetFieldsStream(doc.fdt);
				System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields);
				System.Diagnostics.Debug.Assert(0 == doc.fdt.Length());
				System.Diagnostics.Debug.Assert(0 == doc.fdt.GetFilePointer());
			}
			
			localFieldsWriter.WriteField(fieldInfo, field);
			System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField"));
			doc.numStoredFields++;
		}
示例#10
0
        public void  AddField(Fieldable field, FieldInfo fieldInfo)
        {
            if (doc == null)
            {
                doc       = storedFieldsWriter.GetPerDoc();
                doc.docID = docState.docID;
                localFieldsWriter.SetFieldsStream(doc.fdt);
                System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields);
                System.Diagnostics.Debug.Assert(0 == doc.fdt.Length());
                System.Diagnostics.Debug.Assert(0 == doc.fdt.GetFilePointer());
            }

            localFieldsWriter.WriteField(fieldInfo, field);
            System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField"));
            doc.numStoredFields++;
        }
		internal override bool Start(Fieldable[] fields, int count)
		{
			doVectors = false;
			doVectorPositions = false;
			doVectorOffsets = false;
			
			for (int i = 0; i < count; i++)
			{
				Fieldable field = fields[i];
				if (field.IsIndexed() && field.IsTermVectorStored())
				{
					doVectors = true;
					doVectorPositions |= field.IsStorePositionWithTermVector();
					doVectorOffsets |= field.IsStoreOffsetWithTermVector();
				}
			}
			
			if (doVectors)
			{
				if (perThread.doc == null)
				{
					perThread.doc = termsWriter.GetPerDoc();
					perThread.doc.docID = docState.docID;
					System.Diagnostics.Debug.Assert(perThread.doc.numVectorFields == 0);
					System.Diagnostics.Debug.Assert(0 == perThread.doc.perDocTvf.Length());
					System.Diagnostics.Debug.Assert(0 == perThread.doc.perDocTvf.GetFilePointer());
				}

                System.Diagnostics.Debug.Assert(perThread.doc.docID == docState.docID);
                if (termsHashPerField.numPostings != 0)
                {
                    // Only necessary if previous doc hit a
                    // non-aborting exception while writing vectors in
                    // this field:
                    termsHashPerField.Reset();
                    perThread.termsHashPerThread.Reset(false);
                }
			}
			
			// TODO: only if needed for performance
			//perThread.postingsCount = 0;
			
			return doVectors;
		}
示例#12
0
        internal override bool Start(Fieldable[] fields, int count)
        {
            doVectors         = false;
            doVectorPositions = false;
            doVectorOffsets   = false;

            for (int i = 0; i < count; i++)
            {
                Fieldable field = fields[i];
                if (field.IsIndexed() && field.IsTermVectorStored())
                {
                    doVectors          = true;
                    doVectorPositions |= field.IsStorePositionWithTermVector();
                    doVectorOffsets   |= field.IsStoreOffsetWithTermVector();
                }
            }

            if (doVectors)
            {
                if (perThread.doc == null)
                {
                    perThread.doc       = termsWriter.GetPerDoc();
                    perThread.doc.docID = docState.docID;
                    System.Diagnostics.Debug.Assert(perThread.doc.numVectorFields == 0);
                    System.Diagnostics.Debug.Assert(0 == perThread.doc.perDocTvf.Length());
                    System.Diagnostics.Debug.Assert(0 == perThread.doc.perDocTvf.GetFilePointer());
                }

                System.Diagnostics.Debug.Assert(perThread.doc.docID == docState.docID);
                if (termsHashPerField.numPostings != 0)
                {
                    // Only necessary if previous doc hit a
                    // non-aborting exception while writing vectors in
                    // this field:
                    termsHashPerField.Reset();
                    perThread.termsHashPerThread.Reset(false);
                }
            }

            // TODO: only if needed for performance
            //perThread.postingsCount = 0;

            return(doVectors);
        }
示例#13
0
		/// <summary>Processes all occurrences of a single field </summary>
		public abstract void  ProcessFields(Fieldable[] fields, int count);
示例#14
0
        internal void  WriteField(FieldInfo fi, Fieldable field)
        {
            // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
            // and field.binaryValue() already returns the compressed value for a field
            // with isCompressed()==true, so we disable compression in that case
            bool disableCompression = (field is FieldsReader.FieldForMerge);

            fieldsStream.WriteVInt(fi.number);
            byte bits = 0;

            if (field.IsTokenized())
            {
                bits |= FieldsWriter.FIELD_IS_TOKENIZED;
            }
            if (field.IsBinary())
            {
                bits |= FieldsWriter.FIELD_IS_BINARY;
            }
            if (field.IsCompressed())
            {
                bits |= FieldsWriter.FIELD_IS_COMPRESSED;
            }

            fieldsStream.WriteByte(bits);

            if (field.IsCompressed())
            {
                // compression is enabled for the current field
                byte[] data;
                int    len;
                int    offset;
                if (disableCompression)
                {
                    // optimized case for merging, the data
                    // is already compressed
                    data = field.GetBinaryValue();
                    System.Diagnostics.Debug.Assert(data != null);
                    len    = field.GetBinaryLength();
                    offset = field.GetBinaryOffset();
                }
                else
                {
                    // check if it is a binary field
                    if (field.IsBinary())
                    {
                        data = CompressionTools.Compress(field.GetBinaryValue(), field.GetBinaryOffset(), field.GetBinaryLength());
                    }
                    else
                    {
                        byte[] x = System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue());
                        data = CompressionTools.Compress(x, 0, x.Length);
                    }
                    len    = data.Length;
                    offset = 0;
                }

                fieldsStream.WriteVInt(len);
                fieldsStream.WriteBytes(data, offset, len);
            }
            else
            {
                // compression is disabled for the current field
                if (field.IsBinary())
                {
                    byte[] data;
                    int    len;
                    int    offset;
                    data   = field.GetBinaryValue();
                    len    = field.GetBinaryLength();
                    offset = field.GetBinaryOffset();

                    fieldsStream.WriteVInt(len);
                    fieldsStream.WriteBytes(data, offset, len);
                }
                else
                {
                    fieldsStream.WriteString(field.StringValue());
                }
            }
        }
示例#15
0
		internal override bool Start(Fieldable[] fields, int count)
		{
			doCall = consumer.Start(fields, count);
			if (nextPerField != null)
				doNextCall = nextPerField.Start(fields, count);
			return doCall || doNextCall;
		}
		internal override void  Start(Fieldable f)
		{
			if (fieldState.attributeSource.HasAttribute(typeof(PayloadAttribute)))
			{
				payloadAttribute = (PayloadAttribute) fieldState.attributeSource.GetAttribute(typeof(PayloadAttribute));
			}
			else
			{
				payloadAttribute = null;
			}
		}
示例#17
0
		public override void  ProcessFields(Fieldable[] fields, int count)
		{
			one.ProcessFields(fields, count);
			two.ProcessFields(fields, count);
		}
示例#18
0
		/// <summary> Just like {@link #getPositionIncrementGap}, except for
		/// Token offsets instead.  By default this returns 1 for
		/// tokenized fields and, as if the fields were joined
		/// with an extra space character, and 0 for un-tokenized
		/// fields.  This method is only called if the field
		/// produced at least one token for indexing.
		/// 
		/// </summary>
		/// <param name="field">the field just indexed
		/// </param>
		/// <returns> offset gap, added to the next token emitted from {@link #TokenStream(String,Reader)}
		/// </returns>
		public virtual int GetOffsetGap(Fieldable field)
		{
			if (field.IsTokenized())
				return 1;
			else
				return 0;
		}
示例#19
0
        public override void  ProcessFields(Fieldable[] fields, int count)
        {
            fieldState.Reset(docState.doc.GetBoost());

            int maxFieldLength = docState.maxFieldLength;

            bool doInvert = consumer.Start(fields, count);

            for (int i = 0; i < count; i++)
            {
                Fieldable field = fields[i];

                // TODO FI: this should be "genericized" to querying
                // consumer if it wants to see this particular field
                // tokenized.
                if (field.IsIndexed() && doInvert)
                {
                    bool anyToken;

                    if (fieldState.length > 0)
                    {
                        fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
                    }

                    if (!field.IsTokenized())
                    {
                        // un-tokenized field
                        System.String stringValue = field.StringValue();
                        int           valueLength = stringValue.Length;
                        perThread.singleTokenTokenStream.Reinit(stringValue, 0, valueLength);
                        fieldState.attributeSource = perThread.singleTokenTokenStream;
                        consumer.Start(field);

                        bool success = false;
                        try
                        {
                            consumer.Add();
                            success = true;
                        }
                        finally
                        {
                            if (!success)
                            {
                                docState.docWriter.SetAborting();
                            }
                        }
                        fieldState.offset += valueLength;
                        fieldState.length++;
                        fieldState.position++;
                        anyToken = valueLength > 0;
                    }
                    else
                    {
                        // tokenized field
                        TokenStream stream;
                        TokenStream streamValue = field.TokenStreamValue();

                        if (streamValue != null)
                        {
                            stream = streamValue;
                        }
                        else
                        {
                            // the field does not have a TokenStream,
                            // so we have to obtain one from the analyzer
                            System.IO.TextReader reader;                             // find or make Reader
                            System.IO.TextReader readerValue = field.ReaderValue();

                            if (readerValue != null)
                            {
                                reader = readerValue;
                            }
                            else
                            {
                                System.String stringValue = field.StringValue();
                                if (stringValue == null)
                                {
                                    throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
                                }
                                perThread.stringReader.Init(stringValue);
                                reader = perThread.stringReader;
                            }

                            // Tokenize field and add to postingTable
                            stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
                        }

                        // reset the TokenStream to the first token
                        stream.Reset();

                        int startLength = fieldState.length;

                        // deprecated
                        bool allowMinus1Position = docState.allowMinus1Position;

                        try
                        {
                            int offsetEnd = fieldState.offset - 1;

                            bool hasMoreTokens = stream.IncrementToken();

                            fieldState.attributeSource = stream;

                            OffsetAttribute            offsetAttribute  = (OffsetAttribute)fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute));
                            PositionIncrementAttribute posIncrAttribute = (PositionIncrementAttribute)fieldState.attributeSource.AddAttribute(typeof(PositionIncrementAttribute));

                            consumer.Start(field);

                            for (; ;)
                            {
                                // If we hit an exception in stream.next below
                                // (which is fairly common, eg if analyzer
                                // chokes on a given document), then it's
                                // non-aborting and (above) this one document
                                // will be marked as deleted, but still
                                // consume a docID

                                if (!hasMoreTokens)
                                {
                                    break;
                                }

                                int posIncr = posIncrAttribute.GetPositionIncrement();
                                fieldState.position += posIncr;
                                if (allowMinus1Position || fieldState.position > 0)
                                {
                                    fieldState.position--;
                                }

                                if (posIncr == 0)
                                {
                                    fieldState.numOverlap++;
                                }

                                bool success = false;
                                try
                                {
                                    // If we hit an exception in here, we abort
                                    // all buffered documents since the last
                                    // flush, on the likelihood that the
                                    // internal state of the consumer is now
                                    // corrupt and should not be flushed to a
                                    // new segment:
                                    consumer.Add();
                                    success = true;
                                }
                                finally
                                {
                                    if (!success)
                                    {
                                        docState.docWriter.SetAborting();
                                    }
                                }
                                fieldState.position++;
                                offsetEnd = fieldState.offset + offsetAttribute.EndOffset();
                                if (++fieldState.length >= maxFieldLength)
                                {
                                    if (docState.infoStream != null)
                                    {
                                        docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
                                    }
                                    break;
                                }

                                hasMoreTokens = stream.IncrementToken();
                            }
                            // trigger streams to perform end-of-stream operations
                            stream.End();

                            fieldState.offset += offsetAttribute.EndOffset();
                            anyToken           = fieldState.length > startLength;
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    if (anyToken)
                    {
                        fieldState.offset += docState.analyzer.GetOffsetGap(field);
                    }
                    fieldState.boost *= field.GetBoost();
                }

                // LUCENE-2387: don't hang onto the field, so GC can
                // reclaim
                fields[i] = null;
            }

            consumer.Finish();
            endConsumer.Finish();
        }
		public override DocumentsWriter.DocWriter ProcessDocument()
		{
			
			consumer.StartDocument();
			fieldsWriter.StartDocument();
			
			Document doc = docState.doc;
			
			System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));
			
			fieldCount = 0;
			
			int thisFieldGen = fieldGen++;
			
			System.Collections.IList docFields = doc.GetFields();
			int numDocFields = docFields.Count;
			
			// Absorb any new fields first seen in this document.
			// Also absorb any changes to fields we had already
			// seen before (eg suddenly turning on norms or
			// vectors, etc.):
			
			for (int i = 0; i < numDocFields; i++)
			{
				Fieldable field = (Fieldable) docFields[i];
				System.String fieldName = field.Name();
				
				// Make sure we have a PerField allocated
				int hashPos = fieldName.GetHashCode() & hashMask;
				DocFieldProcessorPerField fp = fieldHash[hashPos];
				while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
					fp = fp.next;
				
				if (fp == null)
				{
					
					// TODO FI: we need to genericize the "flags" that a
					// field holds, and, how these flags are merged; it
					// needs to be more "pluggable" such that if I want
					// to have a new "thing" my Fields can do, I can
					// easily add it
					FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());
					
					fp = new DocFieldProcessorPerField(this, fi);
					fp.next = fieldHash[hashPos];
					fieldHash[hashPos] = fp;
					totalFieldCount++;
					
					if (totalFieldCount >= fieldHash.Length / 2)
						Rehash();
				}
				else
					fp.fieldInfo.Update(field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());
				
				if (thisFieldGen != fp.lastGen)
				{
					
					// First time we're seeing this field for this doc
					fp.fieldCount = 0;
					
					if (fieldCount == fields.Length)
					{
						int newSize = fields.Length * 2;
						DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
						Array.Copy(fields, 0, newArray, 0, fieldCount);
						fields = newArray;
					}
					
					fields[fieldCount++] = fp;
					fp.lastGen = thisFieldGen;
				}
				
				if (fp.fieldCount == fp.fields.Length)
				{
					Fieldable[] newArray = new Fieldable[fp.fields.Length * 2];
					Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
					fp.fields = newArray;
				}
				
				fp.fields[fp.fieldCount++] = field;
				if (field.IsStored())
				{
					fieldsWriter.AddField(field, fp.fieldInfo);
				}
			}
			
			// If we are writing vectors then we must visit
			// fields in sorted order so they are written in
			// sorted order.  TODO: we actually only need to
			// sort the subset of fields that have vectors
			// enabled; we could save [small amount of] CPU
			// here.
			QuickSort(fields, 0, fieldCount - 1);
			
			for (int i = 0; i < fieldCount; i++)
				fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);

            if (docState.maxTermPrefix != null && docState.infoStream != null)
            {
                docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
                docState.maxTermPrefix = null;
            }
			
			DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
			DocumentsWriter.DocWriter two = consumer.FinishDocument();
			if (one == null)
			{
				return two;
			}
			else if (two == null)
			{
				return one;
			}
			else
			{
				PerDoc both = GetPerDoc();
				both.docID = docState.docID;
				System.Diagnostics.Debug.Assert(one.docID == docState.docID);
				System.Diagnostics.Debug.Assert(two.docID == docState.docID);
				both.one = one;
				both.two = two;
				return both;
			}
		}
示例#21
0
		internal void  WriteField(FieldInfo fi, Fieldable field)
		{
			// if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
			// and field.binaryValue() already returns the compressed value for a field
			// with isCompressed()==true, so we disable compression in that case
			bool disableCompression = (field is FieldsReader.FieldForMerge);
			fieldsStream.WriteVInt(fi.number);
			byte bits = 0;
			if (field.IsTokenized())
				bits |= FieldsWriter.FIELD_IS_TOKENIZED;
			if (field.IsBinary())
				bits |= FieldsWriter.FIELD_IS_BINARY;
			if (field.IsCompressed())
				bits |= FieldsWriter.FIELD_IS_COMPRESSED;
			
			fieldsStream.WriteByte(bits);
			
			if (field.IsCompressed())
			{
				// compression is enabled for the current field
				byte[] data;
				int len;
				int offset;
				if (disableCompression)
				{
					// optimized case for merging, the data
					// is already compressed
					data = field.GetBinaryValue();
					System.Diagnostics.Debug.Assert(data != null);
					len = field.GetBinaryLength();
					offset = field.GetBinaryOffset();
				}
				else
				{
					// check if it is a binary field
					if (field.IsBinary())
					{
						data = CompressionTools.Compress(field.GetBinaryValue(), field.GetBinaryOffset(), field.GetBinaryLength());
					}
					else
					{
						byte[] x = System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue());
						data = CompressionTools.Compress(x, 0, x.Length);
					}
					len = data.Length;
					offset = 0;
				}
				
				fieldsStream.WriteVInt(len);
				fieldsStream.WriteBytes(data, offset, len);
			}
			else
			{
				// compression is disabled for the current field
				if (field.IsBinary())
				{
					byte[] data;
					int len;
					int offset;
					data = field.GetBinaryValue();
					len = field.GetBinaryLength();
					offset = field.GetBinaryOffset();
					
					fieldsStream.WriteVInt(len);
					fieldsStream.WriteBytes(data, offset, len);
				}
				else
				{
					fieldsStream.WriteString(field.StringValue());
				}
			}
		}
示例#22
0
 internal abstract void  Start(Fieldable field);
		// Called once per field, and is given all Fieldable
		// occurrences for this field in the document.  Return
		// true if you wish to see inverted tokens for these
		// fields:
		internal abstract bool Start(Fieldable[] fields, int count);
		// Called before a field instance is being processed
		internal abstract void  Start(Fieldable field);
示例#25
0
        public override DocumentsWriter.DocWriter ProcessDocument()
        {
            consumer.StartDocument();
            fieldsWriter.StartDocument();

            Document doc = docState.doc;

            System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));

            fieldCount = 0;

            int thisFieldGen = fieldGen++;

            System.Collections.IList docFields = doc.GetFields();
            int numDocFields = docFields.Count;

            // Absorb any new fields first seen in this document.
            // Also absorb any changes to fields we had already
            // seen before (eg suddenly turning on norms or
            // vectors, etc.):

            for (int i = 0; i < numDocFields; i++)
            {
                Fieldable     field     = (Fieldable)docFields[i];
                System.String fieldName = field.Name();

                // Make sure we have a PerField allocated
                int hashPos = fieldName.GetHashCode() & hashMask;
                DocFieldProcessorPerField fp = fieldHash[hashPos];
                while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
                {
                    fp = fp.next;
                }

                if (fp == null)
                {
                    // TODO FI: we need to genericize the "flags" that a
                    // field holds, and, how these flags are merged; it
                    // needs to be more "pluggable" such that if I want
                    // to have a new "thing" my Fields can do, I can
                    // easily add it
                    FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());

                    fp                 = new DocFieldProcessorPerField(this, fi);
                    fp.next            = fieldHash[hashPos];
                    fieldHash[hashPos] = fp;
                    totalFieldCount++;

                    if (totalFieldCount >= fieldHash.Length / 2)
                    {
                        Rehash();
                    }
                }
                else
                {
                    fp.fieldInfo.Update(field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());
                }

                if (thisFieldGen != fp.lastGen)
                {
                    // First time we're seeing this field for this doc
                    fp.fieldCount = 0;

                    if (fieldCount == fields.Length)
                    {
                        int newSize = fields.Length * 2;
                        DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
                        Array.Copy(fields, 0, newArray, 0, fieldCount);
                        fields = newArray;
                    }

                    fields[fieldCount++] = fp;
                    fp.lastGen           = thisFieldGen;
                }

                if (fp.fieldCount == fp.fields.Length)
                {
                    Fieldable[] newArray = new Fieldable[fp.fields.Length * 2];
                    Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
                    fp.fields = newArray;
                }

                fp.fields[fp.fieldCount++] = field;
                if (field.IsStored())
                {
                    fieldsWriter.AddField(field, fp.fieldInfo);
                }
            }

            // If we are writing vectors then we must visit
            // fields in sorted order so they are written in
            // sorted order.  TODO: we actually only need to
            // sort the subset of fields that have vectors
            // enabled; we could save [small amount of] CPU
            // here.
            QuickSort(fields, 0, fieldCount - 1);

            for (int i = 0; i < fieldCount; i++)
            {
                fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);
            }

            if (docState.maxTermPrefix != null && docState.infoStream != null)
            {
                docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
                docState.maxTermPrefix = null;
            }

            DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
            DocumentsWriter.DocWriter two = consumer.FinishDocument();
            if (one == null)
            {
                return(two);
            }
            else if (two == null)
            {
                return(one);
            }
            else
            {
                PerDoc both = GetPerDoc();
                both.docID = docState.docID;
                System.Diagnostics.Debug.Assert(one.docID == docState.docID);
                System.Diagnostics.Debug.Assert(two.docID == docState.docID);
                both.one = one;
                both.two = two;
                return(both);
            }
        }
示例#26
0
		internal override void  Start(Fieldable f)
		{
			termAtt = (TermAttribute) fieldState.attributeSource.AddAttribute(typeof(TermAttribute));
			consumer.Start(f);
			if (nextPerField != null)
			{
				nextPerField.Start(f);
			}
		}
		internal override void  Start(Fieldable f)
		{
			if (doVectorOffsets)
			{
				offsetAttribute = (OffsetAttribute) fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute));
			}
			else
			{
				offsetAttribute = null;
			}
		}
示例#28
0
		public override void  ProcessFields(Fieldable[] fields, int count)
		{
			
			fieldState.Reset(docState.doc.GetBoost());
			
			int maxFieldLength = docState.maxFieldLength;
			
			bool doInvert = consumer.Start(fields, count);
			
			for (int i = 0; i < count; i++)
			{
				
				Fieldable field = fields[i];
				
				// TODO FI: this should be "genericized" to querying
				// consumer if it wants to see this particular field
				// tokenized.
				if (field.IsIndexed() && doInvert)
				{
					
					bool anyToken;
					
					if (fieldState.length > 0)
						fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
					
					if (!field.IsTokenized())
					{
						// un-tokenized field
						System.String stringValue = field.StringValue();
						int valueLength = stringValue.Length;
						perThread.singleTokenTokenStream.Reinit(stringValue, 0, valueLength);
						fieldState.attributeSource = perThread.singleTokenTokenStream;
						consumer.Start(field);
						
						bool success = false;
						try
						{
							consumer.Add();
							success = true;
						}
						finally
						{
							if (!success)
								docState.docWriter.SetAborting();
						}
						fieldState.offset += valueLength;
						fieldState.length++;
						fieldState.position++;
						anyToken = valueLength > 0;
					}
					else
					{
						// tokenized field
						TokenStream stream;
						TokenStream streamValue = field.TokenStreamValue();
						
						if (streamValue != null)
							stream = streamValue;
						else
						{
							// the field does not have a TokenStream,
							// so we have to obtain one from the analyzer
							System.IO.TextReader reader; // find or make Reader
							System.IO.TextReader readerValue = field.ReaderValue();
							
							if (readerValue != null)
								reader = readerValue;
							else
							{
								System.String stringValue = field.StringValue();
								if (stringValue == null)
									throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
								perThread.stringReader.Init(stringValue);
								reader = perThread.stringReader;
							}
							
							// Tokenize field and add to postingTable
							stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
						}
						
						// reset the TokenStream to the first token
						stream.Reset();
						
						int startLength = fieldState.length;
						
						// deprecated
						bool allowMinus1Position = docState.allowMinus1Position;
						
						try
						{
							int offsetEnd = fieldState.offset - 1;
							
							bool hasMoreTokens = stream.IncrementToken();
							
							fieldState.attributeSource = stream;
							
							OffsetAttribute offsetAttribute = (OffsetAttribute) fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute));
							PositionIncrementAttribute posIncrAttribute = (PositionIncrementAttribute) fieldState.attributeSource.AddAttribute(typeof(PositionIncrementAttribute));
							
							consumer.Start(field);
							
							for (; ; )
							{
								
								// If we hit an exception in stream.next below
								// (which is fairly common, eg if analyzer
								// chokes on a given document), then it's
								// non-aborting and (above) this one document
								// will be marked as deleted, but still
								// consume a docID
								
								if (!hasMoreTokens)
									break;
								
								int posIncr = posIncrAttribute.GetPositionIncrement();
								fieldState.position += posIncr;
								if (allowMinus1Position || fieldState.position > 0)
								{
									fieldState.position--;
								}
								
								if (posIncr == 0)
									fieldState.numOverlap++;
								
								bool success = false;
								try
								{
									// If we hit an exception in here, we abort
									// all buffered documents since the last
									// flush, on the likelihood that the
									// internal state of the consumer is now
									// corrupt and should not be flushed to a
									// new segment:
									consumer.Add();
									success = true;
								}
								finally
								{
									if (!success)
										docState.docWriter.SetAborting();
								}
								fieldState.position++;
								offsetEnd = fieldState.offset + offsetAttribute.EndOffset();
								if (++fieldState.length >= maxFieldLength)
								{
									if (docState.infoStream != null)
										docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
									break;
								}
								
								hasMoreTokens = stream.IncrementToken();
							}
							// trigger streams to perform end-of-stream operations
							stream.End();
							
							fieldState.offset += offsetAttribute.EndOffset();
							anyToken = fieldState.length > startLength;
						}
						finally
						{
							stream.Close();
						}
					}
					
					if (anyToken)
						fieldState.offset += docState.analyzer.GetOffsetGap(field);
					fieldState.boost *= field.GetBoost();
				}
                
                // LUCENE-2387: don't hang onto the field, so GC can
                // reclaim
                fields[i] = null;
			}
			
			consumer.Finish();
			endConsumer.Finish();
		}