コード例 #1
0
        private void  Rehash()
        {
            int newHashSize = (fieldHash.Length * 2);

            System.Diagnostics.Debug.Assert(newHashSize > fieldHash.Length);

            DocFieldProcessorPerField[] newHashArray = new DocFieldProcessorPerField[newHashSize];

            // Rehash
            int newHashMask = newHashSize - 1;

            for (int j = 0; j < fieldHash.Length; j++)
            {
                DocFieldProcessorPerField fp0 = fieldHash[j];
                while (fp0 != null)
                {
                    int hashPos2 = fp0.fieldInfo.name.GetHashCode() & newHashMask;
                    DocFieldProcessorPerField nextFP0 = fp0.next;
                    fp0.next = newHashArray[hashPos2];
                    newHashArray[hashPos2] = fp0;
                    fp0 = nextFP0;
                }
            }

            fieldHash = newHashArray;
            hashMask  = newHashMask;
        }
コード例 #2
0
 public override void  Abort()
 {
     for (int i = 0; i < fieldHash.Length; i++)
     {
         DocFieldProcessorPerField field = fieldHash[i];
         while (field != null)
         {
             DocFieldProcessorPerField next = field.next;
             field.Abort();
             field = next;
         }
     }
     fieldsWriter.Abort();
     consumer.Abort();
 }
コード例 #3
0
 public System.Collections.Generic.ICollection <DocFieldConsumerPerField> Fields()
 {
     System.Collections.Generic.ICollection <DocFieldConsumerPerField> fields =
         new System.Collections.Generic.HashSet <DocFieldConsumerPerField>();
     for (int i = 0; i < fieldHash.Length; i++)
     {
         DocFieldProcessorPerField field = fieldHash[i];
         while (field != null)
         {
             fields.Add(field.consumer);
             field = field.next;
         }
     }
     System.Diagnostics.Debug.Assert(fields.Count == totalFieldCount);
     return(fields);
 }
コード例 #4
0
        /// <summary>If there are fields we've seen but did not see again
        /// in the last run, then free them up.
        /// </summary>

        internal void  TrimFields(SegmentWriteState state)
        {
            for (int i = 0; i < fieldHash.Length; i++)
            {
                DocFieldProcessorPerField perField     = fieldHash[i];
                DocFieldProcessorPerField lastPerField = null;

                while (perField != null)
                {
                    if (perField.lastGen == -1)
                    {
                        // This field was not seen since the previous
                        // flush, so, free up its resources now

                        // Unhash
                        if (lastPerField == null)
                        {
                            fieldHash[i] = perField.next;
                        }
                        else
                        {
                            lastPerField.next = perField.next;
                        }

                        if (state.docWriter.infoStream != null)
                        {
                            state.docWriter.infoStream.WriteLine("  purge field=" + perField.fieldInfo.name);
                        }

                        totalFieldCount--;
                    }
                    else
                    {
                        // Reset
                        perField.lastGen = -1;
                        lastPerField     = perField;
                    }

                    perField = perField.next;
                }
            }
        }
コード例 #5
0
        internal void  QuickSort(DocFieldProcessorPerField[] array, int lo, int hi)
        {
            if (lo >= hi)
            {
                return;
            }
            else if (hi == 1 + lo)
            {
                if (String.CompareOrdinal(array[lo].fieldInfo.name, array[hi].fieldInfo.name) > 0)
                {
                    DocFieldProcessorPerField tmp = array[lo];
                    array[lo] = array[hi];
                    array[hi] = tmp;
                }
                return;
            }

            int mid = Number.URShift((lo + hi), 1);

            if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0)
            {
                DocFieldProcessorPerField tmp = array[lo];
                array[lo]  = array[mid];
                array[mid] = tmp;
            }

            if (String.CompareOrdinal(array[mid].fieldInfo.name, array[hi].fieldInfo.name) > 0)
            {
                DocFieldProcessorPerField tmp = array[mid];
                array[mid] = array[hi];
                array[hi]  = tmp;

                if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0)
                {
                    DocFieldProcessorPerField tmp2 = array[lo];
                    array[lo]  = array[mid];
                    array[mid] = tmp2;
                }
            }

            int left  = lo + 1;
            int right = hi - 1;

            if (left >= right)
            {
                return;
            }

            DocFieldProcessorPerField partition = array[mid];

            for (; ;)
            {
                while (String.CompareOrdinal(array[right].fieldInfo.name, partition.fieldInfo.name) > 0)
                {
                    --right;
                }

                while (left < right && String.CompareOrdinal(array[left].fieldInfo.name, partition.fieldInfo.name) <= 0)
                {
                    ++left;
                }

                if (left < right)
                {
                    DocFieldProcessorPerField tmp = array[left];
                    array[left]  = array[right];
                    array[right] = tmp;
                    --right;
                }
                else
                {
                    break;
                }
            }

            QuickSort(array, lo, left);
            QuickSort(array, left + 1, hi);
        }
コード例 #6
0
        public override DocumentsWriter.DocWriter ProcessDocument()
        {
            consumer.StartDocument();
            fieldsWriter.StartDocument();

            Document.Document doc = docState.doc;

            System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));

            fieldCount = 0;

            int thisFieldGen = fieldGen++;

            System.Collections.Generic.IList <IFieldable> docFields = doc.GetFields();
            int numDocFields = docFields.Count;

            // Absorb any new fields first seen in this document.
            // Also absorb any changes to fields we had already
            // seen before (eg suddenly turning on norms or
            // vectors, etc.):

            for (int i = 0; i < numDocFields; i++)
            {
                IFieldable    field     = docFields[i];
                System.String fieldName = field.Name;

                // Make sure we have a PerField allocated
                int hashPos = fieldName.GetHashCode() & hashMask;
                DocFieldProcessorPerField fp = fieldHash[hashPos];
                while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
                {
                    fp = fp.next;
                }

                if (fp == null)
                {
                    // TODO FI: we need to genericize the "flags" that a
                    // field holds, and, how these flags are merged; it
                    // needs to be more "pluggable" such that if I want
                    // to have a new "thing" my Fields can do, I can
                    // easily add it
                    FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed, field.IsTermVectorStored,
                                                  field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector,
                                                  field.OmitNorms, false, field.OmitTermFreqAndPositions);

                    fp                 = new DocFieldProcessorPerField(this, fi);
                    fp.next            = fieldHash[hashPos];
                    fieldHash[hashPos] = fp;
                    totalFieldCount++;

                    if (totalFieldCount >= fieldHash.Length / 2)
                    {
                        Rehash();
                    }
                }
                else
                {
                    fp.fieldInfo.Update(field.IsIndexed, field.IsTermVectorStored,
                                        field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector,
                                        field.OmitNorms, false, field.OmitTermFreqAndPositions);
                }

                if (thisFieldGen != fp.lastGen)
                {
                    // First time we're seeing this field for this doc
                    fp.fieldCount = 0;

                    if (fieldCount == fields.Length)
                    {
                        int newSize = fields.Length * 2;
                        DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
                        Array.Copy(fields, 0, newArray, 0, fieldCount);
                        fields = newArray;
                    }

                    fields[fieldCount++] = fp;
                    fp.lastGen           = thisFieldGen;
                }

                if (fp.fieldCount == fp.fields.Length)
                {
                    IFieldable[] newArray = new IFieldable[fp.fields.Length * 2];
                    Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
                    fp.fields = newArray;
                }

                fp.fields[fp.fieldCount++] = field;
                if (field.IsStored)
                {
                    fieldsWriter.AddField(field, fp.fieldInfo);
                }
            }

            // If we are writing vectors then we must visit
            // fields in sorted order so they are written in
            // sorted order.  TODO: we actually only need to
            // sort the subset of fields that have vectors
            // enabled; we could save [small amount of] CPU
            // here.
            QuickSort(fields, 0, fieldCount - 1);

            for (int i = 0; i < fieldCount; i++)
            {
                fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);
            }

            if (docState.maxTermPrefix != null && docState.infoStream != null)
            {
                docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
                docState.maxTermPrefix = null;
            }

            DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
            DocumentsWriter.DocWriter two = consumer.FinishDocument();
            if (one == null)
            {
                return(two);
            }
            else if (two == null)
            {
                return(one);
            }
            else
            {
                PerDoc both = GetPerDoc();
                both.docID = docState.docID;
                System.Diagnostics.Debug.Assert(one.docID == docState.docID);
                System.Diagnostics.Debug.Assert(two.docID == docState.docID);
                both.one = one;
                both.two = two;
                return(both);
            }
        }
コード例 #7
0
		internal void  QuickSort(DocFieldProcessorPerField[] array, int lo, int hi)
		{
			if (lo >= hi)
				return ;
			else if (hi == 1 + lo)
			{
				if (String.CompareOrdinal(array[lo].fieldInfo.name, array[hi].fieldInfo.name) > 0)
				{
					DocFieldProcessorPerField tmp = array[lo];
					array[lo] = array[hi];
					array[hi] = tmp;
				}
				return ;
			}
			
			int mid = Number.URShift((lo + hi), 1);
			
			if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0)
			{
				DocFieldProcessorPerField tmp = array[lo];
				array[lo] = array[mid];
				array[mid] = tmp;
			}
			
			if (String.CompareOrdinal(array[mid].fieldInfo.name, array[hi].fieldInfo.name) > 0)
			{
				DocFieldProcessorPerField tmp = array[mid];
				array[mid] = array[hi];
				array[hi] = tmp;
				
				if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0)
				{
					DocFieldProcessorPerField tmp2 = array[lo];
					array[lo] = array[mid];
					array[mid] = tmp2;
				}
			}
			
			int left = lo + 1;
			int right = hi - 1;
			
			if (left >= right)
				return ;
			
			DocFieldProcessorPerField partition = array[mid];
			
			for (; ; )
			{
				while (String.CompareOrdinal(array[right].fieldInfo.name, partition.fieldInfo.name) > 0)
					--right;
				
				while (left < right && String.CompareOrdinal(array[left].fieldInfo.name, partition.fieldInfo.name) <= 0)
					++left;
				
				if (left < right)
				{
					DocFieldProcessorPerField tmp = array[left];
					array[left] = array[right];
					array[right] = tmp;
					--right;
				}
				else
				{
					break;
				}
			}
			
			QuickSort(array, lo, left);
			QuickSort(array, left + 1, hi);
		}
コード例 #8
0
		public override DocumentsWriter.DocWriter ProcessDocument()
		{
			
			consumer.StartDocument();
			fieldsWriter.StartDocument();
			
			Document.Document doc = docState.doc;
			
			System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));
			
			fieldCount = 0;
			
			int thisFieldGen = fieldGen++;
			
			System.Collections.Generic.IList<IFieldable> docFields = doc.GetFields();
			int numDocFields = docFields.Count;
			
			// Absorb any new fields first seen in this document.
			// Also absorb any changes to fields we had already
			// seen before (eg suddenly turning on norms or
			// vectors, etc.):
			
			for (int i = 0; i < numDocFields; i++)
			{
				IFieldable field = docFields[i];
				System.String fieldName = field.Name;
				
				// Make sure we have a PerField allocated
				int hashPos = fieldName.GetHashCode() & hashMask;
				DocFieldProcessorPerField fp = fieldHash[hashPos];
				while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
					fp = fp.next;

                if (fp == null)
                {

                    // TODO FI: we need to genericize the "flags" that a
                    // field holds, and, how these flags are merged; it
                    // needs to be more "pluggable" such that if I want
                    // to have a new "thing" my Fields can do, I can
                    // easily add it
                    FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed, field.IsTermVectorStored,
                                                  field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector,
                                                  field.OmitNorms, false, field.OmitTermFreqAndPositions);

                    fp = new DocFieldProcessorPerField(this, fi);
                    fp.next = fieldHash[hashPos];
                    fieldHash[hashPos] = fp;
                    totalFieldCount++;

                    if (totalFieldCount >= fieldHash.Length / 2)
                        Rehash();
                }
                else
                {
                    fp.fieldInfo.Update(field.IsIndexed, field.IsTermVectorStored,
                                        field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector,
                                        field.OmitNorms, false, field.OmitTermFreqAndPositions);
                }

			    if (thisFieldGen != fp.lastGen)
				{
					
					// First time we're seeing this field for this doc
					fp.fieldCount = 0;
					
					if (fieldCount == fields.Length)
					{
						int newSize = fields.Length * 2;
						DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
						Array.Copy(fields, 0, newArray, 0, fieldCount);
						fields = newArray;
					}
					
					fields[fieldCount++] = fp;
					fp.lastGen = thisFieldGen;
				}
				
				if (fp.fieldCount == fp.fields.Length)
				{
					IFieldable[] newArray = new IFieldable[fp.fields.Length * 2];
					Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
					fp.fields = newArray;
				}
				
				fp.fields[fp.fieldCount++] = field;
				if (field.IsStored)
				{
					fieldsWriter.AddField(field, fp.fieldInfo);
				}
			}
			
			// If we are writing vectors then we must visit
			// fields in sorted order so they are written in
			// sorted order.  TODO: we actually only need to
			// sort the subset of fields that have vectors
			// enabled; we could save [small amount of] CPU
			// here.
			QuickSort(fields, 0, fieldCount - 1);
			
			for (int i = 0; i < fieldCount; i++)
				fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);

            if (docState.maxTermPrefix != null && docState.infoStream != null)
            {
                docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
                docState.maxTermPrefix = null;
            }
			
			DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
			DocumentsWriter.DocWriter two = consumer.FinishDocument();
			if (one == null)
			{
				return two;
			}
			else if (two == null)
			{
				return one;
			}
			else
			{
				PerDoc both = GetPerDoc();
				both.docID = docState.docID;
				System.Diagnostics.Debug.Assert(one.docID == docState.docID);
				System.Diagnostics.Debug.Assert(two.docID == docState.docID);
				both.one = one;
				both.two = two;
				return both;
			}
		}
コード例 #9
0
		private void  Rehash()
		{
			int newHashSize = (fieldHash.Length * 2);
			System.Diagnostics.Debug.Assert(newHashSize > fieldHash.Length);
			
			DocFieldProcessorPerField[] newHashArray = new DocFieldProcessorPerField[newHashSize];
			
			// Rehash
			int newHashMask = newHashSize - 1;
			for (int j = 0; j < fieldHash.Length; j++)
			{
				DocFieldProcessorPerField fp0 = fieldHash[j];
				while (fp0 != null)
				{
					int hashPos2 = fp0.fieldInfo.name.GetHashCode() & newHashMask;
					DocFieldProcessorPerField nextFP0 = fp0.next;
					fp0.next = newHashArray[hashPos2];
					newHashArray[hashPos2] = fp0;
					fp0 = nextFP0;
				}
			}
			
			fieldHash = newHashArray;
			hashMask = newHashMask;
		}