예제 #1
0
 internal void  FreePerDoc(PerDoc perDoc)
 {
     lock (this)
     {
         System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
         docFreeList[freeCount++] = perDoc;
     }
 }
예제 #2
0
 internal void  Free(PerDoc perDoc)
 {
     lock (this)
     {
         System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
         System.Diagnostics.Debug.Assert(0 == perDoc.numStoredFields);
         System.Diagnostics.Debug.Assert(0 == perDoc.fdt.Length());
         System.Diagnostics.Debug.Assert(0 == perDoc.fdt.GetFilePointer());
         docFreeList[freeCount++] = perDoc;
     }
 }
예제 #3
0
        internal void  FinishDocument(PerDoc perDoc)
        {
            lock (this)
            {
                System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("StoredFieldsWriter.finishDocument start"));
                InitFieldsWriter();

                Fill(perDoc.docID);

                // Append stored fields to the real FieldsWriter:
                fieldsWriter.FlushDocument(perDoc.numStoredFields, perDoc.fdt);
                lastDocID++;
                perDoc.Reset();
                Free(perDoc);
                System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("StoredFieldsWriter.finishDocument end"));
            }
        }
예제 #4
0
        internal void  FinishDocument(PerDoc perDoc)
        {
            lock (this)
            {
                System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument start"));

                InitTermVectorsWriter();

                Fill(perDoc.docID);

                // Append term vectors to the real outputs:
                tvx.WriteLong(tvd.FilePointer);
                tvx.WriteLong(tvf.FilePointer);
                tvd.WriteVInt(perDoc.numVectorFields);
                if (perDoc.numVectorFields > 0)
                {
                    for (int i = 0; i < perDoc.numVectorFields; i++)
                    {
                        tvd.WriteVInt(perDoc.fieldNumbers[i]);
                    }

                    System.Diagnostics.Debug.Assert(0 == perDoc.fieldPointers [0]);
                    long lastPos = perDoc.fieldPointers[0];
                    for (int i = 1; i < perDoc.numVectorFields; i++)
                    {
                        long pos = perDoc.fieldPointers[i];
                        tvd.WriteVLong(pos - lastPos);
                        lastPos = pos;
                    }
                    perDoc.perDocTvf.WriteTo(tvf);
                    perDoc.numVectorFields = 0;
                }

                System.Diagnostics.Debug.Assert(lastDocID == perDoc.docID + docWriter.DocStoreOffset);

                lastDocID++;
                perDoc.Reset();
                Free(perDoc);
                System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument end"));
            }
        }
예제 #5
0
 private void  InitBlock()
 {
     docFreeList = new PerDoc[1];
 }
		internal void  FreePerDoc(PerDoc perDoc)
		{
			lock (this)
			{
				System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
				docFreeList[freeCount++] = perDoc;
			}
		}
		private void  InitBlock()
		{
			docFreeList = new PerDoc[1];
		}
예제 #8
0
		internal void  FinishDocument(PerDoc perDoc)
		{
			lock (this)
			{
				
				System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument start"));
				
				InitTermVectorsWriter();
				
				Fill(perDoc.docID);
				
				// Append term vectors to the real outputs:
				tvx.WriteLong(tvd.GetFilePointer());
				tvx.WriteLong(tvf.GetFilePointer());
				tvd.WriteVInt(perDoc.numVectorFields);
				if (perDoc.numVectorFields > 0)
				{
					for (int i = 0; i < perDoc.numVectorFields; i++)
						tvd.WriteVInt(perDoc.fieldNumbers[i]);
					System.Diagnostics.Debug.Assert(0 == perDoc.fieldPointers [0]);
					long lastPos = perDoc.fieldPointers[0];
					for (int i = 1; i < perDoc.numVectorFields; i++)
					{
						long pos = perDoc.fieldPointers[i];
						tvd.WriteVLong(pos - lastPos);
						lastPos = pos;
					}
                    perDoc.perDocTvf.WriteTo(tvf);
					perDoc.numVectorFields = 0;
				}
				
				System.Diagnostics.Debug.Assert(lastDocID == perDoc.docID + docWriter.GetDocStoreOffset());
				
				lastDocID++;
                perDoc.Reset();
				Free(perDoc);
				System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument end"));
			}
		}
		internal void  Free(PerDoc perDoc)
		{
			lock (this)
			{
				System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
				System.Diagnostics.Debug.Assert(0 == perDoc.numStoredFields);
				System.Diagnostics.Debug.Assert(0 == perDoc.fdt.Length);
				System.Diagnostics.Debug.Assert(0 == perDoc.fdt.FilePointer);
				docFreeList[freeCount++] = perDoc;
			}
		}
		internal void  FinishDocument(PerDoc perDoc)
		{
			lock (this)
			{
				System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("StoredFieldsWriter.finishDocument start"));
				InitFieldsWriter();
				
				Fill(perDoc.docID);
				
				// Append stored fields to the real FieldsWriter:
				fieldsWriter.FlushDocument(perDoc.numStoredFields, perDoc.fdt);
				lastDocID++;
				perDoc.Reset();
				Free(perDoc);
				System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("StoredFieldsWriter.finishDocument end"));
			}
		}
        public override DocumentsWriter.DocWriter ProcessDocument()
        {
            consumer.StartDocument();
            fieldsWriter.StartDocument();

            Document doc = docState.doc;

            System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));

            fieldCount = 0;

            int thisFieldGen = fieldGen++;

            System.Collections.Generic.IList <IFieldable> docFields = doc.GetFields();
            int numDocFields = docFields.Count;

            // Absorb any new fields first seen in this document.
            // Also absorb any changes to fields we had already
            // seen before (eg suddenly turning on norms or
            // vectors, etc.):

            for (int i = 0; i < numDocFields; i++)
            {
                IFieldable field     = docFields[i];
                string     fieldName = field.Name;

                // Make sure we have a PerField allocated
                int hashPos = fieldName.GetHashCode() & hashMask;
                DocFieldProcessorPerField fp = fieldHash[hashPos];
                while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
                {
                    fp = fp.next;
                }

                if (fp == null)
                {
                    // TODO FI: we need to genericize the "flags" that a
                    // field holds, and, how these flags are merged; it
                    // needs to be more "pluggable" such that if I want
                    // to have a new "thing" my Fields can do, I can
                    // easily add it
                    FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed, field.IsTermVectorStored,
                                                  field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector,
                                                  field.OmitNorms, false, field.OmitTermFreqAndPositions);

                    fp                 = new DocFieldProcessorPerField(this, fi);
                    fp.next            = fieldHash[hashPos];
                    fieldHash[hashPos] = fp;
                    totalFieldCount++;

                    if (totalFieldCount >= fieldHash.Length / 2)
                    {
                        Rehash();
                    }
                }
                else
                {
                    fp.fieldInfo.Update(field.IsIndexed, field.IsTermVectorStored,
                                        field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector,
                                        field.OmitNorms, false, field.OmitTermFreqAndPositions);
                }

                if (thisFieldGen != fp.lastGen)
                {
                    // First time we're seeing this field for this doc
                    fp.fieldCount = 0;

                    if (fieldCount == fields.Length)
                    {
                        int newSize = fields.Length * 2;
                        DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
                        Array.Copy(fields, 0, newArray, 0, fieldCount);
                        fields = newArray;
                    }

                    fields[fieldCount++] = fp;
                    fp.lastGen           = thisFieldGen;
                }

                if (fp.fieldCount == fp.fields.Length)
                {
                    IFieldable[] newArray = new IFieldable[fp.fields.Length * 2];
                    Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
                    fp.fields = newArray;
                }

                fp.fields[fp.fieldCount++] = field;
                if (field.IsStored)
                {
                    fieldsWriter.AddField(field, fp.fieldInfo);
                }
            }

            // If we are writing vectors then we must visit
            // fields in sorted order so they are written in
            // sorted order.  TODO: we actually only need to
            // sort the subset of fields that have vectors
            // enabled; we could save [small amount of] CPU
            // here.
            QuickSort(fields, 0, fieldCount - 1);

            for (int i = 0; i < fieldCount; i++)
            {
                fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);
            }

            if (docState.maxTermPrefix != null && docState.infoStream != null)
            {
                docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
                docState.maxTermPrefix = null;
            }

            DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
            DocumentsWriter.DocWriter two = consumer.FinishDocument();
            if (one == null)
            {
                return(two);
            }
            else if (two == null)
            {
                return(one);
            }
            else
            {
                PerDoc both = GetPerDoc();
                both.docID = docState.docID;
                System.Diagnostics.Debug.Assert(one.docID == docState.docID);
                System.Diagnostics.Debug.Assert(two.docID == docState.docID);
                both.one = one;
                both.two = two;
                return(both);
            }
        }