Exemple #1
0
        public /*internal*/ void  AddDocument(System.String segment, Document doc)
        {
            // write field names
            fieldInfos = new FieldInfos();
            fieldInfos.Add(doc);
            fieldInfos.Write(directory, segment + ".fnm");

            // write field values
            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            try
            {
                fieldsWriter.AddDocument(doc);
            }
            finally
            {
                fieldsWriter.Close();
            }

            // invert doc into postingTable
            postingTable.Clear();                        // clear postingTable
            fieldLengths   = new int[fieldInfos.Size()]; // init fieldLengths
            fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions
            fieldOffsets   = new int[fieldInfos.Size()]; // init fieldOffsets

            fieldBoosts = new float[fieldInfos.Size()];  // init fieldBoosts
            float boost = doc.GetBoost();

            for (int i = 0; i < fieldBoosts.Length; i++)
            {
                fieldBoosts[i] = boost;
            }

            InvertDocument(doc);

            // sort postingTable into an array
            Posting[] postings = SortPostingTable();

            /*
             * for (int i = 0; i < postings.length; i++) {
             * Posting posting = postings[i];
             * System.out.print(posting.term);
             * System.out.print(" freq=" + posting.freq);
             * System.out.print(" pos=");
             * System.out.print(posting.positions[0]);
             * for (int j = 1; j < posting.freq; j++)
             * System.out.print("," + posting.positions[j]);
             * System.out.println("");
             * }
             */

            // write postings
            WritePostings(postings, segment);

            // write norms of indexed fields
            WriteNorms(segment);
        }
        public IndexItem(Lucene.Net.Documents.Document doc, float score)
        {
            ViewRoles  = doc.Get("ViewRoles");
            SiteId     = Convert.ToInt32(doc.Get("SiteID"), CultureInfo.InvariantCulture);
            PageId     = Convert.ToInt32(doc.Get("PageID"), CultureInfo.InvariantCulture);
            PageName   = doc.Get("PageName");
            PageIndex  = Convert.ToInt32(doc.Get("PageIndex"), CultureInfo.InvariantCulture);
            PageNumber = Convert.ToInt32(doc.Get("PageNumber"), CultureInfo.InvariantCulture);

            string fid = doc.Get("FeatureId");

            if ((fid != null) && (fid.Length > 0))
            {
                FeatureId = fid;
            }
            FeatureName         = doc.Get("FeatureName");
            ItemId              = Convert.ToInt32(doc.Get("ItemID"), CultureInfo.InvariantCulture);
            ModuleId            = Convert.ToInt32(doc.Get("ModuleID"), CultureInfo.InvariantCulture);
            ModuleTitle         = doc.Get("ModuleTitle");
            Title               = doc.Get("Title");
            intro               = doc.Get("Intro");
            ViewPage            = doc.Get("ViewPage");
            QueryStringAddendum = doc.Get("QueryStringAddendum");

            DateTime pubBegin = DateTime.MinValue;

            if (DateTime.TryParse(doc.Get("PublishBeginDate"), out pubBegin))
            {
                this.publishBeginDate = pubBegin;
            }

            DateTime pubEnd = DateTime.MaxValue;

            if (DateTime.TryParse(doc.Get("PublishEndDate"), out pubEnd))
            {
                this.publishEndDate = pubEnd;
            }

            bool useQString;

            if (bool.TryParse(doc.Get("UseQueryStringParams"), out useQString))
            {
                this.useQueryStringParams = useQString;
            }


            boost = doc.GetBoost();
        }
Exemple #3
0
		public void  AddDocument(System.String segment, Document doc)
		{
			// write field names
			fieldInfos = new FieldInfos();
			fieldInfos.Add(doc);
			fieldInfos.Write(directory, segment + ".fnm");
			
			// write field values
			FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
			try
			{
				fieldsWriter.AddDocument(doc);
			}
			finally
			{
				fieldsWriter.Close();
			}
			
			// invert doc into postingTable
			postingTable.Clear(); // clear postingTable
			fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths
			fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions
			fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets
			
			fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts
			float boost = doc.GetBoost();
			for (int i = 0; i < fieldBoosts.Length; i++)
			{
				fieldBoosts[i] = boost;
			}
			
			InvertDocument(doc);
			
			// sort postingTable into an array
			Posting[] postings = SortPostingTable();
			
			/*
			for (int i = 0; i < postings.length; i++) {
			Posting posting = postings[i];
			System.out.print(posting.term);
			System.out.print(" freq=" + posting.freq);
			System.out.print(" pos=");
			System.out.print(posting.positions[0]);
			for (int j = 1; j < posting.freq; j++)
			System.out.print("," + posting.positions[j]);
			System.out.println("");
			}
			*/
			
			// write postings
			WritePostings(postings, segment);
			
			// write norms of indexed fields
			WriteNorms(segment);
		}
			/// <summary>Initializes shared state for this new document </summary>
			internal void  Init(Document doc, int docID)
			{

                System.Diagnostics.Debug.Assert(!isIdle);
                System.Diagnostics.Debug.Assert(Enclosing_Instance.writer.TestPoint("DocumentsWriter.ThreadState.init start"));
				
				this.docID = docID;
				docBoost = doc.GetBoost();
				numStoredFields = 0;
				numFieldData = 0;
				numVectorFields = 0;
				maxTermPrefix = null;
				
				System.Diagnostics.Debug.Assert(0 == fdtLocal.Length());
				System.Diagnostics.Debug.Assert(0 == fdtLocal.GetFilePointer());
				System.Diagnostics.Debug.Assert(0 == tvfLocal.Length());
				System.Diagnostics.Debug.Assert(0 == tvfLocal.GetFilePointer());
				int thisFieldGen = fieldGen++;
				
				System.Collections.IList docFields = doc.GetFields();
				int numDocFields = docFields.Count;
				bool docHasVectors = false;
				
				// Absorb any new fields first seen in this document.
				// Also absorb any changes to fields we had already
				// seen before (eg suddenly turning on norms or
				// vectors, etc.):
				
				for (int i = 0; i < numDocFields; i++)
				{
					Fieldable field = (Fieldable) docFields[i];
					
					FieldInfo fi = Enclosing_Instance.fieldInfos.Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false);
					if (fi.isIndexed && !fi.omitNorms)
					{
						// Maybe grow our buffered norms
						if (Enclosing_Instance.norms.Length <= fi.number)
						{
							int newSize = (int) ((1 + fi.number) * 1.25);
							BufferedNorms[] newNorms = new BufferedNorms[newSize];
							Array.Copy(Enclosing_Instance.norms, 0, newNorms, 0, Enclosing_Instance.norms.Length);
							Enclosing_Instance.norms = newNorms;
						}
						
						if (Enclosing_Instance.norms[fi.number] == null)
							Enclosing_Instance.norms[fi.number] = new BufferedNorms();
						
						Enclosing_Instance.hasNorms = true;
					}
					
					// Make sure we have a FieldData allocated
					int hashPos = fi.name.GetHashCode() & fieldDataHashMask;
					FieldData fp = fieldDataHash[hashPos];
					while (fp != null && !fp.fieldInfo.name.Equals(fi.name))
						fp = fp.next;
					
					if (fp == null)
					{
						
						fp = new FieldData(this, fi);
						fp.next = fieldDataHash[hashPos];
						fieldDataHash[hashPos] = fp;
						
						if (numAllFieldData == allFieldDataArray.Length)
						{
							int newSize = (int) (allFieldDataArray.Length * 1.5);
							int newHashSize = fieldDataHash.Length * 2;
							
							FieldData[] newArray = new FieldData[newSize];
							FieldData[] newHashArray = new FieldData[newHashSize];
							Array.Copy(allFieldDataArray, 0, newArray, 0, numAllFieldData);
							
							// Rehash
							fieldDataHashMask = newSize - 1;
							for (int j = 0; j < fieldDataHash.Length; j++)
							{
								FieldData fp0 = fieldDataHash[j];
								while (fp0 != null)
								{
									hashPos = fp0.fieldInfo.name.GetHashCode() & fieldDataHashMask;
									FieldData nextFP0 = fp0.next;
									fp0.next = newHashArray[hashPos];
									newHashArray[hashPos] = fp0;
									fp0 = nextFP0;
								}
							}
							
							allFieldDataArray = newArray;
							fieldDataHash = newHashArray;
						}
						allFieldDataArray[numAllFieldData++] = fp;
					}
					else
					{
						System.Diagnostics.Debug.Assert(fp.fieldInfo == fi);
					}
					
					if (thisFieldGen != fp.lastGen)
					{
						
						// First time we're seeing this field for this doc
						fp.lastGen = thisFieldGen;
						fp.fieldCount = 0;
						fp.doVectors = fp.doVectorPositions = fp.doVectorOffsets = false;
						fp.doNorms = fi.isIndexed && !fi.omitNorms;
						
						if (numFieldData == fieldDataArray.Length)
						{
							int newSize = fieldDataArray.Length * 2;
							FieldData[] newArray = new FieldData[newSize];
							Array.Copy(fieldDataArray, 0, newArray, 0, numFieldData);
							fieldDataArray = newArray;
						}
						fieldDataArray[numFieldData++] = fp;
					}
					
					if (field.IsTermVectorStored())
					{
						if (!fp.doVectors && numVectorFields++ == vectorFieldPointers.Length)
						{
							int newSize = (int) (numVectorFields * 1.5);
							vectorFieldPointers = new long[newSize];
							vectorFieldNumbers = new int[newSize];
						}
						fp.doVectors = true;
						docHasVectors = true;
						
						fp.doVectorPositions |= field.IsStorePositionWithTermVector();
						fp.doVectorOffsets |= field.IsStoreOffsetWithTermVector();
					}
					
					if (fp.fieldCount == fp.docFields.Length)
					{
						Fieldable[] newArray = new Fieldable[fp.docFields.Length * 2];
						Array.Copy(fp.docFields, 0, newArray, 0, fp.docFields.Length);
						fp.docFields = newArray;
					}
					
					// Lazily allocate arrays for postings:
					if (field.IsIndexed() && fp.postingsHash == null)
						fp.InitPostingArrays();
					
					fp.docFields[fp.fieldCount++] = field;
				}
				
				// Maybe init the local & global fieldsWriter
				if (localFieldsWriter == null)
				{
					if (Enclosing_Instance.fieldsWriter == null)
					{
						System.Diagnostics.Debug.Assert(Enclosing_Instance.docStoreSegment == null);
						System.Diagnostics.Debug.Assert(Enclosing_Instance.segment != null);
						Enclosing_Instance.docStoreSegment = Enclosing_Instance.segment;
						// If we hit an exception while init'ing the
						// fieldsWriter, we must abort this segment
						// because those files will be in an unknown
						// state:
						try
						{
							Enclosing_Instance.fieldsWriter = new FieldsWriter(Enclosing_Instance.directory, Enclosing_Instance.docStoreSegment, Enclosing_Instance.fieldInfos);
						}
						catch (System.Exception t)
						{
							throw new AbortException(t, Enclosing_Instance);
						}
						Enclosing_Instance.files = null;
					}
					localFieldsWriter = new FieldsWriter(null, fdtLocal, Enclosing_Instance.fieldInfos);
				}
				
				// First time we see a doc that has field(s) with
				// stored vectors, we init our tvx writer
				if (docHasVectors)
				{
					if (Enclosing_Instance.tvx == null)
					{
						System.Diagnostics.Debug.Assert(Enclosing_Instance.docStoreSegment != null);
						// If we hit an exception while init'ing the term
						// vector output files, we must abort this segment
						// because those files will be in an unknown
						// state:
						try
						{
							Enclosing_Instance.tvx = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
							Enclosing_Instance.tvx.WriteInt(TermVectorsReader.FORMAT_VERSION);
							Enclosing_Instance.tvd = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
							Enclosing_Instance.tvd.WriteInt(TermVectorsReader.FORMAT_VERSION);
							Enclosing_Instance.tvf = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
							Enclosing_Instance.tvf.WriteInt(TermVectorsReader.FORMAT_VERSION);
							
							// We must "catch up" for all docs before us
							// that had no vectors:
							for (int i = 0; i < Enclosing_Instance.numDocsInStore; i++)
							{
								Enclosing_Instance.tvx.WriteLong(Enclosing_Instance.tvd.GetFilePointer());
								Enclosing_Instance.tvd.WriteVInt(0);
							}
						}
						catch (System.Exception t)
						{
							throw new AbortException(t, Enclosing_Instance);
						}
						Enclosing_Instance.files = null;
					}
					
					numVectorFields = 0;
				}
			}