public /*internal*/ void AddDocument(System.String segment, Document doc) { // write field names fieldInfos = new FieldInfos(); fieldInfos.Add(doc); fieldInfos.Write(directory, segment + ".fnm"); // write field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.AddDocument(doc); } finally { fieldsWriter.Close(); } // invert doc into postingTable postingTable.Clear(); // clear postingTable fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts float boost = doc.GetBoost(); for (int i = 0; i < fieldBoosts.Length; i++) { fieldBoosts[i] = boost; } InvertDocument(doc); // sort postingTable into an array Posting[] postings = SortPostingTable(); /* * for (int i = 0; i < postings.length; i++) { * Posting posting = postings[i]; * System.out.print(posting.term); * System.out.print(" freq=" + posting.freq); * System.out.print(" pos="); * System.out.print(posting.positions[0]); * for (int j = 1; j < posting.freq; j++) * System.out.print("," + posting.positions[j]); * System.out.println(""); * } */ // write postings WritePostings(postings, segment); // write norms of indexed fields WriteNorms(segment); }
public IndexItem(Lucene.Net.Documents.Document doc, float score) { ViewRoles = doc.Get("ViewRoles"); SiteId = Convert.ToInt32(doc.Get("SiteID"), CultureInfo.InvariantCulture); PageId = Convert.ToInt32(doc.Get("PageID"), CultureInfo.InvariantCulture); PageName = doc.Get("PageName"); PageIndex = Convert.ToInt32(doc.Get("PageIndex"), CultureInfo.InvariantCulture); PageNumber = Convert.ToInt32(doc.Get("PageNumber"), CultureInfo.InvariantCulture); string fid = doc.Get("FeatureId"); if ((fid != null) && (fid.Length > 0)) { FeatureId = fid; } FeatureName = doc.Get("FeatureName"); ItemId = Convert.ToInt32(doc.Get("ItemID"), CultureInfo.InvariantCulture); ModuleId = Convert.ToInt32(doc.Get("ModuleID"), CultureInfo.InvariantCulture); ModuleTitle = doc.Get("ModuleTitle"); Title = doc.Get("Title"); intro = doc.Get("Intro"); ViewPage = doc.Get("ViewPage"); QueryStringAddendum = doc.Get("QueryStringAddendum"); DateTime pubBegin = DateTime.MinValue; if (DateTime.TryParse(doc.Get("PublishBeginDate"), out pubBegin)) { this.publishBeginDate = pubBegin; } DateTime pubEnd = DateTime.MaxValue; if (DateTime.TryParse(doc.Get("PublishEndDate"), out pubEnd)) { this.publishEndDate = pubEnd; } bool useQString; if (bool.TryParse(doc.Get("UseQueryStringParams"), out useQString)) { this.useQueryStringParams = useQString; } boost = doc.GetBoost(); }
public void AddDocument(System.String segment, Document doc) { // write field names fieldInfos = new FieldInfos(); fieldInfos.Add(doc); fieldInfos.Write(directory, segment + ".fnm"); // write field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.AddDocument(doc); } finally { fieldsWriter.Close(); } // invert doc into postingTable postingTable.Clear(); // clear postingTable fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts float boost = doc.GetBoost(); for (int i = 0; i < fieldBoosts.Length; i++) { fieldBoosts[i] = boost; } InvertDocument(doc); // sort postingTable into an array Posting[] postings = SortPostingTable(); /* for (int i = 0; i < postings.length; i++) { Posting posting = postings[i]; System.out.print(posting.term); System.out.print(" freq=" + posting.freq); System.out.print(" pos="); System.out.print(posting.positions[0]); for (int j = 1; j < posting.freq; j++) System.out.print("," + posting.positions[j]); System.out.println(""); } */ // write postings WritePostings(postings, segment); // write norms of indexed fields WriteNorms(segment); }
/// <summary>Initializes shared state for this new document </summary> internal void Init(Document doc, int docID) { System.Diagnostics.Debug.Assert(!isIdle); System.Diagnostics.Debug.Assert(Enclosing_Instance.writer.TestPoint("DocumentsWriter.ThreadState.init start")); this.docID = docID; docBoost = doc.GetBoost(); numStoredFields = 0; numFieldData = 0; numVectorFields = 0; maxTermPrefix = null; System.Diagnostics.Debug.Assert(0 == fdtLocal.Length()); System.Diagnostics.Debug.Assert(0 == fdtLocal.GetFilePointer()); System.Diagnostics.Debug.Assert(0 == tvfLocal.Length()); System.Diagnostics.Debug.Assert(0 == tvfLocal.GetFilePointer()); int thisFieldGen = fieldGen++; System.Collections.IList docFields = doc.GetFields(); int numDocFields = docFields.Count; bool docHasVectors = false; // Absorb any new fields first seen in this document. // Also absorb any changes to fields we had already // seen before (eg suddenly turning on norms or // vectors, etc.): for (int i = 0; i < numDocFields; i++) { Fieldable field = (Fieldable) docFields[i]; FieldInfo fi = Enclosing_Instance.fieldInfos.Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false); if (fi.isIndexed && !fi.omitNorms) { // Maybe grow our buffered norms if (Enclosing_Instance.norms.Length <= fi.number) { int newSize = (int) ((1 + fi.number) * 1.25); BufferedNorms[] newNorms = new BufferedNorms[newSize]; Array.Copy(Enclosing_Instance.norms, 0, newNorms, 0, Enclosing_Instance.norms.Length); Enclosing_Instance.norms = newNorms; } if (Enclosing_Instance.norms[fi.number] == null) Enclosing_Instance.norms[fi.number] = new BufferedNorms(); Enclosing_Instance.hasNorms = true; } // Make sure we have a FieldData allocated int hashPos = fi.name.GetHashCode() & fieldDataHashMask; FieldData fp = fieldDataHash[hashPos]; while (fp != null && !fp.fieldInfo.name.Equals(fi.name)) fp = fp.next; if (fp == null) { fp = new FieldData(this, fi); fp.next = fieldDataHash[hashPos]; fieldDataHash[hashPos] = fp; if (numAllFieldData == allFieldDataArray.Length) { int newSize = (int) (allFieldDataArray.Length * 1.5); int newHashSize = fieldDataHash.Length * 2; FieldData[] newArray = new FieldData[newSize]; FieldData[] newHashArray = new FieldData[newHashSize]; Array.Copy(allFieldDataArray, 0, newArray, 0, numAllFieldData); // Rehash fieldDataHashMask = newSize - 1; for (int j = 0; j < fieldDataHash.Length; j++) { FieldData fp0 = fieldDataHash[j]; while (fp0 != null) { hashPos = fp0.fieldInfo.name.GetHashCode() & fieldDataHashMask; FieldData nextFP0 = fp0.next; fp0.next = newHashArray[hashPos]; newHashArray[hashPos] = fp0; fp0 = nextFP0; } } allFieldDataArray = newArray; fieldDataHash = newHashArray; } allFieldDataArray[numAllFieldData++] = fp; } else { System.Diagnostics.Debug.Assert(fp.fieldInfo == fi); } if (thisFieldGen != fp.lastGen) { // First time we're seeing this field for this doc fp.lastGen = thisFieldGen; fp.fieldCount = 0; fp.doVectors = fp.doVectorPositions = fp.doVectorOffsets = false; fp.doNorms = fi.isIndexed && !fi.omitNorms; if (numFieldData == fieldDataArray.Length) { int newSize = fieldDataArray.Length * 2; FieldData[] newArray = new FieldData[newSize]; Array.Copy(fieldDataArray, 0, newArray, 0, numFieldData); fieldDataArray = newArray; } fieldDataArray[numFieldData++] = fp; } if (field.IsTermVectorStored()) { if (!fp.doVectors && numVectorFields++ == vectorFieldPointers.Length) { int newSize = (int) (numVectorFields * 1.5); vectorFieldPointers = new long[newSize]; vectorFieldNumbers = new int[newSize]; } fp.doVectors = true; docHasVectors = true; fp.doVectorPositions |= field.IsStorePositionWithTermVector(); fp.doVectorOffsets |= field.IsStoreOffsetWithTermVector(); } if (fp.fieldCount == fp.docFields.Length) { Fieldable[] newArray = new Fieldable[fp.docFields.Length * 2]; Array.Copy(fp.docFields, 0, newArray, 0, fp.docFields.Length); fp.docFields = newArray; } // Lazily allocate arrays for postings: if (field.IsIndexed() && fp.postingsHash == null) fp.InitPostingArrays(); fp.docFields[fp.fieldCount++] = field; } // Maybe init the local & global fieldsWriter if (localFieldsWriter == null) { if (Enclosing_Instance.fieldsWriter == null) { System.Diagnostics.Debug.Assert(Enclosing_Instance.docStoreSegment == null); System.Diagnostics.Debug.Assert(Enclosing_Instance.segment != null); Enclosing_Instance.docStoreSegment = Enclosing_Instance.segment; // If we hit an exception while init'ing the // fieldsWriter, we must abort this segment // because those files will be in an unknown // state: try { Enclosing_Instance.fieldsWriter = new FieldsWriter(Enclosing_Instance.directory, Enclosing_Instance.docStoreSegment, Enclosing_Instance.fieldInfos); } catch (System.Exception t) { throw new AbortException(t, Enclosing_Instance); } Enclosing_Instance.files = null; } localFieldsWriter = new FieldsWriter(null, fdtLocal, Enclosing_Instance.fieldInfos); } // First time we see a doc that has field(s) with // stored vectors, we init our tvx writer if (docHasVectors) { if (Enclosing_Instance.tvx == null) { System.Diagnostics.Debug.Assert(Enclosing_Instance.docStoreSegment != null); // If we hit an exception while init'ing the term // vector output files, we must abort this segment // because those files will be in an unknown // state: try { Enclosing_Instance.tvx = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); Enclosing_Instance.tvx.WriteInt(TermVectorsReader.FORMAT_VERSION); Enclosing_Instance.tvd = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); Enclosing_Instance.tvd.WriteInt(TermVectorsReader.FORMAT_VERSION); Enclosing_Instance.tvf = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); Enclosing_Instance.tvf.WriteInt(TermVectorsReader.FORMAT_VERSION); // We must "catch up" for all docs before us // that had no vectors: for (int i = 0; i < Enclosing_Instance.numDocsInStore; i++) { Enclosing_Instance.tvx.WriteLong(Enclosing_Instance.tvd.GetFilePointer()); Enclosing_Instance.tvd.WriteVInt(0); } } catch (System.Exception t) { throw new AbortException(t, Enclosing_Instance); } Enclosing_Instance.files = null; } numVectorFields = 0; } }