private void MergeTerms() { try { freqOutput = directory.CreateOutput(segment + ".frq"); proxOutput = directory.CreateOutput(segment + ".prx"); termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval); skipInterval = termInfosWriter.skipInterval; maxSkipLevels = termInfosWriter.maxSkipLevels; skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels, mergedDocs, freqOutput, proxOutput); queue = new SegmentMergeQueue(readers.Count); MergeTermInfos(); } finally { if (freqOutput != null) { freqOutput.Close(); } if (proxOutput != null) { proxOutput.Close(); } if (termInfosWriter != null) { termInfosWriter.Close(); } if (queue != null) { queue.Close(); } } }
/// <summary>Called to complete TermInfos creation. </summary> public /*internal*/ void Close() { output.Seek(4); // write size after format output.WriteLong(size); output.Close(); if (!isIndex) { other.Close(); } }
private void WritePostings(Posting[] postings, System.String segment) { IndexOutput freq = null, prox = null; TermInfosWriter tis = null; TermVectorsWriter termVectorWriter = null; try { //open files for inverse index storage freq = directory.CreateOutput(segment + ".frq"); prox = directory.CreateOutput(segment + ".prx"); tis = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval); TermInfo ti = new TermInfo(); System.String currentField = null; for (int i = 0; i < postings.Length; i++) { Posting posting = postings[i]; // add an entry to the dictionary with pointers to prox and freq files ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), - 1); tis.Add(posting.term, ti); // add an entry to the freq file int postingFreq = posting.freq; if (postingFreq == 1) // optimize freq=1 freq.WriteVInt(1); // set low bit of doc num. else { freq.WriteVInt(0); // the document number freq.WriteVInt(postingFreq); // frequency in doc } int lastPosition = 0; // write positions int[] positions = posting.positions; for (int j = 0; j < postingFreq; j++) { // use delta-encoding int position = positions[j]; prox.WriteVInt(position - lastPosition); lastPosition = position; } // check to see if we switched to a new field System.String termField = posting.term.Field(); if (currentField != termField) { // changing field - see if there is something to save currentField = termField; FieldInfo fi = fieldInfos.FieldInfo(currentField); if (fi.storeTermVector) { if (termVectorWriter == null) { termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos); termVectorWriter.OpenDocument(); } termVectorWriter.OpenField(currentField); } else if (termVectorWriter != null) { termVectorWriter.CloseField(); } } if (termVectorWriter != null && termVectorWriter.IsFieldOpen()) { termVectorWriter.AddTerm(posting.term.Text(), postingFreq, posting.positions, posting.offsets); } } if (termVectorWriter != null) termVectorWriter.CloseDocument(); } finally { // make an effort to close all streams we can but remember and re-throw // the first exception encountered in this process System.IO.IOException keep = null; if (freq != null) try { freq.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (prox != null) try { prox.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (tis != null) try { tis.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (termVectorWriter != null) try { termVectorWriter.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (keep != null) { throw new System.IO.IOException(keep.StackTrace); } } }
private void MergeTerms() { try { freqOutput = directory.CreateOutput(segment + ".frq"); proxOutput = directory.CreateOutput(segment + ".prx"); termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval); skipInterval = termInfosWriter.skipInterval; queue = new SegmentMergeQueue(readers.Count); MergeTermInfos(); } finally { if (freqOutput != null) freqOutput.Close(); if (proxOutput != null) proxOutput.Close(); if (termInfosWriter != null) termInfosWriter.Close(); if (queue != null) queue.Close(); } }
// TODO: would be nice to factor out morme of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... internal override void flush(IDictionary<object, object> threadsAndFields, DocumentsWriter.FlushState state) { // Gather all FieldData's that have postings, across all // ThreadStates List<object> allFields = new List<object>(); IEnumerator<KeyValuePair<object, object>> it = threadsAndFields.GetEnumerator(); while (it.MoveNext()) { KeyValuePair<object, object> entry = (KeyValuePair<object, object>)it.Current; ICollection<object> fields = (ICollection<object>)entry.Value; IEnumerator<object> fieldsIt = fields.GetEnumerator(); while (fieldsIt.MoveNext()) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)fieldsIt.Current; if (perField.termsHashPerField.numPostings > 0) allFields.Add(perField); } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; TermInfosWriter termsOut = new TermInfosWriter(state.directory, state.segmentName, fieldInfos, state.docWriter.writer.GetTermIndexInterval()); IndexOutput freqOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.FREQ_EXTENSION)); IndexOutput proxOut; if (fieldInfos.HasProx()) proxOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.PROX_EXTENSION)); else proxOut = null; DefaultSkipListWriter skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, state.numDocsInRAM, freqOut, proxOut); int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo; string fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName)) end++; FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = (FreqProxTermsWriterPerField)allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(state, fields, termsOut, freqOut, proxOut, skipListWriter); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.reset(); perField.shrinkHash(numPostings); fields[i].reset(); } start = end; } it = threadsAndFields.GetEnumerator(); while (it.MoveNext()) { KeyValuePair<object, object> entry = (KeyValuePair<object, object>)it.Current; FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.reset(true); } freqOut.Close(); if (proxOut != null) { state.flushedFiles[state.SegmentFileName(IndexFileNames.PROX_EXTENSION)] = state.SegmentFileName(IndexFileNames.PROX_EXTENSION); proxOut.Close(); } termsOut.Close(); // Record all files we have flushed state.flushedFiles[state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION)] = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); state.flushedFiles[state.SegmentFileName(IndexFileNames.FREQ_EXTENSION)] = state.SegmentFileName(IndexFileNames.FREQ_EXTENSION); state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)] = state.SegmentFileName(IndexFileNames.TERMS_EXTENSION); state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)] = state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION); }
/// <summary>Called when we are done adding everything. </summary> internal override void Finish() { termsOut.Close(); termsWriter.Close(); }
// FIXME: OG: remove hard-coded file names public static void Test() { System.IO.FileInfo file = new System.IO.FileInfo("words.txt"); System.Console.Out.WriteLine(" reading word file containing " + file.Length + " bytes"); System.DateTime start = System.DateTime.Now; System.Collections.ArrayList keys = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); System.IO.FileStream ws = new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read); System.IO.StreamReader wr = new System.IO.StreamReader(new System.IO.StreamReader(ws, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(ws, System.Text.Encoding.Default).CurrentEncoding); for (System.String key = wr.ReadLine(); key != null; key = wr.ReadLine()) { keys.Add(new Term("word", key)); } wr.Close(); System.DateTime end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to read " + keys.Count + " words"); start = System.DateTime.Now; System.Random gen = new System.Random((System.Int32) 1251971); long fp = (gen.Next() & 0xF) + 1; long pp = (gen.Next() & 0xF) + 1; int[] docFreqs = new int[keys.Count]; long[] freqPointers = new long[keys.Count]; long[] proxPointers = new long[keys.Count]; for (int i = 0; i < keys.Count; i++) { docFreqs[i] = (gen.Next() & 0xF) + 1; freqPointers[i] = fp; proxPointers[i] = pp; fp += (gen.Next() & 0xF) + 1; ; pp += (gen.Next() & 0xF) + 1; ; } end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to generate values"); start = System.DateTime.Now; Directory store = FSDirectory.GetDirectory("test.store", true); FieldInfos fis = new FieldInfos(); TermInfosWriter writer = new TermInfosWriter(store, "words", fis); fis.Add("word", false); for (int i = 0; i < keys.Count; i++) { writer.Add((Term)keys[i], new TermInfo(docFreqs[i], freqPointers[i], proxPointers[i])); } writer.Close(); end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to write table"); System.Console.Out.WriteLine(" table occupies " + store.FileLength("words.tis") + " bytes"); start = System.DateTime.Now; TermInfosReader reader = new TermInfosReader(store, "words", fis); end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to open table"); start = System.DateTime.Now; SegmentTermEnum enumerator = reader.Terms(); for (int i = 0; i < keys.Count; i++) { enumerator.Next(); Term key = (Term)keys[i]; if (!key.Equals(enumerator.Term())) { throw new System.Exception("wrong term: " + enumerator.Term() + ", expected: " + key + " at " + i); } TermInfo ti = enumerator.TermInfo(); if (ti.docFreq != docFreqs[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i); } if (ti.freqPointer != freqPointers[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i); } if (ti.proxPointer != proxPointers[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i); } } end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to iterate over " + keys.Count + " words"); start = System.DateTime.Now; for (int i = 0; i < keys.Count; i++) { Term key = (Term)keys[i]; TermInfo ti = reader.Get(key); if (ti.docFreq != docFreqs[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i); } if (ti.freqPointer != freqPointers[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i); } if (ti.proxPointer != proxPointers[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i); } } end = System.DateTime.Now; System.Console.Out.Write((end.Ticks - start.Ticks) / (float)keys.Count); System.Console.Out.WriteLine(" average milliseconds per lookup"); TermEnum e = reader.Terms(new Term("word", "azz")); System.Console.Out.WriteLine("Word after azz is " + e.Term().text); reader.Close(); store.Close(); }
/// <summary>Creates a segment from all Postings in the Postings /// hashes across all ThreadStates & FieldDatas. /// </summary> private System.Collections.IList WriteSegment() { System.Diagnostics.Debug.Assert(AllThreadsIdle()); System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM); System.String segmentName; segmentName = segment; TermInfosWriter termsOut = new TermInfosWriter(directory, segmentName, fieldInfos, writer.GetTermIndexInterval()); IndexOutput freqOut = directory.CreateOutput(segmentName + ".frq"); IndexOutput proxOut = directory.CreateOutput(segmentName + ".prx"); // Gather all FieldData's that have postings, across all // ThreadStates System.Collections.ArrayList allFields = new System.Collections.ArrayList(); System.Diagnostics.Debug.Assert(AllThreadsIdle()); for (int i = 0; i < threadStates.Length; i++) { ThreadState state = threadStates[i]; state.TrimFields(); int numFields = state.numAllFieldData; for (int j = 0; j < numFields; j++) { ThreadState.FieldData fp = state.allFieldDataArray[j]; if (fp.numPostings > 0) allFields.Add(fp); } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, numDocsInRAM, freqOut, proxOut); int start = 0; while (start < numAllFields) { System.String fieldName = ((ThreadState.FieldData) allFields[start]).fieldInfo.name; int end = start + 1; while (end < numAllFields && ((ThreadState.FieldData) allFields[end]).fieldInfo.name.Equals(fieldName)) end++; ThreadState.FieldData[] fields = new ThreadState.FieldData[end - start]; for (int i = start; i < end; i++) fields[i - start] = (ThreadState.FieldData) allFields[i]; // If this field has postings then add them to the // segment AppendPostings(fields, termsOut, freqOut, proxOut); for (int i = 0; i < fields.Length; i++) fields[i].ResetPostingArrays(); start = end; } freqOut.Close(); proxOut.Close(); termsOut.Close(); // Record all files we have flushed System.Collections.IList flushedFiles = new System.Collections.ArrayList(); flushedFiles.Add(SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION)); flushedFiles.Add(SegmentFileName(IndexFileNames.FREQ_EXTENSION)); flushedFiles.Add(SegmentFileName(IndexFileNames.PROX_EXTENSION)); flushedFiles.Add(SegmentFileName(IndexFileNames.TERMS_EXTENSION)); flushedFiles.Add(SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)); if (hasNorms) { WriteNorms(segmentName, numDocsInRAM); flushedFiles.Add(SegmentFileName(IndexFileNames.NORMS_EXTENSION)); } if (infoStream != null) { long newSegmentSize = SegmentSize(segmentName); System.String message = String.Format(nf, " oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}", new Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (newSegmentSize / numBytesUsed) }); infoStream.WriteLine(message); } ResetPostingsData(); nextDocID = 0; nextWriteDocID = 0; numDocsInRAM = 0; files = null; // Maybe downsize postingsFreeList array if (postingsFreeList.Length > 1.5 * postingsFreeCount) { int newSize = postingsFreeList.Length; while (newSize > 1.25 * postingsFreeCount) { newSize = (int) (newSize * 0.8); } Posting[] newArray = new Posting[newSize]; Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } return flushedFiles; }
private void MergeTerms() { try { freqOutput = directory.CreateOutput(segment + ".frq"); if (HasProx()) proxOutput = directory.CreateOutput(segment + ".prx"); termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval); skipInterval = termInfosWriter.skipInterval; maxSkipLevels = termInfosWriter.maxSkipLevels; skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels, mergedDocs, freqOutput, proxOutput); queue = new SegmentMergeQueue(readers.Count); MergeTermInfos(); } finally { if (freqOutput != null) freqOutput.Close(); if (proxOutput != null) proxOutput.Close(); if (termInfosWriter != null) termInfosWriter.Close(); if (queue != null) queue.Close(); } }
// FIXME: OG: remove hard-coded file names public static void Test() { System.IO.FileInfo file = new System.IO.FileInfo("words.txt"); System.Console.Out.WriteLine(" reading word file containing " + file.Length + " bytes"); System.DateTime start = System.DateTime.Now; System.Collections.ArrayList keys = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); System.IO.FileStream ws = new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read); System.IO.StreamReader wr = new System.IO.StreamReader(new System.IO.StreamReader(ws, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(ws, System.Text.Encoding.Default).CurrentEncoding); for (System.String key = wr.ReadLine(); key != null; key = wr.ReadLine()) keys.Add(new Term("word", key)); wr.Close(); System.DateTime end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to read " + keys.Count + " words"); start = System.DateTime.Now; System.Random gen = new System.Random((System.Int32) 1251971); long fp = (gen.Next() & 0xF) + 1; long pp = (gen.Next() & 0xF) + 1; int[] docFreqs = new int[keys.Count]; long[] freqPointers = new long[keys.Count]; long[] proxPointers = new long[keys.Count]; for (int i = 0; i < keys.Count; i++) { docFreqs[i] = (gen.Next() & 0xF) + 1; freqPointers[i] = fp; proxPointers[i] = pp; fp += (gen.Next() & 0xF) + 1; ; pp += (gen.Next() & 0xF) + 1; ; } end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to generate values"); start = System.DateTime.Now; Directory store = FSDirectory.GetDirectory("test.store", true); FieldInfos fis = new FieldInfos(); TermInfosWriter writer = new TermInfosWriter(store, "words", fis); fis.Add("word", false); for (int i = 0; i < keys.Count; i++) writer.Add((Term) keys[i], new TermInfo(docFreqs[i], freqPointers[i], proxPointers[i])); writer.Close(); end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to write table"); System.Console.Out.WriteLine(" table occupies " + store.FileLength("words.tis") + " bytes"); start = System.DateTime.Now; TermInfosReader reader = new TermInfosReader(store, "words", fis); end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to open table"); start = System.DateTime.Now; SegmentTermEnum enumerator = reader.Terms(); for (int i = 0; i < keys.Count; i++) { enumerator.Next(); Term key = (Term) keys[i]; if (!key.Equals(enumerator.Term())) { throw new System.Exception("wrong term: " + enumerator.Term() + ", expected: " + key + " at " + i); } TermInfo ti = enumerator.TermInfo(); if (ti.docFreq != docFreqs[i]) throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i); if (ti.freqPointer != freqPointers[i]) throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i); if (ti.proxPointer != proxPointers[i]) throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i); } end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to iterate over " + keys.Count + " words"); start = System.DateTime.Now; for (int i = 0; i < keys.Count; i++) { Term key = (Term) keys[i]; TermInfo ti = reader.Get(key); if (ti.docFreq != docFreqs[i]) throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i); if (ti.freqPointer != freqPointers[i]) throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i); if (ti.proxPointer != proxPointers[i]) throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i); } end = System.DateTime.Now; System.Console.Out.Write((end.Ticks - start.Ticks) / (float) keys.Count); System.Console.Out.WriteLine(" average milliseconds per lookup"); TermEnum e = reader.Terms(new Term("word", "azz")); System.Console.Out.WriteLine("Word after azz is " + e.Term().text); reader.Close(); store.Close(); }
private void WritePostings(Posting[] postings, System.String segment) { IndexOutput freq = null, prox = null; TermInfosWriter tis = null; TermVectorsWriter termVectorWriter = null; try { //open files for inverse index storage freq = directory.CreateOutput(segment + ".frq"); prox = directory.CreateOutput(segment + ".prx"); tis = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval); TermInfo ti = new TermInfo(); System.String currentField = null; for (int i = 0; i < postings.Length; i++) { Posting posting = postings[i]; // add an entry to the dictionary with pointers to prox and freq files ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), -1); tis.Add(posting.term, ti); // add an entry to the freq file int postingFreq = posting.freq; if (postingFreq == 1) { // optimize freq=1 freq.WriteVInt(1); } // set low bit of doc num. else { freq.WriteVInt(0); // the document number freq.WriteVInt(postingFreq); // frequency in doc } int lastPosition = 0; // write positions int[] positions = posting.positions; for (int j = 0; j < postingFreq; j++) { // use delta-encoding int position = positions[j]; prox.WriteVInt(position - lastPosition); lastPosition = position; } // check to see if we switched to a new field System.String termField = posting.term.Field(); if (currentField != termField) { // changing field - see if there is something to save currentField = termField; FieldInfo fi = fieldInfos.FieldInfo(currentField); if (fi.storeTermVector) { if (termVectorWriter == null) { termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos); termVectorWriter.OpenDocument(); } termVectorWriter.OpenField(currentField); } else if (termVectorWriter != null) { termVectorWriter.CloseField(); } } if (termVectorWriter != null && termVectorWriter.IsFieldOpen()) { termVectorWriter.AddTerm(posting.term.Text(), postingFreq, posting.positions, posting.offsets); } } if (termVectorWriter != null) { termVectorWriter.CloseDocument(); } } finally { // make an effort to close all streams we can but remember and re-throw // the first exception encountered in this process System.IO.IOException keep = null; if (freq != null) { try { freq.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (prox != null) { try { prox.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (tis != null) { try { tis.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (termVectorWriter != null) { try { termVectorWriter.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (keep != null) { throw new System.IO.IOException(keep.StackTrace); } } }
// TODO: would be nice to factor out morme of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... internal override void flush(IDictionary <object, object> threadsAndFields, DocumentsWriter.FlushState state) { // Gather all FieldData's that have postings, across all // ThreadStates List <object> allFields = new List <object>(); IEnumerator <KeyValuePair <object, object> > it = threadsAndFields.GetEnumerator(); while (it.MoveNext()) { KeyValuePair <object, object> entry = (KeyValuePair <object, object>)it.Current; ICollection <object> fields = (ICollection <object>)entry.Value; IEnumerator <object> fieldsIt = fields.GetEnumerator(); while (fieldsIt.MoveNext()) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)fieldsIt.Current; if (perField.termsHashPerField.numPostings > 0) { allFields.Add(perField); } } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; TermInfosWriter termsOut = new TermInfosWriter(state.directory, state.segmentName, fieldInfos, state.docWriter.writer.GetTermIndexInterval()); IndexOutput freqOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.FREQ_EXTENSION)); IndexOutput proxOut; if (fieldInfos.HasProx()) { proxOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.PROX_EXTENSION)); } else { proxOut = null; } DefaultSkipListWriter skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, state.numDocsInRAM, freqOut, proxOut); int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo; string fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName)) { end++; } FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = (FreqProxTermsWriterPerField)allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(state, fields, termsOut, freqOut, proxOut, skipListWriter); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.reset(); perField.shrinkHash(numPostings); fields[i].reset(); } start = end; } it = threadsAndFields.GetEnumerator(); while (it.MoveNext()) { KeyValuePair <object, object> entry = (KeyValuePair <object, object>)it.Current; FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.reset(true); } freqOut.Close(); if (proxOut != null) { state.flushedFiles[state.SegmentFileName(IndexFileNames.PROX_EXTENSION)] = state.SegmentFileName(IndexFileNames.PROX_EXTENSION); proxOut.Close(); } termsOut.Close(); // Record all files we have flushed state.flushedFiles[state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION)] = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); state.flushedFiles[state.SegmentFileName(IndexFileNames.FREQ_EXTENSION)] = state.SegmentFileName(IndexFileNames.FREQ_EXTENSION); state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)] = state.SegmentFileName(IndexFileNames.TERMS_EXTENSION); state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)] = state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION); }