private TermInfo termInfo = new TermInfo(); // minimize consing /// <summary>Merge one term found in one or more segments. The array <code>smis</code> /// contains segments that are positioned at the same term. <code>N</code> /// is the number of cells in the array actually occupied. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> private void MergeTermInfo(SegmentMergeInfo[] smis, int n) { long freqPointer = freqOutput.GetFilePointer(); long proxPointer = proxOutput.GetFilePointer(); int df = AppendPostings(smis, n); // append posting data long skipPointer = WriteSkip(); if (df > 0) { // add an entry to the dictionary with pointers to prox and freq files termInfo.Set(df, freqPointer, proxPointer, (int)(skipPointer - freqPointer)); termInfosWriter.Add(smis[0].term, termInfo); } }
/// <summary>Adds a new <Term, TermInfo> pair to the set. /// Term must be lexicographically greater than all previous Terms added. /// TermInfo pointers must be positive and greater than all previous. /// </summary> public /*internal*/ void Add(Term term, TermInfo ti) { if (!isIndex && term.CompareTo(lastTerm) <= 0) { throw new System.IO.IOException("term out of order"); } if (ti.freqPointer < lastTi.freqPointer) { throw new System.IO.IOException("freqPointer out of order"); } if (ti.proxPointer < lastTi.proxPointer) { throw new System.IO.IOException("proxPointer out of order"); } if (!isIndex && size % indexInterval == 0) { other.Add(lastTerm, lastTi); // add an index term } WriteTerm(term); // write term output.WriteVInt(ti.docFreq); // write doc freq output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.WriteVLong(ti.proxPointer - lastTi.proxPointer); if (ti.docFreq >= skipInterval) { output.WriteVInt(ti.skipOffset); } if (isIndex) { output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer); lastIndexPointer = other.output.GetFilePointer(); // write pointer } lastTi.Set(ti); size++; }
private void WritePostings(Posting[] postings, System.String segment) { OutputStream freq = null, prox = null; TermInfosWriter tis = null; TermVectorsWriter termVectorWriter = null; try { //open files for inverse index storage freq = directory.CreateFile(segment + ".frq"); prox = directory.CreateFile(segment + ".prx"); tis = new TermInfosWriter(directory, segment, fieldInfos); TermInfo ti = new TermInfo(); System.String currentField = null; for (int i = 0; i < postings.Length; i++) { Posting posting = postings[i]; // add an entry to the dictionary with pointers to prox and freq files ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), -1); tis.Add(posting.term, ti); // add an entry to the freq file int postingFreq = posting.freq; if (postingFreq == 1) { // optimize freq=1 freq.WriteVInt(1); } // set low bit of doc num. else { freq.WriteVInt(0); // the document number freq.WriteVInt(postingFreq); // frequency in doc } int lastPosition = 0; // write positions int[] positions = posting.positions; for (int j = 0; j < postingFreq; j++) { // use delta-encoding int position = positions[j]; prox.WriteVInt(position - lastPosition); lastPosition = position; } // check to see if we switched to a new Field System.String termField = posting.term.Field(); if ((System.Object)currentField != (System.Object)termField) { // changing Field - see if there is something to save currentField = termField; FieldInfo fi = fieldInfos.FieldInfo(currentField); if (fi.storeTermVector) { if (termVectorWriter == null) { termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos); termVectorWriter.OpenDocument(); } termVectorWriter.OpenField(currentField); } else if (termVectorWriter != null) { termVectorWriter.CloseField(); } } if (termVectorWriter != null && termVectorWriter.IsFieldOpen()) { termVectorWriter.AddTerm(posting.term.Text(), postingFreq); } } if (termVectorWriter != null) { termVectorWriter.CloseDocument(); } } finally { // make an effort to close all streams we can but remember and re-throw // the first exception encountered in this process System.IO.IOException keep = null; if (freq != null) { try { freq.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (prox != null) { try { prox.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (tis != null) { try { tis.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (termVectorWriter != null) { try { termVectorWriter.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (keep != null) { throw new System.IO.IOException(keep.StackTrace); } } }
private void WritePostings(Posting[] postings, System.String segment) { OutputStream freq = null, prox = null; TermInfosWriter tis = null; TermVectorsWriter termVectorWriter = null; try { //open files for inverse index storage freq = directory.CreateFile(segment + ".frq"); prox = directory.CreateFile(segment + ".prx"); tis = new TermInfosWriter(directory, segment, fieldInfos); TermInfo ti = new TermInfo(); System.String currentField = null; for (int i = 0; i < postings.Length; i++) { Posting posting = postings[i]; // add an entry to the dictionary with pointers to prox and freq files ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), - 1); tis.Add(posting.term, ti); // add an entry to the freq file int postingFreq = posting.freq; if (postingFreq == 1) // optimize freq=1 freq.WriteVInt(1); // set low bit of doc num. else { freq.WriteVInt(0); // the document number freq.WriteVInt(postingFreq); // frequency in doc } int lastPosition = 0; // write positions int[] positions = posting.positions; for (int j = 0; j < postingFreq; j++) { // use delta-encoding int position = positions[j]; prox.WriteVInt(position - lastPosition); lastPosition = position; } // check to see if we switched to a new Field System.String termField = posting.term.Field(); if ((System.Object) currentField != (System.Object) termField) { // changing Field - see if there is something to save currentField = termField; FieldInfo fi = fieldInfos.FieldInfo(currentField); if (fi.storeTermVector) { if (termVectorWriter == null) { termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos); termVectorWriter.OpenDocument(); } termVectorWriter.OpenField(currentField); } else if (termVectorWriter != null) { termVectorWriter.CloseField(); } } if (termVectorWriter != null && termVectorWriter.IsFieldOpen()) { termVectorWriter.AddTerm(posting.term.Text(), postingFreq); } } if (termVectorWriter != null) termVectorWriter.CloseDocument(); } finally { // make an effort to close all streams we can but remember and re-throw // the first exception encountered in this process System.IO.IOException keep = null; if (freq != null) try { freq.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (prox != null) try { prox.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (tis != null) try { tis.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (termVectorWriter != null) try { termVectorWriter.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (keep != null) { throw new System.IO.IOException(keep.StackTrace); } } }