internal override void Seek(TermInfo ti) { base.Seek(ti); if (ti != null) proxStream.Seek(ti.proxPointer); proxCount = 0; }
internal void Set(TermInfo ti) { docFreq = ti.docFreq; freqPointer = ti.freqPointer; proxPointer = ti.proxPointer; skipOffset = ti.skipOffset; }
public override int DocFreq(Term t) { TermInfo ti = tis.Get(t); if (ti != null) { return(ti.docFreq); } else { return(0); } }
internal virtual void Seek(TermInfo ti) { count = 0; if (ti == null) { df = 0; } else { df = ti.docFreq; doc = 0; skipDoc = 0; skipCount = 0; numSkips = df / skipInterval; freqPointer = ti.freqPointer; proxPointer = ti.proxPointer; skipPointer = freqPointer + ti.skipOffset; freqStream.Seek(freqPointer); haveSkipped = false; } }
internal virtual void Seek(TermInfo ti) { count = 0; if (ti == null) { df = 0; } else { df = ti.docFreq; doc = 0; skipDoc = 0; skipCount = 0; numSkips = df / skipInterval; freqPointer = ti.freqPointer; proxPointer = ti.proxPointer; skipPointer = freqPointer + ti.skipOffset; freqStream.Seek(freqPointer); haveSkipped = false; } }
/// <summary>Adds a new <Term, TermInfo> pair to the set. /// Term must be lexicographically greater than all previous Terms added. /// TermInfo pointers must be positive and greater than all previous. /// </summary> public /*internal*/ void Add(Term term, TermInfo ti) { if (!isIndex && term.CompareTo(lastTerm) <= 0) { throw new System.IO.IOException("term out of order"); } if (ti.freqPointer < lastTi.freqPointer) { throw new System.IO.IOException("freqPointer out of order"); } if (ti.proxPointer < lastTi.proxPointer) { throw new System.IO.IOException("proxPointer out of order"); } if (!isIndex && size % indexInterval == 0) { other.Add(lastTerm, lastTi); // add an index term } WriteTerm(term); // write term output.WriteVInt(ti.docFreq); // write doc freq output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.WriteVLong(ti.proxPointer - lastTi.proxPointer); if (ti.docFreq >= skipInterval) { output.WriteVInt(ti.skipOffset); } if (isIndex) { output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer); lastIndexPointer = other.output.GetFilePointer(); // write pointer } lastTi.Set(ti); size++; }
/// <summary>Sets the argument to the current TermInfo in the enumeration. /// Initially invalid, valid after next() called for the first time. /// </summary> internal void TermInfo(TermInfo ti) { ti.Set(termInfo); }
internal void Seek(long pointer, int p, Term t, TermInfo ti) { input.Seek(pointer); position = p; term = t; prev = null; termInfo.Set(ti); GrowBuffer(term.text.Length); // copy term text into buffer }
/// <summary>Adds a new <Term, TermInfo> pair to the set. /// Term must be lexicographically greater than all previous Terms added. /// TermInfo pointers must be positive and greater than all previous. /// </summary> public /*internal*/ void Add(Term term, TermInfo ti) { if (!isIndex && term.CompareTo(lastTerm) <= 0) throw new System.IO.IOException("term out of order"); if (ti.freqPointer < lastTi.freqPointer) throw new System.IO.IOException("freqPointer out of order"); if (ti.proxPointer < lastTi.proxPointer) throw new System.IO.IOException("proxPointer out of order"); if (!isIndex && size % indexInterval == 0) other.Add(lastTerm, lastTi); // add an index term WriteTerm(term); // write term output.WriteVInt(ti.docFreq); // write doc freq output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.WriteVLong(ti.proxPointer - lastTi.proxPointer); if (ti.docFreq >= skipInterval) { output.WriteVInt(ti.skipOffset); } if (isIndex) { output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer); lastIndexPointer = other.output.GetFilePointer(); // write pointer } lastTi.Set(ti); size++; }
private void WritePostings(Posting[] postings, System.String segment) { OutputStream freq = null, prox = null; TermInfosWriter tis = null; TermVectorsWriter termVectorWriter = null; try { //open files for inverse index storage freq = directory.CreateFile(segment + ".frq"); prox = directory.CreateFile(segment + ".prx"); tis = new TermInfosWriter(directory, segment, fieldInfos); TermInfo ti = new TermInfo(); System.String currentField = null; for (int i = 0; i < postings.Length; i++) { Posting posting = postings[i]; // add an entry to the dictionary with pointers to prox and freq files ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), -1); tis.Add(posting.term, ti); // add an entry to the freq file int postingFreq = posting.freq; if (postingFreq == 1) { // optimize freq=1 freq.WriteVInt(1); } // set low bit of doc num. else { freq.WriteVInt(0); // the document number freq.WriteVInt(postingFreq); // frequency in doc } int lastPosition = 0; // write positions int[] positions = posting.positions; for (int j = 0; j < postingFreq; j++) { // use delta-encoding int position = positions[j]; prox.WriteVInt(position - lastPosition); lastPosition = position; } // check to see if we switched to a new Field System.String termField = posting.term.Field(); if ((System.Object)currentField != (System.Object)termField) { // changing Field - see if there is something to save currentField = termField; FieldInfo fi = fieldInfos.FieldInfo(currentField); if (fi.storeTermVector) { if (termVectorWriter == null) { termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos); termVectorWriter.OpenDocument(); } termVectorWriter.OpenField(currentField); } else if (termVectorWriter != null) { termVectorWriter.CloseField(); } } if (termVectorWriter != null && termVectorWriter.IsFieldOpen()) { termVectorWriter.AddTerm(posting.term.Text(), postingFreq); } } if (termVectorWriter != null) { termVectorWriter.CloseDocument(); } } finally { // make an effort to close all streams we can but remember and re-throw // the first exception encountered in this process System.IO.IOException keep = null; if (freq != null) { try { freq.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (prox != null) { try { prox.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (tis != null) { try { tis.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (termVectorWriter != null) { try { termVectorWriter.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (keep != null) { throw new System.IO.IOException(keep.StackTrace); } } }
public virtual void Seek(Term term) { TermInfo ti = parent.tis.Get(term); Seek(ti); }
private void WritePostings(Posting[] postings, System.String segment) { OutputStream freq = null, prox = null; TermInfosWriter tis = null; TermVectorsWriter termVectorWriter = null; try { //open files for inverse index storage freq = directory.CreateFile(segment + ".frq"); prox = directory.CreateFile(segment + ".prx"); tis = new TermInfosWriter(directory, segment, fieldInfos); TermInfo ti = new TermInfo(); System.String currentField = null; for (int i = 0; i < postings.Length; i++) { Posting posting = postings[i]; // add an entry to the dictionary with pointers to prox and freq files ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), - 1); tis.Add(posting.term, ti); // add an entry to the freq file int postingFreq = posting.freq; if (postingFreq == 1) // optimize freq=1 freq.WriteVInt(1); // set low bit of doc num. else { freq.WriteVInt(0); // the document number freq.WriteVInt(postingFreq); // frequency in doc } int lastPosition = 0; // write positions int[] positions = posting.positions; for (int j = 0; j < postingFreq; j++) { // use delta-encoding int position = positions[j]; prox.WriteVInt(position - lastPosition); lastPosition = position; } // check to see if we switched to a new Field System.String termField = posting.term.Field(); if ((System.Object) currentField != (System.Object) termField) { // changing Field - see if there is something to save currentField = termField; FieldInfo fi = fieldInfos.FieldInfo(currentField); if (fi.storeTermVector) { if (termVectorWriter == null) { termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos); termVectorWriter.OpenDocument(); } termVectorWriter.OpenField(currentField); } else if (termVectorWriter != null) { termVectorWriter.CloseField(); } } if (termVectorWriter != null && termVectorWriter.IsFieldOpen()) { termVectorWriter.AddTerm(posting.term.Text(), postingFreq); } } if (termVectorWriter != null) termVectorWriter.CloseDocument(); } finally { // make an effort to close all streams we can but remember and re-throw // the first exception encountered in this process System.IO.IOException keep = null; if (freq != null) try { freq.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (prox != null) try { prox.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (tis != null) try { tis.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (termVectorWriter != null) try { termVectorWriter.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (keep != null) { throw new System.IO.IOException(keep.StackTrace); } } }
/// <summary>Sets the argument to the current TermInfo in the enumeration. /// Initially invalid, valid after next() called for the first time. /// </summary> internal void TermInfo(TermInfo ti) { ti.Set(termInfo); }