private Term termBuffer = new Term("", ""); // avoid consing private void AddPosition(System.String field, System.String text, int position) { termBuffer.Set(field, text); Posting ti = (Posting)postingTable[termBuffer]; if (ti != null) { // word seen before int freq = ti.freq; if (ti.positions.Length == freq) { // positions array is full int[] newPositions = new int[freq * 2]; // double size int[] positions = ti.positions; for (int i = 0; i < freq; i++) { // copy old positions to new newPositions[i] = positions[i]; } ti.positions = newPositions; } ti.positions[freq] = position; // add new position ti.freq = freq + 1; // update frequency } else { // word not seen before Term term = new Term(field, text, false); postingTable[term] = new Posting(term, position); } }
private Posting[] SortPostingTable() { // copy postingTable into an array Posting[] array = new Posting[postingTable.Count]; System.Collections.IEnumerator postings = postingTable.Values.GetEnumerator(); for (int i = 0; postings.MoveNext(); i++) { array[i] = (Posting)postings.Current; } // sort the array QuickSort(array, 0, array.Length - 1); return(array); }
private void WritePostings(Posting[] postings, System.String segment) { OutputStream freq = null, prox = null; TermInfosWriter tis = null; TermVectorsWriter termVectorWriter = null; try { //open files for inverse index storage freq = directory.CreateFile(segment + ".frq"); prox = directory.CreateFile(segment + ".prx"); tis = new TermInfosWriter(directory, segment, fieldInfos); TermInfo ti = new TermInfo(); System.String currentField = null; for (int i = 0; i < postings.Length; i++) { Posting posting = postings[i]; // add an entry to the dictionary with pointers to prox and freq files ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), -1); tis.Add(posting.term, ti); // add an entry to the freq file int postingFreq = posting.freq; if (postingFreq == 1) { // optimize freq=1 freq.WriteVInt(1); } // set low bit of doc num. else { freq.WriteVInt(0); // the document number freq.WriteVInt(postingFreq); // frequency in doc } int lastPosition = 0; // write positions int[] positions = posting.positions; for (int j = 0; j < postingFreq; j++) { // use delta-encoding int position = positions[j]; prox.WriteVInt(position - lastPosition); lastPosition = position; } // check to see if we switched to a new Field System.String termField = posting.term.Field(); if ((System.Object)currentField != (System.Object)termField) { // changing Field - see if there is something to save currentField = termField; FieldInfo fi = fieldInfos.FieldInfo(currentField); if (fi.storeTermVector) { if (termVectorWriter == null) { termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos); termVectorWriter.OpenDocument(); } termVectorWriter.OpenField(currentField); } else if (termVectorWriter != null) { termVectorWriter.CloseField(); } } if (termVectorWriter != null && termVectorWriter.IsFieldOpen()) { termVectorWriter.AddTerm(posting.term.Text(), postingFreq); } } if (termVectorWriter != null) { termVectorWriter.CloseDocument(); } } finally { // make an effort to close all streams we can but remember and re-throw // the first exception encountered in this process System.IO.IOException keep = null; if (freq != null) { try { freq.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (prox != null) { try { prox.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (tis != null) { try { tis.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (termVectorWriter != null) { try { termVectorWriter.Close(); } catch (System.IO.IOException e) { if (keep == null) { keep = e; } } } if (keep != null) { throw new System.IO.IOException(keep.StackTrace); } } }
private static void QuickSort(Posting[] postings, int lo, int hi) { if (lo >= hi) { return; } int mid = (lo + hi) / 2; if (postings[lo].term.CompareTo(postings[mid].term) > 0) { Posting tmp = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp; } if (postings[mid].term.CompareTo(postings[hi].term) > 0) { Posting tmp = postings[mid]; postings[mid] = postings[hi]; postings[hi] = tmp; if (postings[lo].term.CompareTo(postings[mid].term) > 0) { Posting tmp2 = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp2; } } int left = lo + 1; int right = hi - 1; if (left >= right) { return; } Term partition = postings[mid].term; for (; ;) { while (postings[right].term.CompareTo(partition) > 0) { --right; } while (left < right && postings[left].term.CompareTo(partition) <= 0) { ++left; } if (left < right) { Posting tmp = postings[left]; postings[left] = postings[right]; postings[right] = tmp; --right; } else { break; } } QuickSort(postings, lo, left); QuickSort(postings, left + 1, hi); }
private void WritePostings(Posting[] postings, System.String segment) { OutputStream freq = null, prox = null; TermInfosWriter tis = null; TermVectorsWriter termVectorWriter = null; try { //open files for inverse index storage freq = directory.CreateFile(segment + ".frq"); prox = directory.CreateFile(segment + ".prx"); tis = new TermInfosWriter(directory, segment, fieldInfos); TermInfo ti = new TermInfo(); System.String currentField = null; for (int i = 0; i < postings.Length; i++) { Posting posting = postings[i]; // add an entry to the dictionary with pointers to prox and freq files ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), - 1); tis.Add(posting.term, ti); // add an entry to the freq file int postingFreq = posting.freq; if (postingFreq == 1) // optimize freq=1 freq.WriteVInt(1); // set low bit of doc num. else { freq.WriteVInt(0); // the document number freq.WriteVInt(postingFreq); // frequency in doc } int lastPosition = 0; // write positions int[] positions = posting.positions; for (int j = 0; j < postingFreq; j++) { // use delta-encoding int position = positions[j]; prox.WriteVInt(position - lastPosition); lastPosition = position; } // check to see if we switched to a new Field System.String termField = posting.term.Field(); if ((System.Object) currentField != (System.Object) termField) { // changing Field - see if there is something to save currentField = termField; FieldInfo fi = fieldInfos.FieldInfo(currentField); if (fi.storeTermVector) { if (termVectorWriter == null) { termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos); termVectorWriter.OpenDocument(); } termVectorWriter.OpenField(currentField); } else if (termVectorWriter != null) { termVectorWriter.CloseField(); } } if (termVectorWriter != null && termVectorWriter.IsFieldOpen()) { termVectorWriter.AddTerm(posting.term.Text(), postingFreq); } } if (termVectorWriter != null) termVectorWriter.CloseDocument(); } finally { // make an effort to close all streams we can but remember and re-throw // the first exception encountered in this process System.IO.IOException keep = null; if (freq != null) try { freq.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (prox != null) try { prox.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (tis != null) try { tis.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (termVectorWriter != null) try { termVectorWriter.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (keep != null) { throw new System.IO.IOException(keep.StackTrace); } } }
private static void QuickSort(Posting[] postings, int lo, int hi) { if (lo >= hi) return ; int mid = (lo + hi) / 2; if (postings[lo].term.CompareTo(postings[mid].term) > 0) { Posting tmp = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp; } if (postings[mid].term.CompareTo(postings[hi].term) > 0) { Posting tmp = postings[mid]; postings[mid] = postings[hi]; postings[hi] = tmp; if (postings[lo].term.CompareTo(postings[mid].term) > 0) { Posting tmp2 = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp2; } } int left = lo + 1; int right = hi - 1; if (left >= right) return ; Term partition = postings[mid].term; for (; ; ) { while (postings[right].term.CompareTo(partition) > 0) --right; while (left < right && postings[left].term.CompareTo(partition) <= 0) ++left; if (left < right) { Posting tmp = postings[left]; postings[left] = postings[right]; postings[right] = tmp; --right; } else { break; } } QuickSort(postings, lo, left); QuickSort(postings, left + 1, hi); }
private Posting[] SortPostingTable() { // copy postingTable into an array Posting[] array = new Posting[postingTable.Count]; System.Collections.IEnumerator postings = postingTable.Values.GetEnumerator(); for (int i = 0; postings.MoveNext(); i++) { array[i] = (Posting) postings.Current; } // sort the array QuickSort(array, 0, array.Length - 1); return array; }
private Term termBuffer = new Term("", ""); // avoid consing private void AddPosition(System.String field, System.String text, int position) { termBuffer.Set(field, text); Posting ti = (Posting) postingTable[termBuffer]; if (ti != null) { // word seen before int freq = ti.freq; if (ti.positions.Length == freq) { // positions array is full int[] newPositions = new int[freq * 2]; // double size int[] positions = ti.positions; for (int i = 0; i < freq; i++) // copy old positions to new newPositions[i] = positions[i]; ti.positions = newPositions; } ti.positions[freq] = position; // add new position ti.freq = freq + 1; // update frequency } else { // word not seen before Term term = new Term(field, text, false); postingTable[term] = new Posting(term, position); } }