private SegmentTermVector[] ReadTermVectors(System.String[] fields, long[] tvfPointers) { SegmentTermVector[] res = new SegmentTermVector[fields.Length]; for (int i = 0; i < fields.Length; i++) { res[i] = ReadTermVector(fields[i], tvfPointers[i]); } return(res); }
/// <summary> </summary> /// <param name="fieldNum">The Field to read in /// </param> /// <param name="tvfPointer">The pointer within the tvf file where we should start reading /// </param> /// <returns> The TermVector located at that position /// </returns> /// <throws> IOException </throws> private SegmentTermVector ReadTermVector(System.String field, long tvfPointer) { // Now read the data from specified position //We don't need to offset by the FORMAT here since the pointer already includes the offset tvf.Seek(tvfPointer); int numTerms = tvf.ReadVInt(); //System.out.println("Num Terms: " + numTerms); // If no terms - return a constant empty termvector if (numTerms == 0) { return(new SegmentTermVector(field, null, null)); } int length = numTerms + tvf.ReadVInt(); System.String[] terms = new System.String[numTerms]; int[] termFreqs = new int[numTerms]; int start = 0; int deltaLength = 0; int totalLength = 0; char[] buffer = new char[] {}; System.String previousString = ""; for (int i = 0; i < numTerms; i++) { start = tvf.ReadVInt(); deltaLength = tvf.ReadVInt(); totalLength = start + deltaLength; if (buffer.Length < totalLength) { buffer = new char[totalLength]; for (int j = 0; j < previousString.Length; j++) { // copy contents buffer[j] = previousString[j]; } } tvf.ReadChars(buffer, start, deltaLength); terms[i] = new System.String(buffer, 0, totalLength); previousString = terms[i]; termFreqs[i] = tvf.ReadVInt(); } SegmentTermVector tv = new SegmentTermVector(field, terms, termFreqs); return(tv); }
/// <summary> </summary> /// <param name="fieldNum">The Field to read in /// </param> /// <param name="tvfPointer">The pointer within the tvf file where we should start reading /// </param> /// <returns> The TermVector located at that position /// </returns> /// <throws> IOException </throws> private SegmentTermVector ReadTermVector(System.String field, long tvfPointer) { // Now read the data from specified position //We don't need to offset by the FORMAT here since the pointer already includes the offset tvf.Seek(tvfPointer); int numTerms = tvf.ReadVInt(); //System.out.println("Num Terms: " + numTerms); // If no terms - return a constant empty termvector if (numTerms == 0) return new SegmentTermVector(field, null, null); int length = numTerms + tvf.ReadVInt(); System.String[] terms = new System.String[numTerms]; int[] termFreqs = new int[numTerms]; int start = 0; int deltaLength = 0; int totalLength = 0; char[] buffer = new char[]{}; System.String previousString = ""; for (int i = 0; i < numTerms; i++) { start = tvf.ReadVInt(); deltaLength = tvf.ReadVInt(); totalLength = start + deltaLength; if (buffer.Length < totalLength) { buffer = new char[totalLength]; for (int j = 0; j < previousString.Length; j++) // copy contents buffer[j] = previousString[j]; } tvf.ReadChars(buffer, start, deltaLength); terms[i] = new System.String(buffer, 0, totalLength); previousString = terms[i]; termFreqs[i] = tvf.ReadVInt(); } SegmentTermVector tv = new SegmentTermVector(field, terms, termFreqs); return tv; }
private SegmentTermVector[] ReadTermVectors(System.String[] fields, long[] tvfPointers) { SegmentTermVector[] res = new SegmentTermVector[fields.Length]; for (int i = 0; i < fields.Length; i++) { res[i] = ReadTermVector(fields[i], tvfPointers[i]); } return res; }