public override void GetTermFreqVector(int docNumber, String field, TermVectorMapper mapper) { if (DEBUG) { System.Diagnostics.Debug.WriteLine("MemoryIndexReader.getTermFreqVector"); } Info info = GetInfo(field); if (info == null) { return; } info.SortTerms(); mapper.SetExpectations(field, info.SortedTerms.Length, _index.stride != 1, true); for (int i = info.SortedTerms.Length; --i >= 0;) { ArrayIntList positions = info.SortedTerms[i].Value; int size = positions.Size(); var offsets = new TermVectorOffsetInfo[size / _index.stride]; for (int k = 0, j = 1; j < size; k++, j += _index.stride) { int start = positions.Get(j); int end = positions.Get(j + 1); offsets[k] = new TermVectorOffsetInfo(start, end); } mapper.Map(info.SortedTerms[i].Key, _index.NumPositions(info.SortedTerms[i].Value), offsets, (info.SortedTerms[i].Value).ToArray(_index.stride)); } }
internal virtual void addTerm(System.String term, TermVectorOffsetInfo info) { terms.Add(term); if (offsets != null) { offsets.Add(info); } }
public TermVectorEntry(System.String field, System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { this.field = field; this.term = term; this.frequency = frequency; this.offsets = offsets; this.positions = positions; }
/// <summary> Callback for the TermVectorReader. </summary> /// <param name="term"> /// </param> /// <param name="frequency"> /// </param> /// <param name="offsets"> /// </param> /// <param name="positions"> /// </param> public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { for (int i = 0; i < positions.Length; i++) { System.Int32 posVal = positions[i]; TVPositionInfo pos = currentPositions[posVal]; if (pos == null) { pos = new TVPositionInfo(positions[i], storeOffsets); currentPositions[posVal] = pos; } pos.addTerm(term, offsets != null ? offsets[i] : TermVectorOffsetInfo.Null); } }
public TermVectorOffsetInfo[] GetOffsets(int index) { if (_index.stride == 1) return null; // no offsets stored ArrayIntList positions = sortedTerms[index].Value; int size = positions.Size(); TermVectorOffsetInfo[] offsets = new TermVectorOffsetInfo[size / _index.stride]; for (int i = 0, j = 1; j < size; i++, j += _index.stride) { int start = positions.Get(j); int end = positions.Get(j + 1); offsets[i] = new TermVectorOffsetInfo(start, end); } return offsets; }
/// <summary> </summary> /// <param name="term">The term to map /// </param> /// <param name="frequency">The frequency of the term /// </param> /// <param name="offsets">Offset information, may be null /// </param> /// <param name="positions">Position information, may be null /// </param> //We need to combine any previous mentions of the term public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { TermVectorEntry entry = termToTVE[term]; if (entry == null) { entry = new TermVectorEntry(ALL, term, frequency, storeOffsets == true?offsets:null, storePositions == true?positions:null); termToTVE[term] = entry; currentSet.Add(entry); } else { entry.Frequency = entry.Frequency + frequency; if (storeOffsets) { TermVectorOffsetInfo[] existingOffsets = entry.GetOffsets(); //A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions if (existingOffsets != null && offsets != null && offsets.Length > 0) { //copy over the existing offsets TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[existingOffsets.Length + offsets.Length]; Array.Copy(existingOffsets, 0, newOffsets, 0, existingOffsets.Length); Array.Copy(offsets, 0, newOffsets, existingOffsets.Length, offsets.Length); entry.SetOffsets(newOffsets); } else if (existingOffsets == null && offsets != null && offsets.Length > 0) { entry.SetOffsets(offsets); } //else leave it alone } if (storePositions) { int[] existingPositions = entry.GetPositions(); if (existingPositions != null && positions != null && positions.Length > 0) { int[] newPositions = new int[existingPositions.Length + positions.Length]; Array.Copy(existingPositions, 0, newPositions, 0, existingPositions.Length); Array.Copy(positions, 0, newPositions, existingPositions.Length, positions.Length); entry.SetPositions(newPositions); } else if (existingPositions == null && positions != null && positions.Length > 0) { entry.SetPositions(positions); } } } }
public TermVectorOffsetInfo[] GetOffsets(int index) { if (_index.stride == 1) { return(null); // no offsets stored } ArrayIntList positions = sortedTerms[index].Value; int size = positions.Size(); TermVectorOffsetInfo[] offsets = new TermVectorOffsetInfo[size / _index.stride]; for (int i = 0, j = 1; j < size; i++, j += _index.stride) { int start = positions.Get(j); int end = positions.Get(j + 1); offsets[i] = new TermVectorOffsetInfo(start, end); } return(offsets); }
internal virtual void SetOffsets(TermVectorOffsetInfo[] value) { offsets = value; }
public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { var entry = new TermVectorEntry(currentField, term, frequency, offsets, positions); currentSet.Add(entry); }
public SegmentTermPositionVector(System.String field, System.String[] terms, int[] termFreqs, int[][] positions, TermVectorOffsetInfo[][] offsets):base(field, terms, termFreqs) { this.offsets = offsets; this.positions = positions; }
/// <summary> Map the Term Vector information into your own structure</summary> /// <param name="term">The term to add to the vector /// </param> /// <param name="frequency">The frequency of the term in the document /// </param> /// <param name="offsets">null if the offset is not specified, otherwise the offset into the field of the term /// </param> /// <param name="positions">null if the position is not specified, otherwise the position in the field of the term /// </param> public abstract void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions);