/// <summary> </summary> /// <param name="term">The term to map /// </param> /// <param name="frequency">The frequency of the term /// </param> /// <param name="offsets">Offset information, may be null /// </param> /// <param name="positions">Position information, may be null /// </param> //We need to combine any previous mentions of the term public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { TermVectorEntry entry = termToTVE[term]; if (entry == null) { entry = new TermVectorEntry(ALL, term, frequency, storeOffsets == true?offsets:null, storePositions == true?positions:null); termToTVE[term] = entry; currentSet.Add(entry); } else { entry.Frequency = entry.Frequency + frequency; if (storeOffsets) { TermVectorOffsetInfo[] existingOffsets = entry.GetOffsets(); //A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions if (existingOffsets != null && offsets != null && offsets.Length > 0) { //copy over the existing offsets TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[existingOffsets.Length + offsets.Length]; Array.Copy(existingOffsets, 0, newOffsets, 0, existingOffsets.Length); Array.Copy(offsets, 0, newOffsets, existingOffsets.Length, offsets.Length); entry.SetOffsets(newOffsets); } else if (existingOffsets == null && offsets != null && offsets.Length > 0) { entry.SetOffsets(offsets); } //else leave it alone } if (storePositions) { int[] existingPositions = entry.GetPositions(); if (existingPositions != null && positions != null && positions.Length > 0) { int[] newPositions = new int[existingPositions.Length + positions.Length]; Array.Copy(existingPositions, 0, newPositions, 0, existingPositions.Length); Array.Copy(positions, 0, newPositions, existingPositions.Length, positions.Length); entry.SetPositions(newPositions); } else if (existingPositions == null && positions != null && positions.Length > 0) { entry.SetPositions(positions); } } } }
public virtual int Compare(object object_Renamed, object object1) { int result = 0; TermVectorEntry entry = (TermVectorEntry)object_Renamed; TermVectorEntry entry1 = (TermVectorEntry)object1; result = entry1.GetFrequency() - entry.GetFrequency(); if (result == 0) { result = String.CompareOrdinal(entry.GetTerm(), entry1.GetTerm()); if (result == 0) { result = String.CompareOrdinal(entry.GetField(), entry1.GetField()); } } return result; }
/// <summary> </summary> /// <param name="term">The term to map /// </param> /// <param name="frequency">The frequency of the term /// </param> /// <param name="offsets">Offset information, may be null /// </param> /// <param name="positions">Position information, may be null /// </param> //We need to combine any previous mentions of the term public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { TermVectorEntry entry = (TermVectorEntry) termToTVE[term]; if (entry == null) { entry = new TermVectorEntry(ALL, term, frequency, storeOffsets == true ? offsets : null, storePositions == true ? positions : null); termToTVE[term] = entry; currentSet.Add(entry, entry); } else { entry.SetFrequency(entry.GetFrequency() + frequency); if (storeOffsets) { TermVectorOffsetInfo[] existingOffsets = entry.GetOffsets(); //A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions if (existingOffsets != null && offsets != null && offsets.Length > 0) { //copy over the existing offsets TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[existingOffsets.Length + offsets.Length]; Array.Copy(existingOffsets, 0, newOffsets, 0, existingOffsets.Length); Array.Copy(offsets, 0, newOffsets, existingOffsets.Length, offsets.Length); entry.SetOffsets(newOffsets); } else if (existingOffsets == null && offsets != null && offsets.Length > 0) { entry.SetOffsets(offsets); } //else leave it alone } if (storePositions) { int[] existingPositions = entry.GetPositions(); if (existingPositions != null && positions != null && positions.Length > 0) { int[] newPositions = new int[existingPositions.Length + positions.Length]; Array.Copy(existingPositions, 0, newPositions, 0, existingPositions.Length); Array.Copy(positions, 0, newPositions, existingPositions.Length, positions.Length); entry.SetPositions(newPositions); } else if (existingPositions == null && positions != null && positions.Length > 0) { entry.SetPositions(positions); } } } }
public override bool Equals(System.Object o) { if (this == o) { return(true); } if (o == null || GetType() != o.GetType()) { return(false); } TermVectorEntry that = (TermVectorEntry)o; if (term != null?!term.Equals(that.term):that.term != null) { return(false); } return(true); }
public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { TermVectorEntry entry = new TermVectorEntry(currentField, term, frequency, offsets, positions); currentSet.Add(entry, entry); }
public virtual void TestMapper() { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, mapper); var set_Renamed = mapper.TermVectorEntrySet; Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Check offsets and positions for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();) { TermVectorEntry tve = (TermVectorEntry)iterator.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(1, mapper); set_Renamed = mapper.TermVectorEntrySet; Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Should have offsets and positions b/c we are munging all the fields together for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();) { TermVectorEntry tve = (TermVectorEntry)iterator.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); var map = fsMapper.FieldToTerms; Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); for (var iterator = map.GetEnumerator(); iterator.MoveNext();) { var entry = iterator.Current; var sortedSet = entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();) { TermVectorEntry tve = inner.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.Field; if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } //Try mapper that ignores offs and positions fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); map = fsMapper.FieldToTerms; Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); for (var iterator = map.GetEnumerator(); iterator.MoveNext();) { var entry = iterator.Current; var sortedSet = entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();) { TermVectorEntry tve = inner.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.Field; if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } // test setDocumentNumber() IndexReader ir = IndexReader.Open(dir, true); DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper(); Assert.AreEqual(-1, docNumAwareMapper.GetDocumentNumber()); ir.GetTermFreqVector(0, docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(1, docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(1, "f2", docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); ir.Close(); }