/// <summary> Adds terms and frequencies found in vector into the Map termFreqMap</summary> /// <param name="termFreqMap">a Map of terms and their frequencies /// </param> /// <param name="vector">List of terms and their frequencies for a doc/field /// </param> private void AddTermFrequencies(IDictionary termFreqMap, TermFreqVector vector) { String[] terms = vector.GetTerms(); int[] freqs = vector.GetTermFrequencies(); for (int j = 0; j < terms.Length; j++) { String term = terms[j]; if (IsNoiseWord(term)) { continue; } // increment frequency Int cnt = (Int)termFreqMap[term]; if (cnt == null) { cnt = new Int(); termFreqMap[term] = cnt; cnt.x = freqs[j]; } else { cnt.x += freqs[j]; } } }
public virtual void TestTermVectors() { try { TermFreqVector result = reader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(result != null); System.String[] terms = result.GetTerms(); int[] freqs = result.GetTermFrequencies(); Assert.IsTrue(terms != null && terms.Length == 3 && freqs != null && freqs.Length == 3); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; int freq = freqs[i]; Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1); Assert.IsTrue(freq > 0); } TermFreqVector[] results = reader.GetTermFreqVectors(0); Assert.IsTrue(results != null); Assert.IsTrue(results.Length == 2); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
public TermVector(string fieldName, TermFreqVector tfv) { // // Required for Windows Form Designer support // InitializeComponent(); lblField.Text = fieldName; IntPair[] tvs = new IntPair[tfv.Size()]; String[] terms = tfv.GetTerms(); int[] freqs = tfv.GetTermFrequencies(); for (int i = 0; i < terms.Length; i++) { IntPair ip = new IntPair(freqs[i], terms[i]); tvs[i] = ip; } Array.Sort(tvs, new IntPair.PairComparator(false, true)); listViewTVF.BeginUpdate(); for (int i = 0; i < tvs.Length; i++) { ListViewItem item = new ListViewItem( new string[] { tvs[i].cnt.ToString(), tvs[i].text }); listViewTVF.Items.Add(item); } listViewTVF.EndUpdate(); }
public TermVector(string fieldName, TermFreqVector tfv) { // // Required for Windows Form Designer support // InitializeComponent(); lblField.Text = fieldName; List<TermFrequency> tvs = new List<TermFrequency>(tfv.Size()); String[] terms = tfv.GetTerms(); int[] freqs = tfv.GetTermFrequencies(); for (int i = 0; i < terms.Length; i++) { tvs.Add(new TermFrequency(terms[i], freqs[i])); } tvs.OrderBy( p => p.Term); listViewTVF.BeginUpdate(); foreach(TermFrequency tf in tvs) { ListViewItem item = new ListViewItem(new string[]{tf.Frequency.ToString(), tf.Term}); listViewTVF.Items.Add(item); } listViewTVF.EndUpdate(); }
public TermVector(string fieldName, TermFreqVector tfv) { // // Required for Windows Form Designer support // InitializeComponent(); lblField.Text = fieldName; List <TermFrequency> tvs = new List <TermFrequency>(tfv.Size()); String[] terms = tfv.GetTerms(); int[] freqs = tfv.GetTermFrequencies(); for (int i = 0; i < terms.Length; i++) { tvs.Add(new TermFrequency(terms[i], freqs[i])); } tvs.OrderBy(p => p.Term); listViewTVF.BeginUpdate(); foreach (TermFrequency tf in tvs) { ListViewItem item = new ListViewItem(new string[] { tf.Frequency.ToString(), tf.Term }); listViewTVF.Items.Add(item); } listViewTVF.EndUpdate(); }
public virtual void TestMerge() { //System.out.println("----------------TestMerge------------------"); SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment, false); merger.Add(reader1); merger.Add(reader2); try { int docsMerged = merger.Merge(); merger.CloseReaders(); Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentInfo(mergedSegment, docsMerged, mergedDir)); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs() == 2); Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - 2); Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - 2); TermDocs termDocs = mergedReader.TermDocs(new Term(DocHelper.TEXT_FIELD_2_KEY, "Field")); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.Next() == true); System.Collections.ICollection stored = mergedReader.GetIndexedFieldNames(true); Assert.IsTrue(stored != null); //System.out.println("stored size: " + stored.size()); Assert.IsTrue(stored.Count == 2); TermFreqVector vector = mergedReader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); //System.out.println("Terms size: " + terms.length); Assert.IsTrue(terms.Length == 3); int[] freqs = vector.GetTermFrequencies(); Assert.IsTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; int freq = freqs[i]; //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); } } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } //System.out.println("---------------------end TestMerge-------------------"); }
public virtual void TestMerge() { SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment); merger.Add(reader1); merger.Add(reader2); int docsMerged = merger.Merge(); merger.CloseReaders(); Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = SegmentReader.Get(new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, true)); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs() == 2); Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - DocHelper.unstored.Count); Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - DocHelper.unstored.Count); TermDocs termDocs = mergedReader.TermDocs(new Term(DocHelper.TEXT_FIELD_2_KEY, "field")); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.Next() == true); System.Collections.Generic.ICollection <string> stored = mergedReader.GetFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); Assert.IsTrue(stored != null); //System.out.println("stored size: " + stored.size()); Assert.IsTrue(stored.Count == 4, "We do not have 4 fields that were indexed with term vector"); TermFreqVector vector = mergedReader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); //System.out.println("Terms size: " + terms.length); Assert.IsTrue(terms.Length == 3); int[] freqs = vector.GetTermFrequencies(); Assert.IsTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); Assert.IsTrue(vector is TermPositionVector == true); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; int freq = freqs[i]; //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); } TestSegmentReader.CheckNorms(mergedReader); }
private void AddTermFreqVectorInternal(TermFreqVector vector) { OpenField(vector.GetField()); for (int i = 0; i < vector.Size(); i++) { AddTermInternal(vector.GetTerms()[i], vector.GetTermFrequencies()[i]); } CloseField(); }
/// <summary> Add a complete document specified by all its term vectors. If document has no /// term vectors, add value for tvx. /// /// </summary> /// <param name="vectors"> /// </param> /// <throws> IOException </throws> public void AddAllDocVectors(TermFreqVector[] vectors) { OpenDocument(); if (vectors != null) { for (int i = 0; i < vectors.Length; i++) { bool storePositionWithTermVector = false; bool storeOffsetWithTermVector = false; try { TermPositionVector tpVector = (TermPositionVector)vectors[i]; if (tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null) { storePositionWithTermVector = true; } if (tpVector.Size() > 0 && tpVector.GetOffsets(0) != null) { storeOffsetWithTermVector = true; } FieldInfo fieldInfo = fieldInfos.FieldInfo(tpVector.GetField()); OpenField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector); for (int j = 0; j < tpVector.Size(); j++) { AddTermInternal(tpVector.GetTerms()[j], tpVector.GetTermFrequencies()[j], tpVector.GetTermPositions(j), tpVector.GetOffsets(j)); } CloseField(); } catch (System.InvalidCastException ignore) { TermFreqVector tfVector = vectors[i]; FieldInfo fieldInfo = fieldInfos.FieldInfo(tfVector.GetField()); OpenField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector); for (int j = 0; j < tfVector.Size(); j++) { AddTermInternal(tfVector.GetTerms()[j], tfVector.GetTermFrequencies()[j], null, null); } CloseField(); } } } CloseDocument(); }
/// <summary> Merge the TermVectors from each of the segments into the new one.</summary> /// <throws> IOException </throws> private void MergeVectors() { TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos); try { for (int r = 0; r < readers.Count; r++) { Monodoc.Lucene.Net.Index.IndexReader reader = (Monodoc.Lucene.Net.Index.IndexReader)readers[r]; int maxDoc = reader.MaxDoc(); for (int docNum = 0; docNum < maxDoc; docNum++) { // skip deleted docs if (reader.IsDeleted(docNum)) { continue; } termVectorsWriter.OpenDocument(); // get all term vectors TermFreqVector[] sourceTermVector = reader.GetTermFreqVectors(docNum); if (sourceTermVector != null) { for (int f = 0; f < sourceTermVector.Length; f++) { // translate Field numbers TermFreqVector termVector = sourceTermVector[f]; termVectorsWriter.OpenField(termVector.GetField()); System.String[] terms = termVector.GetTerms(); int[] freqs = termVector.GetTermFrequencies(); for (int t = 0; t < terms.Length; t++) { termVectorsWriter.AddTerm(terms[t], freqs[t]); } } termVectorsWriter.CloseDocument(); } } } } finally { termVectorsWriter.Close(); } }
public virtual void TestTermVectors() { TermFreqVector result = reader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(result != null); System.String[] terms = result.GetTerms(); int[] freqs = result.GetTermFrequencies(); Assert.IsTrue(terms != null && terms.Length == 3 && freqs != null && freqs.Length == 3); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; int freq = freqs[i]; Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1); Assert.IsTrue(freq > 0); } TermFreqVector[] results = reader.GetTermFreqVectors(0); Assert.IsTrue(results != null); Assert.IsTrue(results.Length == 4, "We do not have 4 term freq vectors, we have: " + results.Length); }
public virtual void TestKnownSetOfDocuments() { System.String[] termArray = new System.String[] { "eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored", "with", "an" }; System.String test1 = "eating chocolate in a computer lab"; //6 terms System.String test2 = "computer in a computer lab"; //5 terms System.String test3 = "a chocolate lab grows old"; //5 terms System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms System.Collections.IDictionary test4Map = new System.Collections.Hashtable(); test4Map["chocolate"] = 3; test4Map["lab"] = 2; test4Map["eating"] = 1; test4Map["computer"] = 1; test4Map["with"] = 1; test4Map["a"] = 1; test4Map["colored"] = 1; test4Map["in"] = 1; test4Map["an"] = 1; test4Map["computer"] = 1; test4Map["old"] = 1; Document testDoc1 = new Document(); SetupDoc(testDoc1, test1); Document testDoc2 = new Document(); SetupDoc(testDoc2, test2); Document testDoc3 = new Document(); SetupDoc(testDoc3, test3); Document testDoc4 = new Document(); SetupDoc(testDoc4, test4); Directory dir = new RAMDirectory(); try { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true); Assert.IsTrue(writer != null); writer.AddDocument(testDoc1); writer.AddDocument(testDoc2); writer.AddDocument(testDoc3); writer.AddDocument(testDoc4); writer.Close(); IndexSearcher knownSearcher = new IndexSearcher(dir); TermEnum termEnum = knownSearcher.reader.Terms(); TermDocs termDocs = knownSearcher.reader.TermDocs(); //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length); Similarity sim = knownSearcher.GetSimilarity(); while (termEnum.Next() == true) { Term term = termEnum.Term(); //System.out.println("Term: " + term); termDocs.Seek(term); while (termDocs.Next()) { int docId = termDocs.Doc(); int freq = termDocs.Freq(); //System.out.println("Doc Id: " + docId + " freq " + freq); TermFreqVector vector = knownSearcher.reader.GetTermFreqVector(docId, "Field"); float tf = sim.Tf(freq); float idf = sim.Idf(term, knownSearcher); //float qNorm = sim.queryNorm() //This is fine since we don't have stop words float lNorm = sim.LengthNorm("Field", vector.GetTerms().Length); //float coord = sim.coord() //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); Assert.IsTrue(vector != null); System.String[] vTerms = vector.GetTerms(); int[] freqs = vector.GetTermFrequencies(); for (int i = 0; i < vTerms.Length; i++) { if (term.Text().Equals(vTerms[i]) == true) { Assert.IsTrue(freqs[i] == freq); } } } //System.out.println("--------"); } Query query = new TermQuery(new Term("Field", "chocolate")); Hits hits = knownSearcher.Search(query); //doc 3 should be the first hit b/c it is the shortest match Assert.IsTrue(hits.Length() == 3); float score = hits.Score(0); /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0))); * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1))); * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/ Assert.IsTrue(testDoc3.ToString().Equals(hits.Doc(0).ToString())); Assert.IsTrue(testDoc4.ToString().Equals(hits.Doc(1).ToString())); Assert.IsTrue(testDoc1.ToString().Equals(hits.Doc(2).ToString())); TermFreqVector vector2 = knownSearcher.reader.GetTermFreqVector(hits.Id(1), "Field"); Assert.IsTrue(vector2 != null); //System.out.println("Vector: " + vector); System.String[] terms = vector2.GetTerms(); int[] freqs2 = vector2.GetTermFrequencies(); Assert.IsTrue(terms != null && terms.Length == 10); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); int freq = freqs2[i]; Assert.IsTrue(test4.IndexOf(term) != -1); System.Int32 freqInt = (System.Int32)test4Map[term]; System.Object tmpFreqInt = test4Map[term]; Assert.IsTrue(tmpFreqInt != null); Assert.IsTrue(freqInt == freq); } knownSearcher.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
public virtual void TestKnownSetOfDocuments() { System.String test1 = "eating chocolate in a computer lab"; //6 terms System.String test2 = "computer in a computer lab"; //5 terms System.String test3 = "a chocolate lab grows old"; //5 terms System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms System.Collections.IDictionary test4Map = new System.Collections.Hashtable(); test4Map["chocolate"] = 3; test4Map["lab"] = 2; test4Map["eating"] = 1; test4Map["computer"] = 1; test4Map["with"] = 1; test4Map["a"] = 1; test4Map["colored"] = 1; test4Map["in"] = 1; test4Map["an"] = 1; test4Map["computer"] = 1; test4Map["old"] = 1; Document testDoc1 = new Document(); SetupDoc(testDoc1, test1); Document testDoc2 = new Document(); SetupDoc(testDoc2, test2); Document testDoc3 = new Document(); SetupDoc(testDoc3, test3); Document testDoc4 = new Document(); SetupDoc(testDoc4, test4); Directory dir = new MockRAMDirectory(); try { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Assert.IsTrue(writer != null); writer.AddDocument(testDoc1); writer.AddDocument(testDoc2); writer.AddDocument(testDoc3); writer.AddDocument(testDoc4); writer.Close(); IndexSearcher knownSearcher = new IndexSearcher(dir); TermEnum termEnum = knownSearcher.reader_ForNUnit.Terms(); TermDocs termDocs = knownSearcher.reader_ForNUnit.TermDocs(); //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length); Similarity sim = knownSearcher.GetSimilarity(); while (termEnum.Next() == true) { Term term = termEnum.Term(); //System.out.println("Term: " + term); termDocs.Seek(term); while (termDocs.Next()) { int docId = termDocs.Doc(); int freq = termDocs.Freq(); //System.out.println("Doc Id: " + docId + " freq " + freq); TermFreqVector vector = knownSearcher.reader_ForNUnit.GetTermFreqVector(docId, "field"); float tf = sim.Tf(freq); float idf = sim.Idf(term, knownSearcher); //float qNorm = sim.queryNorm() //This is fine since we don't have stop words float lNorm = sim.LengthNorm("field", vector.GetTerms().Length); //float coord = sim.coord() //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); Assert.IsTrue(vector != null); System.String[] vTerms = vector.GetTerms(); int[] freqs = vector.GetTermFrequencies(); for (int i = 0; i < vTerms.Length; i++) { if (term.Text().Equals(vTerms[i])) { Assert.IsTrue(freqs[i] == freq); } } } //System.out.println("--------"); } Query query = new TermQuery(new Term("field", "chocolate")); ScoreDoc[] hits = knownSearcher.Search(query, null, 1000).scoreDocs; //doc 3 should be the first hit b/c it is the shortest match Assert.IsTrue(hits.Length == 3); float score = hits[0].score; /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0))); * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1))); * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/ Assert.IsTrue(hits[0].doc == 2); Assert.IsTrue(hits[1].doc == 3); Assert.IsTrue(hits[2].doc == 0); TermFreqVector vector2 = knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, "field"); Assert.IsTrue(vector2 != null); //System.out.println("Vector: " + vector); System.String[] terms = vector2.GetTerms(); int[] freqs2 = vector2.GetTermFrequencies(); Assert.IsTrue(terms != null && terms.Length == 10); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); int freq = freqs2[i]; Assert.IsTrue(test4.IndexOf(term) != -1); System.Int32 freqInt = -1; try { freqInt = (System.Int32)test4Map[term]; } catch (Exception) { Assert.IsTrue(false); } Assert.IsTrue(freqInt == freq); } SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, mapper); System.Collections.Generic.SortedDictionary <object, object> vectorEntrySet = mapper.GetTermVectorEntrySet(); Assert.IsTrue(vectorEntrySet.Count == 10, "mapper.getTermVectorEntrySet() Size: " + vectorEntrySet.Count + " is not: " + 10); TermVectorEntry last = null; foreach (TermVectorEntry tve in vectorEntrySet.Keys) { if (tve != null && last != null) { Assert.IsTrue(last.GetFrequency() >= tve.GetFrequency(), "terms are not properly sorted"); System.Int32 expectedFreq = (System.Int32)test4Map[tve.GetTerm()]; //we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields Assert.IsTrue(tve.GetFrequency() == 2 * expectedFreq, "Frequency is not correct:"); } last = tve; } FieldSortedTermVectorMapper fieldMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, fieldMapper); System.Collections.IDictionary map = fieldMapper.GetFieldToTerms(); Assert.IsTrue(map.Count == 2, "map Size: " + map.Count + " is not: " + 2); vectorEntrySet = (System.Collections.Generic.SortedDictionary <Object, Object>)map["field"]; Assert.IsTrue(vectorEntrySet != null, "vectorEntrySet is null and it shouldn't be"); Assert.IsTrue(vectorEntrySet.Count == 10, "vectorEntrySet Size: " + vectorEntrySet.Count + " is not: " + 10); knownSearcher.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
public static void VerifyEquals(TermFreqVector[] d1, TermFreqVector[] d2) { if (d1 == null) { Assert.IsTrue(d2 == null); return; } Assert.IsTrue(d2 != null); Assert.AreEqual(d1.Length, d2.Length); for (int i = 0; i < d1.Length; i++) { TermFreqVector v1 = d1[i]; TermFreqVector v2 = d2[i]; if (v1 == null || v2 == null) { System.Console.Out.WriteLine("v1=" + v1 + " v2=" + v2 + " i=" + i + " of " + d1.Length); } Assert.AreEqual(v1.Size(), v2.Size()); int numTerms = v1.Size(); System.String[] terms1 = v1.GetTerms(); System.String[] terms2 = v2.GetTerms(); int[] freq1 = v1.GetTermFrequencies(); int[] freq2 = v2.GetTermFrequencies(); for (int j = 0; j < numTerms; j++) { if (!terms1[j].Equals(terms2[j])) { Assert.AreEqual(terms1[j], terms2[j]); } Assert.AreEqual(freq1[j], freq2[j]); } if (v1 is TermPositionVector) { Assert.IsTrue(v2 is TermPositionVector); TermPositionVector tpv1 = (TermPositionVector)v1; TermPositionVector tpv2 = (TermPositionVector)v2; for (int j = 0; j < numTerms; j++) { int[] pos1 = tpv1.GetTermPositions(j); int[] pos2 = tpv2.GetTermPositions(j); Assert.AreEqual(pos1.Length, pos2.Length); TermVectorOffsetInfo[] offsets1 = tpv1.GetOffsets(j); TermVectorOffsetInfo[] offsets2 = tpv2.GetOffsets(j); if (offsets1 == null) { Assert.IsTrue(offsets2 == null); } else { Assert.IsTrue(offsets2 != null); } for (int k = 0; k < pos1.Length; k++) { Assert.AreEqual(pos1[k], pos2[k]); if (offsets1 != null) { Assert.AreEqual(offsets1[k].GetStartOffset(), offsets2[k].GetStartOffset()); Assert.AreEqual(offsets1[k].GetEndOffset(), offsets2[k].GetEndOffset()); } } } } } }
private void AddTermFreqVectorInternal(TermFreqVector vector) { OpenField(vector.GetField()); for (int i = 0; i < vector.Size(); i++) { AddTermInternal(vector.GetTerms()[i], vector.GetTermFrequencies()[i]); } CloseField(); }