public TermVector(string fieldName, TermFreqVector tfv) { // // Required for Windows Form Designer support // InitializeComponent(); lblField.Text = fieldName; IntPair[] tvs = new IntPair[tfv.Size()]; String[] terms = tfv.GetTerms(); int[] freqs = tfv.GetTermFrequencies(); for (int i = 0; i < terms.Length; i++) { IntPair ip = new IntPair(freqs[i], terms[i]); tvs[i] = ip; } Array.Sort(tvs, new IntPair.PairComparator(false, true)); listViewTVF.BeginUpdate(); for (int i = 0; i < tvs.Length; i++) { ListViewItem item = new ListViewItem( new string[] { tvs[i].cnt.ToString(), tvs[i].text }); listViewTVF.Items.Add(item); } listViewTVF.EndUpdate(); }
public TermVector(string fieldName, TermFreqVector tfv) { // // Required for Windows Form Designer support // InitializeComponent(); lblField.Text = fieldName; List<TermFrequency> tvs = new List<TermFrequency>(tfv.Size()); String[] terms = tfv.GetTerms(); int[] freqs = tfv.GetTermFrequencies(); for (int i = 0; i < terms.Length; i++) { tvs.Add(new TermFrequency(terms[i], freqs[i])); } tvs.OrderBy( p => p.Term); listViewTVF.BeginUpdate(); foreach(TermFrequency tf in tvs) { ListViewItem item = new ListViewItem(new string[]{tf.Frequency.ToString(), tf.Term}); listViewTVF.Items.Add(item); } listViewTVF.EndUpdate(); }
public TermVector(string fieldName, TermFreqVector tfv) { // // Required for Windows Form Designer support // InitializeComponent(); lblField.Text = fieldName; List <TermFrequency> tvs = new List <TermFrequency>(tfv.Size()); String[] terms = tfv.GetTerms(); int[] freqs = tfv.GetTermFrequencies(); for (int i = 0; i < terms.Length; i++) { tvs.Add(new TermFrequency(terms[i], freqs[i])); } tvs.OrderBy(p => p.Term); listViewTVF.BeginUpdate(); foreach (TermFrequency tf in tvs) { ListViewItem item = new ListViewItem(new string[] { tf.Frequency.ToString(), tf.Term }); listViewTVF.Items.Add(item); } listViewTVF.EndUpdate(); }
private void AddTermFreqVectorInternal(TermFreqVector vector) { OpenField(vector.GetField()); for (int i = 0; i < vector.Size(); i++) { AddTermInternal(vector.GetTerms()[i], vector.GetTermFrequencies()[i]); } CloseField(); }
/// <summary> Add a complete document specified by all its term vectors. If document has no /// term vectors, add value for tvx. /// /// </summary> /// <param name="vectors"> /// </param> /// <throws> IOException </throws> public void AddAllDocVectors(TermFreqVector[] vectors) { OpenDocument(); if (vectors != null) { for (int i = 0; i < vectors.Length; i++) { bool storePositionWithTermVector = false; bool storeOffsetWithTermVector = false; try { TermPositionVector tpVector = (TermPositionVector)vectors[i]; if (tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null) { storePositionWithTermVector = true; } if (tpVector.Size() > 0 && tpVector.GetOffsets(0) != null) { storeOffsetWithTermVector = true; } FieldInfo fieldInfo = fieldInfos.FieldInfo(tpVector.GetField()); OpenField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector); for (int j = 0; j < tpVector.Size(); j++) { AddTermInternal(tpVector.GetTerms()[j], tpVector.GetTermFrequencies()[j], tpVector.GetTermPositions(j), tpVector.GetOffsets(j)); } CloseField(); } catch (System.InvalidCastException ignore) { TermFreqVector tfVector = vectors[i]; FieldInfo fieldInfo = fieldInfos.FieldInfo(tfVector.GetField()); OpenField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector); for (int j = 0; j < tfVector.Size(); j++) { AddTermInternal(tfVector.GetTerms()[j], tfVector.GetTermFrequencies()[j], null, null); } CloseField(); } } } CloseDocument(); }
public static void VerifyEquals(TermFreqVector[] d1, TermFreqVector[] d2) { if (d1 == null) { Assert.IsTrue(d2 == null); return; } Assert.IsTrue(d2 != null); Assert.AreEqual(d1.Length, d2.Length); for (int i = 0; i < d1.Length; i++) { TermFreqVector v1 = d1[i]; TermFreqVector v2 = d2[i]; if (v1 == null || v2 == null) { System.Console.Out.WriteLine("v1=" + v1 + " v2=" + v2 + " i=" + i + " of " + d1.Length); } Assert.AreEqual(v1.Size(), v2.Size()); int numTerms = v1.Size(); System.String[] terms1 = v1.GetTerms(); System.String[] terms2 = v2.GetTerms(); int[] freq1 = v1.GetTermFrequencies(); int[] freq2 = v2.GetTermFrequencies(); for (int j = 0; j < numTerms; j++) { if (!terms1[j].Equals(terms2[j])) { Assert.AreEqual(terms1[j], terms2[j]); } Assert.AreEqual(freq1[j], freq2[j]); } if (v1 is TermPositionVector) { Assert.IsTrue(v2 is TermPositionVector); TermPositionVector tpv1 = (TermPositionVector)v1; TermPositionVector tpv2 = (TermPositionVector)v2; for (int j = 0; j < numTerms; j++) { int[] pos1 = tpv1.GetTermPositions(j); int[] pos2 = tpv2.GetTermPositions(j); Assert.AreEqual(pos1.Length, pos2.Length); TermVectorOffsetInfo[] offsets1 = tpv1.GetOffsets(j); TermVectorOffsetInfo[] offsets2 = tpv2.GetOffsets(j); if (offsets1 == null) { Assert.IsTrue(offsets2 == null); } else { Assert.IsTrue(offsets2 != null); } for (int k = 0; k < pos1.Length; k++) { Assert.AreEqual(pos1[k], pos2[k]); if (offsets1 != null) { Assert.AreEqual(offsets1[k].GetStartOffset(), offsets2[k].GetStartOffset()); Assert.AreEqual(offsets1[k].GetEndOffset(), offsets2[k].GetEndOffset()); } } } } } }
public List <Post> Similar(int postid, int itemsToReturn) { List <Post> TList = new List <Post>(); int docId = -1; IndexSearcher searcher = null; IndexReader reader = null; if (rd == null) { BuildIndex(); } lck.AcquireReaderLock(ReaderTimeOut); try { Analyzer analyzer = GetAnalyzer(); QueryParser parser = GetQueryParser(analyzer); parser.SetDefaultOperator(QueryParser.AND_OPERATOR); Query q = parser.Parse("postid:" + postid); searcher = new IndexSearcher(rd, true); //TODO #pragma warning disable CS0618 // Type or member is obsolete Hits hits = searcher.Search(q); #pragma warning restore CS0618 // Type or member is obsolete if (hits != null && hits.Length() > 0) { docId = hits.Id(0); } if (docId > -1) { reader = IndexReader.Open(rd, true); TermFreqVector tfv = reader.GetTermFreqVector(docId, "exact"); BooleanQuery booleanQuery = new BooleanQuery(); for (int j = 0; j < tfv.Size(); j++) { TermQuery tq = new TermQuery(new Term("exact", tfv.GetTerms()[j])); booleanQuery.Add(tq, BooleanClause.Occur.SHOULD); } //TODO #pragma warning disable CS0618 // Type or member is obsolete Hits similarhits = searcher.Search(booleanQuery, Sort.RELEVANCE); #pragma warning restore CS0618 // Type or member is obsolete for (int i = 0; i < similarhits.Length(); i++) { Document doc = similarhits.Doc(i); if (similarhits.Id(i) != docId) { TList.Add(CreateFromDocument(doc, analyzer, null)); } if (TList.Count >= itemsToReturn) { break; } } } } catch (Exception) { } finally { if (searcher != null) { searcher.Close(); } if (reader != null) { reader.Close(); } lck.ReleaseReaderLock(); } return(TList); }