コード例 #1
0
        public TermVector(string fieldName, TermFreqVector tfv)
        {
            //
            // Required for Windows Form Designer support
            //
            InitializeComponent();

            lblField.Text = fieldName;
            IntPair[] tvs   = new IntPair[tfv.Size()];
            String[]  terms = tfv.GetTerms();
            int[]     freqs = tfv.GetTermFrequencies();
            for (int i = 0; i < terms.Length; i++)
            {
                IntPair ip = new IntPair(freqs[i], terms[i]);
                tvs[i] = ip;
            }
            Array.Sort(tvs, new IntPair.PairComparator(false, true));

            listViewTVF.BeginUpdate();

            for (int i = 0; i < tvs.Length; i++)
            {
                ListViewItem item = new ListViewItem(
                    new string[] { tvs[i].cnt.ToString(), tvs[i].text });
                listViewTVF.Items.Add(item);
            }

            listViewTVF.EndUpdate();
        }
コード例 #2
0
ファイル: TermVector.cs プロジェクト: mammo/LukeSharp
        public TermVector(string fieldName, TermFreqVector tfv)
        {
            //
            // Required for Windows Form Designer support
            //
            InitializeComponent();

            lblField.Text = fieldName;

            List<TermFrequency> tvs = new List<TermFrequency>(tfv.Size());

            String[] terms = tfv.GetTerms();
            int[] freqs = tfv.GetTermFrequencies();
            for (int i = 0; i < terms.Length; i++)
            {
                tvs.Add(new TermFrequency(terms[i], freqs[i]));
            }

            tvs.OrderBy( p => p.Term);

            listViewTVF.BeginUpdate();

            foreach(TermFrequency tf in tvs) {
                ListViewItem item = new ListViewItem(new string[]{tf.Frequency.ToString(), tf.Term});
                listViewTVF.Items.Add(item);
            }

            listViewTVF.EndUpdate();
        }
コード例 #3
0
        public TermVector(string fieldName, TermFreqVector tfv)
        {
            //
            // Required for Windows Form Designer support
            //
            InitializeComponent();

            lblField.Text = fieldName;

            List <TermFrequency> tvs = new List <TermFrequency>(tfv.Size());

            String[] terms = tfv.GetTerms();
            int[]    freqs = tfv.GetTermFrequencies();
            for (int i = 0; i < terms.Length; i++)
            {
                tvs.Add(new TermFrequency(terms[i], freqs[i]));
            }

            tvs.OrderBy(p => p.Term);

            listViewTVF.BeginUpdate();

            foreach (TermFrequency tf in tvs)
            {
                ListViewItem item = new ListViewItem(new string[] { tf.Frequency.ToString(), tf.Term });
                listViewTVF.Items.Add(item);
            }

            listViewTVF.EndUpdate();
        }
コード例 #4
0
 private void  AddTermFreqVectorInternal(TermFreqVector vector)
 {
     OpenField(vector.GetField());
     for (int i = 0; i < vector.Size(); i++)
     {
         AddTermInternal(vector.GetTerms()[i], vector.GetTermFrequencies()[i]);
     }
     CloseField();
 }
コード例 #5
0
        /// <summary> Add a complete document specified by all its term vectors. If document has no
        /// term vectors, add value for tvx.
        ///
        /// </summary>
        /// <param name="vectors">
        /// </param>
        /// <throws>  IOException </throws>
        public void  AddAllDocVectors(TermFreqVector[] vectors)
        {
            OpenDocument();

            if (vectors != null)
            {
                for (int i = 0; i < vectors.Length; i++)
                {
                    bool storePositionWithTermVector = false;
                    bool storeOffsetWithTermVector   = false;

                    try
                    {
                        TermPositionVector tpVector = (TermPositionVector)vectors[i];

                        if (tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null)
                        {
                            storePositionWithTermVector = true;
                        }
                        if (tpVector.Size() > 0 && tpVector.GetOffsets(0) != null)
                        {
                            storeOffsetWithTermVector = true;
                        }

                        FieldInfo fieldInfo = fieldInfos.FieldInfo(tpVector.GetField());
                        OpenField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);

                        for (int j = 0; j < tpVector.Size(); j++)
                        {
                            AddTermInternal(tpVector.GetTerms()[j], tpVector.GetTermFrequencies()[j], tpVector.GetTermPositions(j), tpVector.GetOffsets(j));
                        }

                        CloseField();
                    }
                    catch (System.InvalidCastException ignore)
                    {
                        TermFreqVector tfVector = vectors[i];

                        FieldInfo fieldInfo = fieldInfos.FieldInfo(tfVector.GetField());
                        OpenField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);

                        for (int j = 0; j < tfVector.Size(); j++)
                        {
                            AddTermInternal(tfVector.GetTerms()[j], tfVector.GetTermFrequencies()[j], null, null);
                        }

                        CloseField();
                    }
                }
            }

            CloseDocument();
        }
コード例 #6
0
        public static void  VerifyEquals(TermFreqVector[] d1, TermFreqVector[] d2)
        {
            if (d1 == null)
            {
                Assert.IsTrue(d2 == null);
                return;
            }
            Assert.IsTrue(d2 != null);

            Assert.AreEqual(d1.Length, d2.Length);
            for (int i = 0; i < d1.Length; i++)
            {
                TermFreqVector v1 = d1[i];
                TermFreqVector v2 = d2[i];
                if (v1 == null || v2 == null)
                {
                    System.Console.Out.WriteLine("v1=" + v1 + " v2=" + v2 + " i=" + i + " of " + d1.Length);
                }
                Assert.AreEqual(v1.Size(), v2.Size());
                int             numTerms = v1.Size();
                System.String[] terms1   = v1.GetTerms();
                System.String[] terms2   = v2.GetTerms();
                int[]           freq1    = v1.GetTermFrequencies();
                int[]           freq2    = v2.GetTermFrequencies();
                for (int j = 0; j < numTerms; j++)
                {
                    if (!terms1[j].Equals(terms2[j]))
                    {
                        Assert.AreEqual(terms1[j], terms2[j]);
                    }
                    Assert.AreEqual(freq1[j], freq2[j]);
                }
                if (v1 is TermPositionVector)
                {
                    Assert.IsTrue(v2 is TermPositionVector);
                    TermPositionVector tpv1 = (TermPositionVector)v1;
                    TermPositionVector tpv2 = (TermPositionVector)v2;
                    for (int j = 0; j < numTerms; j++)
                    {
                        int[] pos1 = tpv1.GetTermPositions(j);
                        int[] pos2 = tpv2.GetTermPositions(j);
                        Assert.AreEqual(pos1.Length, pos2.Length);
                        TermVectorOffsetInfo[] offsets1 = tpv1.GetOffsets(j);
                        TermVectorOffsetInfo[] offsets2 = tpv2.GetOffsets(j);
                        if (offsets1 == null)
                        {
                            Assert.IsTrue(offsets2 == null);
                        }
                        else
                        {
                            Assert.IsTrue(offsets2 != null);
                        }
                        for (int k = 0; k < pos1.Length; k++)
                        {
                            Assert.AreEqual(pos1[k], pos2[k]);
                            if (offsets1 != null)
                            {
                                Assert.AreEqual(offsets1[k].GetStartOffset(), offsets2[k].GetStartOffset());
                                Assert.AreEqual(offsets1[k].GetEndOffset(), offsets2[k].GetEndOffset());
                            }
                        }
                    }
                }
            }
        }
コード例 #7
0
ファイル: TermVectorsWriter.cs プロジェクト: emtees/old-code
		private void  AddTermFreqVectorInternal(TermFreqVector vector)
		{
			OpenField(vector.GetField());
			for (int i = 0; i < vector.Size(); i++)
			{
				AddTermInternal(vector.GetTerms()[i], vector.GetTermFrequencies()[i]);
			}
			CloseField();
		}
コード例 #8
0
        public List <Post> Similar(int postid, int itemsToReturn)
        {
            List <Post> TList = new List <Post>();

            int docId = -1;

            IndexSearcher searcher = null;
            IndexReader   reader   = null;

            if (rd == null)
            {
                BuildIndex();
            }

            lck.AcquireReaderLock(ReaderTimeOut);
            try
            {
                Analyzer    analyzer = GetAnalyzer();
                QueryParser parser   = GetQueryParser(analyzer);
                parser.SetDefaultOperator(QueryParser.AND_OPERATOR);

                Query q = parser.Parse("postid:" + postid);

                searcher = new IndexSearcher(rd, true);
                //TODO
#pragma warning disable CS0618 // Type or member is obsolete
                Hits hits = searcher.Search(q);
#pragma warning restore CS0618 // Type or member is obsolete
                if (hits != null && hits.Length() > 0)
                {
                    docId = hits.Id(0);
                }

                if (docId > -1)
                {
                    reader = IndexReader.Open(rd, true);

                    TermFreqVector tfv          = reader.GetTermFreqVector(docId, "exact");
                    BooleanQuery   booleanQuery = new BooleanQuery();
                    for (int j = 0; j < tfv.Size(); j++)
                    {
                        TermQuery tq = new TermQuery(new Term("exact", tfv.GetTerms()[j]));
                        booleanQuery.Add(tq, BooleanClause.Occur.SHOULD);
                    }
                    //TODO
#pragma warning disable CS0618 // Type or member is obsolete
                    Hits similarhits = searcher.Search(booleanQuery, Sort.RELEVANCE);
#pragma warning restore CS0618 // Type or member is obsolete

                    for (int i = 0; i < similarhits.Length(); i++)
                    {
                        Document doc = similarhits.Doc(i);
                        if (similarhits.Id(i) != docId)
                        {
                            TList.Add(CreateFromDocument(doc, analyzer, null));
                        }

                        if (TList.Count >= itemsToReturn)
                        {
                            break;
                        }
                    }
                }
            }
            catch (Exception)
            {
            }
            finally
            {
                if (searcher != null)
                {
                    searcher.Close();
                }

                if (reader != null)
                {
                    reader.Close();
                }

                lck.ReleaseReaderLock();
            }



            return(TList);
        }