Beispiel #1
0
        public List <Article> doList(List <string> searchWords)
        {
            string query = "";

            foreach (String word in searchWords)
            {
                query += word + " ";
            }

            List <Article> articles = SearchIndex.FindArticles(query);

            Dictionary.buildDictionary2(articles);

            int numOfWords = Dictionary.dictionary.Count;

            int[]    searchWordsCount = new int[numOfWords];
            double[] IDF = new double[numOfWords];

            for (int i = 0; i < numOfWords; i++)
            {
                searchWordsCount[i] = 0;
                IDF[i] = 0;
            }

            foreach (Article art in articles)
            {
                countTermsFrequencies(art, searchWords);
                countYearFrequencies(art);
                countTitleFrequencies(art, searchWords);
                System.Diagnostics.Debug.WriteLine("doc TF: " + String.Join(", ", art.TF));

                for (int i = 0; i < numOfWords; i++)
                {
                    if (art.TF[i] > 0)
                    {
                        searchWordsCount[i]++;
                    }
                }
            }

            double ratio;

            for (int i = 0; i < numOfWords; i++)
            {
                if (searchWordsCount[i] >= 1)
                {
                    ratio = (double)articles.Count / searchWordsCount[i];
                    if (ratio == 1)
                    {
                        IDF[i] = 0.02;
                    }
                    else
                    {
                        IDF[i] = Math.Log10(ratio);
                    }
                }
            }

            System.Diagnostics.Debug.WriteLine("--------------------------");
            System.Diagnostics.Debug.WriteLine("word in how many texts: " + String.Join(", ", searchWordsCount.ToArray()));
            System.Diagnostics.Debug.WriteLine("IDF: " + String.Join(", ", IDF.ToArray()));

            foreach (Article art in articles)
            {
                art.TF_IDF = new double[numOfWords];

                for (int i = 0; i < numOfWords; i++)
                {
                    art.TF_IDF[i] = art.TF[i] * IDF[i];
                }
            }

            return(articles);
        }