Exemplo n.º 1
0
        public void Run()
        {
            if (Parameters.Count != 3)
            {
                throw new StoredProcException("First parameter is table name, second parameter is field name, third parameter is words. SP_GetIDF 'tablename', 'fieldname', 'abc news'");
            }

            string tableName = Parameters[0];
            string fieldName = Parameters[1];

            Data.DBProvider dbProvider = Data.DBProvider.GetDBProvider(tableName);

            if (dbProvider == null)
            {
                throw new StoredProcException(string.Format("Table name {0} does not exist!", tableName));
            }

            Hubble.Core.Index.InvertedIndex invertedIndex = dbProvider.GetInvertedIndex(fieldName);

            if (invertedIndex == null)
            {
                throw new StoredProcException(string.Format("Field name {0} does not exist or is not the tokenized index field!", fieldName));
            }

            string          queryStr  = Parameters[2];
            List <WordInfo> wordInfos = ParseWhere.GetWordInfoList(queryStr);
            Dictionary <string, WordIndexForQuery> wordIndexDict = new Dictionary <string, WordIndexForQuery>();

            foreach (Hubble.Core.Entity.WordInfo wordInfo in wordInfos)
            {
                WordIndexForQuery wifq;

                if (!wordIndexDict.TryGetValue(wordInfo.Word, out wifq))
                {
                    //Hubble.Core.Index.WordIndexReader wordIndex = InvertedIndex.GetWordIndex(wordInfo.Word, CanLoadPartOfDocs); //Get whole index

                    Hubble.Core.Index.WordIndexReader wordIndex = invertedIndex.GetWordIndex(wordInfo.Word, false, true); //Only get step doc index

                    if (wordIndex == null)
                    {
                        wordIndexDict.Add(wordInfo.Word, null);
                        continue;
                    }

                    wifq = new WordIndexForQuery(wordIndex,
                                                 invertedIndex.DocumentCount, wordInfo.Rank, 1);
                    wifq.QueryCount    = 1;
                    wifq.FirstPosition = wordInfo.Position;
                    wordIndexDict.Add(wordInfo.Word, wifq);
                }
                else
                {
                    wifq.WordRank += wordInfo.Rank;
                    wifq.QueryCount++;
                }

                //wordIndexList[wordIndexList.Count - 1].Rank += wordInfo.Rank;
            }

            AddColumn("Word");
            AddColumn("TF");
            AddColumn("IDF");
            AddColumn("T_D");
            AddColumn("TotalDoucments");
            AddColumn("TF_IDF");

            int totalDocuments = invertedIndex.DocumentCount;

            foreach (string word in wordIndexDict.Keys)
            {
                NewRow();

                WordIndexForQuery wifq = wordIndexDict[word];
                OutputValue("Word", word);

                if (wifq == null)
                {
                    OutputValue("TF", 0);
                    OutputValue("T_D", 0);
                    OutputValue("TotalDoucments", 0);
                    OutputValue("IDF", 0);
                    OutputValue("TF_IDF", 0);
                }
                else
                {
                    double idf = Math.Log((double)totalDocuments / (double)wifq.RelTotalCount);

                    OutputValue("TF", wifq.QueryCount);
                    OutputValue("T_D", wifq.RelTotalCount);
                    OutputValue("TotalDoucments", totalDocuments);
                    OutputValue("IDF", idf);
                    OutputValue("TF_IDF", wifq.QueryCount * idf);
                }
            }
        }