Exemple #1
0
        internal void Index(InvertedIndex index, IList <Document> docs, int fieldIndex)
        {
            do
            {
                for (int i = 0; i < _IndexPool.Length; i++)
                {
                    if (_IndexPool[i] == null)
                    {
                        _IndexPool[i] = new IndexThread(index, docs, fieldIndex);
                        return;
                    }
                    else
                    {
                        if (_IndexPool[i].Finished)
                        {
                            if (_IndexPool[i].Exception != null)
                            {
                                //Some field index excetpion
                                WaitForAllFinished();
                                return;
                            }

                            _IndexPool[i] = new IndexThread(index, docs, fieldIndex);
                            return;
                        }
                    }
                }

                System.Threading.Thread.Sleep(50);
            }while (true);
        }
Exemple #2
0
            internal IndexThread(InvertedIndex index, IList <Document> docs, int fieldIndex)
            {
                _Finished   = false;
                _Index      = index;
                _Docs       = docs;
                _FieldIndex = fieldIndex;

                _Thread = new System.Threading.Thread(ThreadProc);

                _Thread.IsBackground = true;

                _Thread.Start();
            }
Exemple #3
0
        public void Run()
        {
            if (Parameters.Count != 3)
            {
                throw new StoredProcException("First parameter is table name, second parameter is field name, third parameter is words. SP_GetIDF 'tablename', 'fieldname', 'abc news'");
            }

            string tableName = Parameters[0];
            string fieldName = Parameters[1];

            Data.DBProvider dbProvider = Data.DBProvider.GetDBProvider(tableName);

            if (dbProvider == null)
            {
                throw new StoredProcException(string.Format("Table name {0} does not exist!", tableName));
            }

            Hubble.Core.Index.InvertedIndex invertedIndex = dbProvider.GetInvertedIndex(fieldName);

            if (invertedIndex == null)
            {
                throw new StoredProcException(string.Format("Field name {0} does not exist or is not the tokenized index field!", fieldName));
            }

            string          queryStr  = Parameters[2];
            List <WordInfo> wordInfos = ParseWhere.GetWordInfoList(queryStr);
            Dictionary <string, WordIndexForQuery> wordIndexDict = new Dictionary <string, WordIndexForQuery>();

            foreach (Hubble.Core.Entity.WordInfo wordInfo in wordInfos)
            {
                WordIndexForQuery wifq;

                if (!wordIndexDict.TryGetValue(wordInfo.Word, out wifq))
                {
                    //Hubble.Core.Index.WordIndexReader wordIndex = InvertedIndex.GetWordIndex(wordInfo.Word, CanLoadPartOfDocs); //Get whole index

                    Hubble.Core.Index.WordIndexReader wordIndex = invertedIndex.GetWordIndex(wordInfo.Word, false, true); //Only get step doc index

                    if (wordIndex == null)
                    {
                        wordIndexDict.Add(wordInfo.Word, null);
                        continue;
                    }

                    wifq = new WordIndexForQuery(wordIndex,
                                                 invertedIndex.DocumentCount, wordInfo.Rank, 1);
                    wifq.QueryCount    = 1;
                    wifq.FirstPosition = wordInfo.Position;
                    wordIndexDict.Add(wordInfo.Word, wifq);
                }
                else
                {
                    wifq.WordRank += wordInfo.Rank;
                    wifq.QueryCount++;
                }

                //wordIndexList[wordIndexList.Count - 1].Rank += wordInfo.Rank;
            }

            AddColumn("Word");
            AddColumn("TF");
            AddColumn("IDF");
            AddColumn("T_D");
            AddColumn("TotalDoucments");
            AddColumn("TF_IDF");

            int totalDocuments = invertedIndex.DocumentCount;

            foreach (string word in wordIndexDict.Keys)
            {
                NewRow();

                WordIndexForQuery wifq = wordIndexDict[word];
                OutputValue("Word", word);

                if (wifq == null)
                {
                    OutputValue("TF", 0);
                    OutputValue("T_D", 0);
                    OutputValue("TotalDoucments", 0);
                    OutputValue("IDF", 0);
                    OutputValue("TF_IDF", 0);
                }
                else
                {
                    double idf = Math.Log((double)totalDocuments / (double)wifq.RelTotalCount);

                    OutputValue("TF", wifq.QueryCount);
                    OutputValue("T_D", wifq.RelTotalCount);
                    OutputValue("TotalDoucments", totalDocuments);
                    OutputValue("IDF", idf);
                    OutputValue("TF_IDF", wifq.QueryCount * idf);
                }
            }
        }