Пример #1
0
        public double CalcRankFactor(WordOccurrenceNode occ, Query query)
        {
            double queryRank = 0.0;

            int countTermQuery = 0;
            foreach (QueryItem item in query.QueryItens)
            {
                if (item.WordID == occ.Word.WordID)
                {
                    countTermQuery++;
                }
            }
            
            int countTermDoc = occ.Hits.Count;

            double bm25_TF = ((occ.Word.QuantityDocFrequency + 1)*countTermDoc)/(countTermDoc + occ.Word.QuantityDocFrequency);

            double tf = occ.Frequency;
            double idf = Math.Log((((double)totalDocQuantity) + 1) / ((double)occ.Word.QuantityDocFrequency));

            double tf_idf = ((double)countTermQuery) * (bm25_TF) * idf;

            queryRank += tf_idf;

            return queryRank;
        }
Пример #2
0
        public double CalcRankFactor(WordOccurrenceNode occ, Query query)
        {
            double queryRank = 0.0;
            
            int countTermQuery = 0;
            foreach (QueryItem item in query.QueryItens)
            {
                if (item.WordID == occ.Word.WordID)
                {
                    countTermQuery++;
                }
            }

            double qtf = ((double)countTermQuery / (double)query.QueryItens.Count);
            double termQueryFactor = ((k3 + 1) * qtf) / (k3 + qtf);

            double df = (double)occ.Word.QuantityDocFrequency;
            //double termLogFactor = Math.Log( ((totalDocQuantity - df + 0.5D)/(df + 0.5D)),Math.E);
            double termLogFactor = Math.Log( ((double)totalDocQuantity) / ((double)df));

            double tf = ((double)occ.Hits.Count / (double)occ.Doc.WordQuantity);
            double normalizer = ((k1*(1 - b)) + (b * (occ.Doc.WordQuantity / avdl))) + tf;
            double normalizationTermFactor = ((k1 + 1) * tf) / normalizer;

            queryRank = termLogFactor * normalizationTermFactor * termQueryFactor;

            return queryRank;
        }
Пример #3
0
        private List<Word> FindWords(Query parsedQuery)
        {
            List<Word> wordFound = new List<Word>();

            for (int i = 0; ((i < parsedQuery.QueryItens.Count) && (i < maxSentence)); i++)
            {
                Word wf = indexer.Search(parsedQuery.QueryItens[i].WordID);

                if (wf != null)
                    wordFound.Add(wf);
            }

            return wordFound;
        }
Пример #4
0
        public List<DocumentResult> Search(string query)
        {
            Hashtable resultHash = new Hashtable();

            List<DocumentResult> resultList = new List<DocumentResult>();

            Query parsedQuery = new Query(query);

            List<Word> wordFound = FindWords(parsedQuery);

            //merging the list.
            foreach (Word item in wordFound)
            {
                List<WordOccurrenceNode> tempDocList = InvertedFileManager.Instance.GetWordOccurrencies(item);

                foreach (WordOccurrenceNode wordOccur in tempDocList)
                {
                    if (!resultHash.ContainsKey(wordOccur.Doc.DocID))
                    {
                        DocumentResult newDoc = new DocumentResult(wordOccur.Doc);
                        newDoc.CalculateRank(wordOccur, parsedQuery);
                        resultHash.Add(newDoc.DocID, newDoc);
                    }
                    else
                    {
                        DocumentResult newDoc = resultHash[wordOccur.Doc.DocID] as DocumentResult;
                        newDoc.CalculateRank(wordOccur, parsedQuery);
                    }
                }
            }

            //convert hasthtable to list
            foreach (DictionaryEntry entry in resultHash)
            {
                DocumentResult doc = entry.Value as DocumentResult;
                resultList.Add(doc);
            }

            //sort result list by QueryRank and return
            resultList.Sort((y, x) => x.QueryRank.CompareTo(y.QueryRank));

            return resultList;
        }
Пример #5
0
        public void CalculateRank(WordOccurrenceNode occ, Query query)
        {
            IRankFunction rankFunc = FactoryRankFunction.GetRankFunction();

            this.queryRank += rankFunc.CalcRankFactor(occ, query);
        }
Пример #6
0
        public List<DocumentResult> Search(string query)
        {
            Hashtable resultHash = new Hashtable();

            List<DocumentResult> resultList = new List<DocumentResult>();

            Query parsedQuery = new Query(query);

            List<Word> wordFound = FindWords(parsedQuery);

            //merging the list.
            foreach (Word item in wordFound)
            {
                WordOccurrenceNode firstOcc = item.FirstOccurrence;
                //problem: the number of occurrences is wrong! The 'else' case, doesn't exist and because this, 
                //the program don't count the occurrences of the second word. 
                //when he merge, it discards the occurrences. 
                if (!resultHash.ContainsKey(firstOcc.Doc.DocID))
                {
                    DocumentResult newDoc = new DocumentResult(firstOcc.Doc);
                    newDoc.CalculateRank(firstOcc, parsedQuery);
                    resultHash.Add(newDoc.DocID, newDoc);
                }
                else
                {
                    DocumentResult newDoc = resultHash[firstOcc.Doc.DocID] as DocumentResult;
                    newDoc.CalculateRank(firstOcc, parsedQuery);
                }

                WordOccurrenceNode tmp = firstOcc;

                while (tmp.HasNext())
                {
                    tmp = tmp.NextOccurrence;

                    if (!resultHash.ContainsKey(tmp.Doc.DocID))
                    {
                        DocumentResult newDoc = new DocumentResult(tmp.Doc);
                        newDoc.CalculateRank(tmp, parsedQuery);
                        resultHash.Add(newDoc.DocID, newDoc);
                    }
                    else
                    {
                        DocumentResult newDoc = resultHash[tmp.Doc.DocID] as DocumentResult;
                        newDoc.CalculateRank(tmp, parsedQuery);
                    }
                }
            }

            //convert hasthtable to list
            foreach (DictionaryEntry entry in resultHash)
            {
                DocumentResult doc = entry.Value as DocumentResult;
                resultList.Add(doc);
            }

            //sort result list by QueryRank and return
            resultList.Sort((y, x) => x.QueryRank.CompareTo(y.QueryRank));
            
            return resultList;
        }