コード例 #1
0
        //Computes the TF-IDF rank of both the query and the documents relevant
        //to the document
        private void CompTF_IDF(Tuple <int, List <ITermDocument> > query)
        {
            //A cache of Length of each documents needed for ranking.
            IDictionary <String, long> cacheDocumentsLength = new Dictionary <String, long>();

            //No of document in Corpus.
            long noDocs = _corpus.GetNoDocuments().Result;

            //Compute the size of the query to compute each query's IDF
            var querySize = query.Item1;

            foreach (var term in query.Item2)
            {
                IDictionary <String, int> documents = new Dictionary <String, int>();
                foreach (var doc in _corpus.GetDocuments(term.Term).Result)
                {
                    if (documents.Keys.Contains(doc.DocID))
                    {
                        continue;
                    }
                    documents.Add(doc.DocID, doc.Pos.Count);
                    this._documents.Add(doc.DocID);
                }
                if (documents.Count < 1)
                {
                    continue;
                }
                var    nDocuments = new Dictionary <String, double>();
                double IDF        = (1 + Math.Log(1.0 * noDocs / documents.Keys.Count));
                this._queryRank[term.Term] = (1.0 * term.Docs.First().Pos.Count / querySize) * IDF;
                foreach (var item in documents)
                {
                    long length;
                    if (cacheDocumentsLength.ContainsKey(item.Key))
                    {
                        length = cacheDocumentsLength[item.Key];
                    }
                    else
                    {
                        length = _corpus.GetDocumentLength(item.Key).Result;
                        cacheDocumentsLength[item.Key] = length;
                    }
                    if (length == 0)
                    {
                        continue;
                    }
                    double tF = 1.0 * item.Value / length;
                    nDocuments[item.Key] = tF * IDF;
                }
                _documentRank[term.Term] = nDocuments;
                ;
            }
        }